forked from cirosantilli/python-cheat
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathre_cheat.py
More file actions
executable file
·157 lines (95 loc) · 3.32 KB
/
re_cheat.py
File metadata and controls
executable file
·157 lines (95 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
"""
## re
## Regular expressions
Perl like.
General operation:
- *string regexes* must frist be compiled into *pattern* objects. This has some overhead.
- compiled pattern objects can be used to find *match objects* on test strings.
## Regex methods
match() get match for **THE ENTIRE**!!!!!!! string
search() first match anywhere in the string
findall() iterator of matching *strings*, **NOT**!!! match objects
finditer() iterator of match objects
## Predefined character classes
- \d [0-9]
- \D [^0-9]
- \s [ \t\n\r\f\v]
- \S
- \w [a-zA-Z0-9_].
- \W
"""
import re
if '## Syntax':
if '## Lookahead':
# Don't eat front part or regex
p = re.compile(r'a.')
assert p.sub('0', 'abaac') == '00c'
p = re.compile(r'a(?=.)')
assert p.sub('0', 'abaac') == '0b00c'
if '## re module':
if '## compile':
"""
Return a RegexObject object.
Caches the regex parsing to make it faster.
Always use this unless you will long match once.
Contains basically the same methods as the `re` module.
"""
p = re.compile(r'a.c')
assert p.match('abc')
if '## flags':
"""
##DOTALL: dot matches all characters, including newlines
##MULTILINE: ^ and $ also matches at newlines
"""
assert re.match(r'a', 'A', re.IGNORECASE)
if '## sub':
# Replce what was matched.
p = re.compile('(a.|b.)')
# By string:
assert p.sub('0', 'a_b_abc') == '000c'
# By callable:
assert p.sub(lambda m: m.group(1)[1:], 'a_b-abc') == '_-bc'
# Count:
assert p.sub('0', 'a_b_abc', count=1) == '0b_abc'
if '## subn':
# Same as sub, but also returns number of subs made:
assert p.subn('0', 'a_b_abc') == ('000c', 3)
if '## match':
re.match(r'a.c', 'abc')
assert re.match(r'a.c', 'abc')
# Must match from beginning of string!
# Consider re.search instead.
# http://stackoverflow.com/questions/28840903/python-regex-match-middle-of-string
assert re.match(r'a.c', '0abc') is None
# Does not however have to match until the end:
assert re.match(r'a.c', 'abc0')
if '## search':
"""
Like match, but also matches in the middle.
"""
assert re.search(r'a.c', '0abc')
# Works.
assert re.search(r'a.c', 'abcaBc')
# . == b, stops at first match. to find all matches, use finditer
if '## finditer':
# A list of all non-overlapping match objects.
matches = list(re.finditer(r'a.c', 'abcaBc'))
if '## split':
assert re.split(r'[ab]+', '0abba1aaaaa2') == ['0', '1', '2']
"""
## Match object
## MatchObject
https://docs.python.org/2/library/re.html#re.MatchObject
Impossible to access this class: http://stackoverflow.com/questions/4835352/how-to-subclass-the-matchobject-in-python ...
Important methods: TODO examples
group() Return the string matched by the RE
start() Return the starting position of the match
end() Return the ending position of the match
span() Return a tuple containing the (start, end) positions of the match
"""
"""
## RegexObject
Returned by compile.
https://docs.python.org/2/library/re.html#re.RegexObject
"""