1 """ |
1 """ |
2 Iterator based sre token scanner |
2 Iterator based sre token scanner |
3 """ |
3 """ |
4 import sre_parse, sre_compile, sre_constants |
4 import re |
|
5 from re import VERBOSE, MULTILINE, DOTALL |
|
6 import sre_parse |
|
7 import sre_compile |
|
8 import sre_constants |
5 from sre_constants import BRANCH, SUBPATTERN |
9 from sre_constants import BRANCH, SUBPATTERN |
6 from re import VERBOSE, MULTILINE, DOTALL |
|
7 import re |
|
8 |
10 |
9 __all__ = ['Scanner', 'pattern'] |
11 __all__ = ['Scanner', 'pattern'] |
10 |
12 |
11 FLAGS = (VERBOSE | MULTILINE | DOTALL) |
13 FLAGS = (VERBOSE | MULTILINE | DOTALL) |
|
14 |
12 class Scanner(object): |
15 class Scanner(object): |
13 def __init__(self, lexicon, flags=FLAGS): |
16 def __init__(self, lexicon, flags=FLAGS): |
14 self.actions = [None] |
17 self.actions = [None] |
15 # combine phrases into a compound pattern |
18 # Combine phrases into a compound pattern |
16 s = sre_parse.Pattern() |
19 s = sre_parse.Pattern() |
17 s.flags = flags |
20 s.flags = flags |
18 p = [] |
21 p = [] |
19 for idx, token in enumerate(lexicon): |
22 for idx, token in enumerate(lexicon): |
20 phrase = token.pattern |
23 phrase = token.pattern |
24 except sre_constants.error: |
27 except sre_constants.error: |
25 raise |
28 raise |
26 p.append(subpattern) |
29 p.append(subpattern) |
27 self.actions.append(token) |
30 self.actions.append(token) |
28 |
31 |
|
32 s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work |
29 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) |
33 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) |
30 self.scanner = sre_compile.compile(p) |
34 self.scanner = sre_compile.compile(p) |
31 |
|
32 |
35 |
33 def iterscan(self, string, idx=0, context=None): |
36 def iterscan(self, string, idx=0, context=None): |
34 """ |
37 """ |
35 Yield match, end_idx for each match |
38 Yield match, end_idx for each match |
36 """ |
39 """ |
52 # "fast forward" the scanner |
55 # "fast forward" the scanner |
53 matchend = next_pos |
56 matchend = next_pos |
54 match = self.scanner.scanner(string, matchend).match |
57 match = self.scanner.scanner(string, matchend).match |
55 yield rval, matchend |
58 yield rval, matchend |
56 lastend = matchend |
59 lastend = matchend |
57 |
60 |
|
61 |
58 def pattern(pattern, flags=FLAGS): |
62 def pattern(pattern, flags=FLAGS): |
59 def decorator(fn): |
63 def decorator(fn): |
60 fn.pattern = pattern |
64 fn.pattern = pattern |
61 fn.regex = re.compile(pattern, flags) |
65 fn.regex = re.compile(pattern, flags) |
62 return fn |
66 return fn |