author | Todd Larsen <tlarsen@google.com> |
Wed, 13 Aug 2008 20:33:25 +0000 | |
changeset 68 | 0ae506b51e97 |
parent 54 | 03e267d67478 |
child 323 | ff1a9aa48cfd |
permissions | -rw-r--r-- |
54
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
1 |
""" |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
2 |
Iterator based sre token scanner |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
3 |
""" |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
4 |
import sre_parse, sre_compile, sre_constants |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
5 |
from sre_constants import BRANCH, SUBPATTERN |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
6 |
from re import VERBOSE, MULTILINE, DOTALL |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
7 |
import re |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
8 |
|
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
9 |
__all__ = ['Scanner', 'pattern'] |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
10 |
|
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
11 |
FLAGS = (VERBOSE | MULTILINE | DOTALL) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
12 |
class Scanner(object): |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
13 |
def __init__(self, lexicon, flags=FLAGS): |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
14 |
self.actions = [None] |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
15 |
# combine phrases into a compound pattern |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
16 |
s = sre_parse.Pattern() |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
17 |
s.flags = flags |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
18 |
p = [] |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
19 |
for idx, token in enumerate(lexicon): |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
20 |
phrase = token.pattern |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
21 |
try: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
22 |
subpattern = sre_parse.SubPattern(s, |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
23 |
[(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
24 |
except sre_constants.error: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
25 |
raise |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
26 |
p.append(subpattern) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
27 |
self.actions.append(token) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
28 |
|
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
29 |
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
30 |
self.scanner = sre_compile.compile(p) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
31 |
|
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
32 |
|
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
33 |
def iterscan(self, string, idx=0, context=None): |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
34 |
""" |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
35 |
Yield match, end_idx for each match |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
36 |
""" |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
37 |
match = self.scanner.scanner(string, idx).match |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
38 |
actions = self.actions |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
39 |
lastend = idx |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
40 |
end = len(string) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
41 |
while True: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
42 |
m = match() |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
43 |
if m is None: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
44 |
break |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
45 |
matchbegin, matchend = m.span() |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
46 |
if lastend == matchend: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
47 |
break |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
48 |
action = actions[m.lastindex] |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
49 |
if action is not None: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
50 |
rval, next_pos = action(m, context) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
51 |
if next_pos is not None and next_pos != matchend: |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
52 |
# "fast forward" the scanner |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
53 |
matchend = next_pos |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
54 |
match = self.scanner.scanner(string, matchend).match |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
55 |
yield rval, matchend |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
56 |
lastend = matchend |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
57 |
|
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
58 |
def pattern(pattern, flags=FLAGS): |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
59 |
def decorator(fn): |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
60 |
fn.pattern = pattern |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
61 |
fn.regex = re.compile(pattern, flags) |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
62 |
return fn |
03e267d67478
Major reorganization of the soc svn repo, to merge into a single App Engine
Todd Larsen <tlarsen@google.com>
parents:
diff
changeset
|
63 |
return decorator |