app/django/utils/simplejson/scanner.py
changeset 54 03e267d67478
child 323 ff1a9aa48cfd
equal deleted inserted replaced
53:57b4279d8c4e 54:03e267d67478
       
     1 """
       
     2 Iterator based sre token scanner
       
     3 """
       
     4 import sre_parse, sre_compile, sre_constants
       
     5 from sre_constants import BRANCH, SUBPATTERN
       
     6 from re import VERBOSE, MULTILINE, DOTALL
       
     7 import re
       
     8 
       
     9 __all__ = ['Scanner', 'pattern']
       
    10 
       
    11 FLAGS = (VERBOSE | MULTILINE | DOTALL)
       
    12 class Scanner(object):
       
    13     def __init__(self, lexicon, flags=FLAGS):
       
    14         self.actions = [None]
       
    15         # combine phrases into a compound pattern
       
    16         s = sre_parse.Pattern()
       
    17         s.flags = flags
       
    18         p = []
       
    19         for idx, token in enumerate(lexicon):
       
    20             phrase = token.pattern
       
    21             try:
       
    22                 subpattern = sre_parse.SubPattern(s,
       
    23                     [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
       
    24             except sre_constants.error:
       
    25                 raise
       
    26             p.append(subpattern)
       
    27             self.actions.append(token)
       
    28 
       
    29         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
       
    30         self.scanner = sre_compile.compile(p)
       
    31 
       
    32 
       
    33     def iterscan(self, string, idx=0, context=None):
       
    34         """
       
    35         Yield match, end_idx for each match
       
    36         """
       
    37         match = self.scanner.scanner(string, idx).match
       
    38         actions = self.actions
       
    39         lastend = idx
       
    40         end = len(string)
       
    41         while True:
       
    42             m = match()
       
    43             if m is None:
       
    44                 break
       
    45             matchbegin, matchend = m.span()
       
    46             if lastend == matchend:
       
    47                 break
       
    48             action = actions[m.lastindex]
       
    49             if action is not None:
       
    50                 rval, next_pos = action(m, context)
       
    51                 if next_pos is not None and next_pos != matchend:
       
    52                     # "fast forward" the scanner
       
    53                     matchend = next_pos
       
    54                     match = self.scanner.scanner(string, matchend).match
       
    55                 yield rval, matchend
       
    56             lastend = matchend
       
    57             
       
    58 def pattern(pattern, flags=FLAGS):
       
    59     def decorator(fn):
       
    60         fn.pattern = pattern
       
    61         fn.regex = re.compile(pattern, flags)
       
    62         return fn
       
    63     return decorator