eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/mercurial/parser.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 # parser.py - simple top-down operator precedence parser for mercurial
       
     2 #
       
     3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
       
     9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
       
    10 # for background
       
    11 
       
    12 # takes a tokenizer and elements
       
    13 # tokenizer is an iterator that returns type, value pairs
       
    14 # elements is a mapping of types to binding strength, prefix and infix actions
       
    15 # an action is a tree node name, a tree label, and an optional match
       
    16 # __call__(program) parses program into a labelled tree
       
    17 
       
    18 import error
       
    19 
       
    20 class parser(object):
       
    21     def __init__(self, tokenizer, elements, methods=None):
       
    22         self._tokenizer = tokenizer
       
    23         self._elements = elements
       
    24         self._methods = methods
       
    25     def _advance(self):
       
    26         'advance the tokenizer'
       
    27         t = self.current
       
    28         try:
       
    29             self.current = self._iter.next()
       
    30         except StopIteration:
       
    31             pass
       
    32         return t
       
    33     def _match(self, m, pos):
       
    34         'make sure the tokenizer matches an end condition'
       
    35         if self.current[0] != m:
       
    36             raise error.ParseError("unexpected token: %s" % self.current[0],
       
    37                                    self.current[2])
       
    38         self._advance()
       
    39     def _parse(self, bind=0):
       
    40         token, value, pos = self._advance()
       
    41         # handle prefix rules on current token
       
    42         prefix = self._elements[token][1]
       
    43         if not prefix:
       
    44             raise error.ParseError("not a prefix: %s" % token, pos)
       
    45         if len(prefix) == 1:
       
    46             expr = (prefix[0], value)
       
    47         else:
       
    48             if len(prefix) > 2 and prefix[2] == self.current[0]:
       
    49                 self._match(prefix[2], pos)
       
    50                 expr = (prefix[0], None)
       
    51             else:
       
    52                 expr = (prefix[0], self._parse(prefix[1]))
       
    53                 if len(prefix) > 2:
       
    54                     self._match(prefix[2], pos)
       
    55         # gather tokens until we meet a lower binding strength
       
    56         while bind < self._elements[self.current[0]][0]:
       
    57             token, value, pos = self._advance()
       
    58             e = self._elements[token]
       
    59             # check for suffix - next token isn't a valid prefix
       
    60             if len(e) == 4 and not self._elements[self.current[0]][1]:
       
    61                 suffix = e[3]
       
    62                 expr = (suffix[0], expr)
       
    63             else:
       
    64                 # handle infix rules
       
    65                 if len(e) < 3 or not e[2]:
       
    66                     raise error.ParseError("not an infix: %s" % token, pos)
       
    67                 infix = e[2]
       
    68                 if len(infix) == 3 and infix[2] == self.current[0]:
       
    69                     self._match(infix[2], pos)
       
    70                     expr = (infix[0], expr, (None))
       
    71                 else:
       
    72                     expr = (infix[0], expr, self._parse(infix[1]))
       
    73                     if len(infix) == 3:
       
    74                         self._match(infix[2], pos)
       
    75         return expr
       
    76     def parse(self, message):
       
    77         'generate a parse tree from a message'
       
    78         self._iter = self._tokenizer(message)
       
    79         self.current = self._iter.next()
       
    80         return self._parse()
       
    81     def eval(self, tree):
       
    82         'recursively evaluate a parse tree using node methods'
       
    83         if not isinstance(tree, tuple):
       
    84             return tree
       
    85         return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
       
    86     def __call__(self, message):
       
    87         'parse a message into a parse tree and evaluate if methods given'
       
    88         t = self.parse(message)
       
    89         if self._methods:
       
    90             return self.eval(t)
       
    91         return t