author | Nishanth Amuluru <nishanth@fossee.in> |
Sun, 09 Jan 2011 11:13:36 +0530 | |
changeset 114 | 1a88b04adb39 |
parent 69 | c6bca38c1cbf |
permissions | -rw-r--r-- |
69
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
1 |
# parser.py - simple top-down operator precedence parser for mercurial |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
2 |
# |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
3 |
# Copyright 2010 Matt Mackall <mpm@selenic.com> |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
4 |
# |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
5 |
# This software may be used and distributed according to the terms of the |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
6 |
# GNU General Public License version 2 or any later version. |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
7 |
|
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
8 |
# see http://effbot.org/zone/simple-top-down-parsing.htm and |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
9 |
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
10 |
# for background |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
11 |
|
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
12 |
# takes a tokenizer and elements |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
13 |
# tokenizer is an iterator that returns type, value pairs |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
14 |
# elements is a mapping of types to binding strength, prefix and infix actions |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
15 |
# an action is a tree node name, a tree label, and an optional match |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
16 |
# __call__(program) parses program into a labelled tree |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
17 |
|
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
18 |
import error |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
19 |
|
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
20 |
class parser(object): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
21 |
def __init__(self, tokenizer, elements, methods=None): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
22 |
self._tokenizer = tokenizer |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
23 |
self._elements = elements |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
24 |
self._methods = methods |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
25 |
def _advance(self): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
26 |
'advance the tokenizer' |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
27 |
t = self.current |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
28 |
try: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
29 |
self.current = self._iter.next() |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
30 |
except StopIteration: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
31 |
pass |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
32 |
return t |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
33 |
def _match(self, m, pos): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
34 |
'make sure the tokenizer matches an end condition' |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
35 |
if self.current[0] != m: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
36 |
raise error.ParseError("unexpected token: %s" % self.current[0], |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
37 |
self.current[2]) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
38 |
self._advance() |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
39 |
def _parse(self, bind=0): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
40 |
token, value, pos = self._advance() |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
41 |
# handle prefix rules on current token |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
42 |
prefix = self._elements[token][1] |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
43 |
if not prefix: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
44 |
raise error.ParseError("not a prefix: %s" % token, pos) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
45 |
if len(prefix) == 1: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
46 |
expr = (prefix[0], value) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
47 |
else: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
48 |
if len(prefix) > 2 and prefix[2] == self.current[0]: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
49 |
self._match(prefix[2], pos) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
50 |
expr = (prefix[0], None) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
51 |
else: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
52 |
expr = (prefix[0], self._parse(prefix[1])) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
53 |
if len(prefix) > 2: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
54 |
self._match(prefix[2], pos) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
55 |
# gather tokens until we meet a lower binding strength |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
56 |
while bind < self._elements[self.current[0]][0]: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
57 |
token, value, pos = self._advance() |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
58 |
e = self._elements[token] |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
59 |
# check for suffix - next token isn't a valid prefix |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
60 |
if len(e) == 4 and not self._elements[self.current[0]][1]: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
61 |
suffix = e[3] |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
62 |
expr = (suffix[0], expr) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
63 |
else: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
64 |
# handle infix rules |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
65 |
if len(e) < 3 or not e[2]: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
66 |
raise error.ParseError("not an infix: %s" % token, pos) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
67 |
infix = e[2] |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
68 |
if len(infix) == 3 and infix[2] == self.current[0]: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
69 |
self._match(infix[2], pos) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
70 |
expr = (infix[0], expr, (None)) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
71 |
else: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
72 |
expr = (infix[0], expr, self._parse(infix[1])) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
73 |
if len(infix) == 3: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
74 |
self._match(infix[2], pos) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
75 |
return expr |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
76 |
def parse(self, message): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
77 |
'generate a parse tree from a message' |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
78 |
self._iter = self._tokenizer(message) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
79 |
self.current = self._iter.next() |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
80 |
return self._parse() |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
81 |
def eval(self, tree): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
82 |
'recursively evaluate a parse tree using node methods' |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
83 |
if not isinstance(tree, tuple): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
84 |
return tree |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
85 |
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
86 |
def __call__(self, message): |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
87 |
'parse a message into a parse tree and evaluate if methods given' |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
88 |
t = self.parse(message) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
89 |
if self._methods: |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
90 |
return self.eval(t) |
c6bca38c1cbf
Added buildout stuff and made changes accordingly
Nishanth Amuluru <nishanth@fossee.in>
parents:
diff
changeset
|
91 |
return t |