|
1 """ |
|
2 Implementation of JSONDecoder |
|
3 """ |
|
4 import re |
|
5 |
|
6 from django.utils.simplejson.scanner import Scanner, pattern |
|
7 |
|
8 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
|
9 |
|
10 def _floatconstants(): |
|
11 import struct |
|
12 import sys |
|
13 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
|
14 if sys.byteorder != 'big': |
|
15 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
|
16 nan, inf = struct.unpack('dd', _BYTES) |
|
17 return nan, inf, -inf |
|
18 |
|
19 NaN, PosInf, NegInf = _floatconstants() |
|
20 |
|
21 def linecol(doc, pos): |
|
22 lineno = doc.count('\n', 0, pos) + 1 |
|
23 if lineno == 1: |
|
24 colno = pos |
|
25 else: |
|
26 colno = pos - doc.rindex('\n', 0, pos) |
|
27 return lineno, colno |
|
28 |
|
29 def errmsg(msg, doc, pos, end=None): |
|
30 lineno, colno = linecol(doc, pos) |
|
31 if end is None: |
|
32 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) |
|
33 endlineno, endcolno = linecol(doc, end) |
|
34 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( |
|
35 msg, lineno, colno, endlineno, endcolno, pos, end) |
|
36 |
|
37 _CONSTANTS = { |
|
38 '-Infinity': NegInf, |
|
39 'Infinity': PosInf, |
|
40 'NaN': NaN, |
|
41 'true': True, |
|
42 'false': False, |
|
43 'null': None, |
|
44 } |
|
45 |
|
46 def JSONConstant(match, context, c=_CONSTANTS): |
|
47 return c[match.group(0)], None |
|
48 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
|
49 |
|
50 def JSONNumber(match, context): |
|
51 match = JSONNumber.regex.match(match.string, *match.span()) |
|
52 integer, frac, exp = match.groups() |
|
53 if frac or exp: |
|
54 res = float(integer + (frac or '') + (exp or '')) |
|
55 else: |
|
56 res = int(integer) |
|
57 return res, None |
|
58 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) |
|
59 |
|
60 STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) |
|
61 BACKSLASH = { |
|
62 '"': u'"', '\\': u'\\', '/': u'/', |
|
63 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
|
64 } |
|
65 |
|
66 DEFAULT_ENCODING = "utf-8" |
|
67 |
|
68 def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): |
|
69 if encoding is None: |
|
70 encoding = DEFAULT_ENCODING |
|
71 chunks = [] |
|
72 _append = chunks.append |
|
73 begin = end - 1 |
|
74 while 1: |
|
75 chunk = _m(s, end) |
|
76 if chunk is None: |
|
77 raise ValueError( |
|
78 errmsg("Unterminated string starting at", s, begin)) |
|
79 end = chunk.end() |
|
80 content, terminator = chunk.groups() |
|
81 if content: |
|
82 if not isinstance(content, unicode): |
|
83 content = unicode(content, encoding) |
|
84 _append(content) |
|
85 if terminator == '"': |
|
86 break |
|
87 try: |
|
88 esc = s[end] |
|
89 except IndexError: |
|
90 raise ValueError( |
|
91 errmsg("Unterminated string starting at", s, begin)) |
|
92 if esc != 'u': |
|
93 try: |
|
94 m = _b[esc] |
|
95 except KeyError: |
|
96 raise ValueError( |
|
97 errmsg("Invalid \\escape: %r" % (esc,), s, end)) |
|
98 end += 1 |
|
99 else: |
|
100 esc = s[end + 1:end + 5] |
|
101 try: |
|
102 m = unichr(int(esc, 16)) |
|
103 if len(esc) != 4 or not esc.isalnum(): |
|
104 raise ValueError |
|
105 except ValueError: |
|
106 raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) |
|
107 end += 5 |
|
108 _append(m) |
|
109 return u''.join(chunks), end |
|
110 |
|
111 def JSONString(match, context): |
|
112 encoding = getattr(context, 'encoding', None) |
|
113 return scanstring(match.string, match.end(), encoding) |
|
114 pattern(r'"')(JSONString) |
|
115 |
|
116 WHITESPACE = re.compile(r'\s*', FLAGS) |
|
117 |
|
118 def JSONObject(match, context, _w=WHITESPACE.match): |
|
119 pairs = {} |
|
120 s = match.string |
|
121 end = _w(s, match.end()).end() |
|
122 nextchar = s[end:end + 1] |
|
123 # trivial empty object |
|
124 if nextchar == '}': |
|
125 return pairs, end + 1 |
|
126 if nextchar != '"': |
|
127 raise ValueError(errmsg("Expecting property name", s, end)) |
|
128 end += 1 |
|
129 encoding = getattr(context, 'encoding', None) |
|
130 iterscan = JSONScanner.iterscan |
|
131 while True: |
|
132 key, end = scanstring(s, end, encoding) |
|
133 end = _w(s, end).end() |
|
134 if s[end:end + 1] != ':': |
|
135 raise ValueError(errmsg("Expecting : delimiter", s, end)) |
|
136 end = _w(s, end + 1).end() |
|
137 try: |
|
138 value, end = iterscan(s, idx=end, context=context).next() |
|
139 except StopIteration: |
|
140 raise ValueError(errmsg("Expecting object", s, end)) |
|
141 pairs[key] = value |
|
142 end = _w(s, end).end() |
|
143 nextchar = s[end:end + 1] |
|
144 end += 1 |
|
145 if nextchar == '}': |
|
146 break |
|
147 if nextchar != ',': |
|
148 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
|
149 end = _w(s, end).end() |
|
150 nextchar = s[end:end + 1] |
|
151 end += 1 |
|
152 if nextchar != '"': |
|
153 raise ValueError(errmsg("Expecting property name", s, end - 1)) |
|
154 object_hook = getattr(context, 'object_hook', None) |
|
155 if object_hook is not None: |
|
156 pairs = object_hook(pairs) |
|
157 return pairs, end |
|
158 pattern(r'{')(JSONObject) |
|
159 |
|
160 def JSONArray(match, context, _w=WHITESPACE.match): |
|
161 values = [] |
|
162 s = match.string |
|
163 end = _w(s, match.end()).end() |
|
164 # look-ahead for trivial empty array |
|
165 nextchar = s[end:end + 1] |
|
166 if nextchar == ']': |
|
167 return values, end + 1 |
|
168 iterscan = JSONScanner.iterscan |
|
169 while True: |
|
170 try: |
|
171 value, end = iterscan(s, idx=end, context=context).next() |
|
172 except StopIteration: |
|
173 raise ValueError(errmsg("Expecting object", s, end)) |
|
174 values.append(value) |
|
175 end = _w(s, end).end() |
|
176 nextchar = s[end:end + 1] |
|
177 end += 1 |
|
178 if nextchar == ']': |
|
179 break |
|
180 if nextchar != ',': |
|
181 raise ValueError(errmsg("Expecting , delimiter", s, end)) |
|
182 end = _w(s, end).end() |
|
183 return values, end |
|
184 pattern(r'\[')(JSONArray) |
|
185 |
|
186 ANYTHING = [ |
|
187 JSONObject, |
|
188 JSONArray, |
|
189 JSONString, |
|
190 JSONConstant, |
|
191 JSONNumber, |
|
192 ] |
|
193 |
|
194 JSONScanner = Scanner(ANYTHING) |
|
195 |
|
196 class JSONDecoder(object): |
|
197 """ |
|
198 Simple JSON <http://json.org> decoder |
|
199 |
|
200 Performs the following translations in decoding: |
|
201 |
|
202 +---------------+-------------------+ |
|
203 | JSON | Python | |
|
204 +===============+===================+ |
|
205 | object | dict | |
|
206 +---------------+-------------------+ |
|
207 | array | list | |
|
208 +---------------+-------------------+ |
|
209 | string | unicode | |
|
210 +---------------+-------------------+ |
|
211 | number (int) | int, long | |
|
212 +---------------+-------------------+ |
|
213 | number (real) | float | |
|
214 +---------------+-------------------+ |
|
215 | true | True | |
|
216 +---------------+-------------------+ |
|
217 | false | False | |
|
218 +---------------+-------------------+ |
|
219 | null | None | |
|
220 +---------------+-------------------+ |
|
221 |
|
222 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
|
223 their corresponding ``float`` values, which is outside the JSON spec. |
|
224 """ |
|
225 |
|
226 _scanner = Scanner(ANYTHING) |
|
227 __all__ = ['__init__', 'decode', 'raw_decode'] |
|
228 |
|
229 def __init__(self, encoding=None, object_hook=None): |
|
230 """ |
|
231 ``encoding`` determines the encoding used to interpret any ``str`` |
|
232 objects decoded by this instance (utf-8 by default). It has no |
|
233 effect when decoding ``unicode`` objects. |
|
234 |
|
235 Note that currently only encodings that are a superset of ASCII work, |
|
236 strings of other encodings should be passed in as ``unicode``. |
|
237 |
|
238 ``object_hook``, if specified, will be called with the result |
|
239 of every JSON object decoded and its return value will be used in |
|
240 place of the given ``dict``. This can be used to provide custom |
|
241 deserializations (e.g. to support JSON-RPC class hinting). |
|
242 """ |
|
243 self.encoding = encoding |
|
244 self.object_hook = object_hook |
|
245 |
|
246 def decode(self, s, _w=WHITESPACE.match): |
|
247 """ |
|
248 Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
|
249 instance containing a JSON document) |
|
250 """ |
|
251 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
|
252 end = _w(s, end).end() |
|
253 if end != len(s): |
|
254 raise ValueError(errmsg("Extra data", s, end, len(s))) |
|
255 return obj |
|
256 |
|
257 def raw_decode(self, s, **kw): |
|
258 """ |
|
259 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
|
260 with a JSON document) and return a 2-tuple of the Python |
|
261 representation and the index in ``s`` where the document ended. |
|
262 |
|
263 This can be used to decode a JSON document from a string that may |
|
264 have extraneous data at the end. |
|
265 """ |
|
266 kw.setdefault('context', self) |
|
267 try: |
|
268 obj, end = self._scanner.iterscan(s, **kw).next() |
|
269 except StopIteration: |
|
270 raise ValueError("No JSON object could be decoded") |
|
271 return obj, end |
|
272 |
|
273 __all__ = ['JSONDecoder'] |