py_tasks_melange: comparison app/simplejson/decoder.py

equal deleted inserted replaced

-:2f86cbc90b65
+:295d67509412
+"""Implementation of JSONDecoder
+"""
+import re
+import sys
+import struct
+from simplejson.scanner import make_scanner
+try:
+from simplejson._speedups import scanstring as c_scanstring
+except ImportError:
+c_scanstring = None
+__all__ = ['JSONDecoder']
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+def _floatconstants():
+_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+if sys.byteorder != 'big':
+_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
+nan, inf = struct.unpack('dd', _BYTES)
+return nan, inf, -inf
+NaN, PosInf, NegInf = _floatconstants()
+def linecol(doc, pos):
+lineno = doc.count('\n', 0, pos) + 1
+if lineno == 1:
+colno = pos
+else:
+colno = pos - doc.rindex('\n', 0, pos)
+return lineno, colno
+def errmsg(msg, doc, pos, end=None):
+# Note that this function is called from _speedups
+lineno, colno = linecol(doc, pos)
+if end is None:
+return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
+endlineno, endcolno = linecol(doc, end)
+return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
+msg, lineno, colno, endlineno, endcolno, pos, end)
+_CONSTANTS = {
+'-Infinity': NegInf,
+'Infinity': PosInf,
+'NaN': NaN,
+}
+STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
+BACKSLASH = {
+'"': u'"', '\\': u'\\', '/': u'/',
+'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+}
+DEFAULT_ENCODING = "utf-8"
+def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
+"""Scan the string s for a JSON string. End is the index of the
+character in s after the quote that started the JSON string.
+Unescapes all valid JSON string escape sequences and raises ValueError
+on attempt to decode an invalid string. If strict is False then literal
+control characters are allowed in the string.
+Returns a tuple of the decoded string and the index of the character in s
+after the end quote."""
+if encoding is None:
+encoding = DEFAULT_ENCODING
+chunks = []
+_append = chunks.append
+begin = end - 1
+while 1:
+chunk = _m(s, end)
+if chunk is None:
+raise ValueError(
+errmsg("Unterminated string starting at", s, begin))
+end = chunk.end()
+content, terminator = chunk.groups()
+# Content is contains zero or more unescaped string characters
+if content:
+if not isinstance(content, unicode):
+content = unicode(content, encoding)
+_append(content)
+# Terminator is the end of string, a literal control character,
+# or a backslash denoting that an escape sequence follows
+if terminator == '"':
+break
+elif terminator != '\\':
+if strict:
+msg = "Invalid control character %r at" % (terminator,)
+raise ValueError(msg, s, end)
+else:
+_append(terminator)
+continue
+try:
+esc = s[end]
+except IndexError:
+raise ValueError(
+errmsg("Unterminated string starting at", s, begin))
+# If not a unicode escape sequence, must be in the lookup table
+if esc != 'u':
+try:
+char = _b[esc]
+except KeyError:
+raise ValueError(
+errmsg("Invalid \\escape: %r" % (esc,), s, end))
+end += 1
+else:
+# Unicode escape sequence
+esc = s[end + 1:end + 5]
+next_end = end + 5
+if len(esc) != 4:
+msg = "Invalid \\uXXXX escape"
+raise ValueError(errmsg(msg, s, end))
+uni = int(esc, 16)
+# Check for surrogate pair on UCS-4 systems
+if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+if not s[end + 5:end + 7] == '\\u':
+raise ValueError(errmsg(msg, s, end))
+esc2 = s[end + 7:end + 11]
+if len(esc2) != 4:
+raise ValueError(errmsg(msg, s, end))
+uni2 = int(esc2, 16)
+uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+next_end += 6
+char = unichr(uni)
+end = next_end
+# Append the unescaped character
+_append(char)
+return u''.join(chunks), end
+# Use speedup if available
+scanstring = c_scanstring or py_scanstring
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
+WHITESPACE_STR = ' \t\n\r'
+def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+pairs = {}
+# Use a slice to prevent IndexError from being raised, the following
+# check will raise a more specific ValueError if the string is empty
+nextchar = s[end:end + 1]
+# Normally we expect nextchar == '"'
+if nextchar != '"':
+if nextchar in _ws:
+end = _w(s, end).end()
+nextchar = s[end:end + 1]
+# Trivial empty object
+if nextchar == '}':
+return pairs, end + 1
+elif nextchar != '"':
+raise ValueError(errmsg("Expecting property name", s, end))
+end += 1
+while True:
+key, end = scanstring(s, end, encoding, strict)
+# To skip some function call overhead we optimize the fast paths where
+# the JSON key separator is ": " or just ":".
+if s[end:end + 1] != ':':
+end = _w(s, end).end()
+if s[end:end + 1] != ':':
+raise ValueError(errmsg("Expecting : delimiter", s, end))
+end += 1
+try:
+if s[end] in _ws:
+end += 1
+if s[end] in _ws:
+end = _w(s, end + 1).end()
+except IndexError:
+pass
+try:
+value, end = scan_once(s, end)
+except StopIteration:
+raise ValueError(errmsg("Expecting object", s, end))
+pairs[key] = value
+try:
+nextchar = s[end]
+if nextchar in _ws:
+end = _w(s, end + 1).end()
+nextchar = s[end]
+except IndexError:
+nextchar = ''
+end += 1
+if nextchar == '}':
+break
+elif nextchar != ',':
+raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
+try:
+nextchar = s[end]
+if nextchar in _ws:
+end += 1
+nextchar = s[end]
+if nextchar in _ws:
+end = _w(s, end + 1).end()
+nextchar = s[end]
+except IndexError:
+nextchar = ''
+end += 1
+if nextchar != '"':
+raise ValueError(errmsg("Expecting property name", s, end - 1))
+if object_hook is not None:
+pairs = object_hook(pairs)
+return pairs, end
+def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+values = []
+nextchar = s[end:end + 1]
+if nextchar in _ws:
+end = _w(s, end + 1).end()
+nextchar = s[end:end + 1]
+# Look-ahead for trivial empty array
+if nextchar == ']':
+return values, end + 1
+_append = values.append
+while True:
+try:
+value, end = scan_once(s, end)
+except StopIteration:
+raise ValueError(errmsg("Expecting object", s, end))
+_append(value)
+nextchar = s[end:end + 1]
+if nextchar in _ws:
+end = _w(s, end + 1).end()
+nextchar = s[end:end + 1]
+end += 1
+if nextchar == ']':
+break
+elif nextchar != ',':
+raise ValueError(errmsg("Expecting , delimiter", s, end))
+try:
+if s[end] in _ws:
+end += 1
+if s[end] in _ws:
+end = _w(s, end + 1).end()
+except IndexError:
+pass
+return values, end
+class JSONDecoder(object):
+"""Simple JSON <http://json.org> decoder
+Performs the following translations in decoding by default:
++---------------+-------------------+
+| JSON          | Python            |
++===============+===================+
+| object        | dict              |
++---------------+-------------------+
+| array         | list              |
++---------------+-------------------+
+| string        | unicode           |
++---------------+-------------------+
+| number (int)  | int, long         |
++---------------+-------------------+
+| number (real) | float             |
++---------------+-------------------+
+| true          | True              |
++---------------+-------------------+
+| false         | False             |
++---------------+-------------------+
+| null          | None              |
++---------------+-------------------+
+It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+their corresponding ``float`` values, which is outside the JSON spec.
+"""
+def __init__(self, encoding=None, object_hook=None, parse_float=None,
+parse_int=None, parse_constant=None, strict=True):
+"""``encoding`` determines the encoding used to interpret any ``str``
+objects decoded by this instance (utf-8 by default).  It has no
+effect when decoding ``unicode`` objects.
+Note that currently only encodings that are a superset of ASCII work,
+strings of other encodings should be passed in as ``unicode``.
+``object_hook``, if specified, will be called with the result
+of every JSON object decoded and its return value will be used in
+place of the given ``dict``.  This can be used to provide custom
+deserializations (e.g. to support JSON-RPC class hinting).
+``parse_float``, if specified, will be called with the string
+of every JSON float to be decoded. By default this is equivalent to
+float(num_str). This can be used to use another datatype or parser
+for JSON floats (e.g. decimal.Decimal).
+``parse_int``, if specified, will be called with the string
+of every JSON int to be decoded. By default this is equivalent to
+int(num_str). This can be used to use another datatype or parser
+for JSON integers (e.g. float).
+``parse_constant``, if specified, will be called with one of the
+following strings: -Infinity, Infinity, NaN.
+This can be used to raise an exception if invalid JSON numbers
+are encountered.
+"""
+self.encoding = encoding
+self.object_hook = object_hook
+self.parse_float = parse_float or float
+self.parse_int = parse_int or int
+self.parse_constant = parse_constant or _CONSTANTS.__getitem__
+self.strict = strict
+self.parse_object = JSONObject
+self.parse_array = JSONArray
+self.parse_string = scanstring
+self.scan_once = make_scanner(self)
+def decode(self, s, _w=WHITESPACE.match):
+"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
+instance containing a JSON document)
+"""
+obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+end = _w(s, end).end()
+if end != len(s):
+raise ValueError(errmsg("Extra data", s, end, len(s)))
+return obj
+def raw_decode(self, s, idx=0):
+"""Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
+with a JSON document) and return a 2-tuple of the Python
+representation and the index in ``s`` where the document ended.
+This can be used to decode a JSON document from a string that may
+have extraneous data at the end.
+"""
+try:
+obj, end = self.scan_once(s, idx)
+except StopIteration:
+raise ValueError("No JSON object could be decoded")
+return obj, end