app/django/utils/simplejson/decoder.py
changeset 323 ff1a9aa48cfd
parent 54 03e267d67478
equal deleted inserted replaced
322:6641e941ef1e 323:ff1a9aa48cfd
     1 """
     1 """
     2 Implementation of JSONDecoder
     2 Implementation of JSONDecoder
     3 """
     3 """
     4 import re
     4 import re
       
     5 import sys
     5 
     6 
     6 from django.utils.simplejson.scanner import Scanner, pattern
     7 from django.utils.simplejson.scanner import Scanner, pattern
       
     8 try:
       
     9     from django.utils.simplejson._speedups import scanstring as c_scanstring
       
    10 except ImportError:
       
    11     pass
     7 
    12 
     8 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
    13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
     9 
    14 
    10 def _floatconstants():
    15 def _floatconstants():
    11     import struct
    16     import struct
    16     nan, inf = struct.unpack('dd', _BYTES)
    21     nan, inf = struct.unpack('dd', _BYTES)
    17     return nan, inf, -inf
    22     return nan, inf, -inf
    18 
    23 
    19 NaN, PosInf, NegInf = _floatconstants()
    24 NaN, PosInf, NegInf = _floatconstants()
    20 
    25 
       
    26 
    21 def linecol(doc, pos):
    27 def linecol(doc, pos):
    22     lineno = doc.count('\n', 0, pos) + 1
    28     lineno = doc.count('\n', 0, pos) + 1
    23     if lineno == 1:
    29     if lineno == 1:
    24         colno = pos
    30         colno = pos
    25     else:
    31     else:
    26         colno = pos - doc.rindex('\n', 0, pos)
    32         colno = pos - doc.rindex('\n', 0, pos)
    27     return lineno, colno
    33     return lineno, colno
    28 
    34 
       
    35 
    29 def errmsg(msg, doc, pos, end=None):
    36 def errmsg(msg, doc, pos, end=None):
    30     lineno, colno = linecol(doc, pos)
    37     lineno, colno = linecol(doc, pos)
    31     if end is None:
    38     if end is None:
    32         return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
    39         return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
    33     endlineno, endcolno = linecol(doc, end)
    40     endlineno, endcolno = linecol(doc, end)
    34     return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
    41     return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
    35         msg, lineno, colno, endlineno, endcolno, pos, end)
    42         msg, lineno, colno, endlineno, endcolno, pos, end)
       
    43 
    36 
    44 
    37 _CONSTANTS = {
    45 _CONSTANTS = {
    38     '-Infinity': NegInf,
    46     '-Infinity': NegInf,
    39     'Infinity': PosInf,
    47     'Infinity': PosInf,
    40     'NaN': NaN,
    48     'NaN': NaN,
    42     'false': False,
    50     'false': False,
    43     'null': None,
    51     'null': None,
    44 }
    52 }
    45 
    53 
    46 def JSONConstant(match, context, c=_CONSTANTS):
    54 def JSONConstant(match, context, c=_CONSTANTS):
    47     return c[match.group(0)], None
    55     s = match.group(0)
       
    56     fn = getattr(context, 'parse_constant', None)
       
    57     if fn is None:
       
    58         rval = c[s]
       
    59     else:
       
    60         rval = fn(s)
       
    61     return rval, None
    48 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
    62 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
       
    63 
    49 
    64 
    50 def JSONNumber(match, context):
    65 def JSONNumber(match, context):
    51     match = JSONNumber.regex.match(match.string, *match.span())
    66     match = JSONNumber.regex.match(match.string, *match.span())
    52     integer, frac, exp = match.groups()
    67     integer, frac, exp = match.groups()
    53     if frac or exp:
    68     if frac or exp:
    54         res = float(integer + (frac or '') + (exp or ''))
    69         fn = getattr(context, 'parse_float', None) or float
       
    70         res = fn(integer + (frac or '') + (exp or ''))
    55     else:
    71     else:
    56         res = int(integer)
    72         fn = getattr(context, 'parse_int', None) or int
       
    73         res = fn(integer)
    57     return res, None
    74     return res, None
    58 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
    75 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
    59 
    76 
    60 STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS)
    77 
       
    78 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
    61 BACKSLASH = {
    79 BACKSLASH = {
    62     '"': u'"', '\\': u'\\', '/': u'/',
    80     '"': u'"', '\\': u'\\', '/': u'/',
    63     'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
    81     'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
    64 }
    82 }
    65 
    83 
    66 DEFAULT_ENCODING = "utf-8"
    84 DEFAULT_ENCODING = "utf-8"
    67 
    85 
    68 def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match):
    86 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
    69     if encoding is None:
    87     if encoding is None:
    70         encoding = DEFAULT_ENCODING
    88         encoding = DEFAULT_ENCODING
    71     chunks = []
    89     chunks = []
    72     _append = chunks.append
    90     _append = chunks.append
    73     begin = end - 1
    91     begin = end - 1
    82             if not isinstance(content, unicode):
   100             if not isinstance(content, unicode):
    83                 content = unicode(content, encoding)
   101                 content = unicode(content, encoding)
    84             _append(content)
   102             _append(content)
    85         if terminator == '"':
   103         if terminator == '"':
    86             break
   104             break
       
   105         elif terminator != '\\':
       
   106             if strict:
       
   107                 raise ValueError(errmsg("Invalid control character %r at", s, end))
       
   108             else:
       
   109                 _append(terminator)
       
   110                 continue
    87         try:
   111         try:
    88             esc = s[end]
   112             esc = s[end]
    89         except IndexError:
   113         except IndexError:
    90             raise ValueError(
   114             raise ValueError(
    91                 errmsg("Unterminated string starting at", s, begin))
   115                 errmsg("Unterminated string starting at", s, begin))
    96                 raise ValueError(
   120                 raise ValueError(
    97                     errmsg("Invalid \\escape: %r" % (esc,), s, end))
   121                     errmsg("Invalid \\escape: %r" % (esc,), s, end))
    98             end += 1
   122             end += 1
    99         else:
   123         else:
   100             esc = s[end + 1:end + 5]
   124             esc = s[end + 1:end + 5]
       
   125             next_end = end + 5
       
   126             msg = "Invalid \\uXXXX escape"
   101             try:
   127             try:
   102                 m = unichr(int(esc, 16))
   128                 if len(esc) != 4:
   103                 if len(esc) != 4 or not esc.isalnum():
       
   104                     raise ValueError
   129                     raise ValueError
       
   130                 uni = int(esc, 16)
       
   131                 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
       
   132                     msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
       
   133                     if not s[end + 5:end + 7] == '\\u':
       
   134                         raise ValueError
       
   135                     esc2 = s[end + 7:end + 11]
       
   136                     if len(esc2) != 4:
       
   137                         raise ValueError
       
   138                     uni2 = int(esc2, 16)
       
   139                     uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
       
   140                     next_end += 6
       
   141                 m = unichr(uni)
   105             except ValueError:
   142             except ValueError:
   106                 raise ValueError(errmsg("Invalid \\uXXXX escape", s, end))
   143                 raise ValueError(errmsg(msg, s, end))
   107             end += 5
   144             end = next_end
   108         _append(m)
   145         _append(m)
   109     return u''.join(chunks), end
   146     return u''.join(chunks), end
   110 
   147 
       
   148 
       
   149 # Use speedup
       
   150 try:
       
   151     scanstring = c_scanstring
       
   152 except NameError:
       
   153     scanstring = py_scanstring
       
   154 
   111 def JSONString(match, context):
   155 def JSONString(match, context):
   112     encoding = getattr(context, 'encoding', None)
   156     encoding = getattr(context, 'encoding', None)
   113     return scanstring(match.string, match.end(), encoding)
   157     strict = getattr(context, 'strict', True)
       
   158     return scanstring(match.string, match.end(), encoding, strict)
   114 pattern(r'"')(JSONString)
   159 pattern(r'"')(JSONString)
       
   160 
   115 
   161 
   116 WHITESPACE = re.compile(r'\s*', FLAGS)
   162 WHITESPACE = re.compile(r'\s*', FLAGS)
   117 
   163 
   118 def JSONObject(match, context, _w=WHITESPACE.match):
   164 def JSONObject(match, context, _w=WHITESPACE.match):
   119     pairs = {}
   165     pairs = {}
   120     s = match.string
   166     s = match.string
   121     end = _w(s, match.end()).end()
   167     end = _w(s, match.end()).end()
   122     nextchar = s[end:end + 1]
   168     nextchar = s[end:end + 1]
   123     # trivial empty object
   169     # Trivial empty object
   124     if nextchar == '}':
   170     if nextchar == '}':
   125         return pairs, end + 1
   171         return pairs, end + 1
   126     if nextchar != '"':
   172     if nextchar != '"':
   127         raise ValueError(errmsg("Expecting property name", s, end))
   173         raise ValueError(errmsg("Expecting property name", s, end))
   128     end += 1
   174     end += 1
   129     encoding = getattr(context, 'encoding', None)
   175     encoding = getattr(context, 'encoding', None)
       
   176     strict = getattr(context, 'strict', True)
   130     iterscan = JSONScanner.iterscan
   177     iterscan = JSONScanner.iterscan
   131     while True:
   178     while True:
   132         key, end = scanstring(s, end, encoding)
   179         key, end = scanstring(s, end, encoding, strict)
   133         end = _w(s, end).end()
   180         end = _w(s, end).end()
   134         if s[end:end + 1] != ':':
   181         if s[end:end + 1] != ':':
   135             raise ValueError(errmsg("Expecting : delimiter", s, end))
   182             raise ValueError(errmsg("Expecting : delimiter", s, end))
   136         end = _w(s, end + 1).end()
   183         end = _w(s, end + 1).end()
   137         try:
   184         try:
   154     object_hook = getattr(context, 'object_hook', None)
   201     object_hook = getattr(context, 'object_hook', None)
   155     if object_hook is not None:
   202     if object_hook is not None:
   156         pairs = object_hook(pairs)
   203         pairs = object_hook(pairs)
   157     return pairs, end
   204     return pairs, end
   158 pattern(r'{')(JSONObject)
   205 pattern(r'{')(JSONObject)
   159             
   206 
       
   207 
   160 def JSONArray(match, context, _w=WHITESPACE.match):
   208 def JSONArray(match, context, _w=WHITESPACE.match):
   161     values = []
   209     values = []
   162     s = match.string
   210     s = match.string
   163     end = _w(s, match.end()).end()
   211     end = _w(s, match.end()).end()
   164     # look-ahead for trivial empty array
   212     # Look-ahead for trivial empty array
   165     nextchar = s[end:end + 1]
   213     nextchar = s[end:end + 1]
   166     if nextchar == ']':
   214     if nextchar == ']':
   167         return values, end + 1
   215         return values, end + 1
   168     iterscan = JSONScanner.iterscan
   216     iterscan = JSONScanner.iterscan
   169     while True:
   217     while True:
   180         if nextchar != ',':
   228         if nextchar != ',':
   181             raise ValueError(errmsg("Expecting , delimiter", s, end))
   229             raise ValueError(errmsg("Expecting , delimiter", s, end))
   182         end = _w(s, end).end()
   230         end = _w(s, end).end()
   183     return values, end
   231     return values, end
   184 pattern(r'\[')(JSONArray)
   232 pattern(r'\[')(JSONArray)
   185  
   233 
       
   234 
   186 ANYTHING = [
   235 ANYTHING = [
   187     JSONObject,
   236     JSONObject,
   188     JSONArray,
   237     JSONArray,
   189     JSONString,
   238     JSONString,
   190     JSONConstant,
   239     JSONConstant,
   191     JSONNumber,
   240     JSONNumber,
   192 ]
   241 ]
   193 
   242 
   194 JSONScanner = Scanner(ANYTHING)
   243 JSONScanner = Scanner(ANYTHING)
   195 
   244 
       
   245 
   196 class JSONDecoder(object):
   246 class JSONDecoder(object):
   197     """
   247     """
   198     Simple JSON <http://json.org> decoder
   248     Simple JSON <http://json.org> decoder
   199 
   249 
   200     Performs the following translations in decoding:
   250     Performs the following translations in decoding by default:
   201     
   251     
   202     +---------------+-------------------+
   252     +---------------+-------------------+
   203     | JSON          | Python            |
   253     | JSON          | Python            |
   204     +===============+===================+
   254     +===============+===================+
   205     | object        | dict              |
   255     | object        | dict              |
   224     """
   274     """
   225 
   275 
   226     _scanner = Scanner(ANYTHING)
   276     _scanner = Scanner(ANYTHING)
   227     __all__ = ['__init__', 'decode', 'raw_decode']
   277     __all__ = ['__init__', 'decode', 'raw_decode']
   228 
   278 
   229     def __init__(self, encoding=None, object_hook=None):
   279     def __init__(self, encoding=None, object_hook=None, parse_float=None,
       
   280             parse_int=None, parse_constant=None, strict=True):
   230         """
   281         """
   231         ``encoding`` determines the encoding used to interpret any ``str``
   282         ``encoding`` determines the encoding used to interpret any ``str``
   232         objects decoded by this instance (utf-8 by default).  It has no
   283         objects decoded by this instance (utf-8 by default).  It has no
   233         effect when decoding ``unicode`` objects.
   284         effect when decoding ``unicode`` objects.
   234         
   285         
   237 
   288 
   238         ``object_hook``, if specified, will be called with the result
   289         ``object_hook``, if specified, will be called with the result
   239         of every JSON object decoded and its return value will be used in
   290         of every JSON object decoded and its return value will be used in
   240         place of the given ``dict``.  This can be used to provide custom
   291         place of the given ``dict``.  This can be used to provide custom
   241         deserializations (e.g. to support JSON-RPC class hinting).
   292         deserializations (e.g. to support JSON-RPC class hinting).
       
   293 
       
   294         ``parse_float``, if specified, will be called with the string
       
   295         of every JSON float to be decoded. By default this is equivalent to
       
   296         float(num_str). This can be used to use another datatype or parser
       
   297         for JSON floats (e.g. decimal.Decimal).
       
   298 
       
   299         ``parse_int``, if specified, will be called with the string
       
   300         of every JSON int to be decoded. By default this is equivalent to
       
   301         int(num_str). This can be used to use another datatype or parser
       
   302         for JSON integers (e.g. float).
       
   303 
       
   304         ``parse_constant``, if specified, will be called with one of the
       
   305         following strings: -Infinity, Infinity, NaN, null, true, false.
       
   306         This can be used to raise an exception if invalid JSON numbers
       
   307         are encountered.
   242         """
   308         """
   243         self.encoding = encoding
   309         self.encoding = encoding
   244         self.object_hook = object_hook
   310         self.object_hook = object_hook
       
   311         self.parse_float = parse_float
       
   312         self.parse_int = parse_int
       
   313         self.parse_constant = parse_constant
       
   314         self.strict = strict
   245 
   315 
   246     def decode(self, s, _w=WHITESPACE.match):
   316     def decode(self, s, _w=WHITESPACE.match):
   247         """
   317         """
   248         Return the Python representation of ``s`` (a ``str`` or ``unicode``
   318         Return the Python representation of ``s`` (a ``str`` or ``unicode``
   249         instance containing a JSON document)
   319         instance containing a JSON document)