app/django/utils/simplejson/encoder.py
changeset 54 03e267d67478
child 323 ff1a9aa48cfd
equal deleted inserted replaced
53:57b4279d8c4e 54:03e267d67478
       
     1 """
       
     2 Implementation of JSONEncoder
       
     3 """
       
     4 import re
       
     5 
       
     6 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
       
     7 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
       
     8 ESCAPE_DCT = {
       
     9     # escape all forward slashes to prevent </script> attack
       
    10     '/': '\\/',
       
    11     '\\': '\\\\',
       
    12     '"': '\\"',
       
    13     '\b': '\\b',
       
    14     '\f': '\\f',
       
    15     '\n': '\\n',
       
    16     '\r': '\\r',
       
    17     '\t': '\\t',
       
    18 }
       
    19 for i in range(0x20):
       
    20     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
       
    21 
       
    22 # assume this produces an infinity on all machines (probably not guaranteed)
       
    23 INFINITY = float('1e66666')
       
    24 
       
    25 def floatstr(o, allow_nan=True):
       
    26     # Check for specials.  Note that this type of test is processor- and/or
       
    27     # platform-specific, so do tests which don't depend on the internals.
       
    28 
       
    29     if o != o:
       
    30         text = 'NaN'
       
    31     elif o == INFINITY:
       
    32         text = 'Infinity'
       
    33     elif o == -INFINITY:
       
    34         text = '-Infinity'
       
    35     else:
       
    36         return str(o)
       
    37 
       
    38     if not allow_nan:
       
    39         raise ValueError("Out of range float values are not JSON compliant: %r"
       
    40             % (o,))
       
    41 
       
    42     return text
       
    43 
       
    44 
       
    45 def encode_basestring(s):
       
    46     """
       
    47     Return a JSON representation of a Python string
       
    48     """
       
    49     def replace(match):
       
    50         return ESCAPE_DCT[match.group(0)]
       
    51     return '"' + ESCAPE.sub(replace, s) + '"'
       
    52 
       
    53 def encode_basestring_ascii(s):
       
    54     def replace(match):
       
    55         s = match.group(0)
       
    56         try:
       
    57             return ESCAPE_DCT[s]
       
    58         except KeyError:
       
    59             return '\\u%04x' % (ord(s),)
       
    60     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
       
    61         
       
    62 
       
    63 class JSONEncoder(object):
       
    64     """
       
    65     Extensible JSON <http://json.org> encoder for Python data structures.
       
    66 
       
    67     Supports the following objects and types by default:
       
    68     
       
    69     +-------------------+---------------+
       
    70     | Python            | JSON          |
       
    71     +===================+===============+
       
    72     | dict              | object        |
       
    73     +-------------------+---------------+
       
    74     | list, tuple       | array         |
       
    75     +-------------------+---------------+
       
    76     | str, unicode      | string        |
       
    77     +-------------------+---------------+
       
    78     | int, long, float  | number        |
       
    79     +-------------------+---------------+
       
    80     | True              | true          |
       
    81     +-------------------+---------------+
       
    82     | False             | false         |
       
    83     +-------------------+---------------+
       
    84     | None              | null          |
       
    85     +-------------------+---------------+
       
    86 
       
    87     To extend this to recognize other objects, subclass and implement a
       
    88     ``.default()`` method with another method that returns a serializable
       
    89     object for ``o`` if possible, otherwise it should call the superclass
       
    90     implementation (to raise ``TypeError``).
       
    91     """
       
    92     __all__ = ['__init__', 'default', 'encode', 'iterencode']
       
    93     item_separator = ', '
       
    94     key_separator = ': '
       
    95     def __init__(self, skipkeys=False, ensure_ascii=True,
       
    96             check_circular=True, allow_nan=True, sort_keys=False,
       
    97             indent=None, separators=None):
       
    98         """
       
    99         Constructor for JSONEncoder, with sensible defaults.
       
   100 
       
   101         If skipkeys is False, then it is a TypeError to attempt
       
   102         encoding of keys that are not str, int, long, float or None.  If
       
   103         skipkeys is True, such items are simply skipped.
       
   104 
       
   105         If ensure_ascii is True, the output is guaranteed to be str
       
   106         objects with all incoming unicode characters escaped.  If
       
   107         ensure_ascii is false, the output will be unicode object.
       
   108 
       
   109         If check_circular is True, then lists, dicts, and custom encoded
       
   110         objects will be checked for circular references during encoding to
       
   111         prevent an infinite recursion (which would cause an OverflowError).
       
   112         Otherwise, no such check takes place.
       
   113 
       
   114         If allow_nan is True, then NaN, Infinity, and -Infinity will be
       
   115         encoded as such.  This behavior is not JSON specification compliant,
       
   116         but is consistent with most JavaScript based encoders and decoders.
       
   117         Otherwise, it will be a ValueError to encode such floats.
       
   118 
       
   119         If sort_keys is True, then the output of dictionaries will be
       
   120         sorted by key; this is useful for regression tests to ensure
       
   121         that JSON serializations can be compared on a day-to-day basis.
       
   122 
       
   123         If indent is a non-negative integer, then JSON array
       
   124         elements and object members will be pretty-printed with that
       
   125         indent level.  An indent level of 0 will only insert newlines.
       
   126         None is the most compact representation.
       
   127 
       
   128         If specified, separators should be a (item_separator, key_separator)
       
   129         tuple. The default is (', ', ': '). To get the most compact JSON
       
   130         representation you should specify (',', ':') to eliminate whitespace.
       
   131         """
       
   132 
       
   133         self.skipkeys = skipkeys
       
   134         self.ensure_ascii = ensure_ascii
       
   135         self.check_circular = check_circular
       
   136         self.allow_nan = allow_nan
       
   137         self.sort_keys = sort_keys
       
   138         self.indent = indent
       
   139         self.current_indent_level = 0
       
   140         if separators is not None:
       
   141             self.item_separator, self.key_separator = separators
       
   142 
       
   143     def _newline_indent(self):
       
   144         return '\n' + (' ' * (self.indent * self.current_indent_level))
       
   145 
       
   146     def _iterencode_list(self, lst, markers=None):
       
   147         if not lst:
       
   148             yield '[]'
       
   149             return
       
   150         if markers is not None:
       
   151             markerid = id(lst)
       
   152             if markerid in markers:
       
   153                 raise ValueError("Circular reference detected")
       
   154             markers[markerid] = lst
       
   155         yield '['
       
   156         if self.indent is not None:
       
   157             self.current_indent_level += 1
       
   158             newline_indent = self._newline_indent()
       
   159             separator = self.item_separator + newline_indent
       
   160             yield newline_indent
       
   161         else:
       
   162             newline_indent = None
       
   163             separator = self.item_separator
       
   164         first = True
       
   165         for value in lst:
       
   166             if first:
       
   167                 first = False
       
   168             else:
       
   169                 yield separator
       
   170             for chunk in self._iterencode(value, markers):
       
   171                 yield chunk
       
   172         if newline_indent is not None:
       
   173             self.current_indent_level -= 1
       
   174             yield self._newline_indent()
       
   175         yield ']'
       
   176         if markers is not None:
       
   177             del markers[markerid]
       
   178 
       
   179     def _iterencode_dict(self, dct, markers=None):
       
   180         if not dct:
       
   181             yield '{}'
       
   182             return
       
   183         if markers is not None:
       
   184             markerid = id(dct)
       
   185             if markerid in markers:
       
   186                 raise ValueError("Circular reference detected")
       
   187             markers[markerid] = dct
       
   188         yield '{'
       
   189         key_separator = self.key_separator
       
   190         if self.indent is not None:
       
   191             self.current_indent_level += 1
       
   192             newline_indent = self._newline_indent()
       
   193             item_separator = self.item_separator + newline_indent
       
   194             yield newline_indent
       
   195         else:
       
   196             newline_indent = None
       
   197             item_separator = self.item_separator
       
   198         first = True
       
   199         if self.ensure_ascii:
       
   200             encoder = encode_basestring_ascii
       
   201         else:
       
   202             encoder = encode_basestring
       
   203         allow_nan = self.allow_nan
       
   204         if self.sort_keys:
       
   205             keys = dct.keys()
       
   206             keys.sort()
       
   207             items = [(k, dct[k]) for k in keys]
       
   208         else:
       
   209             items = dct.iteritems()
       
   210         for key, value in items:
       
   211             if isinstance(key, basestring):
       
   212                 pass
       
   213             # JavaScript is weakly typed for these, so it makes sense to
       
   214             # also allow them.  Many encoders seem to do something like this.
       
   215             elif isinstance(key, float):
       
   216                 key = floatstr(key, allow_nan)
       
   217             elif isinstance(key, (int, long)):
       
   218                 key = str(key)
       
   219             elif key is True:
       
   220                 key = 'true'
       
   221             elif key is False:
       
   222                 key = 'false'
       
   223             elif key is None:
       
   224                 key = 'null'
       
   225             elif self.skipkeys:
       
   226                 continue
       
   227             else:
       
   228                 raise TypeError("key %r is not a string" % (key,))
       
   229             if first:
       
   230                 first = False
       
   231             else:
       
   232                 yield item_separator
       
   233             yield encoder(key)
       
   234             yield key_separator
       
   235             for chunk in self._iterencode(value, markers):
       
   236                 yield chunk
       
   237         if newline_indent is not None:
       
   238             self.current_indent_level -= 1
       
   239             yield self._newline_indent()
       
   240         yield '}'
       
   241         if markers is not None:
       
   242             del markers[markerid]
       
   243 
       
   244     def _iterencode(self, o, markers=None):
       
   245         if isinstance(o, basestring):
       
   246             if self.ensure_ascii:
       
   247                 encoder = encode_basestring_ascii
       
   248             else:
       
   249                 encoder = encode_basestring
       
   250             yield encoder(o)
       
   251         elif o is None:
       
   252             yield 'null'
       
   253         elif o is True:
       
   254             yield 'true'
       
   255         elif o is False:
       
   256             yield 'false'
       
   257         elif isinstance(o, (int, long)):
       
   258             yield str(o)
       
   259         elif isinstance(o, float):
       
   260             yield floatstr(o, self.allow_nan)
       
   261         elif isinstance(o, (list, tuple)):
       
   262             for chunk in self._iterencode_list(o, markers):
       
   263                 yield chunk
       
   264         elif isinstance(o, dict):
       
   265             for chunk in self._iterencode_dict(o, markers):
       
   266                 yield chunk
       
   267         else:
       
   268             if markers is not None:
       
   269                 markerid = id(o)
       
   270                 if markerid in markers:
       
   271                     raise ValueError("Circular reference detected")
       
   272                 markers[markerid] = o
       
   273             for chunk in self._iterencode_default(o, markers):
       
   274                 yield chunk
       
   275             if markers is not None:
       
   276                 del markers[markerid]
       
   277 
       
   278     def _iterencode_default(self, o, markers=None):
       
   279         newobj = self.default(o)
       
   280         return self._iterencode(newobj, markers)
       
   281 
       
   282     def default(self, o):
       
   283         """
       
   284         Implement this method in a subclass such that it returns
       
   285         a serializable object for ``o``, or calls the base implementation
       
   286         (to raise a ``TypeError``).
       
   287 
       
   288         For example, to support arbitrary iterators, you could
       
   289         implement default like this::
       
   290             
       
   291             def default(self, o):
       
   292                 try:
       
   293                     iterable = iter(o)
       
   294                 except TypeError:
       
   295                     pass
       
   296                 else:
       
   297                     return list(iterable)
       
   298                 return JSONEncoder.default(self, o)
       
   299         """
       
   300         raise TypeError("%r is not JSON serializable" % (o,))
       
   301 
       
   302     def encode(self, o):
       
   303         """
       
   304         Return a JSON string representation of a Python data structure.
       
   305 
       
   306         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
       
   307         '{"foo":["bar", "baz"]}'
       
   308         """
       
   309         # This doesn't pass the iterator directly to ''.join() because it
       
   310         # sucks at reporting exceptions.  It's going to do this internally
       
   311         # anyway because it uses PySequence_Fast or similar.
       
   312         chunks = list(self.iterencode(o))
       
   313         return ''.join(chunks)
       
   314 
       
   315     def iterencode(self, o):
       
   316         """
       
   317         Encode the given object and yield each string
       
   318         representation as available.
       
   319         
       
   320         For example::
       
   321             
       
   322             for chunk in JSONEncoder().iterencode(bigobject):
       
   323                 mysocket.write(chunk)
       
   324         """
       
   325         if self.check_circular:
       
   326             markers = {}
       
   327         else:
       
   328             markers = None
       
   329         return self._iterencode(o, markers)
       
   330 
       
   331 __all__ = ['JSONEncoder']