app/simplejson/encoder.py
changeset 975 295d67509412
equal deleted inserted replaced
974:2f86cbc90b65 975:295d67509412
       
     1 """Implementation of JSONEncoder
       
     2 """
       
     3 import re
       
     4 
       
     5 try:
       
     6     from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
       
     7 except ImportError:
       
     8     c_encode_basestring_ascii = None
       
     9 try:
       
    10     from simplejson._speedups import make_encoder as c_make_encoder
       
    11 except ImportError:
       
    12     c_make_encoder = None
       
    13 
       
    14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
       
    15 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
       
    16 HAS_UTF8 = re.compile(r'[\x80-\xff]')
       
    17 ESCAPE_DCT = {
       
    18     '\\': '\\\\',
       
    19     '"': '\\"',
       
    20     '\b': '\\b',
       
    21     '\f': '\\f',
       
    22     '\n': '\\n',
       
    23     '\r': '\\r',
       
    24     '\t': '\\t',
       
    25 }
       
    26 for i in range(0x20):
       
    27     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
       
    28 
       
    29 # Assume this produces an infinity on all machines (probably not guaranteed)
       
    30 INFINITY = float('1e66666')
       
    31 FLOAT_REPR = repr
       
    32 
       
    33 def encode_basestring(s):
       
    34     """Return a JSON representation of a Python string
       
    35 
       
    36     """
       
    37     def replace(match):
       
    38         return ESCAPE_DCT[match.group(0)]
       
    39     return '"' + ESCAPE.sub(replace, s) + '"'
       
    40 
       
    41 
       
    42 def py_encode_basestring_ascii(s):
       
    43     """Return an ASCII-only JSON representation of a Python string
       
    44 
       
    45     """
       
    46     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
       
    47         s = s.decode('utf-8')
       
    48     def replace(match):
       
    49         s = match.group(0)
       
    50         try:
       
    51             return ESCAPE_DCT[s]
       
    52         except KeyError:
       
    53             n = ord(s)
       
    54             if n < 0x10000:
       
    55                 return '\\u%04x' % (n,)
       
    56             else:
       
    57                 # surrogate pair
       
    58                 n -= 0x10000
       
    59                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
       
    60                 s2 = 0xdc00 | (n & 0x3ff)
       
    61                 return '\\u%04x\\u%04x' % (s1, s2)
       
    62     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
       
    63 
       
    64 
       
    65 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
       
    66 
       
    67 class JSONEncoder(object):
       
    68     """Extensible JSON <http://json.org> encoder for Python data structures.
       
    69 
       
    70     Supports the following objects and types by default:
       
    71 
       
    72     +-------------------+---------------+
       
    73     | Python            | JSON          |
       
    74     +===================+===============+
       
    75     | dict              | object        |
       
    76     +-------------------+---------------+
       
    77     | list, tuple       | array         |
       
    78     +-------------------+---------------+
       
    79     | str, unicode      | string        |
       
    80     +-------------------+---------------+
       
    81     | int, long, float  | number        |
       
    82     +-------------------+---------------+
       
    83     | True              | true          |
       
    84     +-------------------+---------------+
       
    85     | False             | false         |
       
    86     +-------------------+---------------+
       
    87     | None              | null          |
       
    88     +-------------------+---------------+
       
    89 
       
    90     To extend this to recognize other objects, subclass and implement a
       
    91     ``.default()`` method with another method that returns a serializable
       
    92     object for ``o`` if possible, otherwise it should call the superclass
       
    93     implementation (to raise ``TypeError``).
       
    94 
       
    95     """
       
    96     item_separator = ', '
       
    97     key_separator = ': '
       
    98     def __init__(self, skipkeys=False, ensure_ascii=True,
       
    99             check_circular=True, allow_nan=True, sort_keys=False,
       
   100             indent=None, separators=None, encoding='utf-8', default=None):
       
   101         """Constructor for JSONEncoder, with sensible defaults.
       
   102 
       
   103         If skipkeys is False, then it is a TypeError to attempt
       
   104         encoding of keys that are not str, int, long, float or None.  If
       
   105         skipkeys is True, such items are simply skipped.
       
   106 
       
   107         If ensure_ascii is True, the output is guaranteed to be str
       
   108         objects with all incoming unicode characters escaped.  If
       
   109         ensure_ascii is false, the output will be unicode object.
       
   110 
       
   111         If check_circular is True, then lists, dicts, and custom encoded
       
   112         objects will be checked for circular references during encoding to
       
   113         prevent an infinite recursion (which would cause an OverflowError).
       
   114         Otherwise, no such check takes place.
       
   115 
       
   116         If allow_nan is True, then NaN, Infinity, and -Infinity will be
       
   117         encoded as such.  This behavior is not JSON specification compliant,
       
   118         but is consistent with most JavaScript based encoders and decoders.
       
   119         Otherwise, it will be a ValueError to encode such floats.
       
   120 
       
   121         If sort_keys is True, then the output of dictionaries will be
       
   122         sorted by key; this is useful for regression tests to ensure
       
   123         that JSON serializations can be compared on a day-to-day basis.
       
   124 
       
   125         If indent is a non-negative integer, then JSON array
       
   126         elements and object members will be pretty-printed with that
       
   127         indent level.  An indent level of 0 will only insert newlines.
       
   128         None is the most compact representation.
       
   129 
       
   130         If specified, separators should be a (item_separator, key_separator)
       
   131         tuple.  The default is (', ', ': ').  To get the most compact JSON
       
   132         representation you should specify (',', ':') to eliminate whitespace.
       
   133 
       
   134         If specified, default is a function that gets called for objects
       
   135         that can't otherwise be serialized.  It should return a JSON encodable
       
   136         version of the object or raise a ``TypeError``.
       
   137 
       
   138         If encoding is not None, then all input strings will be
       
   139         transformed into unicode using that encoding prior to JSON-encoding.
       
   140         The default is UTF-8.
       
   141 
       
   142         """
       
   143 
       
   144         self.skipkeys = skipkeys
       
   145         self.ensure_ascii = ensure_ascii
       
   146         self.check_circular = check_circular
       
   147         self.allow_nan = allow_nan
       
   148         self.sort_keys = sort_keys
       
   149         self.indent = indent
       
   150         if separators is not None:
       
   151             self.item_separator, self.key_separator = separators
       
   152         if default is not None:
       
   153             self.default = default
       
   154         self.encoding = encoding
       
   155 
       
   156     def default(self, o):
       
   157         """Implement this method in a subclass such that it returns
       
   158         a serializable object for ``o``, or calls the base implementation
       
   159         (to raise a ``TypeError``).
       
   160 
       
   161         For example, to support arbitrary iterators, you could
       
   162         implement default like this::
       
   163 
       
   164             def default(self, o):
       
   165                 try:
       
   166                     iterable = iter(o)
       
   167                 except TypeError:
       
   168                     pass
       
   169                 else:
       
   170                     return list(iterable)
       
   171                 return JSONEncoder.default(self, o)
       
   172 
       
   173         """
       
   174         raise TypeError("%r is not JSON serializable" % (o,))
       
   175 
       
   176     def encode(self, o):
       
   177         """Return a JSON string representation of a Python data structure.
       
   178 
       
   179         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
       
   180         '{"foo": ["bar", "baz"]}'
       
   181 
       
   182         """
       
   183         # This is for extremely simple cases and benchmarks.
       
   184         if isinstance(o, basestring):
       
   185             if isinstance(o, str):
       
   186                 _encoding = self.encoding
       
   187                 if (_encoding is not None
       
   188                         and not (_encoding == 'utf-8')):
       
   189                     o = o.decode(_encoding)
       
   190             if self.ensure_ascii:
       
   191                 return encode_basestring_ascii(o)
       
   192             else:
       
   193                 return encode_basestring(o)
       
   194         # This doesn't pass the iterator directly to ''.join() because the
       
   195         # exceptions aren't as detailed.  The list call should be roughly
       
   196         # equivalent to the PySequence_Fast that ''.join() would do.
       
   197         chunks = self.iterencode(o, _one_shot=True)
       
   198         if not isinstance(chunks, (list, tuple)):
       
   199             chunks = list(chunks)
       
   200         return ''.join(chunks)
       
   201 
       
   202     def iterencode(self, o, _one_shot=False):
       
   203         """Encode the given object and yield each string
       
   204         representation as available.
       
   205 
       
   206         For example::
       
   207 
       
   208             for chunk in JSONEncoder().iterencode(bigobject):
       
   209                 mysocket.write(chunk)
       
   210 
       
   211         """
       
   212         if self.check_circular:
       
   213             markers = {}
       
   214         else:
       
   215             markers = None
       
   216         if self.ensure_ascii:
       
   217             _encoder = encode_basestring_ascii
       
   218         else:
       
   219             _encoder = encode_basestring
       
   220         if self.encoding != 'utf-8':
       
   221             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
       
   222                 if isinstance(o, str):
       
   223                     o = o.decode(_encoding)
       
   224                 return _orig_encoder(o)
       
   225 
       
   226         def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
       
   227             # Check for specials.  Note that this type of test is processor- and/or
       
   228             # platform-specific, so do tests which don't depend on the internals.
       
   229 
       
   230             if o != o:
       
   231                 text = 'NaN'
       
   232             elif o == _inf:
       
   233                 text = 'Infinity'
       
   234             elif o == _neginf:
       
   235                 text = '-Infinity'
       
   236             else:
       
   237                 return _repr(o)
       
   238 
       
   239             if not allow_nan:
       
   240                 raise ValueError("Out of range float values are not JSON compliant: %r"
       
   241                     % (o,))
       
   242 
       
   243             return text
       
   244 
       
   245 
       
   246         if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
       
   247             _iterencode = c_make_encoder(
       
   248                 markers, self.default, _encoder, self.indent,
       
   249                 self.key_separator, self.item_separator, self.sort_keys,
       
   250                 self.skipkeys, self.allow_nan)
       
   251         else:
       
   252             _iterencode = _make_iterencode(
       
   253                 markers, self.default, _encoder, self.indent, floatstr,
       
   254                 self.key_separator, self.item_separator, self.sort_keys,
       
   255                 self.skipkeys, _one_shot)
       
   256         return _iterencode(o, 0)
       
   257 
       
   258 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
       
   259         ## HACK: hand-optimized bytecode; turn globals into locals
       
   260         False=False,
       
   261         True=True,
       
   262         ValueError=ValueError,
       
   263         basestring=basestring,
       
   264         dict=dict,
       
   265         float=float,
       
   266         id=id,
       
   267         int=int,
       
   268         isinstance=isinstance,
       
   269         list=list,
       
   270         long=long,
       
   271         str=str,
       
   272         tuple=tuple,
       
   273     ):
       
   274 
       
   275     def _iterencode_list(lst, _current_indent_level):
       
   276         if not lst:
       
   277             yield '[]'
       
   278             return
       
   279         if markers is not None:
       
   280             markerid = id(lst)
       
   281             if markerid in markers:
       
   282                 raise ValueError("Circular reference detected")
       
   283             markers[markerid] = lst
       
   284         buf = '['
       
   285         if _indent is not None:
       
   286             _current_indent_level += 1
       
   287             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
       
   288             separator = _item_separator + newline_indent
       
   289             buf += newline_indent
       
   290         else:
       
   291             newline_indent = None
       
   292             separator = _item_separator
       
   293         first = True
       
   294         for value in lst:
       
   295             if first:
       
   296                 first = False
       
   297             else:
       
   298                 buf = separator
       
   299             if isinstance(value, basestring):
       
   300                 yield buf + _encoder(value)
       
   301             elif value is None:
       
   302                 yield buf + 'null'
       
   303             elif value is True:
       
   304                 yield buf + 'true'
       
   305             elif value is False:
       
   306                 yield buf + 'false'
       
   307             elif isinstance(value, (int, long)):
       
   308                 yield buf + str(value)
       
   309             elif isinstance(value, float):
       
   310                 yield buf + _floatstr(value)
       
   311             else:
       
   312                 yield buf
       
   313                 if isinstance(value, (list, tuple)):
       
   314                     chunks = _iterencode_list(value, _current_indent_level)
       
   315                 elif isinstance(value, dict):
       
   316                     chunks = _iterencode_dict(value, _current_indent_level)
       
   317                 else:
       
   318                     chunks = _iterencode(value, _current_indent_level)
       
   319                 for chunk in chunks:
       
   320                     yield chunk
       
   321         if newline_indent is not None:
       
   322             _current_indent_level -= 1
       
   323             yield '\n' + (' ' * (_indent * _current_indent_level))
       
   324         yield ']'
       
   325         if markers is not None:
       
   326             del markers[markerid]
       
   327 
       
   328     def _iterencode_dict(dct, _current_indent_level):
       
   329         if not dct:
       
   330             yield '{}'
       
   331             return
       
   332         if markers is not None:
       
   333             markerid = id(dct)
       
   334             if markerid in markers:
       
   335                 raise ValueError("Circular reference detected")
       
   336             markers[markerid] = dct
       
   337         yield '{'
       
   338         if _indent is not None:
       
   339             _current_indent_level += 1
       
   340             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
       
   341             item_separator = _item_separator + newline_indent
       
   342             yield newline_indent
       
   343         else:
       
   344             newline_indent = None
       
   345             item_separator = _item_separator
       
   346         first = True
       
   347         if _sort_keys:
       
   348             items = dct.items()
       
   349             items.sort(key=lambda kv: kv[0])
       
   350         else:
       
   351             items = dct.iteritems()
       
   352         for key, value in items:
       
   353             if isinstance(key, basestring):
       
   354                 pass
       
   355             # JavaScript is weakly typed for these, so it makes sense to
       
   356             # also allow them.  Many encoders seem to do something like this.
       
   357             elif isinstance(key, float):
       
   358                 key = _floatstr(key)
       
   359             elif isinstance(key, (int, long)):
       
   360                 key = str(key)
       
   361             elif key is True:
       
   362                 key = 'true'
       
   363             elif key is False:
       
   364                 key = 'false'
       
   365             elif key is None:
       
   366                 key = 'null'
       
   367             elif _skipkeys:
       
   368                 continue
       
   369             else:
       
   370                 raise TypeError("key %r is not a string" % (key,))
       
   371             if first:
       
   372                 first = False
       
   373             else:
       
   374                 yield item_separator
       
   375             yield _encoder(key)
       
   376             yield _key_separator
       
   377             if isinstance(value, basestring):
       
   378                 yield _encoder(value)
       
   379             elif value is None:
       
   380                 yield 'null'
       
   381             elif value is True:
       
   382                 yield 'true'
       
   383             elif value is False:
       
   384                 yield 'false'
       
   385             elif isinstance(value, (int, long)):
       
   386                 yield str(value)
       
   387             elif isinstance(value, float):
       
   388                 yield _floatstr(value)
       
   389             else:
       
   390                 if isinstance(value, (list, tuple)):
       
   391                     chunks = _iterencode_list(value, _current_indent_level)
       
   392                 elif isinstance(value, dict):
       
   393                     chunks = _iterencode_dict(value, _current_indent_level)
       
   394                 else:
       
   395                     chunks = _iterencode(value, _current_indent_level)
       
   396                 for chunk in chunks:
       
   397                     yield chunk
       
   398         if newline_indent is not None:
       
   399             _current_indent_level -= 1
       
   400             yield '\n' + (' ' * (_indent * _current_indent_level))
       
   401         yield '}'
       
   402         if markers is not None:
       
   403             del markers[markerid]
       
   404 
       
   405     def _iterencode(o, _current_indent_level):
       
   406         if isinstance(o, basestring):
       
   407             yield _encoder(o)
       
   408         elif o is None:
       
   409             yield 'null'
       
   410         elif o is True:
       
   411             yield 'true'
       
   412         elif o is False:
       
   413             yield 'false'
       
   414         elif isinstance(o, (int, long)):
       
   415             yield str(o)
       
   416         elif isinstance(o, float):
       
   417             yield _floatstr(o)
       
   418         elif isinstance(o, (list, tuple)):
       
   419             for chunk in _iterencode_list(o, _current_indent_level):
       
   420                 yield chunk
       
   421         elif isinstance(o, dict):
       
   422             for chunk in _iterencode_dict(o, _current_indent_level):
       
   423                 yield chunk
       
   424         else:
       
   425             if markers is not None:
       
   426                 markerid = id(o)
       
   427                 if markerid in markers:
       
   428                     raise ValueError("Circular reference detected")
       
   429                 markers[markerid] = o
       
   430             o = _default(o)
       
   431             for chunk in _iterencode(o, _current_indent_level):
       
   432                 yield chunk
       
   433             if markers is not None:
       
   434                 del markers[markerid]
       
   435 
       
   436     return _iterencode