app/django/utils/simplejson/encoder.py
changeset 323 ff1a9aa48cfd
parent 54 03e267d67478
equal deleted inserted replaced
322:6641e941ef1e 323:ff1a9aa48cfd
     1 """
     1 """
     2 Implementation of JSONEncoder
     2 Implementation of JSONEncoder
     3 """
     3 """
     4 import re
     4 import re
     5 
     5 
     6 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
     6 try:
     7 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
     7     from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
       
     8 except ImportError:
       
     9     pass
       
    10 
       
    11 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
       
    12 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
       
    13 HAS_UTF8 = re.compile(r'[\x80-\xff]')
     8 ESCAPE_DCT = {
    14 ESCAPE_DCT = {
     9     # escape all forward slashes to prevent </script> attack
       
    10     '/': '\\/',
       
    11     '\\': '\\\\',
    15     '\\': '\\\\',
    12     '"': '\\"',
    16     '"': '\\"',
    13     '\b': '\\b',
    17     '\b': '\\b',
    14     '\f': '\\f',
    18     '\f': '\\f',
    15     '\n': '\\n',
    19     '\n': '\\n',
    17     '\t': '\\t',
    21     '\t': '\\t',
    18 }
    22 }
    19 for i in range(0x20):
    23 for i in range(0x20):
    20     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
    24     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
    21 
    25 
    22 # assume this produces an infinity on all machines (probably not guaranteed)
    26 # Assume this produces an infinity on all machines (probably not guaranteed)
    23 INFINITY = float('1e66666')
    27 INFINITY = float('1e66666')
       
    28 FLOAT_REPR = repr
    24 
    29 
    25 def floatstr(o, allow_nan=True):
    30 def floatstr(o, allow_nan=True):
    26     # Check for specials.  Note that this type of test is processor- and/or
    31     # Check for specials.  Note that this type of test is processor- and/or
    27     # platform-specific, so do tests which don't depend on the internals.
    32     # platform-specific, so do tests which don't depend on the internals.
    28 
    33 
    31     elif o == INFINITY:
    36     elif o == INFINITY:
    32         text = 'Infinity'
    37         text = 'Infinity'
    33     elif o == -INFINITY:
    38     elif o == -INFINITY:
    34         text = '-Infinity'
    39         text = '-Infinity'
    35     else:
    40     else:
    36         return str(o)
    41         return FLOAT_REPR(o)
    37 
    42 
    38     if not allow_nan:
    43     if not allow_nan:
    39         raise ValueError("Out of range float values are not JSON compliant: %r"
    44         raise ValueError("Out of range float values are not JSON compliant: %r"
    40             % (o,))
    45             % (o,))
    41 
    46 
    48     """
    53     """
    49     def replace(match):
    54     def replace(match):
    50         return ESCAPE_DCT[match.group(0)]
    55         return ESCAPE_DCT[match.group(0)]
    51     return '"' + ESCAPE.sub(replace, s) + '"'
    56     return '"' + ESCAPE.sub(replace, s) + '"'
    52 
    57 
    53 def encode_basestring_ascii(s):
    58 
       
    59 def py_encode_basestring_ascii(s):
       
    60     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
       
    61         s = s.decode('utf-8')
    54     def replace(match):
    62     def replace(match):
    55         s = match.group(0)
    63         s = match.group(0)
    56         try:
    64         try:
    57             return ESCAPE_DCT[s]
    65             return ESCAPE_DCT[s]
    58         except KeyError:
    66         except KeyError:
    59             return '\\u%04x' % (ord(s),)
    67             n = ord(s)
       
    68             if n < 0x10000:
       
    69                 return '\\u%04x' % (n,)
       
    70             else:
       
    71                 # surrogate pair
       
    72                 n -= 0x10000
       
    73                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
       
    74                 s2 = 0xdc00 | (n & 0x3ff)
       
    75                 return '\\u%04x\\u%04x' % (s1, s2)
    60     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    76     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    61         
    77 
       
    78 
       
    79 try:
       
    80     encode_basestring_ascii = c_encode_basestring_ascii
       
    81 except NameError:
       
    82     encode_basestring_ascii = py_encode_basestring_ascii
       
    83 
    62 
    84 
    63 class JSONEncoder(object):
    85 class JSONEncoder(object):
    64     """
    86     """
    65     Extensible JSON <http://json.org> encoder for Python data structures.
    87     Extensible JSON <http://json.org> encoder for Python data structures.
    66 
    88 
    92     __all__ = ['__init__', 'default', 'encode', 'iterencode']
   114     __all__ = ['__init__', 'default', 'encode', 'iterencode']
    93     item_separator = ', '
   115     item_separator = ', '
    94     key_separator = ': '
   116     key_separator = ': '
    95     def __init__(self, skipkeys=False, ensure_ascii=True,
   117     def __init__(self, skipkeys=False, ensure_ascii=True,
    96             check_circular=True, allow_nan=True, sort_keys=False,
   118             check_circular=True, allow_nan=True, sort_keys=False,
    97             indent=None, separators=None):
   119             indent=None, separators=None, encoding='utf-8', default=None):
    98         """
   120         """
    99         Constructor for JSONEncoder, with sensible defaults.
   121         Constructor for JSONEncoder, with sensible defaults.
   100 
   122 
   101         If skipkeys is False, then it is a TypeError to attempt
   123         If skipkeys is False, then it is a TypeError to attempt
   102         encoding of keys that are not str, int, long, float or None.  If
   124         encoding of keys that are not str, int, long, float or None.  If
   124         elements and object members will be pretty-printed with that
   146         elements and object members will be pretty-printed with that
   125         indent level.  An indent level of 0 will only insert newlines.
   147         indent level.  An indent level of 0 will only insert newlines.
   126         None is the most compact representation.
   148         None is the most compact representation.
   127 
   149 
   128         If specified, separators should be a (item_separator, key_separator)
   150         If specified, separators should be a (item_separator, key_separator)
   129         tuple. The default is (', ', ': '). To get the most compact JSON
   151         tuple.  The default is (', ', ': ').  To get the most compact JSON
   130         representation you should specify (',', ':') to eliminate whitespace.
   152         representation you should specify (',', ':') to eliminate whitespace.
       
   153 
       
   154         If specified, default is a function that gets called for objects
       
   155         that can't otherwise be serialized.  It should return a JSON encodable
       
   156         version of the object or raise a ``TypeError``.
       
   157 
       
   158         If encoding is not None, then all input strings will be
       
   159         transformed into unicode using that encoding prior to JSON-encoding.
       
   160         The default is UTF-8.
   131         """
   161         """
   132 
   162 
   133         self.skipkeys = skipkeys
   163         self.skipkeys = skipkeys
   134         self.ensure_ascii = ensure_ascii
   164         self.ensure_ascii = ensure_ascii
   135         self.check_circular = check_circular
   165         self.check_circular = check_circular
   137         self.sort_keys = sort_keys
   167         self.sort_keys = sort_keys
   138         self.indent = indent
   168         self.indent = indent
   139         self.current_indent_level = 0
   169         self.current_indent_level = 0
   140         if separators is not None:
   170         if separators is not None:
   141             self.item_separator, self.key_separator = separators
   171             self.item_separator, self.key_separator = separators
       
   172         if default is not None:
       
   173             self.default = default
       
   174         self.encoding = encoding
   142 
   175 
   143     def _newline_indent(self):
   176     def _newline_indent(self):
   144         return '\n' + (' ' * (self.indent * self.current_indent_level))
   177         return '\n' + (' ' * (self.indent * self.current_indent_level))
   145 
   178 
   146     def _iterencode_list(self, lst, markers=None):
   179     def _iterencode_list(self, lst, markers=None):
   205             keys = dct.keys()
   238             keys = dct.keys()
   206             keys.sort()
   239             keys.sort()
   207             items = [(k, dct[k]) for k in keys]
   240             items = [(k, dct[k]) for k in keys]
   208         else:
   241         else:
   209             items = dct.iteritems()
   242             items = dct.iteritems()
       
   243         _encoding = self.encoding
       
   244         _do_decode = (_encoding is not None
       
   245             and not (_encoding == 'utf-8'))
   210         for key, value in items:
   246         for key, value in items:
   211             if isinstance(key, basestring):
   247             if isinstance(key, str):
       
   248                 if _do_decode:
       
   249                     key = key.decode(_encoding)
       
   250             elif isinstance(key, basestring):
   212                 pass
   251                 pass
   213             # JavaScript is weakly typed for these, so it makes sense to
   252             # JavaScript is weakly typed for these, so it makes sense to
   214             # also allow them.  Many encoders seem to do something like this.
   253             # also allow them.  Many encoders seem to do something like this.
   215             elif isinstance(key, float):
   254             elif isinstance(key, float):
   216                 key = floatstr(key, allow_nan)
   255                 key = floatstr(key, allow_nan)
   245         if isinstance(o, basestring):
   284         if isinstance(o, basestring):
   246             if self.ensure_ascii:
   285             if self.ensure_ascii:
   247                 encoder = encode_basestring_ascii
   286                 encoder = encode_basestring_ascii
   248             else:
   287             else:
   249                 encoder = encode_basestring
   288                 encoder = encode_basestring
       
   289             _encoding = self.encoding
       
   290             if (_encoding is not None and isinstance(o, str)
       
   291                     and not (_encoding == 'utf-8')):
       
   292                 o = o.decode(_encoding)
   250             yield encoder(o)
   293             yield encoder(o)
   251         elif o is None:
   294         elif o is None:
   252             yield 'null'
   295             yield 'null'
   253         elif o is True:
   296         elif o is True:
   254             yield 'true'
   297             yield 'true'
   302     def encode(self, o):
   345     def encode(self, o):
   303         """
   346         """
   304         Return a JSON string representation of a Python data structure.
   347         Return a JSON string representation of a Python data structure.
   305 
   348 
   306         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
   349         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
   307         '{"foo":["bar", "baz"]}'
   350         '{"foo": ["bar", "baz"]}'
   308         """
   351         """
   309         # This doesn't pass the iterator directly to ''.join() because it
   352         # This is for extremely simple cases and benchmarks.
   310         # sucks at reporting exceptions.  It's going to do this internally
   353         if isinstance(o, basestring):
   311         # anyway because it uses PySequence_Fast or similar.
   354             if isinstance(o, str):
       
   355                 _encoding = self.encoding
       
   356                 if (_encoding is not None 
       
   357                         and not (_encoding == 'utf-8')):
       
   358                     o = o.decode(_encoding)
       
   359             if self.ensure_ascii:
       
   360                 return encode_basestring_ascii(o)
       
   361             else:
       
   362                 return encode_basestring(o)
       
   363         # This doesn't pass the iterator directly to ''.join() because the
       
   364         # exceptions aren't as detailed.  The list call should be roughly
       
   365         # equivalent to the PySequence_Fast that ''.join() would do.
   312         chunks = list(self.iterencode(o))
   366         chunks = list(self.iterencode(o))
   313         return ''.join(chunks)
   367         return ''.join(chunks)
   314 
   368 
   315     def iterencode(self, o):
   369     def iterencode(self, o):
   316         """
   370         """