diff -r 6641e941ef1e -r ff1a9aa48cfd app/django/utils/simplejson/encoder.py --- a/app/django/utils/simplejson/encoder.py Tue Oct 14 12:36:55 2008 +0000 +++ b/app/django/utils/simplejson/encoder.py Tue Oct 14 16:00:59 2008 +0000 @@ -3,11 +3,15 @@ """ import re -ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') -ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') +try: + from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: + pass + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { - # escape all forward slashes to prevent attack - '/': '\\/', '\\': '\\\\', '"': '\\"', '\b': '\\b', @@ -19,8 +23,9 @@ for i in range(0x20): ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -# assume this produces an infinity on all machines (probably not guaranteed) +# Assume this produces an infinity on all machines (probably not guaranteed) INFINITY = float('1e66666') +FLOAT_REPR = repr def floatstr(o, allow_nan=True): # Check for specials. Note that this type of test is processor- and/or @@ -33,7 +38,7 @@ elif o == -INFINITY: text = '-Infinity' else: - return str(o) + return FLOAT_REPR(o) if not allow_nan: raise ValueError("Out of range float values are not JSON compliant: %r" @@ -50,15 +55,32 @@ return ESCAPE_DCT[match.group(0)] return '"' + ESCAPE.sub(replace, s) + '"' -def encode_basestring_ascii(s): + +def py_encode_basestring_ascii(s): + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: return ESCAPE_DCT[s] except KeyError: - return '\\u%04x' % (ord(s),) + n = ord(s) + if n < 0x10000: + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u%04x\\u%04x' % (s1, s2) return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' - + + +try: + encode_basestring_ascii = c_encode_basestring_ascii +except NameError: + encode_basestring_ascii = py_encode_basestring_ascii + class JSONEncoder(object): """ @@ -94,7 +116,7 @@ key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None): + indent=None, separators=None, encoding='utf-8', default=None): """ Constructor for JSONEncoder, with sensible defaults. @@ -126,8 +148,16 @@ None is the most compact representation. If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON + tuple. The default is (', ', ': '). To get the most compact JSON representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. """ self.skipkeys = skipkeys @@ -139,6 +169,9 @@ self.current_indent_level = 0 if separators is not None: self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding def _newline_indent(self): return '\n' + (' ' * (self.indent * self.current_indent_level)) @@ -207,8 +240,14 @@ items = [(k, dct[k]) for k in keys] else: items = dct.iteritems() + _encoding = self.encoding + _do_decode = (_encoding is not None + and not (_encoding == 'utf-8')) for key, value in items: - if isinstance(key, basestring): + if isinstance(key, str): + if _do_decode: + key = key.decode(_encoding) + elif isinstance(key, basestring): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. @@ -247,6 +286,10 @@ encoder = encode_basestring_ascii else: encoder = encode_basestring + _encoding = self.encoding + if (_encoding is not None and isinstance(o, str) + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) yield encoder(o) elif o is None: yield 'null' @@ -304,11 +347,22 @@ Return a JSON string representation of a Python data structure. >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) - '{"foo":["bar", "baz"]}' + '{"foo": ["bar", "baz"]}' """ - # This doesn't pass the iterator directly to ''.join() because it - # sucks at reporting exceptions. It's going to do this internally - # anyway because it uses PySequence_Fast or similar. + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. chunks = list(self.iterencode(o)) return ''.join(chunks)