1 """ |
1 """ |
2 Implementation of JSONEncoder |
2 Implementation of JSONEncoder |
3 """ |
3 """ |
4 import re |
4 import re |
5 |
5 |
6 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') |
6 try: |
7 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') |
7 from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii |
|
8 except ImportError: |
|
9 pass |
|
10 |
|
11 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') |
|
12 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') |
|
13 HAS_UTF8 = re.compile(r'[\x80-\xff]') |
8 ESCAPE_DCT = { |
14 ESCAPE_DCT = { |
9 # escape all forward slashes to prevent </script> attack |
|
10 '/': '\\/', |
|
11 '\\': '\\\\', |
15 '\\': '\\\\', |
12 '"': '\\"', |
16 '"': '\\"', |
13 '\b': '\\b', |
17 '\b': '\\b', |
14 '\f': '\\f', |
18 '\f': '\\f', |
15 '\n': '\\n', |
19 '\n': '\\n', |
17 '\t': '\\t', |
21 '\t': '\\t', |
18 } |
22 } |
19 for i in range(0x20): |
23 for i in range(0x20): |
20 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) |
24 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) |
21 |
25 |
22 # assume this produces an infinity on all machines (probably not guaranteed) |
26 # Assume this produces an infinity on all machines (probably not guaranteed) |
23 INFINITY = float('1e66666') |
27 INFINITY = float('1e66666') |
|
28 FLOAT_REPR = repr |
24 |
29 |
25 def floatstr(o, allow_nan=True): |
30 def floatstr(o, allow_nan=True): |
26 # Check for specials. Note that this type of test is processor- and/or |
31 # Check for specials. Note that this type of test is processor- and/or |
27 # platform-specific, so do tests which don't depend on the internals. |
32 # platform-specific, so do tests which don't depend on the internals. |
28 |
33 |
48 """ |
53 """ |
49 def replace(match): |
54 def replace(match): |
50 return ESCAPE_DCT[match.group(0)] |
55 return ESCAPE_DCT[match.group(0)] |
51 return '"' + ESCAPE.sub(replace, s) + '"' |
56 return '"' + ESCAPE.sub(replace, s) + '"' |
52 |
57 |
53 def encode_basestring_ascii(s): |
58 |
|
59 def py_encode_basestring_ascii(s): |
|
60 if isinstance(s, str) and HAS_UTF8.search(s) is not None: |
|
61 s = s.decode('utf-8') |
54 def replace(match): |
62 def replace(match): |
55 s = match.group(0) |
63 s = match.group(0) |
56 try: |
64 try: |
57 return ESCAPE_DCT[s] |
65 return ESCAPE_DCT[s] |
58 except KeyError: |
66 except KeyError: |
59 return '\\u%04x' % (ord(s),) |
67 n = ord(s) |
|
68 if n < 0x10000: |
|
69 return '\\u%04x' % (n,) |
|
70 else: |
|
71 # surrogate pair |
|
72 n -= 0x10000 |
|
73 s1 = 0xd800 | ((n >> 10) & 0x3ff) |
|
74 s2 = 0xdc00 | (n & 0x3ff) |
|
75 return '\\u%04x\\u%04x' % (s1, s2) |
60 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' |
76 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' |
61 |
77 |
|
78 |
|
79 try: |
|
80 encode_basestring_ascii = c_encode_basestring_ascii |
|
81 except NameError: |
|
82 encode_basestring_ascii = py_encode_basestring_ascii |
|
83 |
62 |
84 |
63 class JSONEncoder(object): |
85 class JSONEncoder(object): |
64 """ |
86 """ |
65 Extensible JSON <http://json.org> encoder for Python data structures. |
87 Extensible JSON <http://json.org> encoder for Python data structures. |
66 |
88 |
92 __all__ = ['__init__', 'default', 'encode', 'iterencode'] |
114 __all__ = ['__init__', 'default', 'encode', 'iterencode'] |
93 item_separator = ', ' |
115 item_separator = ', ' |
94 key_separator = ': ' |
116 key_separator = ': ' |
95 def __init__(self, skipkeys=False, ensure_ascii=True, |
117 def __init__(self, skipkeys=False, ensure_ascii=True, |
96 check_circular=True, allow_nan=True, sort_keys=False, |
118 check_circular=True, allow_nan=True, sort_keys=False, |
97 indent=None, separators=None): |
119 indent=None, separators=None, encoding='utf-8', default=None): |
98 """ |
120 """ |
99 Constructor for JSONEncoder, with sensible defaults. |
121 Constructor for JSONEncoder, with sensible defaults. |
100 |
122 |
101 If skipkeys is False, then it is a TypeError to attempt |
123 If skipkeys is False, then it is a TypeError to attempt |
102 encoding of keys that are not str, int, long, float or None. If |
124 encoding of keys that are not str, int, long, float or None. If |
124 elements and object members will be pretty-printed with that |
146 elements and object members will be pretty-printed with that |
125 indent level. An indent level of 0 will only insert newlines. |
147 indent level. An indent level of 0 will only insert newlines. |
126 None is the most compact representation. |
148 None is the most compact representation. |
127 |
149 |
128 If specified, separators should be a (item_separator, key_separator) |
150 If specified, separators should be a (item_separator, key_separator) |
129 tuple. The default is (', ', ': '). To get the most compact JSON |
151 tuple. The default is (', ', ': '). To get the most compact JSON |
130 representation you should specify (',', ':') to eliminate whitespace. |
152 representation you should specify (',', ':') to eliminate whitespace. |
|
153 |
|
154 If specified, default is a function that gets called for objects |
|
155 that can't otherwise be serialized. It should return a JSON encodable |
|
156 version of the object or raise a ``TypeError``. |
|
157 |
|
158 If encoding is not None, then all input strings will be |
|
159 transformed into unicode using that encoding prior to JSON-encoding. |
|
160 The default is UTF-8. |
131 """ |
161 """ |
132 |
162 |
133 self.skipkeys = skipkeys |
163 self.skipkeys = skipkeys |
134 self.ensure_ascii = ensure_ascii |
164 self.ensure_ascii = ensure_ascii |
135 self.check_circular = check_circular |
165 self.check_circular = check_circular |
137 self.sort_keys = sort_keys |
167 self.sort_keys = sort_keys |
138 self.indent = indent |
168 self.indent = indent |
139 self.current_indent_level = 0 |
169 self.current_indent_level = 0 |
140 if separators is not None: |
170 if separators is not None: |
141 self.item_separator, self.key_separator = separators |
171 self.item_separator, self.key_separator = separators |
|
172 if default is not None: |
|
173 self.default = default |
|
174 self.encoding = encoding |
142 |
175 |
143 def _newline_indent(self): |
176 def _newline_indent(self): |
144 return '\n' + (' ' * (self.indent * self.current_indent_level)) |
177 return '\n' + (' ' * (self.indent * self.current_indent_level)) |
145 |
178 |
146 def _iterencode_list(self, lst, markers=None): |
179 def _iterencode_list(self, lst, markers=None): |
205 keys = dct.keys() |
238 keys = dct.keys() |
206 keys.sort() |
239 keys.sort() |
207 items = [(k, dct[k]) for k in keys] |
240 items = [(k, dct[k]) for k in keys] |
208 else: |
241 else: |
209 items = dct.iteritems() |
242 items = dct.iteritems() |
|
243 _encoding = self.encoding |
|
244 _do_decode = (_encoding is not None |
|
245 and not (_encoding == 'utf-8')) |
210 for key, value in items: |
246 for key, value in items: |
211 if isinstance(key, basestring): |
247 if isinstance(key, str): |
|
248 if _do_decode: |
|
249 key = key.decode(_encoding) |
|
250 elif isinstance(key, basestring): |
212 pass |
251 pass |
213 # JavaScript is weakly typed for these, so it makes sense to |
252 # JavaScript is weakly typed for these, so it makes sense to |
214 # also allow them. Many encoders seem to do something like this. |
253 # also allow them. Many encoders seem to do something like this. |
215 elif isinstance(key, float): |
254 elif isinstance(key, float): |
216 key = floatstr(key, allow_nan) |
255 key = floatstr(key, allow_nan) |
245 if isinstance(o, basestring): |
284 if isinstance(o, basestring): |
246 if self.ensure_ascii: |
285 if self.ensure_ascii: |
247 encoder = encode_basestring_ascii |
286 encoder = encode_basestring_ascii |
248 else: |
287 else: |
249 encoder = encode_basestring |
288 encoder = encode_basestring |
|
289 _encoding = self.encoding |
|
290 if (_encoding is not None and isinstance(o, str) |
|
291 and not (_encoding == 'utf-8')): |
|
292 o = o.decode(_encoding) |
250 yield encoder(o) |
293 yield encoder(o) |
251 elif o is None: |
294 elif o is None: |
252 yield 'null' |
295 yield 'null' |
253 elif o is True: |
296 elif o is True: |
254 yield 'true' |
297 yield 'true' |
302 def encode(self, o): |
345 def encode(self, o): |
303 """ |
346 """ |
304 Return a JSON string representation of a Python data structure. |
347 Return a JSON string representation of a Python data structure. |
305 |
348 |
306 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) |
349 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) |
307 '{"foo":["bar", "baz"]}' |
350 '{"foo": ["bar", "baz"]}' |
308 """ |
351 """ |
309 # This doesn't pass the iterator directly to ''.join() because it |
352 # This is for extremely simple cases and benchmarks. |
310 # sucks at reporting exceptions. It's going to do this internally |
353 if isinstance(o, basestring): |
311 # anyway because it uses PySequence_Fast or similar. |
354 if isinstance(o, str): |
|
355 _encoding = self.encoding |
|
356 if (_encoding is not None |
|
357 and not (_encoding == 'utf-8')): |
|
358 o = o.decode(_encoding) |
|
359 if self.ensure_ascii: |
|
360 return encode_basestring_ascii(o) |
|
361 else: |
|
362 return encode_basestring(o) |
|
363 # This doesn't pass the iterator directly to ''.join() because the |
|
364 # exceptions aren't as detailed. The list call should be roughly |
|
365 # equivalent to the PySequence_Fast that ''.join() would do. |
312 chunks = list(self.iterencode(o)) |
366 chunks = list(self.iterencode(o)) |
313 return ''.join(chunks) |
367 return ''.join(chunks) |
314 |
368 |
315 def iterencode(self, o): |
369 def iterencode(self, o): |
316 """ |
370 """ |