From d57ce3d6a9e4533e1814cbf39194e99b91b30f4e Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Sun, 27 Jul 2008 23:38:28 +0000 Subject: [PATCH] Fixed #7131 -- Updated included simplejson code to match the simplejson-1.9.2 release. This should be fully backwards-compatible for people using the public interfaces. git-svn-id: http://code.djangoproject.com/svn/django/trunk@8124 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/simplejson/LICENSE.txt | 1 - django/utils/simplejson/__init__.py | 186 +++++++++++++++++++++----- django/utils/simplejson/decoder.py | 104 +++++++++++--- django/utils/simplejson/encoder.py | 86 +++++++++--- django/utils/simplejson/jsonfilter.py | 40 ------ django/utils/simplejson/scanner.py | 18 ++- django/utils/simplejson/tool.py | 44 ++++++ 7 files changed, 367 insertions(+), 112 deletions(-) delete mode 100644 django/utils/simplejson/jsonfilter.py create mode 100644 django/utils/simplejson/tool.py diff --git a/django/utils/simplejson/LICENSE.txt b/django/utils/simplejson/LICENSE.txt index 1fa4fd5ba2..ad95f29c17 100644 --- a/django/utils/simplejson/LICENSE.txt +++ b/django/utils/simplejson/LICENSE.txt @@ -1,4 +1,3 @@ -simplejson 1.5 Copyright (c) 2006 Bob Ippolito Permission is hereby granted, free of charge, to any person obtaining a copy of diff --git a/django/utils/simplejson/__init__.py b/django/utils/simplejson/__init__.py index 15b7173976..130940f358 100644 --- a/django/utils/simplejson/__init__.py +++ b/django/utils/simplejson/__init__.py @@ -65,6 +65,9 @@ Specializing JSON object decoding:: >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) + >>> import decimal + >>> simplejson.loads('1.1', parse_float=decimal.Decimal) + Decimal("1.1") Extending JSONEncoder:: @@ -83,20 +86,48 @@ Extending JSONEncoder:: ['[', '2.0', ', ', '1.0', ']'] +Using simplejson from the shell to validate and +pretty-print:: + + $ echo '{"json":"obj"}' | python -msimplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -msimplejson.tool + Expecting property name: line 1 column 2 (char 2) + Note that the JSON produced by this module's default settings is a subset of YAML, so it may be used as a serializer for that as well. """ -__version__ = '1.5' +__version__ = '1.9.2' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONEncoder', ] -from django.utils.simplejson.decoder import JSONDecoder -from django.utils.simplejson.encoder import JSONEncoder +if __name__ == '__main__': + import warnings + warnings.warn('python -msimplejson is deprecated, use python -msiplejson.tool', DeprecationWarning) + from django.utils.simplejson.decoder import JSONDecoder + from django.utils.simplejson.encoder import JSONEncoder +else: + from decoder import JSONDecoder + from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): """ Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -107,7 +138,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly understands ``unicode`` (as in ``codecs.getwriter()``) this is likely to cause an error. @@ -121,25 +152,44 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. """ - if cls is None: - cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - **kw).iterencode(obj) + # cached encoder + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: fp.write(chunk) + def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): """ Serialize ``obj`` to a JSON formatted ``str``. @@ -161,88 +211,159 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact representation. If ``separators`` is an ``(item_separator, dict_separator)`` tuple then it will be used instead of the default ``(', ', ': ')`` separators. ``(',', ':')`` is the most compact JSON representation. + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. """ + # cached encoder + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, + separators=separators, encoding=encoding, default=default, **kw).encode(obj) -def load(fp, encoding=None, cls=None, object_hook=None, **kw): + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): """ Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. If the contents of ``fp`` is encoded with an ASCII based encoding other than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are + be specified. Encodings that are not ASCII based (such as UCS-2) are not allowed, and should be wrapped with ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` object and passed to ``loads()`` ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. """ - if cls is None: - cls = JSONDecoder - if object_hook is not None: - kw['object_hook'] = object_hook - return cls(encoding=encoding, **kw).decode(fp.read()) + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) -def loads(s, encoding=None, cls=None, object_hook=None, **kw): + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): """ Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) + must be specified. Encodings that are not ASCII based (such as UCS-2) are not allowed and should be decoded to ``unicode`` first. ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant return cls(encoding=encoding, **kw).decode(s) + +# +# Compatibility cruft from other libraries +# + + +def decode(s): + """ + demjson, python-cjson API compatibility hook. Use loads(s) instead. + """ + import warnings + warnings.warn("simplejson.loads(s) should be used instead of decode(s)", + DeprecationWarning) + return loads(s) + + +def encode(obj): + """ + demjson, python-cjson compatibility hook. Use dumps(s) instead. + """ + import warnings + warnings.warn("simplejson.dumps(s) should be used instead of encode(s)", + DeprecationWarning) + return dumps(obj) + + def read(s): """ - json-py API compatibility hook. Use loads(s) instead. + jsonlib, JsonUtils, python-json, json-py API compatibility hook. + Use loads(s) instead. """ import warnings warnings.warn("simplejson.loads(s) should be used instead of read(s)", DeprecationWarning) return loads(s) + def write(obj): """ - json-py API compatibility hook. Use dumps(s) instead. + jsonlib, JsonUtils, python-json, json-py API compatibility hook. + Use dumps(s) instead. """ import warnings warnings.warn("simplejson.dumps(s) should be used instead of write(s)", @@ -250,3 +371,6 @@ def write(obj): return dumps(obj) +if __name__ == '__main__': + import simplejson.tool + simplejson.tool.main() diff --git a/django/utils/simplejson/decoder.py b/django/utils/simplejson/decoder.py index 66f68a200b..cef9fc002b 100644 --- a/django/utils/simplejson/decoder.py +++ b/django/utils/simplejson/decoder.py @@ -2,8 +2,13 @@ Implementation of JSONDecoder """ import re +import sys from django.utils.simplejson.scanner import Scanner, pattern +try: + from django.utils.simplejson._speedups import scanstring as c_scanstring +except ImportError: + pass FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL @@ -18,6 +23,7 @@ def _floatconstants(): NaN, PosInf, NegInf = _floatconstants() + def linecol(doc, pos): lineno = doc.count('\n', 0, pos) + 1 if lineno == 1: @@ -26,6 +32,7 @@ def linecol(doc, pos): colno = pos - doc.rindex('\n', 0, pos) return lineno, colno + def errmsg(msg, doc, pos, end=None): lineno, colno = linecol(doc, pos) if end is None: @@ -34,6 +41,7 @@ def errmsg(msg, doc, pos, end=None): return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( msg, lineno, colno, endlineno, endcolno, pos, end) + _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -44,20 +52,30 @@ _CONSTANTS = { } def JSONConstant(match, context, c=_CONSTANTS): - return c[match.group(0)], None + s = match.group(0) + fn = getattr(context, 'parse_constant', None) + if fn is None: + rval = c[s] + else: + rval = fn(s) + return rval, None pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + def JSONNumber(match, context): match = JSONNumber.regex.match(match.string, *match.span()) integer, frac, exp = match.groups() if frac or exp: - res = float(integer + (frac or '') + (exp or '')) + fn = getattr(context, 'parse_float', None) or float + res = fn(integer + (frac or '') + (exp or '')) else: - res = int(integer) + fn = getattr(context, 'parse_int', None) or int + res = fn(integer) return res, None pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) -STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { '"': u'"', '\\': u'\\', '/': u'/', 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', @@ -65,7 +83,7 @@ BACKSLASH = { DEFAULT_ENCODING = "utf-8" -def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): if encoding is None: encoding = DEFAULT_ENCODING chunks = [] @@ -84,6 +102,12 @@ def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): _append(content) if terminator == '"': break + elif terminator != '\\': + if strict: + raise ValueError(errmsg("Invalid control character %r at", s, end)) + else: + _append(terminator) + continue try: esc = s[end] except IndexError: @@ -98,21 +122,43 @@ def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): end += 1 else: esc = s[end + 1:end + 5] + next_end = end + 5 + msg = "Invalid \\uXXXX escape" try: - m = unichr(int(esc, 16)) - if len(esc) != 4 or not esc.isalnum(): + if len(esc) != 4: raise ValueError + uni = int(esc, 16) + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + m = unichr(uni) except ValueError: - raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) - end += 5 + raise ValueError(errmsg(msg, s, end)) + end = next_end _append(m) return u''.join(chunks), end + +# Use speedup +try: + scanstring = c_scanstring +except NameError: + scanstring = py_scanstring + def JSONString(match, context): encoding = getattr(context, 'encoding', None) - return scanstring(match.string, match.end(), encoding) + strict = getattr(context, 'strict', True) + return scanstring(match.string, match.end(), encoding, strict) pattern(r'"')(JSONString) + WHITESPACE = re.compile(r'\s*', FLAGS) def JSONObject(match, context, _w=WHITESPACE.match): @@ -120,16 +166,17 @@ def JSONObject(match, context, _w=WHITESPACE.match): s = match.string end = _w(s, match.end()).end() nextchar = s[end:end + 1] - # trivial empty object + # Trivial empty object if nextchar == '}': return pairs, end + 1 if nextchar != '"': raise ValueError(errmsg("Expecting property name", s, end)) end += 1 encoding = getattr(context, 'encoding', None) + strict = getattr(context, 'strict', True) iterscan = JSONScanner.iterscan while True: - key, end = scanstring(s, end, encoding) + key, end = scanstring(s, end, encoding, strict) end = _w(s, end).end() if s[end:end + 1] != ':': raise ValueError(errmsg("Expecting : delimiter", s, end)) @@ -156,12 +203,13 @@ def JSONObject(match, context, _w=WHITESPACE.match): pairs = object_hook(pairs) return pairs, end pattern(r'{')(JSONObject) - + + def JSONArray(match, context, _w=WHITESPACE.match): values = [] s = match.string end = _w(s, match.end()).end() - # look-ahead for trivial empty array + # Look-ahead for trivial empty array nextchar = s[end:end + 1] if nextchar == ']': return values, end + 1 @@ -182,7 +230,8 @@ def JSONArray(match, context, _w=WHITESPACE.match): end = _w(s, end).end() return values, end pattern(r'\[')(JSONArray) - + + ANYTHING = [ JSONObject, JSONArray, @@ -193,11 +242,12 @@ ANYTHING = [ JSONScanner = Scanner(ANYTHING) + class JSONDecoder(object): """ Simple JSON decoder - Performs the following translations in decoding: + Performs the following translations in decoding by default: +---------------+-------------------+ | JSON | Python | @@ -226,7 +276,8 @@ class JSONDecoder(object): _scanner = Scanner(ANYTHING) __all__ = ['__init__', 'decode', 'raw_decode'] - def __init__(self, encoding=None, object_hook=None): + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): """ ``encoding`` determines the encoding used to interpret any ``str`` objects decoded by this instance (utf-8 by default). It has no @@ -239,9 +290,28 @@ class JSONDecoder(object): of every JSON object decoded and its return value will be used in place of the given ``dict``. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. """ self.encoding = encoding self.object_hook = object_hook + self.parse_float = parse_float + self.parse_int = parse_int + self.parse_constant = parse_constant + self.strict = strict def decode(self, s, _w=WHITESPACE.match): """ diff --git a/django/utils/simplejson/encoder.py b/django/utils/simplejson/encoder.py index c83c6873eb..e6c01f6138 100644 --- a/django/utils/simplejson/encoder.py +++ b/django/utils/simplejson/encoder.py @@ -3,11 +3,15 @@ Implementation of JSONEncoder """ import re -ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') -ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') +try: + from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: + pass + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { - # escape all forward slashes to prevent attack - '/': '\\/', '\\': '\\\\', '"': '\\"', '\b': '\\b', @@ -19,8 +23,9 @@ ESCAPE_DCT = { for i in range(0x20): ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -# assume this produces an infinity on all machines (probably not guaranteed) +# Assume this produces an infinity on all machines (probably not guaranteed) INFINITY = float('1e66666') +FLOAT_REPR = repr def floatstr(o, allow_nan=True): # Check for specials. Note that this type of test is processor- and/or @@ -33,7 +38,7 @@ def floatstr(o, allow_nan=True): elif o == -INFINITY: text = '-Infinity' else: - return str(o) + return FLOAT_REPR(o) if not allow_nan: raise ValueError("Out of range float values are not JSON compliant: %r" @@ -50,15 +55,32 @@ def encode_basestring(s): return ESCAPE_DCT[match.group(0)] return '"' + ESCAPE.sub(replace, s) + '"' -def encode_basestring_ascii(s): + +def py_encode_basestring_ascii(s): + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: return ESCAPE_DCT[s] except KeyError: - return '\\u%04x' % (ord(s),) + n = ord(s) + if n < 0x10000: + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u%04x\\u%04x' % (s1, s2) return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' - + + +try: + encode_basestring_ascii = c_encode_basestring_ascii +except NameError: + encode_basestring_ascii = py_encode_basestring_ascii + class JSONEncoder(object): """ @@ -94,7 +116,7 @@ class JSONEncoder(object): key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None): + indent=None, separators=None, encoding='utf-8', default=None): """ Constructor for JSONEncoder, with sensible defaults. @@ -126,8 +148,16 @@ class JSONEncoder(object): None is the most compact representation. If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON + tuple. The default is (', ', ': '). To get the most compact JSON representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. """ self.skipkeys = skipkeys @@ -139,6 +169,9 @@ class JSONEncoder(object): self.current_indent_level = 0 if separators is not None: self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding def _newline_indent(self): return '\n' + (' ' * (self.indent * self.current_indent_level)) @@ -207,8 +240,14 @@ class JSONEncoder(object): items = [(k, dct[k]) for k in keys] else: items = dct.iteritems() + _encoding = self.encoding + _do_decode = (_encoding is not None + and not (_encoding == 'utf-8')) for key, value in items: - if isinstance(key, basestring): + if isinstance(key, str): + if _do_decode: + key = key.decode(_encoding) + elif isinstance(key, basestring): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. @@ -247,6 +286,10 @@ class JSONEncoder(object): encoder = encode_basestring_ascii else: encoder = encode_basestring + _encoding = self.encoding + if (_encoding is not None and isinstance(o, str) + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) yield encoder(o) elif o is None: yield 'null' @@ -304,11 +347,22 @@ class JSONEncoder(object): Return a JSON string representation of a Python data structure. >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) - '{"foo":["bar", "baz"]}' + '{"foo": ["bar", "baz"]}' """ - # This doesn't pass the iterator directly to ''.join() because it - # sucks at reporting exceptions. It's going to do this internally - # anyway because it uses PySequence_Fast or similar. + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. chunks = list(self.iterencode(o)) return ''.join(chunks) diff --git a/django/utils/simplejson/jsonfilter.py b/django/utils/simplejson/jsonfilter.py deleted file mode 100644 index d02ae2033a..0000000000 --- a/django/utils/simplejson/jsonfilter.py +++ /dev/null @@ -1,40 +0,0 @@ -from django.utils import simplejson -import cgi - -class JSONFilter(object): - def __init__(self, app, mime_type='text/x-json'): - self.app = app - self.mime_type = mime_type - - def __call__(self, environ, start_response): - # Read JSON POST input to jsonfilter.json if matching mime type - response = {'status': '200 OK', 'headers': []} - def json_start_response(status, headers): - response['status'] = status - response['headers'].extend(headers) - environ['jsonfilter.mime_type'] = self.mime_type - if environ.get('REQUEST_METHOD', '') == 'POST': - if environ.get('CONTENT_TYPE', '') == self.mime_type: - args = [_ for _ in [environ.get('CONTENT_LENGTH')] if _] - data = environ['wsgi.input'].read(*map(int, args)) - environ['jsonfilter.json'] = simplejson.loads(data) - res = simplejson.dumps(self.app(environ, json_start_response)) - jsonp = cgi.parse_qs(environ.get('QUERY_STRING', '')).get('jsonp') - if jsonp: - content_type = 'text/javascript' - res = ''.join(jsonp + ['(', res, ')']) - elif 'Opera' in environ.get('HTTP_USER_AGENT', ''): - # Opera has bunk XMLHttpRequest support for most mime types - content_type = 'text/plain' - else: - content_type = self.mime_type - headers = [ - ('Content-type', content_type), - ('Content-length', len(res)), - ] - headers.extend(response['headers']) - start_response(response['status'], headers) - return [res] - -def factory(app, global_conf, **kw): - return JSONFilter(app, **kw) diff --git a/django/utils/simplejson/scanner.py b/django/utils/simplejson/scanner.py index 64f4999fb5..2a18390d0d 100644 --- a/django/utils/simplejson/scanner.py +++ b/django/utils/simplejson/scanner.py @@ -1,18 +1,21 @@ """ Iterator based sre token scanner """ -import sre_parse, sre_compile, sre_constants -from sre_constants import BRANCH, SUBPATTERN -from re import VERBOSE, MULTILINE, DOTALL import re +from re import VERBOSE, MULTILINE, DOTALL +import sre_parse +import sre_compile +import sre_constants +from sre_constants import BRANCH, SUBPATTERN __all__ = ['Scanner', 'pattern'] FLAGS = (VERBOSE | MULTILINE | DOTALL) + class Scanner(object): def __init__(self, lexicon, flags=FLAGS): self.actions = [None] - # combine phrases into a compound pattern + # Combine phrases into a compound pattern s = sre_parse.Pattern() s.flags = flags p = [] @@ -26,10 +29,10 @@ class Scanner(object): p.append(subpattern) self.actions.append(token) + s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) self.scanner = sre_compile.compile(p) - def iterscan(self, string, idx=0, context=None): """ Yield match, end_idx for each match @@ -54,10 +57,11 @@ class Scanner(object): match = self.scanner.scanner(string, matchend).match yield rval, matchend lastend = matchend - + + def pattern(pattern, flags=FLAGS): def decorator(fn): fn.pattern = pattern fn.regex = re.compile(pattern, flags) return fn - return decorator + return decorator \ No newline at end of file diff --git a/django/utils/simplejson/tool.py b/django/utils/simplejson/tool.py new file mode 100644 index 0000000000..d0eb8ad9ee --- /dev/null +++ b/django/utils/simplejson/tool.py @@ -0,0 +1,44 @@ +r""" +Using simplejson from the shell to validate and +pretty-print:: + + $ echo '{"json":"obj"}' | python -msimplejson + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -msimplejson + Expecting property name: line 1 column 2 (char 2) + +Note that the JSON produced by this module's default settings +is a subset of YAML, so it may be used as a serializer for that as well. +""" +import django.utils.simplejson + +# +# Pretty printer: +# curl http://mochikit.com/examples/ajax_tables/domains.json | python -msimplejson.tool +# + +def main(): + import sys + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit("%s [infile [outfile]]" % (sys.argv[0],)) + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main()