diff options
author | Nick Babcock <nbabcock19@hotmail.com> | 2015-07-10 17:25:50 -0400 |
---|---|---|
committer | Nick Babcock <nbabcock19@hotmail.com> | 2015-07-10 17:25:50 -0400 |
commit | a9e5e7256c9fb27126813c9430be1dab5f27f7ba (patch) | |
tree | 488eb7357fd5e781395161b7aa6be5aabea6a8ed /simplejson | |
parent | 49724ee06f038d27ab3d4adbe4ed403692799aa1 (diff) | |
parent | 0bcdf20cc525c1343b796cb8f247ea5213c6557e (diff) | |
download | simplejson-a9e5e7256c9fb27126813c9430be1dab5f27f7ba.tar.gz |
Merge branch 'master' into iterable_as_array-gh1
Conflicts:
CHANGES.txt
conf.py
index.rst
setup.py
simplejson/__init__.py
simplejson/_speedups.c
simplejson/encoder.py
simplejson/tests/test_tuple.py
Diffstat (limited to 'simplejson')
34 files changed, 2864 insertions, 1005 deletions
diff --git a/simplejson/__init__.py b/simplejson/__init__.py index a1c0578..cac96ca 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -13,15 +13,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -30,14 +30,14 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) { "4": 5, "6": 7 @@ -51,7 +51,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -94,33 +94,35 @@ Using simplejson.tool from the shell to validate and pretty-print:: "json": "obj" } $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) + Expecting property name: line 1 column 3 (char 2) """ -__version__ = '2.3.0' +from __future__ import absolute_import +__version__ = '3.7.4' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', - 'OrderedDict', + 'OrderedDict', 'simple_first', ] __author__ = 'Bob Ippolito <bob@redivi.com>' from decimal import Decimal -from decoder import JSONDecoder, JSONDecodeError -from encoder import JSONEncoder +from .scanner import JSONDecodeError +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML def _import_OrderedDict(): import collections try: return collections.OrderedDict except AttributeError: - import ordered_dict + from . import ordered_dict return ordered_dict.OrderedDict OrderedDict = _import_OrderedDict() def _import_c_make_encoder(): try: - from simplejson._speedups import make_encoder + from ._speedups import make_encoder return make_encoder except ImportError: return None @@ -138,35 +140,42 @@ _default_encoder = JSONEncoder( namedtuple_as_object=True, tuple_as_array=True, iterable_as_array=False, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=True, - namedtuple_as_object=True, tuple_as_array=True, - iterable_as_array=False, - **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). - If ``skipkeys`` is true then ``dict`` keys that are not basic types + If *skipkeys* is true then ``dict`` keys that are not basic types (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + If *ensure_ascii* is false, then the some chunks written to ``fp`` may be ``unicode`` instances, subject to normal Python ``str`` to ``unicode`` coercion rules. Unless ``fp.write()`` explicitly understands ``unicode`` (as in ``codecs.getwriter()``) this is likely to cause an error. - If ``check_circular`` is false, then the circular reference check + If *check_circular* is false, then the circular reference check for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). - If ``allow_nan`` is false, then it will be a ``ValueError`` to + If *allow_nan* is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. If *indent* is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated @@ -175,14 +184,16 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, versions of simplejson earlier than 2.1.0, an integer is also accepted and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + *encoding* is the character encoding for str instances, default is UTF-8. - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. If *use_decimal* is true (default: ``True``) then decimal.Decimal will be natively serialized to JSON with full precision. @@ -198,18 +209,50 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, any object not in the above table that implements ``__iter__()`` will be encoded as a JSON array. + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and use_decimal and - namedtuple_as_object and tuple_as_array and - not iterable_as_array and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -221,6 +264,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost @@ -229,12 +278,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=True, - namedtuple_as_object=True, - tuple_as_array=True, - iterable_as_array=False, - **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -261,9 +310,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, versions of simplejson earlier than 2.1.0, an integer is also accepted and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. ``encoding`` is the character encoding for str instances, default is UTF-8. @@ -284,18 +335,48 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, any object not in the above table that implements ``__iter__()`` will be encoded as a JSON array. + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and use_decimal and - namedtuple_as_object and tuple_as_array and - not iterable_as_array and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -307,6 +388,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).encode(obj) @@ -361,7 +448,8 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ return loads(fp.read(), @@ -417,7 +505,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ if (cls is None and encoding is None and object_hook is None and @@ -445,9 +534,9 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, def _toggle_speedups(enabled): - import simplejson.decoder as dec - import simplejson.encoder as enc - import simplejson.scanner as scan + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan c_make_encoder = _import_c_make_encoder() if enabled: dec.scanstring = dec.c_scanstring or dec.py_scanstring @@ -478,3 +567,9 @@ def _toggle_speedups(enabled): encoding='utf-8', default=None, ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index 783bac8..691c73c 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1,11 +1,50 @@ +/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */ #include "Python.h" #include "structmember.h" -#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double) + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyString_Check PyBytes_Check +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyInt_Check(obj) 0 +#define PyInt_CheckExact(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE +#define JSON_ASCII_Check PyUnicode_Check +#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 +#define PyInt_Type PyLong_Type +#define PyInt_FromString PyLong_FromString +#define PY2_UNUSED +#define PY3_UNUSED UNUSED +#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) +#else /* PY_MAJOR_VERSION >= 3 */ +#define PY2_UNUSED UNUSED +#define PY3_UNUSED +#define PyUnicode_READY(obj) 0 +#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) +#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) +#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) +#define PyUnicode_GetLength PyUnicode_GET_SIZE +#define JSON_UNICHR Py_UNICODE +#define JSON_ASCII_Check PyString_Check +#define JSON_ASCII_AS_STRING PyString_AS_STRING +#define JSON_InternFromString PyString_InternFromString +#define JSON_Intern_GET_SIZE PyString_GET_SIZE +#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) #define PyOS_string_to_double json_PyOS_string_to_double static double json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); static double -json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) { +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) +{ double x; assert(endptr == NULL); assert(overflow_exception == NULL); @@ -15,22 +54,32 @@ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exce return x; } #endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE) +#if !defined(Py_SIZE) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) #endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#if PY_VERSION_HEX < 0x02050000 +#if !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX #define PY_SSIZE_T_MIN INT_MIN #define PyInt_FromSsize_t PyInt_FromLong #define PyInt_AsSsize_t PyInt_AsLong #endif -#ifndef Py_IS_FINITE +#if !defined(Py_IS_FINITE) #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) #endif +#endif /* PY_VERSION_HEX < 0x02050000 */ #ifdef __GNUC__ #define UNUSED __attribute__((__unused__)) @@ -44,11 +93,38 @@ typedef int Py_ssize_t; #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) -#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr)) + +#define JSON_ALLOW_NAN 1 +#define JSON_IGNORE_NAN 2 static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; -static PyTypeObject *DecimalTypePtr; + +typedef struct { + PyObject *large_strings; /* A list of previously accumulated large strings */ + PyObject *small_strings; /* Pending small strings */ +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + +#define ERR_EXPECTING_VALUE "Expecting value" +#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'" +#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'" +#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'" +#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes" +#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'" +#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter" +#define ERR_STRING_UNTERMINATED "Unterminated string starting at" +#define ERR_STRING_CONTROL "Invalid control character %r at" +#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r" +#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence" typedef struct _PyScannerObject { PyObject_HEAD @@ -82,41 +158,74 @@ typedef struct _PyEncoderObject { PyObject *key_separator; PyObject *item_separator; PyObject *sort_keys; - PyObject *skipkeys; PyObject *key_memo; + PyObject *encoding; + PyObject *Decimal; + PyObject *skipkeys_bool; + int skipkeys; int fast_encode; - int allow_nan; + /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */ + int allow_or_ignore_nan; int use_decimal; int namedtuple_as_object; int tuple_as_array; int iterable_as_array; + PyObject *max_long_size; + PyObject *min_long_size; + PyObject *item_sort_key; + PyObject *item_sort_kw; + int for_json; } PyEncoderObject; static PyMemberDef encoder_members[] = { {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, - {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + /* Python 2.5 does not support T_BOOl */ + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"}, {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, + {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, + {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"}, + {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"}, {NULL} }; +static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * +JSON_ParseEncoding(PyObject *encoding); +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c); +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj); +static Py_ssize_t +ascii_char_size(JSON_UNICHR c); static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * ascii_escape_str(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); +#if PY_MAJOR_VERSION < 3 +static PyObject * +join_list_string(PyObject *lst); static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); @@ -136,12 +245,14 @@ static void encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key); static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); static PyObject * _encoded_const(PyObject *obj); static void @@ -156,21 +267,173 @@ static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); static int _is_namedtuple(PyObject *obj); +static int +_has_for_json_hook(PyObject *obj); +static PyObject * +moduleinit(void); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) #define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) + +static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large_strings = NULL; + acc->small_strings = PyList_New(0); + if (acc->small_strings == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large_strings == NULL) { + acc->large_strings = PyList_New(0); + if (acc->large_strings == NULL) + return -1; + } +#if PY_MAJOR_VERSION >= 3 + joined = join_list_unicode(acc->small_strings); +#else /* PY_MAJOR_VERSION >= 3 */ + joined = join_list_string(acc->small_strings); +#endif /* PY_MAJOR_VERSION < 3 */ + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large_strings, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small_strings, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small_strings); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small_strings); + if (ret) { + Py_CLEAR(acc->large_strings); + return NULL; + } + res = acc->large_strings; + acc->large_strings = NULL; + if (res == NULL) + return PyList_New(0); + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small_strings); + Py_CLEAR(acc->large_strings); +} + +static int +IS_DIGIT(JSON_UNICHR c) +{ + return c >= '0' && c <= '9'; +} + +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *rval = PyUnicode_New(1, c); + if (rval) + PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); + return rval; +#else /* PY_MAJOR_VERSION >= 3 */ + return PyUnicode_FromUnicode(&c, 1); +#endif /* PY_MAJOR_VERSION < 3 */ +} + +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj) +{ + if (s->max_long_size != Py_None && s->min_long_size != Py_None) { + if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) || + PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); #else -#define MAX_EXPANSION MIN_EXPANSION + PyObject* quoted = PyString_FromFormat("\"%s\"", + PyString_AsString(encoded)); #endif + Py_DECREF(encoded); + encoded = quoted; + } + } + + return encoded; +} static int _is_namedtuple(PyObject *obj) { - return PyTuple_Check(obj) && PyObject_HasAttrString(obj, "_asdict"); + int rval = 0; + PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict"); + if (_asdict == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(_asdict); + Py_DECREF(_asdict); + return rval; +} + +static int +_has_for_json_hook(PyObject *obj) +{ + int rval = 0; + PyObject *for_json = PyObject_GetAttrString(obj, "for_json"); + if (for_json == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(for_json); + Py_DECREF(for_json); + return rval; } static int @@ -191,44 +454,74 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) } static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif output[chars++] = 'u'; output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } } return chars; } +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + else if (c >= 0x10000U) { + return 2 * MIN_EXPANSION; + } +#endif + else { + return MIN_EXPANSION; + } +} + static PyObject * ascii_escape_unicode(PyObject *pystr) { @@ -236,57 +529,62 @@ ascii_escape_unicode(PyObject *pystr) Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; - Py_ssize_t max_output_size; Py_ssize_t chars; + PY2_UNUSED int kind; + void *data; PyObject *rval; char *output; - Py_UNICODE *input_unicode; - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); + if (PyUnicode_READY(pystr)) + return NULL; - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); + kind = PyUnicode_KIND(pystr); + data = PyUnicode_DATA(pystr); + input_chars = PyUnicode_GetLength(pystr); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); + output = (char *)PyUnicode_DATA(rval); +#else rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } output = PyString_AS_STRING(rval); +#endif chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); } output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { + assert(chars == output_size); + return rval; +} + +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + if (input == NULL) return NULL; - } + rval = ascii_escape_unicode(input); + Py_DECREF(input); return rval; } +#else /* PY_MAJOR_VERSION >= 3 */ + static PyObject * ascii_escape_str(PyObject *pystr) { @@ -301,91 +599,201 @@ ascii_escape_str(PyObject *pystr) input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); + output_size = 2; /* Fast path for a string that's already ASCII */ for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; } - break; + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; } + output_size += ascii_char_size(c); } - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } + chars = 0; output = PyString_AS_STRING(rval); - output[0] = '"'; - - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key) +{ + if (PyUnicode_Check(key)) { + Py_INCREF(key); + return key; + } + else if (PyString_Check(key)) { +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_Decode( + PyString_AS_STRING(key), + PyString_GET_SIZE(key), + JSON_ASCII_AS_STRING(s->encoding), + NULL); +#else /* PY_MAJOR_VERSION >= 3 */ + Py_INCREF(key); + return key; +#endif /* PY_MAJOR_VERSION < 3 */ + } + else if (PyFloat_Check(key)) { + return encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + return _encoded_const(key); + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + if (!(PyInt_CheckExact(key) || PyLong_CheckExact(key))) { + /* See #118, do not trust custom str/repr */ + PyObject *res; + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, key, NULL); + if (tmp == NULL) { return NULL; } - output = PyString_AS_STRING(rval); + res = PyObject_Str(tmp); + Py_DECREF(tmp); + return res; + } + else { + return PyObject_Str(key); } } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { + else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { + return PyObject_Str(key); + } + else if (s->skipkeys) { + Py_INCREF(Py_None); + return Py_None; + } + PyErr_SetString(PyExc_TypeError, "keys must be a string"); + return NULL; +} + +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + static PyObject *sortfun = NULL; + static PyObject *sortargs = NULL; + + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } + + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) return NULL; + if (s->item_sort_kw == Py_None) + return iter; + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(key)) { + /* item can be added as-is */ + } +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyUnicode_Check(key)) { + /* item can be added as-is */ + } + else { + PyObject *tpl; + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(kstr); + continue; + } + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + tpl = PyTuple_Pack(2, kstr, value); + if (tpl == NULL) + goto bail; + Py_CLEAR(kstr); + Py_DECREF(item); + item = tpl; + } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); } - return rval; + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) + goto bail; + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; } static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { - /* Use the Python function simplejson.decoder.errmsg to raise a nice - looking ValueError exception */ + /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ static PyObject *JSONDecodeError = NULL; PyObject *exc; if (JSONDecodeError == NULL) { - PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); - if (decoder == NULL) + PyObject *scanner = PyImport_ImportModule("simplejson.scanner"); + if (scanner == NULL) return; - JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); - Py_DECREF(decoder); + JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError"); + Py_DECREF(scanner); if (JSONDecodeError == NULL) return; } @@ -402,7 +810,7 @@ join_list_unicode(PyObject *lst) /* return u''.join(lst) */ static PyObject *joinfn = NULL; if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + PyObject *ustr = JSON_NewEmptyUnicode(); if (ustr == NULL) return NULL; @@ -414,6 +822,9 @@ join_list_unicode(PyObject *lst) return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ static PyObject * join_list_string(PyObject *lst) { @@ -431,9 +842,11 @@ join_list_string(PyObject *lst) } return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) +{ /* return (rval, idx) tuple, stealing reference to rval */ PyObject *tpl; PyObject *pyidx; @@ -441,6 +854,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { steal a reference to rval, returns (rval, idx) */ if (rval == NULL) { + assert(PyErr_Occurred()); return NULL; } pyidx = PyInt_FromSsize_t(idx); @@ -473,6 +887,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { Py_CLEAR(chunk); \ } +#if PY_MAJOR_VERSION < 3 static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) { @@ -493,9 +908,11 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s char *buf = PyString_AS_STRING(pystr); PyObject *chunks = NULL; PyObject *chunk = NULL; + PyObject *strchunk = NULL; if (len == end) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; } else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); @@ -510,7 +927,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s break; } else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); + raise_errmsg(ERR_STRING_CONTROL, pystr, next); goto bail; } else if (c > 0x7f) { @@ -518,13 +935,23 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } } if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ if (next != end) { - PyObject *strchunk; APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + if (!has_unicode) { + chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); + } + else { + chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); + } + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; @@ -539,6 +966,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s else { chunk = strchunk; } +#endif /* PY_MAJOR_VERSION < 3 */ } next++; if (c == '"') { @@ -546,7 +974,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s break; } if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } c = buf[next]; @@ -565,7 +993,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s default: c = 0; } if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); goto bail; } } @@ -574,12 +1002,12 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s next++; end = next + 4; if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); goto bail; } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -592,28 +1020,21 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s case 'F': c |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } -#ifdef Py_UNICODE_WIDE +#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { + if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { + JSON_UNICHR c2 = 0; + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + c2 <<= 4; + JSON_UNICHR digit = buf[next]; + switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c2 |= (digit - '0'); break; @@ -624,28 +1045,34 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s case 'F': c2 |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif +#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ } if (c > 0x7f) { has_unicode = 1; } APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ if (has_unicode) { - chunk = PyUnicode_FromUnicode(&c, 1); + chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } @@ -657,13 +1084,14 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } +#endif } if (chunks == NULL) { if (chunk != NULL) rval = chunk; else - rval = PyString_FromStringAndSize("", 0); + rval = JSON_NewEmptyUnicode(); } else { APPEND_OLD_CHUNK @@ -682,7 +1110,7 @@ bail: Py_XDECREF(chunks); return NULL; } - +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) @@ -696,15 +1124,17 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Return value is a new PyUnicode */ PyObject *rval; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); Py_ssize_t begin = end - 1; Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + Py_ssize_t len = PyUnicode_GetLength(pystr); + void *buf = PyUnicode_DATA(pystr); PyObject *chunks = NULL; PyObject *chunk = NULL; if (len == end) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; } else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); @@ -712,25 +1142,29 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } while (1) { /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; + JSON_UNICHR c = 0; for (next = end; next < len; next++) { - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c == '"' || c == '\\') { break; } else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); + raise_errmsg(ERR_STRING_CONTROL, pystr, next); goto bail; } } if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ if (next != end) { APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&buf[end], next - end); +#if PY_MAJOR_VERSION < 3 + chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); +#else + chunk = PyUnicode_Substring(pystr, end, next); +#endif if (chunk == NULL) { goto bail; } @@ -741,10 +1175,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next break; } if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c != 'u') { /* Non-unicode backslash escapes */ end = next + 1; @@ -760,7 +1194,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next default: c = 0; } if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); goto bail; } } @@ -769,12 +1203,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next next++; end = next + 4; if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); goto bail; } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -787,28 +1221,23 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next case 'F': c |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } -#ifdef Py_UNICODE_WIDE +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { + JSON_UNICHR c2 = 0; + if (end + 6 < len && + PyUnicode_READ(kind, buf, next) == '\\' && + PyUnicode_READ(kind, buf, next + 1) == 'u') { + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c2 <<= 4; + switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c2 |= (digit - '0'); break; @@ -819,24 +1248,24 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next case 'F': c2 |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; } #endif } APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&c, 1); + chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } @@ -846,7 +1275,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (chunk != NULL) rval = chunk; else - rval = PyUnicode_FromUnicode(NULL, 0); + rval = JSON_NewEmptyUnicode(); } else { APPEND_OLD_CHUNK @@ -893,12 +1322,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) if (encoding == NULL) { encoding = DEFAULT_ENCODING; } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", @@ -975,8 +1408,10 @@ scanner_clear(PyObject *self) return 0; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyString pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -992,9 +1427,10 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyObject *item; PyObject *key = NULL; PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); + char *encoding = JSON_ASCII_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; Py_ssize_t next_idx; if (has_pairs_hook) { pairs = PyList_New(0); @@ -1012,12 +1448,14 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { + int trailing_delimiter = 0; while (idx <= end_idx) { PyObject *memokey; + trailing_delimiter = 0; /* read key */ if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); goto bail; } key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); @@ -1038,7 +1476,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ /* skip whitespace between key and : delimiter, read :, skip whitespace */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); goto bail; } idx++; @@ -1073,23 +1511,33 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; if (idx > end_idx) break; if (str[idx] == '}') { break; } else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be '}' */ if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } goto bail; } @@ -1121,9 +1569,11 @@ bail: Py_XDECREF(pairs); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1131,8 +1581,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss Returns a new PyObject (usually a dict, but object_hook can change that) */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); PyObject *rval = NULL; PyObject *pairs = NULL; PyObject *item; @@ -1140,6 +1591,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss PyObject *val = NULL; int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; Py_ssize_t next_idx; if (has_pairs_hook) { @@ -1152,18 +1604,20 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss if (rval == NULL) return NULL; } - + /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { + int trailing_delimiter = 0; while (idx <= end_idx) { PyObject *memokey; + trailing_delimiter = 0; /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); + if (PyUnicode_READ(kind, str, idx) != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); goto bail; } key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); @@ -1181,14 +1635,15 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss } idx = next_idx; - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); + /* skip whitespace between key and : delimiter, read :, skip + whitespace */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); goto bail; } idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); @@ -1216,27 +1671,38 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss idx = next_idx; /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; - /* bail if the object is closed or we didn't get the , delimiter */ + /* bail if the object is closed or we didn't get the , + delimiter */ + did_parse = 1; if (idx > end_idx) break; - if (str[idx] == '}') { + if (PyUnicode_READ(kind, str, idx) == '}') { break; } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } goto bail; } @@ -1269,8 +1735,10 @@ bail: return NULL; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1291,15 +1759,12 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t /* only loop if the array is non-empty */ if (idx <= end_idx && str[idx] != ']') { + int trailing_delimiter = 0; while (idx <= end_idx) { - + trailing_delimiter = 0; /* read any JSON term and de-tuplefy the (rval, idx) */ val = scan_once_str(s, pystr, idx, &next_idx); if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - raise_errmsg("Expecting object", pystr, idx); - } goto bail; } @@ -1318,19 +1783,28 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t break; } else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be ']' */ if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } goto bail; } *next_idx_ptr = idx + 1; @@ -1340,9 +1814,11 @@ bail: Py_DECREF(rval); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1350,8 +1826,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi Returns a new PyList */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; @@ -1359,19 +1836,16 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi return NULL; /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { + int trailing_delimiter = 0; while (idx <= end_idx) { - + trailing_delimiter = 0; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - raise_errmsg("Expecting object", pystr, idx); - } goto bail; } @@ -1382,27 +1856,36 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi idx = next_idx; /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the array is closed or we didn't get the , delimiter */ if (idx > end_idx) break; - if (str[idx] == ']') { + if (PyUnicode_READ(kind, str, idx) == ']') { break; } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } goto bail; } *next_idx_ptr = idx + 1; @@ -1414,7 +1897,8 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON constant from PyString pystr. constant is the constant string that was found ("NaN", "Infinity", "-Infinity"). @@ -1427,20 +1911,22 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * PyObject *cstr; PyObject *rval; /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); + cstr = JSON_InternFromString(constant); if (cstr == NULL) return NULL; /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); + idx += JSON_Intern_GET_SIZE(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyString pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1459,11 +1945,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz /* read a sign if it's there, make sure it's not the end of the string */ if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } + idx++; } /* read as many integer digits as we find as long as it doesn't start with 0 */ @@ -1477,7 +1963,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz } /* no integer digits, error */ else { - PyErr_SetNone(PyExc_StopIteration); + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } @@ -1541,9 +2027,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz *next_idx_ptr = idx; return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyUnicode pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1553,57 +2041,68 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ PyInt, PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; Py_ssize_t idx = start; int is_float = 0; + JSON_UNICHR c; PyObject *rval; PyObject *numstr; /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + if (PyUnicode_READ(kind, str, idx) == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } + idx++; } /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { + c = PyUnicode_READ(kind, str, idx); + if (c == '0') { + /* if it starts with 0 we only expect one integer digit */ idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { + else if (IS_DIGIT(c)) { idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { + idx++; + } } - /* no integer digits, error */ else { - PyErr_SetNone(PyExc_StopIteration); + /* no integer digits, error */ + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + if (idx < end_idx && + PyUnicode_READ(kind, str, idx) == '.' && + IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { is_float = 1; idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == 'e' || + PyUnicode_READ(kind, str, idx) == 'E')) { Py_ssize_t e_start = idx; idx++; /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == '-' || + PyUnicode_READ(kind, str, idx) == '+')) idx++; /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { is_float = 1; } else { @@ -1612,7 +2111,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); +#if PY_MAJOR_VERSION >= 3 + numstr = PyUnicode_Substring(pystr, start, idx); +#else + numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); +#endif if (numstr == NULL) return NULL; if (is_float) { @@ -1621,7 +2124,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else rval = PyFloat_FromString(numstr, NULL); +#endif } } else { @@ -1633,6 +2140,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { @@ -1647,27 +2155,33 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n Py_ssize_t length = PyString_GET_SIZE(pystr); PyObject *rval = NULL; int fallthrough = 0; - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } - if (Py_EnterRecursiveCall(" while decoding a JSON document")) - return NULL; switch (str[idx]) { case '"': /* string */ rval = scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), + JSON_ASCII_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); break; case '{': /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case '[': /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case 'n': /* null */ @@ -1729,9 +2243,10 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n /* Didn't find a string, object, array, or named constant. Look for a number. */ if (fallthrough) rval = _match_number_str(s, pystr, idx, next_idx_ptr); - Py_LeaveRecursiveCall(); return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ + static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) @@ -1743,17 +2258,16 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ Returns a new PyObject representation of the term. */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t length = PyUnicode_GetLength(pystr); PyObject *rval = NULL; int fallthrough = 0; - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } - if (Py_EnterRecursiveCall(" while decoding a JSON document")) - return NULL; - switch (str[idx]) { + switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ rval = scanstring_unicode(pystr, idx + 1, @@ -1762,15 +2276,26 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case '{': /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) + return NULL; rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case '[': /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) + return NULL; rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case 'n': /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'u' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; rval = Py_None; @@ -1780,7 +2305,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 't': /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'r' && + PyUnicode_READ(kind, str, idx + 2) == 'u' && + PyUnicode_READ(kind, str, idx + 3) == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; rval = Py_True; @@ -1790,7 +2318,11 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'f': /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + if ((idx + 4 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; rval = Py_False; @@ -1800,7 +2332,9 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'N': /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + if ((idx + 2 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } else @@ -1808,7 +2342,14 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'I': /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + if ((idx + 7 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } else @@ -1816,7 +2357,15 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case '-': /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + if ((idx + 8 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } else @@ -1828,7 +2377,6 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ /* Didn't find a string, object, array, or named constant. Look for a number. */ if (fallthrough) rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); - Py_LeaveRecursiveCall(); return rval; } @@ -1847,12 +2395,14 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) return NULL; - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scan_once_unicode(s, pystr, idx, &next_idx); } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", @@ -1880,6 +2430,25 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)s; } +static PyObject * +JSON_ParseEncoding(PyObject *encoding) +{ + if (encoding == NULL) + return NULL; + if (encoding == Py_None) + return JSON_InternFromString(DEFAULT_ENCODING); +#if PY_MAJOR_VERSION < 3 + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif + if (JSON_ASCII_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + PyErr_SetString(PyExc_TypeError, "encoding must be a string"); + return NULL; +} + static int scanner_init(PyObject *self, PyObject *args, PyObject *kwds) { @@ -1887,34 +2456,26 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) PyObject *ctx; static char *kwlist[] = {"context", NULL}; PyScannerObject *s; + PyObject *encoding; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) return -1; - + if (s->memo == NULL) { s->memo = PyDict_New(); if (s->memo == NULL) goto bail; } - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); + /* JSON_ASCII_AS_STRING is used on encoding */ + encoding = PyObject_GetAttrString(ctx, "encoding"); + s->encoding = JSON_ParseEncoding(encoding); + Py_XDECREF(encoding); if (s->encoding == NULL) goto bail; - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); - } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL || !PyString_Check(s->encoding)) - goto bail; /* All of these will fail "gracefully" so we don't need to verify them */ s->strict = PyObject_GetAttrString(ctx, "strict"); @@ -1953,8 +2514,7 @@ PyDoc_STRVAR(scanner_doc, "JSON scanner object"); static PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Scanner", /* tp_name */ sizeof(PyScannerObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2004,12 +2564,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->markers = NULL; s->defaultfn = NULL; s->encoder = NULL; + s->encoding = NULL; s->indent = NULL; s->key_separator = NULL; s->item_separator = NULL; - s->sort_keys = NULL; - s->skipkeys = NULL; s->key_memo = NULL; + s->sort_keys = NULL; + s->item_sort_key = NULL; + s->item_sort_kw = NULL; + s->Decimal = NULL; + s->max_long_size = NULL; + s->min_long_size = NULL; } return (PyObject *)s; } @@ -2017,47 +2582,140 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { - /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "iterable_as_array", NULL}; + static char *kwlist[] = { + "markers", + "default", + "encoder", + "indent", + "key_separator", + "item_separator", + "sort_keys", + "skipkeys", + "allow_nan", + "key_memo", + "use_decimal", + "namedtuple_as_object", + "tuple_as_array", + "iterable_as_array" + "int_as_string_bitcount", + "item_sort_key", + "encoding", + "for_json", + "ignore_nan", + "Decimal", + NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; - PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; + PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array; + PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json; + PyObject *ignore_nan, *Decimal; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOO:make_encoder", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, - &namedtuple_as_object, &tuple_as_array, &iterable_as_array)) + &namedtuple_as_object, &tuple_as_array, + &int_as_string_bitcount, &item_sort_key, &encoding, &for_json, + &ignore_nan, &Decimal, &iterable_as_array)) return -1; + Py_INCREF(markers); s->markers = markers; + Py_INCREF(defaultfn); s->defaultfn = defaultfn; + Py_INCREF(encoder); s->encoder = encoder; + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + return -1; + Py_INCREF(indent); s->indent = indent; + Py_INCREF(key_separator); s->key_separator = key_separator; + Py_INCREF(item_separator); s->item_separator = item_separator; - s->sort_keys = sort_keys; - s->skipkeys = skipkeys; + Py_INCREF(skipkeys); + s->skipkeys_bool = skipkeys; + s->skipkeys = PyObject_IsTrue(skipkeys); + Py_INCREF(key_memo); s->key_memo = key_memo; s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); - s->allow_nan = PyObject_IsTrue(allow_nan); + s->allow_or_ignore_nan = ( + (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) | + (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0)); s->use_decimal = PyObject_IsTrue(use_decimal); s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); s->iterable_as_array = PyObject_IsTrue(iterable_as_array); + if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) { + static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8; + int int_as_string_bitcount_val = (int)PyLong_AsLong(int_as_string_bitcount); + if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < long_long_bitsize) { + s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << int_as_string_bitcount_val); + s->min_long_size = PyLong_FromLongLong(-1LL << int_as_string_bitcount_val); + if (s->min_long_size == NULL || s->max_long_size == NULL) { + return -1; + } + } + else { + PyErr_Format(PyExc_TypeError, + "int_as_string_bitcount (%d) must be greater than 0 and less than the number of bits of a `long long` type (%u bits)", + int_as_string_bitcount_val, long_long_bitsize); + return -1; + } + } + else if (int_as_string_bitcount == Py_None) { + Py_INCREF(Py_None); + s->max_long_size = Py_None; + Py_INCREF(Py_None); + s->min_long_size = Py_None; + } + else { + PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer"); + return -1; + } + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) { + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + return -1; + } + } + else if (PyObject_IsTrue(sort_keys)) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + return -1; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + return -1; + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + return -1; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + return -1; + } + Py_INCREF(sort_keys); + s->sort_keys = sort_keys; + Py_INCREF(item_sort_key); + s->item_sort_key = item_sort_key; + Py_INCREF(Decimal); + s->Decimal = Decimal; + s->for_json = PyObject_IsTrue(for_json); - Py_INCREF(s->markers); - Py_INCREF(s->defaultfn); - Py_INCREF(s->encoder); - Py_INCREF(s->indent); - Py_INCREF(s->key_separator); - Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); - Py_INCREF(s->skipkeys); - Py_INCREF(s->key_memo); return 0; } @@ -2067,22 +2725,21 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds) /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; - PyObject *rval; Py_ssize_t indent_level; PyEncoderObject *s; + JSON_Accu rval; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; - rval = PyList_New(0); - if (rval == NULL) + if (JSON_Accu_Init(&rval)) return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); return NULL; } - return rval; + return JSON_Accu_FinishAsList(&rval); } static PyObject * @@ -2092,7 +2749,7 @@ _encoded_const(PyObject *obj) if (obj == Py_None) { static PyObject *s_null = NULL; if (s_null == NULL) { - s_null = PyString_InternFromString("null"); + s_null = JSON_InternFromString("null"); } Py_INCREF(s_null); return s_null; @@ -2100,7 +2757,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_True) { static PyObject *s_true = NULL; if (s_true == NULL) { - s_true = PyString_InternFromString("true"); + s_true = JSON_InternFromString("true"); } Py_INCREF(s_true); return s_true; @@ -2108,7 +2765,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_False) { static PyObject *s_false = NULL; if (s_false == NULL) { - s_false = PyString_InternFromString("false"); + s_false = JSON_InternFromString("false"); } Py_INCREF(s_false); return s_false; @@ -2125,22 +2782,54 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) /* Return the JSON representation of a PyFloat */ double i = PyFloat_AS_DOUBLE(obj); if (!Py_IS_FINITE(i)) { - if (!s->allow_nan) { + if (!s->allow_or_ignore_nan) { PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); return NULL; } - if (i > 0) { - return PyString_FromString("Infinity"); + if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) { + return _encoded_const(Py_None); + } + /* JSON_ALLOW_NAN is set */ + else if (i > 0) { + static PyObject *sInfinity = NULL; + if (sInfinity == NULL) + sInfinity = JSON_InternFromString("Infinity"); + if (sInfinity) + Py_INCREF(sInfinity); + return sInfinity; } else if (i < 0) { - return PyString_FromString("-Infinity"); + static PyObject *sNegInfinity = NULL; + if (sNegInfinity == NULL) + sNegInfinity = JSON_InternFromString("-Infinity"); + if (sNegInfinity) + Py_INCREF(sNegInfinity); + return sNegInfinity; } else { - return PyString_FromString("NaN"); + static PyObject *sNaN = NULL; + if (sNaN == NULL) + sNaN = JSON_InternFromString("NaN"); + if (sNaN) + Py_INCREF(sNaN); + return sNaN; } } /* Use a better float format here? */ - return PyObject_Repr(obj); + if (PyFloat_CheckExact(obj)) { + return PyObject_Repr(obj); + } + else { + /* See #118, do not trust custom str/repr */ + PyObject *res; + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyFloat_Type, obj, NULL); + if (tmp == NULL) { + return NULL; + } + res = PyObject_Repr(tmp); + Py_DECREF(tmp); + return res; + } } static PyObject * @@ -2154,60 +2843,97 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_list_append(PyObject *lst, PyObject *stolen) +_steal_accumulate(JSON_Accu *accu, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); + int rval = JSON_Accu_Accumulate(accu, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ int rv = -1; - if (Py_EnterRecursiveCall(" while encoding a JSON document")) - return rv; do { if (obj == Py_None || obj == Py_True || obj == Py_False) { PyObject *cstr = _encoded_const(obj); if (cstr != NULL) - rv = _steal_list_append(rval, cstr); + rv = _steal_accumulate(rval, cstr); } else if (PyString_Check(obj) || PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + PyObject *encoded; + if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) { + encoded = PyObject_Str(obj); + } + else { + /* See #118, do not trust custom str/repr */ + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, obj, NULL); + if (tmp == NULL) { + encoded = NULL; + } + else { + encoded = PyObject_Str(tmp); + Py_DECREF(tmp); + } + } + if (encoded != NULL) { + encoded = maybe_quote_bigint(s, encoded, obj); + if (encoded == NULL) + break; + rv = _steal_accumulate(rval, encoded); + } } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); + } + else if (s->for_json && _has_for_json_hook(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "for_json", NULL); + if (newobj != NULL) { + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); } else if (s->namedtuple_as_object && _is_namedtuple(obj)) { - PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL); + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "_asdict", NULL); if (newobj != NULL) { rv = encoder_listencode_dict(s, rval, newobj, indent_level); Py_DECREF(newobj); } + Py_LeaveRecursiveCall(); } else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; rv = encoder_listencode_list(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); } else if (PyDict_Check(obj)) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; rv = encoder_listencode_dict(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); } - else if (s->use_decimal && Decimal_Check(obj)) { + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else { PyObject *ident = NULL; @@ -2239,12 +2965,16 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi break; } } + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); if (newobj == NULL) { Py_XDECREF(ident); + Py_LeaveRecursiveCall(); break; } rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_LeaveRecursiveCall(); Py_DECREF(newobj); if (rv) { Py_XDECREF(ident); @@ -2259,37 +2989,33 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi } } } while (0); - Py_LeaveRecursiveCall(); return rv; } static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) { - /* Encode Python dict dct a JSON term, rval is a PyList */ + /* Encode Python dict dct a JSON term */ static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; - static PyObject *iteritems = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; PyObject *iter = NULL; PyObject *item = NULL; PyObject *items = NULL; PyObject *encoded = NULL; - int skipkeys; Py_ssize_t idx; - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); - iteritems = PyString_InternFromString("iteritems"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) return -1; } if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); + return JSON_Accu_Accumulate(rval, empty_dict); if (s->markers != Py_None) { int has_key; @@ -2307,7 +3033,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } } - if (PyList_Append(rval, open_dict)) + if (JSON_Accu_Accumulate(rval, open_dict)) goto bail; if (s->indent != Py_None) { @@ -2320,47 +3046,10 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss */ } - if (PyObject_IsTrue(s->sort_keys)) { - /* First sort the keys then replace them with (key, value) tuples. */ - Py_ssize_t i, nitems; - if (PyDict_CheckExact(dct)) - items = PyDict_Keys(dct); - else - items = PyMapping_Keys(dct); - if (items == NULL) - goto bail; - if (!PyList_Check(items)) { - PyErr_SetString(PyExc_ValueError, "keys must return list"); - goto bail; - } - if (PyList_Sort(items) < 0) - goto bail; - nitems = PyList_GET_SIZE(items); - for (i = 0; i < nitems; i++) { - PyObject *key, *value; - key = PyList_GET_ITEM(items, i); - value = PyDict_GetItem(dct, key); - item = PyTuple_Pack(2, key, value); - if (item == NULL) - goto bail; - PyList_SET_ITEM(items, i, item); - Py_DECREF(key); - } - } - else { - if (PyDict_CheckExact(dct)) - items = PyDict_Items(dct); - else - items = PyMapping_Items(dct); - } - if (items == NULL) - goto bail; - iter = PyObject_GetIter(items); - Py_DECREF(items); + iter = encoder_dict_iteritems(s, dct); if (iter == NULL) goto bail; - skipkeys = PyObject_IsTrue(s->skipkeys); idx = 0; while ((item = PyIter_Next(iter))) { PyObject *encoded, *key, *value; @@ -2378,43 +3067,21 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss encoded = PyDict_GetItem(s->key_memo, key); if (encoded != NULL) { Py_INCREF(encoded); - } - else if (PyString_Check(key) || PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - /* This must come before the PyInt_Check because - True and False are also 1 and 0.*/ - kstr = _encoded_const(key); - if (kstr == NULL) - goto bail; - } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); + } else { + kstr = encoder_stringify_key(s, key); if (kstr == NULL) goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(item); + Py_DECREF(kstr); + continue; + } } - else if (skipkeys) { - Py_DECREF(item); - continue; - } - else { - /* TODO: include repr of key */ - PyErr_SetString(PyExc_TypeError, "keys must be a string"); - goto bail; - } - if (idx) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } - if (encoded == NULL) { encoded = encoder_encode_string(s, kstr); Py_CLEAR(kstr); @@ -2423,11 +3090,11 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss if (PyDict_SetItem(s->key_memo, key, encoded)) goto bail; } - if (PyList_Append(rval, encoded)) { + if (JSON_Accu_Accumulate(rval, encoded)) { goto bail; } Py_CLEAR(encoded); - if (PyList_Append(rval, s->key_separator)) + if (JSON_Accu_Accumulate(rval, s->key_separator)) goto bail; if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; @@ -2449,13 +3116,14 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_dict)) + if (JSON_Accu_Accumulate(rval, close_dict)) goto bail; return 0; bail: Py_XDECREF(encoded); Py_XDECREF(items); + Py_XDECREF(item); Py_XDECREF(iter); Py_XDECREF(kstr); Py_XDECREF(ident); @@ -2464,9 +3132,9 @@ bail: static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) { - /* Encode Python list seq to a JSON term, rval is a PyList */ + /* Encode Python list seq to a JSON term */ static PyObject *open_array = NULL; static PyObject *close_array = NULL; static PyObject *empty_array = NULL; @@ -2477,9 +3145,9 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss int i = 0; if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); if (open_array == NULL || close_array == NULL || empty_array == NULL) return -1; } @@ -2488,7 +3156,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss if (is_true == -1) return -1; else if (is_true == 0) - return PyList_Append(rval, empty_array); + return JSON_Accu_Accumulate(rval, empty_array); if (s->markers != Py_None) { int has_key; @@ -2510,7 +3178,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss if (iter == NULL) goto bail; - if (PyList_Append(rval, open_array)) + if (JSON_Accu_Accumulate(rval, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ @@ -2523,7 +3191,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss } while ((obj = PyIter_Next(iter))) { if (i) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoder_listencode_obj(s, rval, obj, indent_level)) @@ -2546,7 +3214,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_array)) + if (JSON_Accu_Accumulate(rval, close_array)) goto bail; return 0; @@ -2574,12 +3242,17 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->markers); Py_VISIT(s->defaultfn); Py_VISIT(s->encoder); + Py_VISIT(s->encoding); Py_VISIT(s->indent); Py_VISIT(s->key_separator); Py_VISIT(s->item_separator); - Py_VISIT(s->sort_keys); - Py_VISIT(s->skipkeys); Py_VISIT(s->key_memo); + Py_VISIT(s->sort_keys); + Py_VISIT(s->item_sort_kw); + Py_VISIT(s->item_sort_key); + Py_VISIT(s->max_long_size); + Py_VISIT(s->min_long_size); + Py_VISIT(s->Decimal); return 0; } @@ -2593,12 +3266,18 @@ encoder_clear(PyObject *self) Py_CLEAR(s->markers); Py_CLEAR(s->defaultfn); Py_CLEAR(s->encoder); + Py_CLEAR(s->encoding); Py_CLEAR(s->indent); Py_CLEAR(s->key_separator); Py_CLEAR(s->item_separator); - Py_CLEAR(s->sort_keys); - Py_CLEAR(s->skipkeys); Py_CLEAR(s->key_memo); + Py_CLEAR(s->skipkeys_bool); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->item_sort_kw); + Py_CLEAR(s->item_sort_key); + Py_CLEAR(s->max_long_size); + Py_CLEAR(s->min_long_size); + Py_CLEAR(s->Decimal); return 0; } @@ -2606,8 +3285,7 @@ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") static PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Encoder", /* tp_name */ sizeof(PyEncoderObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2663,28 +3341,53 @@ static PyMethodDef speedups_methods[] = { PyDoc_STRVAR(module_doc, "simplejson speedups\n"); -void -init_speedups(void) +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +static PyObject * +moduleinit(void) { - PyObject *m, *decimal; + PyObject *m; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) - return; + return NULL; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) - return; - - decimal = PyImport_ImportModule("decimal"); - if (decimal == NULL) - return; - DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal"); - Py_DECREF(decimal); - if (DecimalTypePtr == NULL) - return; + return NULL; +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); Py_INCREF((PyObject*)&PyEncoderType); PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + return m; +} + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); } +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif diff --git a/simplejson/compat.py b/simplejson/compat.py new file mode 100644 index 0000000..a0af4a1 --- /dev/null +++ b/simplejson/compat.py @@ -0,0 +1,46 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/simplejson/decoder.py b/simplejson/decoder.py index e5496d6..545e658 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -1,24 +1,28 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner, JSONDecodeError -from simplejson.scanner import make_scanner def _import_c_scanstring(): try: - from simplejson._speedups import scanstring + from ._speedups import scanstring return scanstring except ImportError: return None c_scanstring = _import_c_scanstring() +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + _BYTES = fromhex('7FF80000000000007FF0000000000000') # The struct module in Python 2.4 would get frexp() out of range here # when an endian is specified in the format string. Fixed in Python 2.5+ if sys.byteorder != 'big': @@ -28,57 +32,6 @@ def _floatconstants(): NaN, PosInf, NegInf = _floatconstants() - -class JSONDecodeError(ValueError): - """Subclass of ValueError with the following additional properties: - - msg: The unformatted error message - doc: The JSON document being parsed - pos: The start index of doc where parsing failed - end: The end index of doc where parsing failed (may be None) - lineno: The line corresponding to pos - colno: The column corresponding to pos - endlineno: The line corresponding to end (may be None) - endcolno: The column corresponding to end (may be None) - - """ - def __init__(self, msg, doc, pos, end=None): - ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) - self.msg = msg - self.doc = doc - self.pos = pos - self.end = end - self.lineno, self.colno = linecol(doc, pos) - if end is not None: - self.endlineno, self.endcolno = linecol(doc, end) - else: - self.endlineno, self.endcolno = None, None - - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -87,14 +40,15 @@ _CONSTANTS = { STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), } DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match): + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError @@ -117,8 +71,8 @@ def py_scanstring(s, end, encoding=None, strict=True, content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) + if not _PY3 and not isinstance(content, text_type): + content = text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -126,8 +80,7 @@ def py_scanstring(s, end, encoding=None, strict=True, break elif terminator != '\\': if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) + msg = "Invalid control character %r at" raise JSONDecodeError(msg, s, end) else: _append(terminator) @@ -142,33 +95,42 @@ def py_scanstring(s, end, encoding=None, strict=True, try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: " + repr(esc) + msg = "Invalid \\X escape sequence %r" raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" esc = s[end + 1:end + 5] - next_end = end + 5 - if len(esc) != 4: - msg = "Invalid \\uXXXX escape" - raise JSONDecodeError(msg, s, end) - uni = int(esc, 16) + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise JSONDecodeError(msg, s, end) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise JSONDecodeError(msg, s, end) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 char = unichr(uni) - end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -177,9 +139,10 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, +def JSONObject(state, encoding, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state # Backwards compatibility if memo is None: memo = {} @@ -203,7 +166,9 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise JSONDecodeError("Expecting property name", s, end) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) end += 1 while True: key, end = scanstring(s, end, encoding, strict) @@ -214,7 +179,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise JSONDecodeError("Expecting : delimiter", s, end) + raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 @@ -226,10 +191,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, except IndexError: pass - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) + value, end = scan_once(s, end) pairs.append((key, value)) try: @@ -244,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, if nextchar == '}': break elif nextchar != ',': - raise JSONDecodeError("Expecting , delimiter", s, end - 1) + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) try: nextchar = s[end] @@ -259,7 +221,9 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, end += 1 if nextchar != '"': - raise JSONDecodeError("Expecting property name", s, end - 1) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) if object_pairs_hook is not None: result = object_pairs_hook(pairs) @@ -269,7 +233,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -278,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) _append = values.append while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) + value, end = scan_once(s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -293,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise JSONDecodeError("Expecting , delimiter", s, end) + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) try: if s[end] in _ws: @@ -317,7 +281,7 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | unicode | + | string | str, unicode | +---------------+-------------------+ | number (int) | int, long | +---------------+-------------------+ @@ -381,6 +345,8 @@ class JSONDecoder(object): ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -394,28 +360,41 @@ class JSONDecoder(object): self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + if _PY3 and isinstance(s, binary_type): + s = s.decode(self.encoding) + obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0): + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. This can be used to decode a JSON document from a string that may have extraneous data at the end. """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise JSONDecodeError("No JSON object could be decoded", s, idx) - return obj, end + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 75ba993..d771bb4 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -1,11 +1,14 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re -from decimal import Decimal - +from operator import itemgetter +# Do not import Decimal directly to avoid reload issues +import decimal +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 def _import_speedups(): try: - from simplejson import _speedups + from . import _speedups return _speedups.encode_basestring_ascii, _speedups.make_encoder except ImportError: return None, None @@ -13,7 +16,10 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from simplejson.decoder import PosInf -ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +# This is required because u() will mangle the string and ur'' isn't valid +# python3 syntax +ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -24,32 +30,40 @@ ESCAPE_DCT = { '\n': '\\n', '\r': '\\r', '\t': '\\t', - u'\u2028': '\\u2028', - u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u('"')): """Return a JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] - return u'"' + ESCAPE.sub(replace, s) + u'"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -103,11 +117,14 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None, - use_decimal=True, namedtuple_as_object=True, - tuple_as_array=True, iterable_as_array=False): + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None, iterable_as_array=False): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -139,9 +156,10 @@ class JSONEncoder(object): versions of simplejson earlier than 2.1.0, an integer is also accepted and is converted to a string with that many spaces. - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable @@ -155,7 +173,7 @@ class JSONEncoder(object): be supported directly by the encoder. For the inverse, decode JSON with ``parse_float=decimal.Decimal``. - If namedtuple_as_object is true (the default), tuple subclasses with + If namedtuple_as_object is true (the default), objects with ``_asdict()`` methods will be encoded as JSON objects. If tuple_as_array is true (the default), tuple (and subclasses) will @@ -165,6 +183,27 @@ class JSONEncoder(object): any object not in the above table that implements ``__iter__()`` will be encoded as a JSON array. + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + """ self.skipkeys = skipkeys @@ -176,8 +215,13 @@ class JSONEncoder(object): self.namedtuple_as_object = namedtuple_as_object self.tuple_as_array = tuple_as_array self.iterable_as_array = iterable_as_array - if isinstance(indent, (int, long)): - indent = ' ' * indent + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators @@ -216,12 +260,11 @@ class JSONEncoder(object): """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -257,11 +300,11 @@ class JSONEncoder(object): _encoder = encode_basestring if self.encoding != 'utf-8': def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): + if isinstance(o, binary_type): o = o.decode(_encoding) return _orig_encoder(o) - def floatstr(o, allow_nan=self.allow_nan, + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): # Check for specials. Note that this type of test is processor # and/or platform-specific, so do tests which don't depend on @@ -274,17 +317,23 @@ class JSONEncoder(object): elif o == _neginf: text = '-Infinity' else: + if type(o) != float: + # See #118, do not trust custom str/repr + o = float(o) return _repr(o) - if not allow_nan: + if ignore_nan: + text = 'null' + elif not allow_nan: raise ValueError( "Out of range float values are not JSON compliant: " + repr(o)) return text - key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) if (_one_shot and c_make_encoder is not None and self.indent is None): _iterencode = c_make_encoder( @@ -292,14 +341,18 @@ class JSONEncoder(object): self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, self.allow_nan, key_memo, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, - self.iterable_as_array) + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, decimal.Decimal, self.iterable_as_array) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, _one_shot, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, - self.iterable_as_array) + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.iterable_as_array, Decimal=decimal.Decimal) try: return _iterencode(o, 0) finally: @@ -336,24 +389,53 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _use_decimal, _namedtuple_as_object, _tuple_as_array, + _int_as_string_bitcount, _item_sort_key, + _encoding,_for_json, _iterable_as_array, ## HACK: hand-optimized bytecode; turn globals into locals - False=False, - True=True, + _PY3=PY3, ValueError=ValueError, - basestring=basestring, - Decimal=Decimal, + string_types=string_types, + Decimal=None, dict=dict, float=float, id=id, - int=int, + integer_types=integer_types, isinstance=isinstance, list=list, - long=long, str=str, tuple=tuple, iter=iter, ): + if _use_decimal and Decimal is None: + Decimal = decimal.Decimal + if _item_sort_key and not callable(_item_sort_key): + raise TypeError("item_sort_key must be None or callable") + elif _sort_keys and not _item_sort_key: + _item_sort_key = itemgetter(0) + + if (_int_as_string_bitcount is not None and + (_int_as_string_bitcount <= 0 or + not isinstance(_int_as_string_bitcount, integer_types))): + raise TypeError("int_as_string_bitcount must be a positive integer") + + def _encode_int(value): + skip_quoting = ( + _int_as_string_bitcount is None + or + _int_as_string_bitcount < 1 + ) + if type(value) not in integer_types: + # See #118, do not trust custom str/repr + value = int(value) + if ( + skip_quoting or + (-1 << _int_as_string_bitcount) + < value < + (1 << _int_as_string_bitcount) + ): + return str(value) + return '"' + str(value) + '"' def _iterencode_list(lst, _current_indent_level): if not lst: @@ -379,7 +461,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, first = False else: buf = separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield buf + _encoder(value) elif value is None: yield buf + 'null' @@ -387,26 +470,30 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield buf + 'true' elif value is False: yield buf + 'false' - elif isinstance(value, (int, long)): - yield buf + str(value) + elif isinstance(value, integer_types): + yield buf + _encode_int(value) elif isinstance(value, float): yield buf + _floatstr(value) elif _use_decimal and isinstance(value, Decimal): yield buf + str(value) else: yield buf - if isinstance(value, list): - chunks = _iterencode_list(value, _current_indent_level) - elif (_namedtuple_as_object and isinstance(value, tuple) and - hasattr(value, '_asdict')): - chunks = _iterencode_dict(value._asdict(), - _current_indent_level) - elif _tuple_as_array and isinstance(value, tuple): + for_json = _for_json and getattr(value, 'for_json', None) + if for_json and callable(for_json): + chunks = _iterencode(for_json(), _current_indent_level) + elif isinstance(value, list): chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) else: - chunks = _iterencode(value, _current_indent_level) + _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) + if _asdict and callable(_asdict): + chunks = _iterencode_dict(_asdict(), + _current_indent_level) + elif _tuple_as_array and isinstance(value, tuple): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) for chunk in chunks: yield chunk if newline_indent is not None: @@ -416,6 +503,32 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, if markers is not None: del markers[markerid] + def _stringify_key(key): + if isinstance(key, string_types): # pragma: no cover + pass + elif isinstance(key, binary_type): + key = key.decode(_encoding) + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, integer_types): + if type(key) not in integer_types: + # See #118, do not trust custom str/repr + key = int(key) + key = str(key) + elif _use_decimal and isinstance(key, Decimal): + key = str(key) + elif _skipkeys: + key = None + else: + raise TypeError("key " + repr(key) + " is not a string") + return key + def _iterencode_dict(dct, _current_indent_level): if not dct: yield '{}' @@ -435,37 +548,35 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, newline_indent = None item_separator = _item_separator first = True - if _sort_keys: - items = dct.items() - items.sort(key=lambda kv: kv[0]) + if _PY3: + iteritems = dct.items() + else: + iteritems = dct.iteritems() + if _item_sort_key: + items = [] + for k, v in dct.items(): + if not isinstance(k, string_types): + k = _stringify_key(k) + if k is None: + continue + items.append((k, v)) + items.sort(key=_item_sort_key) else: - items = dct.iteritems() + items = iteritems for key, value in items: - if isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = _floatstr(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif isinstance(key, (int, long)): - key = str(key) - elif _skipkeys: - continue - else: - raise TypeError("key " + repr(key) + " is not a string") + if not (_item_sort_key or isinstance(key, string_types)): + key = _stringify_key(key) + if key is None: + # _skipkeys must be True + continue if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield _encoder(value) elif value is None: yield 'null' @@ -473,25 +584,29 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif value is False: yield 'false' - elif isinstance(value, (int, long)): - yield str(value) + elif isinstance(value, integer_types): + yield _encode_int(value) elif isinstance(value, float): yield _floatstr(value) elif _use_decimal and isinstance(value, Decimal): yield str(value) else: - if isinstance(value, list): - chunks = _iterencode_list(value, _current_indent_level) - elif (_namedtuple_as_object and isinstance(value, tuple) and - hasattr(value, '_asdict')): - chunks = _iterencode_dict(value._asdict(), - _current_indent_level) - elif _tuple_as_array and isinstance(value, tuple): + for_json = _for_json and getattr(value, 'for_json', None) + if for_json and callable(for_json): + chunks = _iterencode(for_json(), _current_indent_level) + elif isinstance(value, list): chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) else: - chunks = _iterencode(value, _current_indent_level) + _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) + if _asdict and callable(_asdict): + chunks = _iterencode_dict(_asdict(), + _current_indent_level) + elif _tuple_as_array and isinstance(value, tuple): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) for chunk in chunks: yield chunk if newline_indent is not None: @@ -502,7 +617,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, del markers[markerid] def _iterencode(o, _current_indent_level): - if isinstance(o, basestring): + if (isinstance(o, string_types) or + (_PY3 and isinstance(o, binary_type))): yield _encoder(o) elif o is None: yield 'null' @@ -510,45 +626,52 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif o is False: yield 'false' - elif isinstance(o, (int, long)): - yield str(o) + elif isinstance(o, integer_types): + yield _encode_int(o) elif isinstance(o, float): yield _floatstr(o) - elif isinstance(o, list): - for chunk in _iterencode_list(o, _current_indent_level): - yield chunk - elif (_namedtuple_as_object and isinstance(o, tuple) and - hasattr(o, '_asdict')): - for chunk in _iterencode_dict(o._asdict(), _current_indent_level): - yield chunk - elif (_tuple_as_array and isinstance(o, tuple)): - for chunk in _iterencode_list(o, _current_indent_level): - yield chunk - elif isinstance(o, dict): - for chunk in _iterencode_dict(o, _current_indent_level): - yield chunk - elif _use_decimal and isinstance(o, Decimal): - yield str(o) else: - while _iterable_as_array: - # Markers are not checked here because it is valid for an - # iterable to return self. - try: - o = iter(o) - except TypeError: - break + for_json = _for_json and getattr(o, 'for_json', None) + if for_json and callable(for_json): + for chunk in _iterencode(for_json(), _current_indent_level): + yield chunk + elif isinstance(o, list): for chunk in _iterencode_list(o, _current_indent_level): yield chunk - return - if markers is not None: - markerid = id(o) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = o - o = _default(o) - for chunk in _iterencode(o, _current_indent_level): - yield chunk - if markers is not None: - del markers[markerid] + else: + _asdict = _namedtuple_as_object and getattr(o, '_asdict', None) + if _asdict and callable(_asdict): + for chunk in _iterencode_dict(_asdict(), + _current_indent_level): + yield chunk + elif (_tuple_as_array and isinstance(o, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + elif _use_decimal and isinstance(o, Decimal): + yield str(o) + else: + while _iterable_as_array: + # Markers are not checked here because it is valid for + # an iterable to return self. + try: + o = iter(o) + except TypeError: + break + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + return + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] return _iterencode diff --git a/simplejson/scanner.py b/simplejson/scanner.py index 54593a3..5abed35 100644 --- a/simplejson/scanner.py +++ b/simplejson/scanner.py @@ -9,12 +9,62 @@ def _import_c_make_scanner(): return None c_make_scanner = _import_c_make_scanner() -__all__ = ['make_scanner'] +__all__ = ['make_scanner', 'JSONDecodeError'] NUMBER_RE = re.compile( r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', (re.VERBOSE | re.MULTILINE | re.DOTALL)) +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + end: The end index of doc where parsing failed (may be None) + lineno: The line corresponding to pos + colno: The column corresponding to pos + endlineno: The line corresponding to end (may be None) + endcolno: The column corresponding to end (may be None) + + """ + # Note that this exception is used from _speedups + def __init__(self, msg, doc, pos, end=None): + ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) + self.msg = msg + self.doc = doc + self.pos = pos + self.end = end + self.lineno, self.colno = linecol(doc, pos) + if end is not None: + self.endlineno, self.endcolno = linecol(doc, end) + else: + self.endlineno, self.endcolno = None, None + + def __reduce__(self): + return self.__class__, (self.msg, self.doc, self.pos, self.end) + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + 1 + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + msg = msg.replace('%r', repr(doc[pos:pos + 1])) + if end is None: + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + def py_make_scanner(context): parse_object = context.parse_object parse_array = context.parse_array @@ -30,10 +80,11 @@ def py_make_scanner(context): memo = context.memo def _scan_once(string, idx): + errmsg = 'Expecting value' try: nextchar = string[idx] except IndexError: - raise StopIteration + raise JSONDecodeError(errmsg, string, idx) if nextchar == '"': return parse_string(string, idx + 1, encoding, strict) @@ -64,9 +115,14 @@ def py_make_scanner(context): elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: - raise StopIteration + raise JSONDecodeError(errmsg, string, idx) def scan_once(string, idx): + if idx < 0: + # Ensure the same behavior as the C speedup, otherwise + # this would work for *some* negative string indices due + # to the behavior of __getitem__ for strings. #98 + raise JSONDecodeError('Expecting value', string, idx) try: return _scan_once(string, idx) finally: diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py index 8d2e443..8c1a4f1 100644 --- a/simplejson/tests/__init__.py +++ b/simplejson/tests/__init__.py @@ -1,18 +1,26 @@ +from __future__ import absolute_import import unittest import doctest +import sys -class OptionalExtensionTestSuite(unittest.TestSuite): +class NoExtensionTestSuite(unittest.TestSuite): def run(self, result): import simplejson - run = unittest.TestSuite.run - run(self, result) simplejson._toggle_speedups(False) - run(self, result) + result = unittest.TestSuite.run(self, result) simplejson._toggle_speedups(True) return result +class TestMissingSpeedups(unittest.TestCase): + def runTest(self): + if hasattr(sys, 'pypy_translation_info'): + "PyPy doesn't need speedups! :)" + elif hasattr(self, 'skipTest'): + self.skipTest('_speedups.so is missing!') + + def additional_tests(suite=None): import simplejson import simplejson.encoder @@ -26,35 +34,50 @@ def additional_tests(suite=None): def all_tests_suite(): - suite = unittest.TestLoader().loadTestsFromNames([ - 'simplejson.tests.test_check_circular', - 'simplejson.tests.test_decode', - 'simplejson.tests.test_default', - 'simplejson.tests.test_dump', - 'simplejson.tests.test_encode_basestring_ascii', - 'simplejson.tests.test_encode_for_html', - 'simplejson.tests.test_errors', - 'simplejson.tests.test_fail', - 'simplejson.tests.test_float', - 'simplejson.tests.test_indent', - 'simplejson.tests.test_pass1', - 'simplejson.tests.test_pass2', - 'simplejson.tests.test_pass3', - 'simplejson.tests.test_recursion', - 'simplejson.tests.test_scanstring', - 'simplejson.tests.test_separators', - 'simplejson.tests.test_speedups', - 'simplejson.tests.test_unicode', - 'simplejson.tests.test_decimal', - 'simplejson.tests.test_tuple', - 'simplejson.tests.test_namedtuple', - ]) - suite = additional_tests(suite) - return OptionalExtensionTestSuite([suite]) + def get_suite(): + return additional_tests( + unittest.TestLoader().loadTestsFromNames([ + 'simplejson.tests.test_bitsize_int_as_string', + 'simplejson.tests.test_bigint_as_string', + 'simplejson.tests.test_check_circular', + 'simplejson.tests.test_decode', + 'simplejson.tests.test_default', + 'simplejson.tests.test_dump', + 'simplejson.tests.test_encode_basestring_ascii', + 'simplejson.tests.test_encode_for_html', + 'simplejson.tests.test_errors', + 'simplejson.tests.test_fail', + 'simplejson.tests.test_float', + 'simplejson.tests.test_indent', + 'simplejson.tests.test_pass1', + 'simplejson.tests.test_pass2', + 'simplejson.tests.test_pass3', + 'simplejson.tests.test_recursion', + 'simplejson.tests.test_scanstring', + 'simplejson.tests.test_separators', + 'simplejson.tests.test_speedups', + 'simplejson.tests.test_unicode', + 'simplejson.tests.test_decimal', + 'simplejson.tests.test_tuple', + 'simplejson.tests.test_namedtuple', + 'simplejson.tests.test_tool', + 'simplejson.tests.test_for_json', + 'simplejson.tests.test_subclass', + ])) + suite = get_suite() + import simplejson + if simplejson._import_c_make_encoder() is None: + suite.addTest(TestMissingSpeedups()) + else: + suite = unittest.TestSuite([ + suite, + NoExtensionTestSuite([get_suite()]), + ]) + return suite def main(): - runner = unittest.TextTestRunner() + runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v')) suite = all_tests_suite() raise SystemExit(not runner.run(suite).wasSuccessful()) diff --git a/simplejson/tests/test_bigint_as_string.py b/simplejson/tests/test_bigint_as_string.py new file mode 100644 index 0000000..2cf2cc2 --- /dev/null +++ b/simplejson/tests/test_bigint_as_string.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + + +class TestBigintAsString(TestCase): + # Python 2.5, at least the one that ships on Mac OS X, calculates + # 2 ** 53 as 0! It manages to calculate 1 << 53 correctly. + values = [(200, 200), + ((1 << 53) - 1, 9007199254740991), + ((1 << 53), '9007199254740992'), + ((1 << 53) + 1, '9007199254740993'), + (-100, -100), + ((-1 << 53), '-9007199254740992'), + ((-1 << 53) - 1, '-9007199254740993'), + ((-1 << 53) + 1, -9007199254740991)] + + options = ( + {"bigint_as_string": True}, + {"int_as_string_bitcount": 53} + ) + + def test_ints(self): + for opts in self.options: + for val, expect in self.values: + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) + + def test_lists(self): + for opts in self.options: + for val, expect in self.values: + val = [val, val] + expect = [expect, expect] + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) + + def test_dicts(self): + for opts in self.options: + for val, expect in self.values: + val = {'k': val} + expect = {'k': expect} + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) + + def test_dict_keys(self): + for opts in self.options: + for val, _ in self.values: + expect = {str(val): 'value'} + val = {val: 'value'} + self.assertEqual( + expect, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) diff --git a/simplejson/tests/test_bitsize_int_as_string.py b/simplejson/tests/test_bitsize_int_as_string.py new file mode 100644 index 0000000..fd7d103 --- /dev/null +++ b/simplejson/tests/test_bitsize_int_as_string.py @@ -0,0 +1,73 @@ +from unittest import TestCase + +import simplejson as json + + +class TestBitSizeIntAsString(TestCase): + # Python 2.5, at least the one that ships on Mac OS X, calculates + # 2 ** 31 as 0! It manages to calculate 1 << 31 correctly. + values = [ + (200, 200), + ((1 << 31) - 1, (1 << 31) - 1), + ((1 << 31), str(1 << 31)), + ((1 << 31) + 1, str((1 << 31) + 1)), + (-100, -100), + ((-1 << 31), str(-1 << 31)), + ((-1 << 31) - 1, str((-1 << 31) - 1)), + ((-1 << 31) + 1, (-1 << 31) + 1), + ] + + def test_invalid_counts(self): + for n in ['foo', -1, 0, 1.0]: + self.assertRaises( + TypeError, + json.dumps, 0, int_as_string_bitcount=n) + + def test_ints_outside_range_fails(self): + self.assertNotEqual( + str(1 << 15), + json.loads(json.dumps(1 << 15, int_as_string_bitcount=16)), + ) + + def test_ints(self): + for val, expect in self.values: + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31)), + ) + + def test_lists(self): + for val, expect in self.values: + val = [val, val] + expect = [expect, expect] + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31))) + + def test_dicts(self): + for val, expect in self.values: + val = {'k': val} + expect = {'k': expect} + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31))) + + def test_dict_keys(self): + for val, _ in self.values: + expect = {str(val): 'value'} + val = {val: 'value'} + self.assertEqual( + expect, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31))) diff --git a/simplejson/tests/test_decimal.py b/simplejson/tests/test_decimal.py index 09f062e..2b0940b 100644 --- a/simplejson/tests/test_decimal.py +++ b/simplejson/tests/test_decimal.py @@ -1,6 +1,7 @@ +import decimal from decimal import Decimal from unittest import TestCase -from StringIO import StringIO +from simplejson.compat import StringIO, reload_module import simplejson as json @@ -10,35 +11,40 @@ class TestDecimal(TestCase): sio = StringIO() json.dump(obj, sio, **kw) res = json.dumps(obj, **kw) - self.assertEquals(res, sio.getvalue()) + self.assertEqual(res, sio.getvalue()) return res def loads(self, s, **kw): sio = StringIO(s) res = json.loads(s, **kw) - self.assertEquals(res, json.load(sio, **kw)) + self.assertEqual(res, json.load(sio, **kw)) return res def test_decimal_encode(self): for d in map(Decimal, self.NUMS): - self.assertEquals(self.dumps(d, use_decimal=True), str(d)) - + self.assertEqual(self.dumps(d, use_decimal=True), str(d)) + def test_decimal_decode(self): for s in self.NUMS: - self.assertEquals(self.loads(s, parse_float=Decimal), Decimal(s)) - + self.assertEqual(self.loads(s, parse_float=Decimal), Decimal(s)) + + def test_stringify_key(self): + for d in map(Decimal, self.NUMS): + v = {d: d} + self.assertEqual( + self.loads( + self.dumps(v, use_decimal=True), parse_float=Decimal), + {str(d): d}) + def test_decimal_roundtrip(self): for d in map(Decimal, self.NUMS): # The type might not be the same (int and Decimal) but they # should still compare equal. - self.assertEquals( - self.loads( - self.dumps(d, use_decimal=True), parse_float=Decimal), - d) - self.assertEquals( - self.loads( - self.dumps([d], use_decimal=True), parse_float=Decimal), - [d]) + for v in [d, [d], {'': d}]: + self.assertEqual( + self.loads( + self.dumps(v, use_decimal=True), parse_float=Decimal), + v) def test_decimal_defaults(self): d = Decimal('1.1') @@ -46,10 +52,20 @@ class TestDecimal(TestCase): self.assertRaises(TypeError, json.dumps, d, use_decimal=False) self.assertEqual('1.1', json.dumps(d)) self.assertEqual('1.1', json.dumps(d, use_decimal=True)) - self.assertRaises(TypeError, json.dump, d, StringIO(), use_decimal=False) + self.assertRaises(TypeError, json.dump, d, StringIO(), + use_decimal=False) sio = StringIO() json.dump(d, sio) self.assertEqual('1.1', sio.getvalue()) sio = StringIO() json.dump(d, sio, use_decimal=True) self.assertEqual('1.1', sio.getvalue()) + + def test_decimal_reload(self): + # Simulate a subinterpreter that reloads the Python modules but not + # the C code https://github.com/simplejson/simplejson/issues/34 + global Decimal + Decimal = reload_module(decimal).Decimal + import simplejson.encoder + simplejson.encoder.Decimal = Decimal + self.test_decimal_roundtrip() diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py index a140a13..30b692a 100644 --- a/simplejson/tests/test_decode.py +++ b/simplejson/tests/test_decode.py @@ -1,8 +1,9 @@ +from __future__ import absolute_import import decimal from unittest import TestCase -from StringIO import StringIO import simplejson as json +from simplejson.compat import StringIO from simplejson import OrderedDict class TestDecode(TestCase): @@ -13,19 +14,19 @@ class TestDecode(TestCase): def test_decimal(self): rval = json.loads('1.1', parse_float=decimal.Decimal) self.assertTrue(isinstance(rval, decimal.Decimal)) - self.assertEquals(rval, decimal.Decimal('1.1')) + self.assertEqual(rval, decimal.Decimal('1.1')) def test_float(self): rval = json.loads('1', parse_int=float) self.assertTrue(isinstance(rval, float)) - self.assertEquals(rval, 1.0) + self.assertEqual(rval, 1.0) def test_decoder_optimizations(self): # Several optimizations were made that skip over calls to # the whitespace regex, so this test is designed to try and # exercise the uncommon cases. The array cases are already covered. rval = json.loads('{ "key" : "value" , "k":"v" }') - self.assertEquals(rval, {"key":"value", "k":"v"}) + self.assertEqual(rval, {"key":"value", "k":"v"}) def test_empty_objects(self): s = '{}' @@ -81,3 +82,18 @@ class TestDecode(TestCase): self.assertEqual( ({'a': {}}, 9), cls(object_pairs_hook=dict).raw_decode("{\"a\": {}}")) + # https://github.com/simplejson/simplejson/pull/38 + self.assertEqual( + ({'a': {}}, 11), + cls().raw_decode(" \n{\"a\": {}}")) + + def test_bounds_checking(self): + # https://github.com/simplejson/simplejson/issues/98 + j = json.decoder.JSONDecoder() + for i in [4, 5, 6, -1, -2, -3, -4, -5, -6]: + self.assertRaises(ValueError, j.scan_once, '1234', i) + self.assertRaises(ValueError, j.raw_decode, '1234', i) + x, y = sorted(['128931233', '472389423'], key=id) + diff = id(x) - id(y) + self.assertRaises(ValueError, j.scan_once, y, diff) + self.assertRaises(ValueError, j.raw_decode, y, i) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py index 139e42b..d1eacb8 100644 --- a/simplejson/tests/test_default.py +++ b/simplejson/tests/test_default.py @@ -4,6 +4,6 @@ import simplejson as json class TestDefault(TestCase): def test_default(self): - self.assertEquals( + self.assertEqual( json.dumps(type, default=repr), json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py index 8b34004..3661de0 100644 --- a/simplejson/tests/test_dump.py +++ b/simplejson/tests/test_dump.py @@ -1,27 +1,130 @@ from unittest import TestCase -from cStringIO import StringIO - +from simplejson.compat import StringIO, long_type, b, binary_type, PY3 import simplejson as json +def as_text_type(s): + if PY3 and isinstance(s, binary_type): + return s.decode('ascii') + return s + class TestDump(TestCase): def test_dump(self): sio = StringIO() json.dump({}, sio) - self.assertEquals(sio.getvalue(), '{}') + self.assertEqual(sio.getvalue(), '{}') + + def test_constants(self): + for c in [None, True, False]: + self.assertTrue(json.loads(json.dumps(c)) is c) + self.assertTrue(json.loads(json.dumps([c]))[0] is c) + self.assertTrue(json.loads(json.dumps({'a': c}))['a'] is c) + + def test_stringify_key(self): + items = [(b('bytes'), 'bytes'), + (1.0, '1.0'), + (10, '10'), + (True, 'true'), + (False, 'false'), + (None, 'null'), + (long_type(100), '100')] + for k, expect in items: + self.assertEqual( + json.loads(json.dumps({k: expect})), + {expect: expect}) + self.assertEqual( + json.loads(json.dumps({k: expect}, sort_keys=True)), + {expect: expect}) + self.assertRaises(TypeError, json.dumps, {json: 1}) + for v in [{}, {'other': 1}, {b('derp'): 1, 'herp': 2}]: + for sort_keys in [False, True]: + v0 = dict(v) + v0[json] = 1 + v1 = dict((as_text_type(key), val) for (key, val) in v.items()) + self.assertEqual( + json.loads(json.dumps(v0, skipkeys=True, sort_keys=sort_keys)), + v1) + self.assertEqual( + json.loads(json.dumps({'': v0}, skipkeys=True, sort_keys=sort_keys)), + {'': v1}) + self.assertEqual( + json.loads(json.dumps([v0], skipkeys=True, sort_keys=sort_keys)), + [v1]) def test_dumps(self): - self.assertEquals(json.dumps({}), '{}') + self.assertEqual(json.dumps({}), '{}') def test_encode_truefalse(self): - self.assertEquals(json.dumps( + self.assertEqual(json.dumps( {True: False, False: True}, sort_keys=True), '{"false": true, "true": false}') - self.assertEquals(json.dumps( - {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), - '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') + self.assertEqual( + json.dumps( + {2: 3.0, + 4.0: long_type(5), + False: 1, + long_type(6): True, + "7": 0}, + sort_keys=True), + '{"2": 3.0, "4.0": 5, "6": true, "7": 0, "false": 1}') def test_ordered_dict(self): # http://bugs.python.org/issue6105 items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] s = json.dumps(json.OrderedDict(items)) - self.assertEqual(s, '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}')
\ No newline at end of file + self.assertEqual( + s, + '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') + + def test_indent_unknown_type_acceptance(self): + """ + A test against the regression mentioned at `github issue 29`_. + + The indent parameter should accept any type which pretends to be + an instance of int or long when it comes to being multiplied by + strings, even if it is not actually an int or long, for + backwards compatibility. + + .. _github issue 29: + http://github.com/simplejson/simplejson/issue/29 + """ + + class AwesomeInt(object): + """An awesome reimplementation of integers""" + + def __init__(self, *args, **kwargs): + if len(args) > 0: + # [construct from literals, objects, etc.] + # ... + + # Finally, if args[0] is an integer, store it + if isinstance(args[0], int): + self._int = args[0] + + # [various methods] + + def __mul__(self, other): + # [various ways to multiply AwesomeInt objects] + # ... finally, if the right-hand operand is not awesome enough, + # try to do a normal integer multiplication + if hasattr(self, '_int'): + return self._int * other + else: + raise NotImplementedError("To do non-awesome things with" + " this object, please construct it from an integer!") + + s = json.dumps([0, 1, 2], indent=AwesomeInt(3)) + self.assertEqual(s, '[\n 0,\n 1,\n 2\n]') + + def test_accumulator(self): + # the C API uses an accumulator that collects after 100,000 appends + lst = [0] * 100000 + self.assertEqual(json.loads(json.dumps(lst)), lst) + + def test_sort_keys(self): + # https://github.com/simplejson/simplejson/issues/106 + for num_keys in range(2, 32): + p = dict((str(x), x) for x in range(num_keys)) + sio = StringIO() + json.dump(p, sio, sort_keys=True) + self.assertEqual(sio.getvalue(), json.dumps(p, sort_keys=True)) + self.assertEqual(json.loads(sio.getvalue()), p) diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py index 6c40961..49706bf 100644 --- a/simplejson/tests/test_encode_basestring_ascii.py +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -1,6 +1,7 @@ from unittest import TestCase import simplejson.encoder +from simplejson.compat import b CASES = [ (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), @@ -11,9 +12,9 @@ CASES = [ (u' s p a c e d ', '" s p a c e d "'), (u'\U0001d120', '"\\ud834\\udd20"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), (u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), @@ -34,10 +35,10 @@ class TestEncodeBaseStringAscii(TestCase): fname = encode_basestring_ascii.__name__ for input_string, expect in CASES: result = encode_basestring_ascii(input_string) - #self.assertEquals(result, expect, + #self.assertEqual(result, expect, # '{0!r} != {1!r} for {2}({3!r})'.format( # result, expect, fname, input_string)) - self.assertEquals(result, expect, + self.assertEqual(result, expect, '%r != %r for %s(%r)' % (result, expect, fname, input_string)) def test_sorted_dict(self): diff --git a/simplejson/tests/test_encode_for_html.py b/simplejson/tests/test_encode_for_html.py index c2d5f80..f995254 100644 --- a/simplejson/tests/test_encode_for_html.py +++ b/simplejson/tests/test_encode_for_html.py @@ -1,14 +1,12 @@ import unittest -import simplejson.decoder -import simplejson.encoder - +import simplejson as json class TestEncodeForHTML(unittest.TestCase): def setUp(self): - self.decoder = simplejson.decoder.JSONDecoder() - self.encoder = simplejson.encoder.JSONEncoderForHTML() + self.decoder = json.JSONDecoder() + self.encoder = json.JSONEncoderForHTML() def test_basic_encode(self): self.assertEqual(r'"\u0026"', self.encoder.encode('&')) diff --git a/simplejson/tests/test_errors.py b/simplejson/tests/test_errors.py index 620ccf3..8dede38 100644 --- a/simplejson/tests/test_errors.py +++ b/simplejson/tests/test_errors.py @@ -1,6 +1,8 @@ +import sys, pickle from unittest import TestCase import simplejson as json +from simplejson.compat import u, b class TestErrors(TestCase): def test_string_keys_error(self): @@ -11,24 +13,39 @@ class TestErrors(TestCase): err = None try: json.loads('{}\na\nb') - except json.JSONDecodeError, e: - err = e + except json.JSONDecodeError: + err = sys.exc_info()[1] else: self.fail('Expected JSONDecodeError') - self.assertEquals(err.lineno, 2) - self.assertEquals(err.colno, 1) - self.assertEquals(err.endlineno, 3) - self.assertEquals(err.endcolno, 2) + self.assertEqual(err.lineno, 2) + self.assertEqual(err.colno, 1) + self.assertEqual(err.endlineno, 3) + self.assertEqual(err.endcolno, 2) def test_scan_error(self): err = None - for t in (str, unicode): + for t in (u, b): try: json.loads(t('{"asdf": "')) - except json.JSONDecodeError, e: - err = e + except json.JSONDecodeError: + err = sys.exc_info()[1] else: self.fail('Expected JSONDecodeError') - self.assertEquals(err.lineno, 1) - self.assertEquals(err.colno, 9) -
\ No newline at end of file + self.assertEqual(err.lineno, 1) + self.assertEqual(err.colno, 10) + + def test_error_is_pickable(self): + err = None + try: + json.loads('{}\na\nb') + except json.JSONDecodeError: + err = sys.exc_info()[1] + else: + self.fail('Expected JSONDecodeError') + s = pickle.dumps(err) + e = pickle.loads(s) + + self.assertEqual(err.msg, e.msg) + self.assertEqual(err.doc, e.doc) + self.assertEqual(err.pos, e.pos) + self.assertEqual(err.end, e.end) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py index 646c0f4..788f3a5 100644 --- a/simplejson/tests/test_fail.py +++ b/simplejson/tests/test_fail.py @@ -1,15 +1,16 @@ +import sys from unittest import TestCase import simplejson as json -# Fri Dec 30 18:57:26 2005 +# 2007-10-05 JSONDOCS = [ # http://json.org/JSON_checker/test/fail1.json '"A JSON payload should be an object or array, not a string."', # http://json.org/JSON_checker/test/fail2.json '["Unclosed array"', # http://json.org/JSON_checker/test/fail3.json - '{unquoted_key: "keys must be quoted}', + '{unquoted_key: "keys must be quoted"}', # http://json.org/JSON_checker/test/fail4.json '["extra comma",]', # http://json.org/JSON_checker/test/fail5.json @@ -35,7 +36,7 @@ JSONDOCS = [ # http://json.org/JSON_checker/test/fail15.json '["Illegal backslash escape: \\x15"]', # http://json.org/JSON_checker/test/fail16.json - '["Illegal backslash escape: \\\'"]', + '[\\naked]', # http://json.org/JSON_checker/test/fail17.json '["Illegal backslash escape: \\017"]', # http://json.org/JSON_checker/test/fail18.json @@ -52,8 +53,51 @@ JSONDOCS = [ '["Bad value", truth]', # http://json.org/JSON_checker/test/fail24.json "['single quote']", + # http://json.org/JSON_checker/test/fail25.json + '["\ttab\tcharacter\tin\tstring\t"]', + # http://json.org/JSON_checker/test/fail26.json + '["tab\\ character\\ in\\ string\\ "]', + # http://json.org/JSON_checker/test/fail27.json + '["line\nbreak"]', + # http://json.org/JSON_checker/test/fail28.json + '["line\\\nbreak"]', + # http://json.org/JSON_checker/test/fail29.json + '[0e]', + # http://json.org/JSON_checker/test/fail30.json + '[0e+]', + # http://json.org/JSON_checker/test/fail31.json + '[0e+-1]', + # http://json.org/JSON_checker/test/fail32.json + '{"Comma instead if closing brace": true,', + # http://json.org/JSON_checker/test/fail33.json + '["mismatch"}', # http://code.google.com/p/simplejson/issues/detail?id=3 u'["A\u001FZ control characters in string"]', + # misc based on coverage + '{', + '{]', + '{"foo": "bar"]', + '{"foo": "bar"', + 'nul', + 'nulx', + '-', + '-x', + '-e', + '-e0', + '-Infinite', + '-Inf', + 'Infinit', + 'Infinite', + 'NaM', + 'NuN', + 'falsy', + 'fal', + 'trug', + 'tru', + '1e', + '1ex', + '1e-', + '1e-x', ] SKIPS = { @@ -73,7 +117,6 @@ class TestFail(TestCase): except json.JSONDecodeError: pass else: - #self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) def test_array_decoder_issue46(self): @@ -81,11 +124,53 @@ class TestFail(TestCase): for doc in [u'[,]', '[,]']: try: json.loads(doc) - except json.JSONDecodeError, e: - self.assertEquals(e.pos, 1) - self.assertEquals(e.lineno, 1) - self.assertEquals(e.colno, 1) - except Exception, e: + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual(e.pos, 1) + self.assertEqual(e.lineno, 1) + self.assertEqual(e.colno, 2) + except Exception: + e = sys.exc_info()[1] + self.fail("Unexpected exception raised %r %s" % (e, e)) + else: + self.fail("Unexpected success parsing '[,]'") + + def test_truncated_input(self): + test_cases = [ + ('', 'Expecting value', 0), + ('[', "Expecting value or ']'", 1), + ('[42', "Expecting ',' delimiter", 3), + ('[42,', 'Expecting value', 4), + ('["', 'Unterminated string starting at', 1), + ('["spam', 'Unterminated string starting at', 1), + ('["spam"', "Expecting ',' delimiter", 7), + ('["spam",', 'Expecting value', 8), + ('{', 'Expecting property name enclosed in double quotes', 1), + ('{"', 'Unterminated string starting at', 1), + ('{"spam', 'Unterminated string starting at', 1), + ('{"spam"', "Expecting ':' delimiter", 7), + ('{"spam":', 'Expecting value', 8), + ('{"spam":42', "Expecting ',' delimiter", 10), + ('{"spam":42,', 'Expecting property name enclosed in double quotes', + 11), + ('"', 'Unterminated string starting at', 0), + ('"spam', 'Unterminated string starting at', 0), + ('[,', "Expecting value", 1), + ] + for data, msg, idx in test_cases: + try: + json.loads(data) + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual( + e.msg[:len(msg)], + msg, + "%r doesn't start with %r for %r" % (e.msg, msg, data)) + self.assertEqual( + e.pos, idx, + "pos %r != %r for %r" % (e.pos, idx, data)) + except Exception: + e = sys.exc_info()[1] self.fail("Unexpected exception raised %r %s" % (e, e)) else: - self.fail("Unexpected success parsing '[,]'")
\ No newline at end of file + self.fail("Unexpected success parsing '%r'" % (data,)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py index 94502c6..e382ec2 100644 --- a/simplejson/tests/test_float.py +++ b/simplejson/tests/test_float.py @@ -1,19 +1,35 @@ import math from unittest import TestCase - +from simplejson.compat import long_type, text_type import simplejson as json +from simplejson.decoder import NaN, PosInf, NegInf class TestFloat(TestCase): + def test_degenerates_allow(self): + for inf in (PosInf, NegInf): + self.assertEqual(json.loads(json.dumps(inf)), inf) + # Python 2.5 doesn't have math.isnan + nan = json.loads(json.dumps(NaN)) + self.assertTrue((0 + nan) != nan) + + def test_degenerates_ignore(self): + for f in (PosInf, NegInf, NaN): + self.assertEqual(json.loads(json.dumps(f, ignore_nan=True)), None) + + def test_degenerates_deny(self): + for f in (PosInf, NegInf, NaN): + self.assertRaises(ValueError, json.dumps, f, allow_nan=False) + def test_floats(self): for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: - self.assertEquals(float(json.dumps(num)), num) - self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) + self.assertEqual(float(json.dumps(num)), num) + self.assertEqual(json.loads(json.dumps(num)), num) + self.assertEqual(json.loads(text_type(json.dumps(num))), num) def test_ints(self): - for num in [1, 1L, 1<<32, 1<<64]: - self.assertEquals(json.dumps(num), str(num)) - self.assertEquals(int(json.dumps(num)), num) - self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) + for num in [1, long_type(1), 1<<32, 1<<64]: + self.assertEqual(json.dumps(num), str(num)) + self.assertEqual(int(json.dumps(num)), num) + self.assertEqual(json.loads(json.dumps(num)), num) + self.assertEqual(json.loads(text_type(json.dumps(num))), num) diff --git a/simplejson/tests/test_for_json.py b/simplejson/tests/test_for_json.py new file mode 100644 index 0000000..b791b88 --- /dev/null +++ b/simplejson/tests/test_for_json.py @@ -0,0 +1,97 @@ +import unittest +import simplejson as json + + +class ForJson(object): + def for_json(self): + return {'for_json': 1} + + +class NestedForJson(object): + def for_json(self): + return {'nested': ForJson()} + + +class ForJsonList(object): + def for_json(self): + return ['list'] + + +class DictForJson(dict): + def for_json(self): + return {'alpha': 1} + + +class ListForJson(list): + def for_json(self): + return ['list'] + + +class TestForJson(unittest.TestCase): + def assertRoundTrip(self, obj, other, for_json=True): + if for_json is None: + # None will use the default + s = json.dumps(obj) + else: + s = json.dumps(obj, for_json=for_json) + self.assertEqual( + json.loads(s), + other) + + def test_for_json_encodes_stand_alone_object(self): + self.assertRoundTrip( + ForJson(), + ForJson().for_json()) + + def test_for_json_encodes_object_nested_in_dict(self): + self.assertRoundTrip( + {'hooray': ForJson()}, + {'hooray': ForJson().for_json()}) + + def test_for_json_encodes_object_nested_in_list_within_dict(self): + self.assertRoundTrip( + {'list': [0, ForJson(), 2, 3]}, + {'list': [0, ForJson().for_json(), 2, 3]}) + + def test_for_json_encodes_object_nested_within_object(self): + self.assertRoundTrip( + NestedForJson(), + {'nested': {'for_json': 1}}) + + def test_for_json_encodes_list(self): + self.assertRoundTrip( + ForJsonList(), + ForJsonList().for_json()) + + def test_for_json_encodes_list_within_object(self): + self.assertRoundTrip( + {'nested': ForJsonList()}, + {'nested': ForJsonList().for_json()}) + + def test_for_json_encodes_dict_subclass(self): + self.assertRoundTrip( + DictForJson(a=1), + DictForJson(a=1).for_json()) + + def test_for_json_encodes_list_subclass(self): + self.assertRoundTrip( + ListForJson(['l']), + ListForJson(['l']).for_json()) + + def test_for_json_ignored_if_not_true_with_dict_subclass(self): + for for_json in (None, False): + self.assertRoundTrip( + DictForJson(a=1), + {'a': 1}, + for_json=for_json) + + def test_for_json_ignored_if_not_true_with_list_subclass(self): + for for_json in (None, False): + self.assertRoundTrip( + ListForJson(['l']), + ['l'], + for_json=for_json) + + def test_raises_typeerror_if_for_json_not_true_with_object(self): + self.assertRaises(TypeError, json.dumps, ForJson()) + self.assertRaises(TypeError, json.dumps, ForJson(), for_json=False) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py index 1e6bdb1..cea25a5 100644 --- a/simplejson/tests/test_indent.py +++ b/simplejson/tests/test_indent.py @@ -1,8 +1,8 @@ from unittest import TestCase +import textwrap import simplejson as json -import textwrap -from StringIO import StringIO +from simplejson.compat import StringIO class TestIndent(TestCase): def test_indent(self): @@ -42,26 +42,26 @@ class TestIndent(TestCase): h3 = json.loads(d3) h4 = json.loads(d4) - self.assertEquals(h1, h) - self.assertEquals(h2, h) - self.assertEquals(h3, h) - self.assertEquals(h4, h) - self.assertEquals(d3, expect.replace('\t', ' ')) - self.assertEquals(d4, expect.replace('\t', ' ')) + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(h3, h) + self.assertEqual(h4, h) + self.assertEqual(d3, expect.replace('\t', ' ')) + self.assertEqual(d4, expect.replace('\t', ' ')) # NOTE: Python 2.4 textwrap.dedent converts tabs to spaces, # so the following is expected to fail. Python 2.4 is not a # supported platform in simplejson 2.1.0+. - self.assertEquals(d2, expect) + self.assertEqual(d2, expect) def test_indent0(self): h = {3: 1} def check(indent, expected): d1 = json.dumps(h, indent=indent) - self.assertEquals(d1, expected) + self.assertEqual(d1, expected) sio = StringIO() json.dump(h, sio, indent=indent) - self.assertEquals(sio.getvalue(), expected) + self.assertEqual(sio.getvalue(), expected) # indent=0 should emit newlines check(0, '{\n"3": 1\n}') @@ -73,14 +73,14 @@ class TestIndent(TestCase): expect = '[\n1,\n2,\n3,\n4\n]' expect_spaces = '[\n1, \n2, \n3, \n4\n]' # Ensure that separators still works - self.assertEquals( + self.assertEqual( expect_spaces, json.dumps(lst, indent=0, separators=(', ', ': '))) # Force the new defaults - self.assertEquals( + self.assertEqual( expect, json.dumps(lst, indent=0, separators=(',', ': '))) # Added in 2.1.4 - self.assertEquals( + self.assertEqual( expect, - json.dumps(lst, indent=0))
\ No newline at end of file + json.dumps(lst, indent=0)) diff --git a/simplejson/tests/test_item_sort_key.py b/simplejson/tests/test_item_sort_key.py new file mode 100644 index 0000000..b05bfc8 --- /dev/null +++ b/simplejson/tests/test_item_sort_key.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json +from operator import itemgetter + +class TestItemSortKey(TestCase): + def test_simple_first(self): + a = {'a': 1, 'c': 5, 'jack': 'jill', 'pick': 'axe', 'array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'} + self.assertEqual( + '{"a": 1, "c": 5, "crate": "dog", "jack": "jill", "pick": "axe", "zeak": "oh", "array": [1, 5, 6, 9], "tuple": [83, 12, 3]}', + json.dumps(a, item_sort_key=json.simple_first)) + + def test_case(self): + a = {'a': 1, 'c': 5, 'Jack': 'jill', 'pick': 'axe', 'Array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'} + self.assertEqual( + '{"Array": [1, 5, 6, 9], "Jack": "jill", "a": 1, "c": 5, "crate": "dog", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}', + json.dumps(a, item_sort_key=itemgetter(0))) + self.assertEqual( + '{"a": 1, "Array": [1, 5, 6, 9], "c": 5, "crate": "dog", "Jack": "jill", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}', + json.dumps(a, item_sort_key=lambda kv: kv[0].lower())) diff --git a/simplejson/tests/test_namedtuple.py b/simplejson/tests/test_namedtuple.py index 18da218..4387894 100644 --- a/simplejson/tests/test_namedtuple.py +++ b/simplejson/tests/test_namedtuple.py @@ -1,6 +1,7 @@ +from __future__ import absolute_import import unittest import simplejson as json -from StringIO import StringIO +from simplejson.compat import StringIO try: from collections import namedtuple @@ -21,11 +22,36 @@ else: Value = namedtuple('Value', ['value']) Point = namedtuple('Point', ['x', 'y']) +class DuckValue(object): + def __init__(self, *args): + self.value = Value(*args) + + def _asdict(self): + return self.value._asdict() + +class DuckPoint(object): + def __init__(self, *args): + self.point = Point(*args) + + def _asdict(self): + return self.point._asdict() + +class DeadDuck(object): + _asdict = None + +class DeadDict(dict): + _asdict = None + +CONSTRUCTORS = [ + lambda v: v, + lambda v: [v], + lambda v: [{'key': v}], +] + class TestNamedTuple(unittest.TestCase): def test_namedtuple_dumps(self): - for v in [Value(1), Point(1, 2)]: + for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]: d = v._asdict() - l = list(v) self.assertEqual(d, json.loads(json.dumps(v))) self.assertEqual( d, @@ -35,6 +61,10 @@ class TestNamedTuple(unittest.TestCase): d, json.loads(json.dumps(v, namedtuple_as_object=True, tuple_as_array=False))) + + def test_namedtuple_dumps_false(self): + for v in [Value(1), Point(1, 2)]: + l = list(v) self.assertEqual( l, json.loads(json.dumps(v, namedtuple_as_object=False))) @@ -42,9 +72,8 @@ class TestNamedTuple(unittest.TestCase): tuple_as_array=False, namedtuple_as_object=False) def test_namedtuple_dump(self): - for v in [Value(1), Point(1, 2)]: + for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]: d = v._asdict() - l = list(v) sio = StringIO() json.dump(v, sio) self.assertEqual(d, json.loads(sio.getvalue())) @@ -62,6 +91,10 @@ class TestNamedTuple(unittest.TestCase): self.assertEqual( d, json.loads(sio.getvalue())) + + def test_namedtuple_dump_false(self): + for v in [Value(1), Point(1, 2)]: + l = list(v) sio = StringIO() json.dump(v, sio, namedtuple_as_object=False) self.assertEqual( @@ -69,3 +102,21 @@ class TestNamedTuple(unittest.TestCase): json.loads(sio.getvalue())) self.assertRaises(TypeError, json.dump, v, StringIO(), tuple_as_array=False, namedtuple_as_object=False) + + def test_asdict_not_callable_dump(self): + for f in CONSTRUCTORS: + self.assertRaises(TypeError, + json.dump, f(DeadDuck()), StringIO(), namedtuple_as_object=True) + sio = StringIO() + json.dump(f(DeadDict()), sio, namedtuple_as_object=True) + self.assertEqual( + json.dumps(f({})), + sio.getvalue()) + + def test_asdict_not_callable_dumps(self): + for f in CONSTRUCTORS: + self.assertRaises(TypeError, + json.dumps, f(DeadDuck()), namedtuple_as_object=True) + self.assertEqual( + json.dumps(f({})), + json.dumps(f(DeadDict()), namedtuple_as_object=True)) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py index c3d6302..f0b5b10 100644 --- a/simplejson/tests/test_pass1.py +++ b/simplejson/tests/test_pass1.py @@ -18,7 +18,7 @@ JSON = r''' "real": -9876.543210, "e": 0.123456789e-12, "E": 1.234567890E+34, - "": 23456789012E666, + "": 23456789012E66, "zero": 0, "one": 1, "space": " ", @@ -44,8 +44,7 @@ JSON = r''' , -4 , 5 , 6 ,7 ], - "compact": [1,2,3,4,5,6,7], +4 , 5 , 6 ,7 ],"compact": [1,2,3,4,5,6,7], "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", "quotes": "" \u0022 %22 0x22 034 "", "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" @@ -56,9 +55,11 @@ JSON = r''' 99.44 , -1066 - - +1066, +1e1, +0.1e1, +1e-1, +1e00,2e+00,2e-00 ,"rosebud"] ''' @@ -67,10 +68,4 @@ class TestPass1(TestCase): # test in/out equivalence and parsing res = json.loads(JSON) out = json.dumps(res) - self.assertEquals(res, json.loads(out)) - try: - json.dumps(res, allow_nan=False) - except ValueError: - pass - else: - self.fail("23456789012E666 should be out of range") + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py index de4ee00..5d812b3 100644 --- a/simplejson/tests/test_pass2.py +++ b/simplejson/tests/test_pass2.py @@ -11,4 +11,4 @@ class TestPass2(TestCase): # test in/out equivalence and parsing res = json.loads(JSON) out = json.dumps(res) - self.assertEquals(res, json.loads(out)) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py index f591aba..821d60b 100644 --- a/simplejson/tests/test_pass3.py +++ b/simplejson/tests/test_pass3.py @@ -17,4 +17,4 @@ class TestPass3(TestCase): # test in/out equivalence and parsing res = json.loads(JSON) out = json.dumps(res) - self.assertEquals(res, json.loads(out)) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py index 83a1d88..662eb66 100644 --- a/simplejson/tests/test_recursion.py +++ b/simplejson/tests/test_recursion.py @@ -57,7 +57,7 @@ class TestRecursion(TestCase): def test_defaultrecursion(self): enc = RecursiveJSONEncoder() - self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"') enc.recurse = True try: enc.encode(JSONTestObject) diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py index a7fcd46..3d98f0d 100644 --- a/simplejson/tests/test_scanstring.py +++ b/simplejson/tests/test_scanstring.py @@ -3,8 +3,17 @@ from unittest import TestCase import simplejson as json import simplejson.decoder +from simplejson.compat import b, PY3 class TestScanString(TestCase): + # The bytes type is intentionally not used in most of these tests + # under Python 3 because the decoder immediately coerces to str before + # calling scanstring. In Python 2 we are testing the code paths + # for both unicode and str. + # + # The reason this is done is because Python 3 would require + # entirely different code paths for parsing bytes and str. + # def test_py_scanstring(self): self._test_scanstring(simplejson.decoder.py_scanstring) @@ -14,104 +23,172 @@ class TestScanString(TestCase): self._test_scanstring(simplejson.decoder.c_scanstring) def _test_scanstring(self, scanstring): - self.assertEquals( - scanstring('"z\\ud834\\udd20x"', 1, None, True), - (u'z\U0001d120x', 16)) - if sys.maxunicode == 65535: - self.assertEquals( + self.assertEqual( scanstring(u'"z\U0001d120x"', 1, None, True), (u'z\U0001d120x', 6)) else: - self.assertEquals( + self.assertEqual( scanstring(u'"z\U0001d120x"', 1, None, True), (u'z\U0001d120x', 5)) - self.assertEquals( + self.assertEqual( scanstring('"\\u007b"', 1, None, True), (u'{', 8)) - self.assertEquals( + self.assertEqual( scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), (u'A JSON payload should be an object or array, not a string.', 60)) - self.assertEquals( + self.assertEqual( scanstring('["Unclosed array"', 2, None, True), (u'Unclosed array', 17)) - self.assertEquals( + self.assertEqual( scanstring('["extra comma",]', 2, None, True), (u'extra comma', 14)) - self.assertEquals( + self.assertEqual( scanstring('["double extra comma",,]', 2, None, True), (u'double extra comma', 21)) - self.assertEquals( + self.assertEqual( scanstring('["Comma after the close"],', 2, None, True), (u'Comma after the close', 24)) - self.assertEquals( + self.assertEqual( scanstring('["Extra close"]]', 2, None, True), (u'Extra close', 14)) - self.assertEquals( + self.assertEqual( scanstring('{"Extra comma": true,}', 2, None, True), (u'Extra comma', 14)) - self.assertEquals( + self.assertEqual( scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), (u'Extra value after close', 26)) - self.assertEquals( + self.assertEqual( scanstring('{"Illegal expression": 1 + 2}', 2, None, True), (u'Illegal expression', 21)) - self.assertEquals( + self.assertEqual( scanstring('{"Illegal invocation": alert()}', 2, None, True), (u'Illegal invocation', 21)) - self.assertEquals( + self.assertEqual( scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), (u'Numbers cannot have leading zeroes', 37)) - self.assertEquals( + self.assertEqual( scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), (u'Numbers cannot be hex', 24)) - self.assertEquals( + self.assertEqual( scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), (u'Too deep', 30)) - self.assertEquals( + self.assertEqual( scanstring('{"Missing colon" null}', 2, None, True), (u'Missing colon', 16)) - self.assertEquals( + self.assertEqual( scanstring('{"Double colon":: null}', 2, None, True), (u'Double colon', 15)) - self.assertEquals( + self.assertEqual( scanstring('{"Comma instead of colon", null}', 2, None, True), (u'Comma instead of colon', 25)) - self.assertEquals( + self.assertEqual( scanstring('["Colon instead of comma": false]', 2, None, True), (u'Colon instead of comma', 25)) - self.assertEquals( + self.assertEqual( scanstring('["Bad value", truth]', 2, None, True), (u'Bad value', 12)) + for c in map(chr, range(0x00, 0x1f)): + self.assertEqual( + scanstring(c + '"', 0, None, False), + (c, 2)) + self.assertRaises( + ValueError, + scanstring, c + '"', 0, None, True) + + self.assertRaises(ValueError, scanstring, '', 0, None, True) + self.assertRaises(ValueError, scanstring, 'a', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) + if sys.maxunicode > 65535: + self.assertRaises(ValueError, + scanstring, '\\ud834\\u"', 0, None, True) + self.assertRaises(ValueError, + scanstring, '\\ud834\\x0123"', 0, None, True) + def test_issue3623(self): self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, "xxx") self.assertRaises(UnicodeDecodeError, - json.encoder.encode_basestring_ascii, "xx\xff") + json.encoder.encode_basestring_ascii, b("xx\xff")) def test_overflow(self): - # Python 2.5 does not have maxsize - maxsize = getattr(sys, 'maxsize', sys.maxint) + # Python 2.5 does not have maxsize, Python 3 does not have maxint + maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) + assert maxsize is not None self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", maxsize + 1) + def test_surrogates(self): + scanstring = json.decoder.scanstring + + def assertScan(given, expect, test_utf8=True): + givens = [given] + if not PY3 and test_utf8: + givens.append(given.encode('utf8')) + for given in givens: + (res, count) = scanstring(given, 1, None, True) + self.assertEqual(len(given), count) + self.assertEqual(res, expect) + + assertScan( + u'"z\\ud834\\u0079x"', + u'z\ud834yx') + assertScan( + u'"z\\ud834\\udd20x"', + u'z\U0001d120x') + assertScan( + u'"z\\ud834\\ud834\\udd20x"', + u'z\ud834\U0001d120x') + assertScan( + u'"z\\ud834x"', + u'z\ud834x') + assertScan( + u'"z\\udd20x"', + u'z\udd20x') + assertScan( + u'"z\ud834x"', + u'z\ud834x') + # It may look strange to join strings together, but Python is drunk. + # https://gist.github.com/etrepum/5538443 + assertScan( + u'"z\\ud834\udd20x12345"', + u''.join([u'z\ud834', u'\udd20x12345'])) + assertScan( + u'"z\ud834\\udd20x"', + u''.join([u'z\ud834', u'\udd20x'])) + # these have different behavior given UTF8 input, because the surrogate + # pair may be joined (in maxunicode > 65535 builds) + assertScan( + u''.join([u'"z\ud834', u'\udd20x"']), + u''.join([u'z\ud834', u'\udd20x']), + test_utf8=False) + + self.assertRaises(ValueError, + scanstring, u'"z\\ud83x"', 1, None, True) + self.assertRaises(ValueError, + scanstring, u'"z\\ud834\\udd2x"', 1, None, True) diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py index cbda93c..91b4d4f 100644 --- a/simplejson/tests/test_separators.py +++ b/simplejson/tests/test_separators.py @@ -37,6 +37,6 @@ class TestSeparators(TestCase): h1 = json.loads(d1) h2 = json.loads(d2) - self.assertEquals(h1, h) - self.assertEquals(h2, h) - self.assertEquals(d2, expect) + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(d2, expect) diff --git a/simplejson/tests/test_speedups.py b/simplejson/tests/test_speedups.py index 825ecf2..0a2b63b 100644 --- a/simplejson/tests/test_speedups.py +++ b/simplejson/tests/test_speedups.py @@ -1,20 +1,39 @@ +import sys +import unittest from unittest import TestCase from simplejson import encoder, scanner + def has_speedups(): return encoder.c_make_encoder is not None + +def skip_if_speedups_missing(func): + def wrapper(*args, **kwargs): + if not has_speedups(): + if hasattr(unittest, 'SkipTest'): + raise unittest.SkipTest("C Extension not available") + else: + sys.stdout.write("C Extension not available") + return + return func(*args, **kwargs) + + return wrapper + + class TestDecode(TestCase): + @skip_if_speedups_missing def test_make_scanner(self): - if not has_speedups(): - return self.assertRaises(AttributeError, scanner.c_make_scanner, 1) + @skip_if_speedups_missing def test_make_encoder(self): - if not has_speedups(): - return - self.assertRaises(TypeError, encoder.c_make_encoder, + self.assertRaises( + TypeError, + encoder.c_make_encoder, None, - "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75", - None) + ("\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7" + "\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75"), + None + ) diff --git a/simplejson/tests/test_subclass.py b/simplejson/tests/test_subclass.py new file mode 100644 index 0000000..2bae3b6 --- /dev/null +++ b/simplejson/tests/test_subclass.py @@ -0,0 +1,37 @@ +from unittest import TestCase +import simplejson as json + +from decimal import Decimal + +class AlternateInt(int): + def __repr__(self): + return 'invalid json' + __str__ = __repr__ + + +class AlternateFloat(float): + def __repr__(self): + return 'invalid json' + __str__ = __repr__ + + +# class AlternateDecimal(Decimal): +# def __repr__(self): +# return 'invalid json' + + +class TestSubclass(TestCase): + def test_int(self): + self.assertEqual(json.dumps(AlternateInt(1)), '1') + self.assertEqual(json.dumps(AlternateInt(-1)), '-1') + self.assertEqual(json.loads(json.dumps({AlternateInt(1): 1})), {'1': 1}) + + def test_float(self): + self.assertEqual(json.dumps(AlternateFloat(1.0)), '1.0') + self.assertEqual(json.dumps(AlternateFloat(-1.0)), '-1.0') + self.assertEqual(json.loads(json.dumps({AlternateFloat(1.0): 1})), {'1.0': 1}) + + # NOTE: Decimal subclasses are not supported as-is + # def test_decimal(self): + # self.assertEqual(json.dumps(AlternateDecimal('1.0')), '1.0') + # self.assertEqual(json.dumps(AlternateDecimal('-1.0')), '-1.0') diff --git a/simplejson/tests/test_tool.py b/simplejson/tests/test_tool.py new file mode 100644 index 0000000..ac2a14c --- /dev/null +++ b/simplejson/tests/test_tool.py @@ -0,0 +1,97 @@ +from __future__ import with_statement +import os +import sys +import textwrap +import unittest +import subprocess +import tempfile +try: + # Python 3.x + from test.support import strip_python_stderr +except ImportError: + # Python 2.6+ + try: + from test.test_support import strip_python_stderr + except ImportError: + # Python 2.5 + import re + def strip_python_stderr(stderr): + return re.sub( + r"\[\d+ refs\]\r?\n?$".encode(), + "".encode(), + stderr).strip() + +class TestTool(unittest.TestCase): + data = """ + + [["blorpie"],[ "whoops" ] , [ + ],\t"d-shtaeou",\r"d-nthiouh", + "i-vhbjkhnth", {"nifty":87}, {"morefield" :\tfalse,"field" + :"yes"} ] + """ + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ] + """) + + def runTool(self, args=None, data=None): + argv = [sys.executable, '-m', 'simplejson.tool'] + if args: + argv.extend(args) + proc = subprocess.Popen(argv, + stdin=subprocess.PIPE, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + out, err = proc.communicate(data) + self.assertEqual(strip_python_stderr(err), ''.encode()) + self.assertEqual(proc.returncode, 0) + return out + + def test_stdin_stdout(self): + self.assertEqual( + self.runTool(data=self.data.encode()), + self.expect.encode()) + + def test_infile_stdout(self): + with tempfile.NamedTemporaryFile() as infile: + infile.write(self.data.encode()) + infile.flush() + self.assertEqual( + self.runTool(args=[infile.name]), + self.expect.encode()) + + def test_infile_outfile(self): + with tempfile.NamedTemporaryFile() as infile: + infile.write(self.data.encode()) + infile.flush() + # outfile will get overwritten by tool, so the delete + # may not work on some platforms. Do it manually. + outfile = tempfile.NamedTemporaryFile() + try: + self.assertEqual( + self.runTool(args=[infile.name, outfile.name]), + ''.encode()) + with open(outfile.name, 'rb') as f: + self.assertEqual(f.read(), self.expect.encode()) + finally: + outfile.close() + if os.path.exists(outfile.name): + os.unlink(outfile.name) diff --git a/simplejson/tests/test_tuple.py b/simplejson/tests/test_tuple.py index cff9a75..4ad7b0e 100644 --- a/simplejson/tests/test_tuple.py +++ b/simplejson/tests/test_tuple.py @@ -1,6 +1,6 @@ import unittest -from StringIO import StringIO +from simplejson.compat import StringIO import simplejson as json class TestTuples(unittest.TestCase): @@ -13,7 +13,8 @@ class TestTuples(unittest.TestCase): self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False) # Ensure that the "default" does not get called self.assertEqual(expect, json.dumps(t, default=repr)) - self.assertEqual(expect, json.dumps(t, tuple_as_array=True, default=repr)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True, + default=repr)) # Ensure that the "default" gets called self.assertEqual( json.dumps(repr(t)), @@ -29,7 +30,8 @@ class TestTuples(unittest.TestCase): sio = StringIO() json.dump(t, sio, tuple_as_array=True) self.assertEqual(expect, sio.getvalue()) - self.assertRaises(TypeError, json.dump, t, StringIO(), tuple_as_array=False) + self.assertRaises(TypeError, json.dump, t, StringIO(), + tuple_as_array=False) # Ensure that the "default" does not get called sio = StringIO() json.dump(t, sio, default=repr) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 83fe65b..3b37f65 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,6 +1,9 @@ +import sys +import codecs from unittest import TestCase import simplejson as json +from simplejson.compat import unichr, text_type, b, u, BytesIO class TestUnicode(TestCase): def test_encoding1(self): @@ -9,51 +12,51 @@ class TestUnicode(TestCase): s = u.encode('utf-8') ju = encoder.encode(u) js = encoder.encode(s) - self.assertEquals(ju, js) + self.assertEqual(ju, js) def test_encoding2(self): u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' s = u.encode('utf-8') ju = json.dumps(u, encoding='utf-8') js = json.dumps(s, encoding='utf-8') - self.assertEquals(ju, js) + self.assertEqual(ju, js) def test_encoding3(self): u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' j = json.dumps(u) - self.assertEquals(j, '"\\u03b1\\u03a9"') + self.assertEqual(j, '"\\u03b1\\u03a9"') def test_encoding4(self): u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' j = json.dumps([u]) - self.assertEquals(j, '["\\u03b1\\u03a9"]') + self.assertEqual(j, '["\\u03b1\\u03a9"]') def test_encoding5(self): u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' j = json.dumps(u, ensure_ascii=False) - self.assertEquals(j, u'"' + u + u'"') + self.assertEqual(j, u'"' + u + u'"') def test_encoding6(self): u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' j = json.dumps([u], ensure_ascii=False) - self.assertEquals(j, u'["' + u + u'"]') + self.assertEqual(j, u'["' + u + u'"]') def test_big_unicode_encode(self): u = u'\U0001d120' - self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') - self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + self.assertEqual(json.dumps(u), '"\\ud834\\udd20"') + self.assertEqual(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') def test_big_unicode_decode(self): u = u'z\U0001d120x' - self.assertEquals(json.loads('"' + u + '"'), u) - self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + self.assertEqual(json.loads('"' + u + '"'), u) + self.assertEqual(json.loads('"z\\ud834\\udd20x"'), u) def test_unicode_decode(self): for i in range(0, 0xd7ff): u = unichr(i) #s = '"\\u{0:04x}"'.format(i) s = '"\\u%04x"' % (i,) - self.assertEquals(json.loads(s), u) + self.assertEqual(json.loads(s), u) def test_object_pairs_hook_with_unicode(self): s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' @@ -72,38 +75,79 @@ class TestUnicode(TestCase): def test_default_encoding(self): - self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + self.assertEqual(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), {'a': u'\xe9'}) def test_unicode_preservation(self): - self.assertEquals(type(json.loads(u'""')), unicode) - self.assertEquals(type(json.loads(u'"a"')), unicode) - self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) + self.assertEqual(type(json.loads(u'""')), text_type) + self.assertEqual(type(json.loads(u'"a"')), text_type) + self.assertEqual(type(json.loads(u'["a"]')[0]), text_type) def test_ensure_ascii_false_returns_unicode(self): # http://code.google.com/p/simplejson/issues/detail?id=48 - self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode) + self.assertEqual(type(json.dumps([], ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps(0, ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps({}, ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps("", ensure_ascii=False)), text_type) def test_ensure_ascii_false_bytestring_encoding(self): # http://code.google.com/p/simplejson/issues/detail?id=48 - doc1 = {u'quux': 'Arr\xc3\xaat sur images'} - doc2 = {u'quux': u'Arr\xeat sur images'} + doc1 = {u'quux': b('Arr\xc3\xaat sur images')} + doc2 = {u'quux': u('Arr\xeat sur images')} doc_ascii = '{"quux": "Arr\\u00eat sur images"}' doc_unicode = u'{"quux": "Arr\xeat sur images"}' - self.assertEquals(json.dumps(doc1), doc_ascii) - self.assertEquals(json.dumps(doc2), doc_ascii) - self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode) - self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode) + self.assertEqual(json.dumps(doc1), doc_ascii) + self.assertEqual(json.dumps(doc2), doc_ascii) + self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode) + self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode) def test_ensure_ascii_linebreak_encoding(self): # http://timelessrepo.com/json-isnt-a-javascript-subset s1 = u'\u2029\u2028' s2 = s1.encode('utf8') expect = '"\\u2029\\u2028"' - self.assertEquals(json.dumps(s1), expect) - self.assertEquals(json.dumps(s2), expect) - self.assertEquals(json.dumps(s1, ensure_ascii=False), expect) - self.assertEquals(json.dumps(s2, ensure_ascii=False), expect) + self.assertEqual(json.dumps(s1), expect) + self.assertEqual(json.dumps(s2), expect) + self.assertEqual(json.dumps(s1, ensure_ascii=False), expect) + self.assertEqual(json.dumps(s2, ensure_ascii=False), expect) + + def test_invalid_escape_sequences(self): + # incomplete escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234') + # invalid escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"') + if sys.maxunicode > 65535: + # invalid escape sequence for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"') + + def test_ensure_ascii_still_works(self): + # in the ascii range, ensure that everything is the same + for c in map(unichr, range(0, 127)): + self.assertEqual( + json.dumps(c, ensure_ascii=False), + json.dumps(c)) + snowman = u'\N{SNOWMAN}' + self.assertEqual( + json.dumps(c, ensure_ascii=False), + '"' + c + '"') + + def test_strip_bom(self): + content = u"\u3053\u3093\u306b\u3061\u308f" + json_doc = codecs.BOM_UTF8 + b(json.dumps(content)) + self.assertEqual(json.load(BytesIO(json_doc)), content) + for doc in json_doc, json_doc.decode('utf8'): + self.assertEqual(json.loads(doc), content) diff --git a/simplejson/tool.py b/simplejson/tool.py index 73370db..062e8e2 100644 --- a/simplejson/tool.py +++ b/simplejson/tool.py @@ -10,6 +10,7 @@ Usage:: Expecting property name: line 1 column 2 (char 2) """ +from __future__ import with_statement import sys import simplejson as json @@ -18,21 +19,23 @@ def main(): infile = sys.stdin outfile = sys.stdout elif len(sys.argv) == 2: - infile = open(sys.argv[1], 'rb') + infile = open(sys.argv[1], 'r') outfile = sys.stdout elif len(sys.argv) == 3: - infile = open(sys.argv[1], 'rb') - outfile = open(sys.argv[2], 'wb') + infile = open(sys.argv[1], 'r') + outfile = open(sys.argv[2], 'w') else: raise SystemExit(sys.argv[0] + " [infile [outfile]]") - try: - obj = json.load(infile, - object_pairs_hook=json.OrderedDict, - use_decimal=True) - except ValueError, e: - raise SystemExit(e) - json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) - outfile.write('\n') + with infile: + try: + obj = json.load(infile, + object_pairs_hook=json.OrderedDict, + use_decimal=True) + except ValueError: + raise SystemExit(sys.exc_info()[1]) + with outfile: + json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) + outfile.write('\n') if __name__ == '__main__': |