diff options
author | Bob Ippolito <bob@redivi.com> | 2012-12-30 19:55:51 -0800 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2012-12-30 19:55:51 -0800 |
commit | 02e24f4f856b065aa31d79d4726ce6668a20779e (patch) | |
tree | 28fb30571a6bdb0e6b032ae62d5a92b3edf52791 | |
parent | 57d980277f8d1d6324904edd43e1a730572b64b6 (diff) | |
parent | dd76051b3f65625635d39277daf60135215c9d10 (diff) | |
download | simplejson-02e24f4f856b065aa31d79d4726ce6668a20779e.tar.gz |
Merge branch 'py3'v3.0.0
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | .travis.yml | 3 | ||||
-rw-r--r-- | CHANGES.txt | 10 | ||||
-rw-r--r-- | README.rst | 6 | ||||
-rw-r--r-- | conf.py | 4 | ||||
-rw-r--r-- | index.rst | 39 | ||||
-rw-r--r-- | setup.py | 36 | ||||
-rw-r--r-- | simplejson/__init__.py | 34 | ||||
-rw-r--r-- | simplejson/_speedups.c | 1024 | ||||
-rw-r--r-- | simplejson/compat.py | 43 | ||||
-rw-r--r-- | simplejson/decoder.py | 73 | ||||
-rw-r--r-- | simplejson/encoder.py | 141 | ||||
-rw-r--r-- | simplejson/tests/__init__.py | 20 | ||||
-rw-r--r-- | simplejson/tests/test_decimal.py | 25 | ||||
-rw-r--r-- | simplejson/tests/test_decode.py | 3 | ||||
-rw-r--r-- | simplejson/tests/test_dump.py | 63 | ||||
-rw-r--r-- | simplejson/tests/test_encode_basestring_ascii.py | 5 | ||||
-rw-r--r-- | simplejson/tests/test_errors.py | 13 | ||||
-rw-r--r-- | simplejson/tests/test_fail.py | 34 | ||||
-rw-r--r-- | simplejson/tests/test_float.py | 16 | ||||
-rw-r--r-- | simplejson/tests/test_indent.py | 6 | ||||
-rw-r--r-- | simplejson/tests/test_namedtuple.py | 3 | ||||
-rw-r--r-- | simplejson/tests/test_scanstring.py | 38 | ||||
-rw-r--r-- | simplejson/tests/test_tuple.py | 10 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 54 |
25 files changed, 1181 insertions, 524 deletions
@@ -2,9 +2,11 @@ *.egg *.pyc *.so +.DS_Store /MANIFEST /.coverage /coverage.xml +/htmlcov /build /dist /docs diff --git a/.travis.yml b/.travis.yml index d81b6d1..8d649d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,8 @@ language: python python: + - "2.5" - "2.6" - "2.7" + - "3.3" + - "pypy" script: python setup.py test diff --git a/CHANGES.txt b/CHANGES.txt index f9566a9..37b5e65 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,13 @@ +Version 3.0.0 released 2012-12-30 + +* Python 3.3 is now supported, thanks to Vinay Sajip #8 +* `sort_keys`/`item_sort_key` now sort on the stringified verison of the + key, rather than the original object. This ensures that the sort + only compares string types and makes the behavior consistent between + Python 2.x and Python 3.x. +* Like other number types, Decimal instances used as keys are now + coerced to strings when use_decimal is True. + Version 2.6.2 released 2012-09-21 * JSONEncoderForHTML was not exported in the simplejson module @@ -1,7 +1,7 @@ simplejson is a simple, fast, complete, correct and extensible -JSON <http://json.org> encoder and decoder for Python 2.5+. It is -pure Python code with no dependencies, but includes an optional C -extension for a serious speed boost. +JSON <http://json.org> encoder and decoder for Python 2.5+ +and Python 3.3+. It is pure Python code with no dependencies, +but includes an optional C extension for a serious speed boost. The latest documentation for simplejson can be read online here: http://simplejson.readthedocs.org/ @@ -42,9 +42,9 @@ copyright = '2012, Bob Ippolito' # other places throughout the built documents. # # The short X.Y version. -version = '2.6' +version = '3.0' # The full version, including alpha/beta/rc tags. -release = '2.6.2' +release = '3.0.0' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -14,7 +14,7 @@ syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. version of the :mod:`json` library contained in Python 2.6, but maintains compatibility with Python 2.5 and (currently) has significant performance advantages, even without using the optional C -extension for speedups. +extension for speedups. :mod:`simplejson` is also supported on Python 3.3+. Development of simplejson happens on Github: http://github.com/simplejson/simplejson @@ -24,15 +24,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -41,14 +41,15 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',', ':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4 * ' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print('\n'.join([l.rstrip() for l in s.splitlines()])) { "4": 5, "6": 7 @@ -62,7 +63,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -218,6 +219,11 @@ Basic Usage will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + If *item_sort_key* is a callable (not the default), then the output of dictionaries will be sorted with it. The callable will be used like this: ``sorted(dct.items(), key=item_sort_key)``. This option takes precedence @@ -226,6 +232,11 @@ Basic Usage .. versionchanged:: 2.5.0 *item_sort_key* is new in 2.5.0. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + .. note:: JSON is not a framed protocol so unlike :mod:`pickle` or :mod:`marshal` it @@ -487,6 +498,11 @@ Encoders and decoders will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + If *item_sort_key* is a callable (not the default), then the output of dictionaries will be sorted with it. The callable will be used like this: ``sorted(dct.items(), key=item_sort_key)``. This option takes precedence @@ -495,6 +511,11 @@ Encoders and decoders .. versionchanged:: 2.5.0 *item_sort_key* is new in 2.5.0. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + If *indent* is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated for each level of nesting. ``None`` (the default) selects the most compact @@ -7,15 +7,24 @@ from distutils.errors import CCompilerError, DistutilsExecError, \ DistutilsPlatformError IS_PYPY = hasattr(sys, 'pypy_translation_info') -VERSION = '2.6.2' +VERSION = '3.0.0' DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python" LONG_DESCRIPTION = open('README.rst', 'r').read() CLASSIFIERS = filter(None, map(str.strip, """ +Development Status :: 5 - Production/Stable Intended Audience :: Developers License :: OSI Approved :: MIT License Programming Language :: Python +Programming Language :: Python :: 2 +Programming Language :: Python :: 2.5 +Programming Language :: Python :: 2.6 +Programming Language :: Python :: 2.7 +Programming Language :: Python :: 3 +Programming Language :: Python :: 3.3 +Programming Language :: Python :: Implementation :: CPython +Programming Language :: Python :: Implementation :: PyPy Topic :: Software Development :: Libraries :: Python Modules """.splitlines())) @@ -36,13 +45,13 @@ class ve_build_ext(build_ext): def run(self): try: build_ext.run(self) - except DistutilsPlatformError, x: + except DistutilsPlatformError: raise BuildFailed() def build_extension(self, ext): try: build_ext.build_extension(self, ext) - except ext_errors, x: + except ext_errors: raise BuildFailed() @@ -89,16 +98,17 @@ def run_setup(with_binary): try: run_setup(not IS_PYPY) except BuildFailed: - BUILD_EXT_WARNING = "WARNING: The C extension could not be compiled, speedups are not enabled." - print '*' * 75 - print BUILD_EXT_WARNING - print "Failure information, if any, is above." - print "I'm retrying the build without the C extension now." - print '*' * 75 + BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, " + "speedups are not enabled.") + print('*' * 75) + print(BUILD_EXT_WARNING) + print("Failure information, if any, is above.") + print("I'm retrying the build without the C extension now.") + print('*' * 75) run_setup(False) - print '*' * 75 - print BUILD_EXT_WARNING - print "Plain-Python installation succeeded." - print '*' * 75 + print('*' * 75) + print(BUILD_EXT_WARNING) + print("Plain-Python installation succeeded.") + print('*' * 75) diff --git a/simplejson/__init__.py b/simplejson/__init__.py index fe2bd5a..c655e92 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -14,15 +14,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -31,14 +31,15 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print('\n'.join([l.rstrip() for l in s.splitlines()])) { "4": 5, "6": 7 @@ -52,7 +53,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -97,7 +98,8 @@ Using simplejson.tool from the shell to validate and pretty-print:: $ echo '{ 1.2:3.4}' | python -m simplejson.tool Expecting property name: line 1 column 2 (char 2) """ -__version__ = '2.6.2' +from __future__ import absolute_import +__version__ = '3.0.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -108,20 +110,20 @@ __author__ = 'Bob Ippolito <bob@redivi.com>' from decimal import Decimal -from decoder import JSONDecoder, JSONDecodeError -from encoder import JSONEncoder, JSONEncoderForHTML +from .decoder import JSONDecoder, JSONDecodeError +from .encoder import JSONEncoder, JSONEncoderForHTML def _import_OrderedDict(): import collections try: return collections.OrderedDict except AttributeError: - import ordered_dict + from . import ordered_dict return ordered_dict.OrderedDict OrderedDict = _import_OrderedDict() def _import_c_make_encoder(): try: - from simplejson._speedups import make_encoder + from ._speedups import make_encoder return make_encoder except ImportError: return None @@ -469,9 +471,9 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, def _toggle_speedups(enabled): - import simplejson.decoder as dec - import simplejson.encoder as enc - import simplejson.scanner as scan + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan c_make_encoder = _import_c_make_encoder() if enabled: dec.scanstring = dec.c_scanstring or dec.py_scanstring diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index be68b2d..37a43ef 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1,11 +1,48 @@ #include "Python.h" #include "structmember.h" -#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double) + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyString_Check PyBytes_Check +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyInt_Check(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE +#define JSON_ASCII_Check PyUnicode_Check +#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 +#define PyInt_Type PyLong_Type +#define PyInt_FromString PyLong_FromString +#define PY2_UNUSED +#define PY3_UNUSED UNUSED +#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) +#else /* PY_MAJOR_VERSION >= 3 */ +#define PY2_UNUSED UNUSED +#define PY3_UNUSED +#define PyUnicode_READY(obj) 0 +#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) +#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) +#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) +#define PyUnicode_GetLength PyUnicode_GET_SIZE +#define JSON_UNICHR Py_UNICODE +#define JSON_ASCII_Check PyString_Check +#define JSON_ASCII_AS_STRING PyString_AS_STRING +#define JSON_InternFromString PyString_InternFromString +#define JSON_Intern_GET_SIZE PyString_GET_SIZE +#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) #define PyOS_string_to_double json_PyOS_string_to_double static double json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); static double -json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) { +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) +{ double x; assert(endptr == NULL); assert(overflow_exception == NULL); @@ -15,22 +52,32 @@ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exce return x; } #endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE) +#if !defined(Py_SIZE) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) #endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#if PY_VERSION_HEX < 0x02050000 +#if !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX #define PY_SSIZE_T_MIN INT_MIN #define PyInt_FromSsize_t PyInt_FromLong #define PyInt_AsSsize_t PyInt_AsLong #endif -#ifndef Py_IS_FINITE +#if !defined(Py_IS_FINITE) #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) #endif +#endif /* PY_VERSION_HEX < 0x02050000 */ #ifdef __GNUC__ #define UNUSED __attribute__((__unused__)) @@ -80,9 +127,10 @@ typedef struct _PyEncoderObject { PyObject *key_separator; PyObject *item_separator; PyObject *sort_keys; - PyObject *skipkeys; PyObject *key_memo; + PyObject *encoding; PyObject *Decimal; + char skipkeys; int fast_encode; int allow_nan; int use_decimal; @@ -90,37 +138,51 @@ typedef struct _PyEncoderObject { int tuple_as_array; int bigint_as_string; PyObject *item_sort_key; + PyObject *item_sort_kw; } PyEncoderObject; static PyMemberDef encoder_members[] = { {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, - {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, {NULL} }; static PyObject * +JSON_ParseEncoding(PyObject *encoding); +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c); +static PyObject * maybe_quote_bigint(PyObject *encoded, PyObject *obj); - static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +ascii_char_size(JSON_UNICHR c); +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * ascii_escape_str(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); +#if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); @@ -140,6 +202,8 @@ static void encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key); static int encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); static int @@ -160,16 +224,32 @@ static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); static int _is_namedtuple(PyObject *obj); +static PyObject * +moduleinit(void); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) #define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) -#else -#define MAX_EXPANSION MIN_EXPANSION -#endif + +static int +IS_DIGIT(JSON_UNICHR c) +{ + return c >= '0' && c <= '9'; +} + +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *rval = PyUnicode_New(1, c); + if (rval) + PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); + return rval; +#else /* PY_MAJOR_VERSION >= 3 */ + return PyUnicode_FromUnicode(&c, 1); +#endif /* PY_MAJOR_VERSION < 3 */ +} static PyObject * maybe_quote_bigint(PyObject *encoded, PyObject *obj) @@ -192,8 +272,12 @@ maybe_quote_bigint(PyObject *encoded, PyObject *obj) } if (PyObject_RichCompareBool(obj, big_long, Py_GE) || PyObject_RichCompareBool(obj, small_long, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); +#else PyObject* quoted = PyString_FromFormat("\"%s\"", PyString_AsString(encoded)); +#endif Py_DECREF(encoded); encoded = quoted; } @@ -232,44 +316,72 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) } static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif output[chars++] = 'u'; output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } } return chars; } +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } + else if (c < 0x10000U) { + return MIN_EXPANSION; + } + else { + return 2 * MIN_EXPANSION; + } +} + static PyObject * ascii_escape_unicode(PyObject *pystr) { @@ -277,57 +389,62 @@ ascii_escape_unicode(PyObject *pystr) Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; - Py_ssize_t max_output_size; Py_ssize_t chars; + int kind; + void *data; PyObject *rval; char *output; - Py_UNICODE *input_unicode; - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); + if (PyUnicode_READY(pystr)) + return NULL; - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); + kind = PyUnicode_KIND(pystr); + data = PyUnicode_DATA(pystr); + input_chars = PyUnicode_GetLength(pystr); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); + output = (char *)PyUnicode_DATA(rval); +#else rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } output = PyString_AS_STRING(rval); +#endif chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); } output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { + assert(chars == output_size); + return rval; +} + +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + if (input == NULL) return NULL; - } + rval = ascii_escape_unicode(input); + Py_DECREF(input); return rval; } +#else /* PY_MAJOR_VERSION >= 3 */ + static PyObject * ascii_escape_str(PyObject *pystr) { @@ -342,76 +459,173 @@ ascii_escape_str(PyObject *pystr) input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); + output_size = 2; /* Fast path for a string that's already ASCII */ for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; } - break; + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; } + output_size += ascii_char_size(c); } - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } + chars = 0; output = PyString_AS_STRING(rval); - output[0] = '"'; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key) +{ + if (PyUnicode_Check(key)) { + Py_INCREF(key); + return key; + } + else if (PyString_Check(key)) { +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_Decode( + PyString_AS_STRING(key), + PyString_GET_SIZE(key), + JSON_ASCII_AS_STRING(s->encoding), + NULL); +#else /* PY_MAJOR_VERSION >= 3 */ + Py_INCREF(key); + return key; +#endif /* PY_MAJOR_VERSION < 3 */ + } + else if (PyFloat_Check(key)) { + return encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + return _encoded_const(key); + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + return PyObject_Str(key); + } + else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { + return PyObject_Str(key); + } + else if (s->skipkeys) { + Py_INCREF(Py_None); + return Py_None; + } + PyErr_SetString(PyExc_TypeError, "keys must be a string"); + return NULL; +} + +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + static PyObject *sortfun = NULL; + static PyObject *sortargs = NULL; - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + return NULL; + if (s->item_sort_kw == Py_None) + return iter; + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; } - else { - chars = ascii_escape_char(c, output, chars); + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(key)) { + // item can be added as-is } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyUnicode_Check(key)) { + // item can be added as-is + } + else { + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + // skipkeys + Py_DECREF(kstr); + continue; } - output = PyString_AS_STRING(rval); + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + PyObject *tpl = PyTuple_Pack(2, kstr, value); + if (tpl == NULL) + goto bail; + Py_CLEAR(kstr); + Py_DECREF(item); + item = tpl; } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) + goto bail; + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; } static void @@ -443,7 +657,7 @@ join_list_unicode(PyObject *lst) /* return u''.join(lst) */ static PyObject *joinfn = NULL; if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + PyObject *ustr = JSON_NewEmptyUnicode(); if (ustr == NULL) return NULL; @@ -455,6 +669,9 @@ join_list_unicode(PyObject *lst) return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ static PyObject * join_list_string(PyObject *lst) { @@ -472,9 +689,11 @@ join_list_string(PyObject *lst) } return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) +{ /* return (rval, idx) tuple, stealing reference to rval */ PyObject *tpl; PyObject *pyidx; @@ -514,6 +733,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { Py_CLEAR(chunk); \ } +#if PY_MAJOR_VERSION < 3 static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) { @@ -537,6 +757,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s if (len == end) { raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; } else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); @@ -564,9 +785,19 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } /* Pick up this chunk if it's not zero length */ if (next != end) { - PyObject *strchunk; APPEND_OLD_CHUNK - strchunk = PyString_FromStringAndSize(&buf[end], next - end); +#if PY_MAJOR_VERSION >= 3 + if (!has_unicode) { + chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); + } + else { + chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); + } + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; } @@ -580,6 +811,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s else { chunk = strchunk; } +#endif /* PY_MAJOR_VERSION < 3 */ } next++; if (c == '"') { @@ -620,7 +852,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -637,10 +869,10 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } -#ifdef Py_UNICODE_WIDE +#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; + JSON_UNICHR c2 = 0; if (end + 6 >= len) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; @@ -653,7 +885,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s /* Decode 4 hex digits */ for (; next < end; next++) { c2 <<= 4; - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = buf[next]; switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -679,14 +911,20 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s raise_errmsg("Unpaired low surrogate", pystr, end - 5); goto bail; } -#endif +#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ } if (c > 0x7f) { has_unicode = 1; } APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ if (has_unicode) { - chunk = PyUnicode_FromUnicode(&c, 1); + chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } @@ -698,13 +936,14 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } +#endif } if (chunks == NULL) { if (chunk != NULL) rval = chunk; else - rval = PyString_FromStringAndSize("", 0); + rval = JSON_NewEmptyUnicode(); } else { APPEND_OLD_CHUNK @@ -723,7 +962,7 @@ bail: Py_XDECREF(chunks); return NULL; } - +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) @@ -737,15 +976,17 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Return value is a new PyUnicode */ PyObject *rval; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); Py_ssize_t begin = end - 1; Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + Py_ssize_t len = PyUnicode_GetLength(pystr); + void *buf = PyUnicode_DATA(pystr); PyObject *chunks = NULL; PyObject *chunk = NULL; if (len == end) { raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; } else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); @@ -753,9 +994,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } while (1) { /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; + JSON_UNICHR c = 0; for (next = end; next < len; next++) { - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c == '"' || c == '\\') { break; } @@ -771,7 +1012,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next /* Pick up this chunk if it's not zero length */ if (next != end) { APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&buf[end], next - end); +#if PY_MAJOR_VERSION < 3 + chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); +#else + chunk = PyUnicode_Substring(pystr, end, next); +#endif if (chunk == NULL) { goto bail; } @@ -785,7 +1030,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c != 'u') { /* Non-unicode backslash escapes */ end = next + 1; @@ -815,7 +1060,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -832,15 +1077,16 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next goto bail; } } -#ifdef Py_UNICODE_WIDE +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; + JSON_UNICHR c2 = 0; if (end + 6 >= len) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } - if (buf[next++] != '\\' || buf[next++] != 'u') { + if (PyUnicode_READ(kind, buf, next++) != '\\' || + PyUnicode_READ(kind, buf, next++) != 'u') { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } @@ -848,7 +1094,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next /* Decode 4 hex digits */ for (; next < end; next++) { c2 <<= 4; - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -877,7 +1123,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next #endif } APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&c, 1); + chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } @@ -887,7 +1133,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (chunk != NULL) rval = chunk; else - rval = PyUnicode_FromUnicode(NULL, 0); + rval = JSON_NewEmptyUnicode(); } else { APPEND_OLD_CHUNK @@ -934,12 +1180,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) if (encoding == NULL) { encoding = DEFAULT_ENCODING; } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", @@ -1016,8 +1266,10 @@ scanner_clear(PyObject *self) return 0; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyString pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1033,7 +1285,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyObject *item; PyObject *key = NULL; PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); + char *encoding = JSON_ASCII_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); Py_ssize_t next_idx; @@ -1164,9 +1416,11 @@ bail: Py_XDECREF(pairs); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1174,8 +1428,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss Returns a new PyObject (usually a dict, but object_hook can change that) */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); PyObject *rval = NULL; PyObject *pairs = NULL; PyObject *item; @@ -1197,15 +1452,15 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss } /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { while (idx <= end_idx) { PyObject *memokey; /* read key */ - if (str[idx] != '"') { + if (PyUnicode_READ(kind, str, idx) != '"') { raise_errmsg( "Expecting property name enclosed in double quotes", pystr, idx); @@ -1228,13 +1483,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { raise_errmsg("Expecting ':' delimiter", pystr, idx); goto bail; } idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); @@ -1262,27 +1517,27 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss idx = next_idx; /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the object is closed or we didn't get the , delimiter */ if (idx > end_idx) break; - if (str[idx] == '}') { + if (PyUnicode_READ(kind, str, idx) == '}') { break; } - else if (str[idx] != ',') { + else if (PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; } } /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } @@ -1316,8 +1571,10 @@ bail: return NULL; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1387,9 +1644,11 @@ bail: Py_DECREF(rval); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1397,8 +1656,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi Returns a new PyList */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; @@ -1406,10 +1666,10 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi return NULL; /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { while (idx <= end_idx) { /* read any JSON term */ @@ -1429,26 +1689,26 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi idx = next_idx; /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the array is closed or we didn't get the , delimiter */ if (idx > end_idx) break; - if (str[idx] == ']') { + if (PyUnicode_READ(kind, str, idx) == ']') { break; } - else if (str[idx] != ',') { + else if (PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; } } /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } @@ -1461,7 +1721,8 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON constant from PyString pystr. constant is the constant string that was found ("NaN", "Infinity", "-Infinity"). @@ -1474,20 +1735,22 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * PyObject *cstr; PyObject *rval; /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); + cstr = JSON_InternFromString(constant); if (cstr == NULL) return NULL; /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); + idx += JSON_Intern_GET_SIZE(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyString pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1588,9 +1851,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz *next_idx_ptr = idx; return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyUnicode pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1600,15 +1865,17 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ PyInt, PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; Py_ssize_t idx = start; int is_float = 0; + JSON_UNICHR c; PyObject *rval; PyObject *numstr; /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { + if (PyUnicode_READ(kind, str, idx) == '-') { idx++; if (idx > end_idx) { PyErr_SetNone(PyExc_StopIteration); @@ -1617,40 +1884,49 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { + c = PyUnicode_READ(kind, str, idx); + if (c == '0') { + /* if it starts with 0 we only expect one integer digit */ idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { + else if (IS_DIGIT(c)) { idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { + idx++; + } } - /* no integer digits, error */ else { + /* no integer digits, error */ PyErr_SetNone(PyExc_StopIteration); return NULL; } /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + if (idx < end_idx && + PyUnicode_READ(kind, str, idx) == '.' && + IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { is_float = 1; idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == 'e' || + PyUnicode_READ(kind, str, idx) == 'E')) { Py_ssize_t e_start = idx; idx++; /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == '-' || + PyUnicode_READ(kind, str, idx) == '+')) idx++; /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { is_float = 1; } else { @@ -1659,7 +1935,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); +#if PY_MAJOR_VERSION >= 3 + numstr = PyUnicode_Substring(pystr, start, idx); +#else + numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); +#endif if (numstr == NULL) return NULL; if (is_float) { @@ -1668,7 +1948,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else rval = PyFloat_FromString(numstr, NULL); +#endif } } else { @@ -1680,6 +1964,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { @@ -1704,7 +1989,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case '"': /* string */ rval = scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), + JSON_ASCII_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); break; @@ -1779,6 +2064,8 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n Py_LeaveRecursiveCall(); return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ + static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) @@ -1790,8 +2077,9 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ Returns a new PyObject representation of the term. */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t length = PyUnicode_GetLength(pystr); PyObject *rval = NULL; int fallthrough = 0; if (idx >= length) { @@ -1800,7 +2088,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ } if (Py_EnterRecursiveCall(" while decoding a JSON document")) return NULL; - switch (str[idx]) { + switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ rval = scanstring_unicode(pystr, idx + 1, @@ -1817,7 +2105,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'n': /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'u' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; rval = Py_None; @@ -1827,7 +2118,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 't': /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'r' && + PyUnicode_READ(kind, str, idx + 2) == 'u' && + PyUnicode_READ(kind, str, idx + 3) == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; rval = Py_True; @@ -1837,7 +2131,11 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'f': /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + if ((idx + 4 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; rval = Py_False; @@ -1847,7 +2145,9 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'N': /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + if ((idx + 2 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } else @@ -1855,7 +2155,14 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case 'I': /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + if ((idx + 7 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } else @@ -1863,7 +2170,15 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case '-': /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + if ((idx + 8 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } else @@ -1894,12 +2209,14 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) return NULL; - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scan_once_unicode(s, pystr, idx, &next_idx); } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", @@ -1927,6 +2244,25 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)s; } +static PyObject * +JSON_ParseEncoding(PyObject *encoding) +{ + if (encoding == NULL) + return NULL; + if (encoding == Py_None) + return JSON_InternFromString(DEFAULT_ENCODING); +#if PY_MAJOR_VERSION < 3 + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif + if (JSON_ASCII_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + PyErr_SetString(PyExc_TypeError, "encoding must be a string"); + return NULL; +} + static int scanner_init(PyObject *self, PyObject *args, PyObject *kwds) { @@ -1934,6 +2270,7 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) PyObject *ctx; static char *kwlist[] = {"context", NULL}; PyScannerObject *s; + PyObject *encoding; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; @@ -1947,21 +2284,12 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) goto bail; } - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); + /* JSON_ASCII_AS_STRING is used on encoding */ + encoding = PyObject_GetAttrString(ctx, "encoding"); + s->encoding = JSON_ParseEncoding(encoding); + Py_XDECREF(encoding); if (s->encoding == NULL) goto bail; - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); - } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL || !PyString_Check(s->encoding)) - goto bail; /* All of these will fail "gracefully" so we don't need to verify them */ s->strict = PyObject_GetAttrString(ctx, "strict"); @@ -2000,8 +2328,7 @@ PyDoc_STRVAR(scanner_doc, "JSON scanner object"); static PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Scanner", /* tp_name */ sizeof(PyScannerObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2051,13 +2378,14 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->markers = NULL; s->defaultfn = NULL; s->encoder = NULL; + s->encoding = NULL; s->indent = NULL; s->key_separator = NULL; s->item_separator = NULL; - s->sort_keys = NULL; - s->skipkeys = NULL; s->key_memo = NULL; + s->sort_keys = NULL; s->item_sort_key = NULL; + s->item_sort_kw = NULL; s->Decimal = NULL; } return (PyObject *)s; @@ -2067,32 +2395,34 @@ static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "Decimal", NULL}; + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "Decimal", NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; - PyObject *bigint_as_string, *item_sort_key, *Decimal; + PyObject *bigint_as_string, *item_sort_key, *encoding, *Decimal; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOO:make_encoder", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, &namedtuple_as_object, &tuple_as_array, &bigint_as_string, - &item_sort_key, &Decimal)) + &item_sort_key, &encoding, &Decimal)) return -1; s->markers = markers; s->defaultfn = defaultfn; s->encoder = encoder; + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + return -1; s->indent = indent; s->key_separator = key_separator; s->item_separator = item_separator; - s->sort_keys = sort_keys; - s->skipkeys = skipkeys; + s->skipkeys = (char)PyObject_IsTrue(skipkeys); s->key_memo = key_memo; s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); s->allow_nan = PyObject_IsTrue(allow_nan); @@ -2100,6 +2430,35 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); s->bigint_as_string = PyObject_IsTrue(bigint_as_string); + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + } + else if (PyObject_IsTrue(sort_keys)) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + return -1; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + return -1; + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + return -1; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + return -1; + } + s->sort_keys = sort_keys; s->item_sort_key = item_sort_key; s->Decimal = Decimal; @@ -2109,9 +2468,8 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) Py_INCREF(s->indent); Py_INCREF(s->key_separator); Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); - Py_INCREF(s->skipkeys); Py_INCREF(s->key_memo); + Py_INCREF(s->sort_keys); Py_INCREF(s->item_sort_key); Py_INCREF(s->Decimal); return 0; @@ -2148,7 +2506,7 @@ _encoded_const(PyObject *obj) if (obj == Py_None) { static PyObject *s_null = NULL; if (s_null == NULL) { - s_null = PyString_InternFromString("null"); + s_null = JSON_InternFromString("null"); } Py_INCREF(s_null); return s_null; @@ -2156,7 +2514,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_True) { static PyObject *s_true = NULL; if (s_true == NULL) { - s_true = PyString_InternFromString("true"); + s_true = JSON_InternFromString("true"); } Py_INCREF(s_true); return s_true; @@ -2164,7 +2522,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_False) { static PyObject *s_false = NULL; if (s_false == NULL) { - s_false = PyString_InternFromString("false"); + s_false = JSON_InternFromString("false"); } Py_INCREF(s_false); return s_false; @@ -2186,13 +2544,28 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) return NULL; } if (i > 0) { - return PyString_FromString("Infinity"); + static PyObject *sInfinity = NULL; + if (sInfinity == NULL) + sInfinity = JSON_InternFromString("Infinity"); + if (sInfinity) + Py_INCREF(sInfinity); + return sInfinity; } else if (i < 0) { - return PyString_FromString("-Infinity"); + static PyObject *sNegInfinity = NULL; + if (sNegInfinity == NULL) + sNegInfinity = JSON_InternFromString("-Infinity"); + if (sNegInfinity) + Py_INCREF(sNegInfinity); + return sNegInfinity; } else { - return PyString_FromString("NaN"); + static PyObject *sNaN = NULL; + if (sNaN == NULL) + sNaN = JSON_InternFromString("NaN"); + if (sNaN) + Py_INCREF(sNaN); + return sNaN; } } /* Use a better float format here? */ @@ -2266,7 +2639,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi else if (PyDict_Check(obj)) { rv = encoder_listencode_dict(s, rval, obj, indent_level); } - else if (s->use_decimal && PyObject_TypeCheck(obj, s->Decimal)) { + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) rv = _steal_list_append(rval, encoded); @@ -2322,22 +2695,19 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; - static PyObject *iteritems = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; PyObject *iter = NULL; PyObject *item = NULL; PyObject *items = NULL; PyObject *encoded = NULL; - int skipkeys; Py_ssize_t idx; - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); - iteritems = PyString_InternFromString("iteritems"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) return -1; } if (PyDict_Size(dct) == 0) @@ -2372,54 +2742,10 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss */ } - if (PyCallable_Check(s->item_sort_key)) { - if (PyDict_CheckExact(dct)) - items = PyDict_Items(dct); - else - items = PyMapping_Items(dct); - PyObject_CallMethod(items, "sort", "OO", Py_None, s->item_sort_key); - } - else if (PyObject_IsTrue(s->sort_keys)) { - /* First sort the keys then replace them with (key, value) tuples. */ - Py_ssize_t i, nitems; - if (PyDict_CheckExact(dct)) - items = PyDict_Keys(dct); - else - items = PyMapping_Keys(dct); - if (items == NULL) - goto bail; - if (!PyList_Check(items)) { - PyErr_SetString(PyExc_ValueError, "keys must return list"); - goto bail; - } - if (PyList_Sort(items) < 0) - goto bail; - nitems = PyList_GET_SIZE(items); - for (i = 0; i < nitems; i++) { - PyObject *key, *value; - key = PyList_GET_ITEM(items, i); - value = PyDict_GetItem(dct, key); - item = PyTuple_Pack(2, key, value); - if (item == NULL) - goto bail; - PyList_SET_ITEM(items, i, item); - Py_DECREF(key); - } - } - else { - if (PyDict_CheckExact(dct)) - items = PyDict_Items(dct); - else - items = PyMapping_Items(dct); - } - if (items == NULL) - goto bail; - iter = PyObject_GetIter(items); - Py_DECREF(items); + iter = encoder_dict_iteritems(s, dct); if (iter == NULL) goto bail; - skipkeys = PyObject_IsTrue(s->skipkeys); idx = 0; while ((item = PyIter_Next(iter))) { PyObject *encoded, *key, *value; @@ -2437,43 +2763,21 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss encoded = PyDict_GetItem(s->key_memo, key); if (encoded != NULL) { Py_INCREF(encoded); - } - else if (PyString_Check(key) || PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - /* This must come before the PyInt_Check because - True and False are also 1 and 0.*/ - kstr = _encoded_const(key); + } else { + kstr = encoder_stringify_key(s, key); if (kstr == NULL) goto bail; + else if (kstr == Py_None) { + // skipkeys + Py_DECREF(item); + Py_DECREF(kstr); + continue; + } } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); - if (kstr == NULL) - goto bail; - } - else if (skipkeys) { - Py_DECREF(item); - continue; - } - else { - /* TODO: include repr of key */ - PyErr_SetString(PyExc_TypeError, "keys must be a string"); - goto bail; - } - if (idx) { if (PyList_Append(rval, s->item_separator)) goto bail; } - if (encoded == NULL) { encoded = encoder_encode_string(s, kstr); Py_CLEAR(kstr); @@ -2536,9 +2840,9 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss int i = 0; if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); if (open_array == NULL || close_array == NULL || empty_array == NULL) return -1; } @@ -2633,13 +2937,15 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->markers); Py_VISIT(s->defaultfn); Py_VISIT(s->encoder); + Py_VISIT(s->encoding); Py_VISIT(s->indent); Py_VISIT(s->key_separator); Py_VISIT(s->item_separator); - Py_VISIT(s->sort_keys); - Py_VISIT(s->skipkeys); Py_VISIT(s->key_memo); + Py_VISIT(s->sort_keys); + Py_VISIT(s->item_sort_kw); Py_VISIT(s->item_sort_key); + Py_VISIT(s->Decimal); return 0; } @@ -2653,12 +2959,13 @@ encoder_clear(PyObject *self) Py_CLEAR(s->markers); Py_CLEAR(s->defaultfn); Py_CLEAR(s->encoder); + Py_CLEAR(s->encoding); Py_CLEAR(s->indent); Py_CLEAR(s->key_separator); Py_CLEAR(s->item_separator); - Py_CLEAR(s->sort_keys); - Py_CLEAR(s->skipkeys); Py_CLEAR(s->key_memo); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->item_sort_kw); Py_CLEAR(s->item_sort_key); Py_CLEAR(s->Decimal); return 0; @@ -2668,8 +2975,7 @@ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") static PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Encoder", /* tp_name */ sizeof(PyEncoderObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2725,21 +3031,53 @@ static PyMethodDef speedups_methods[] = { PyDoc_STRVAR(module_doc, "simplejson speedups\n"); -void -init_speedups(void) +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +static PyObject * +moduleinit(void) { PyObject *m; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) - return; + return NULL; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) - return; - + return NULL; +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); Py_INCREF((PyObject*)&PyEncoderType); PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + return m; } + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); +} +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif diff --git a/simplejson/compat.py b/simplejson/compat.py new file mode 100644 index 0000000..449e48a --- /dev/null +++ b/simplejson/compat.py @@ -0,0 +1,43 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 1c1526c..546a168 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -1,13 +1,14 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct - -from simplejson.scanner import make_scanner +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner def _import_c_scanstring(): try: - from simplejson._speedups import scanstring + from ._speedups import scanstring return scanstring except ImportError: return None @@ -18,7 +19,7 @@ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + _BYTES = fromhex('7FF80000000000007FF0000000000000') # The struct module in Python 2.4 would get frexp() out of range here # when an endian is specified in the format string. Fixed in Python 2.5+ if sys.byteorder != 'big': @@ -87,14 +88,15 @@ _CONSTANTS = { STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), } DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match): + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError @@ -117,8 +119,8 @@ def py_scanstring(s, end, encoding=None, strict=True, content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) + if not _PY3 and not isinstance(content, text_type): + content = text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -152,23 +154,38 @@ def py_scanstring(s, end, encoding=None, strict=True, if len(esc) != 4: msg = "Invalid \\uXXXX escape" raise JSONDecodeError(msg, s, end) - uni = int(esc, 16) + try: + uni = int(esc, 16) + except ValueError: + msg = "Invalid \\uXXXX escape" + raise JSONDecodeError(msg, s, end) # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise JSONDecodeError(msg, s, end) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: + if _maxunicode > 65535: + unimask = uni & 0xfc00 + if unimask == 0xd800: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise JSONDecodeError(msg, s, end) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise JSONDecodeError(msg, s, end) + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 != 0xdc00: + msg = "Unpaired high surrogate" + raise JSONDecodeError(msg, s, end) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + elif unimask == 0xdc00: + msg = "Unpaired low surrogate" raise JSONDecodeError(msg, s, end) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 char = unichr(uni) end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -177,9 +194,10 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, +def JSONObject(state, encoding, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state # Backwards compatibility if memo is None: memo = {} @@ -273,7 +291,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -385,6 +404,8 @@ class JSONDecoder(object): ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -398,18 +419,20 @@ class JSONDecoder(object): self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ + if _PY3 and isinstance(s, binary_type): + s = s.decode(self.encoding) obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0, _w=WHITESPACE.match): + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. @@ -420,6 +443,8 @@ class JSONDecoder(object): have extraneous data at the end. """ + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") try: obj, end = self.scan_once(s, idx=_w(s, idx).end()) except StopIteration: diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 6b4a6a4..432838b 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -1,11 +1,13 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re +from operator import itemgetter from decimal import Decimal - +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 def _import_speedups(): try: - from simplejson import _speedups + from . import _speedups return _speedups.encode_basestring_ascii, _speedups.make_encoder except ImportError: return None, None @@ -13,7 +15,7 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from simplejson.decoder import PosInf -ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +ESCAPE = re.compile(u(r'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')) ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -24,32 +26,40 @@ ESCAPE_DCT = { '\n': '\\n', '\r': '\\r', '\t': '\\t', - u'\u2028': '\\u2028', - u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u('"')): """Return a JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] - return u'"' + ESCAPE.sub(replace, s) + u'"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -181,7 +191,7 @@ class JSONEncoder(object): self.tuple_as_array = tuple_as_array self.bigint_as_string = bigint_as_string self.item_sort_key = item_sort_key - if indent is not None and not isinstance(indent, basestring): + if indent is not None and not isinstance(indent, string_types): indent = indent * ' ' self.indent = indent if separators is not None: @@ -221,12 +231,11 @@ class JSONEncoder(object): """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -262,7 +271,7 @@ class JSONEncoder(object): _encoder = encode_basestring if self.encoding != 'utf-8': def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): + if isinstance(o, binary_type): o = o.decode(_encoding) return _orig_encoder(o) @@ -298,6 +307,7 @@ class JSONEncoder(object): self.skipkeys, self.allow_nan, key_memo, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, self.bigint_as_string, self.item_sort_key, + self.encoding, Decimal) else: _iterencode = _make_iterencode( @@ -306,6 +316,7 @@ class JSONEncoder(object): self.skipkeys, _one_shot, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, self.bigint_as_string, self.item_sort_key, + self.encoding, Decimal=Decimal) try: return _iterencode(o, 0) @@ -343,25 +354,25 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _use_decimal, _namedtuple_as_object, _tuple_as_array, - _bigint_as_string, _item_sort_key, + _bigint_as_string, _item_sort_key, _encoding, ## HACK: hand-optimized bytecode; turn globals into locals - False=False, - True=True, + _PY3=PY3, ValueError=ValueError, - basestring=basestring, + string_types=string_types, Decimal=Decimal, dict=dict, float=float, id=id, - int=int, + integer_types=integer_types, isinstance=isinstance, list=list, - long=long, str=str, tuple=tuple, ): if _item_sort_key and not callable(_item_sort_key): raise TypeError("item_sort_key must be None or callable") + elif _sort_keys and not _item_sort_key: + _item_sort_key = itemgetter(0) def _iterencode_list(lst, _current_indent_level): if not lst: @@ -387,7 +398,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, first = False else: buf = separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield buf + _encoder(value) elif value is None: yield buf + 'null' @@ -395,7 +407,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield buf + 'true' elif value is False: yield buf + 'false' - elif isinstance(value, (int, long)): + elif isinstance(value, integer_types): yield ((buf + str(value)) if (not _bigint_as_string or (-1 << 53) < value < (1 << 53)) @@ -428,6 +440,29 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, if markers is not None: del markers[markerid] + def _stringify_key(key): + if isinstance(key, string_types): # pragma: no cover + pass + elif isinstance(key, binary_type): + key = key.decode(_encoding) + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, integer_types): + key = str(key) + elif _use_decimal and isinstance(key, Decimal): + key = str(key) + elif _skipkeys: + key = None + else: + raise TypeError("key " + repr(key) + " is not a string") + return key + def _iterencode_dict(dct, _current_indent_level): if not dct: yield '{}' @@ -447,40 +482,35 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, newline_indent = None item_separator = _item_separator first = True + if _PY3: + iteritems = dct.items() + else: + iteritems = dct.iteritems() if _item_sort_key: - items = dct.items() + items = [] + for k, v in dct.items(): + if not isinstance(k, string_types): + k = _stringify_key(k) + if k is None: + continue + items.append((k, v)) items.sort(key=_item_sort_key) - elif _sort_keys: - items = dct.items() - items.sort(key=lambda kv: kv[0]) else: - items = dct.iteritems() + items = iteritems for key, value in items: - if isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = _floatstr(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif isinstance(key, (int, long)): - key = str(key) - elif _skipkeys: - continue - else: - raise TypeError("key " + repr(key) + " is not a string") + if not (_item_sort_key or isinstance(key, string_types)): + key = _stringify_key(key) + if key is None: + # _skipkeys must be True + continue if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield _encoder(value) elif value is None: yield 'null' @@ -488,7 +518,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif value is False: yield 'false' - elif isinstance(value, (int, long)): + elif isinstance(value, integer_types): yield (str(value) if (not _bigint_as_string or (-1 << 53) < value < (1 << 53)) @@ -521,7 +551,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, del markers[markerid] def _iterencode(o, _current_indent_level): - if isinstance(o, basestring): + if (isinstance(o, string_types) or + (_PY3 and isinstance(o, binary_type))): yield _encoder(o) elif o is None: yield 'null' @@ -529,7 +560,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif o is False: yield 'false' - elif isinstance(o, (int, long)): + elif isinstance(o, integer_types): yield (str(o) if (not _bigint_as_string or (-1 << 53) < o < (1 << 53)) diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py index 12289b6..da21365 100644 --- a/simplejson/tests/__init__.py +++ b/simplejson/tests/__init__.py @@ -1,17 +1,27 @@ +from __future__ import absolute_import import unittest import doctest - +import sys class OptionalExtensionTestSuite(unittest.TestSuite): def run(self, result): import simplejson run = unittest.TestSuite.run run(self, result) - simplejson._toggle_speedups(False) - run(self, result) - simplejson._toggle_speedups(True) + if simplejson._import_c_make_encoder() is None: + TestMissingSpeedups().run(result) + else: + simplejson._toggle_speedups(False) + run(self, result) + simplejson._toggle_speedups(True) return result +class TestMissingSpeedups(unittest.TestCase): + def runTest(self): + if hasattr(sys, 'pypy_translation_info'): + "PyPy doesn't need speedups! :)" + elif hasattr(self, 'skipTest'): + self.skipTest('_speedups.so is missing!') def additional_tests(suite=None): import simplejson @@ -55,7 +65,7 @@ def all_tests_suite(): def main(): - runner = unittest.TextTestRunner() + runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v')) suite = all_tests_suite() raise SystemExit(not runner.run(suite).wasSuccessful()) diff --git a/simplejson/tests/test_decimal.py b/simplejson/tests/test_decimal.py index 5fbe36c..ed1dada 100644 --- a/simplejson/tests/test_decimal.py +++ b/simplejson/tests/test_decimal.py @@ -1,7 +1,7 @@ import decimal from decimal import Decimal from unittest import TestCase -from StringIO import StringIO +from simplejson.compat import StringIO, reload_module import simplejson as json @@ -28,18 +28,23 @@ class TestDecimal(TestCase): for s in self.NUMS: self.assertEquals(self.loads(s, parse_float=Decimal), Decimal(s)) + def test_stringify_key(self): + for d in map(Decimal, self.NUMS): + v = {d: d} + self.assertEquals( + self.loads( + self.dumps(v, use_decimal=True), parse_float=Decimal), + {str(d): d}) + def test_decimal_roundtrip(self): for d in map(Decimal, self.NUMS): # The type might not be the same (int and Decimal) but they # should still compare equal. - self.assertEquals( - self.loads( - self.dumps(d, use_decimal=True), parse_float=Decimal), - d) - self.assertEquals( - self.loads( - self.dumps([d], use_decimal=True), parse_float=Decimal), - [d]) + for v in [d, [d], {'': d}]: + self.assertEquals( + self.loads( + self.dumps(v, use_decimal=True), parse_float=Decimal), + v) def test_decimal_defaults(self): d = Decimal('1.1') @@ -60,7 +65,7 @@ class TestDecimal(TestCase): # Simulate a subinterpreter that reloads the Python modules but not # the C code https://github.com/simplejson/simplejson/issues/34 global Decimal - Decimal = reload(decimal).Decimal + Decimal = reload_module(decimal).Decimal import simplejson.encoder simplejson.encoder.Decimal = Decimal self.test_decimal_roundtrip() diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py index 37b231b..5c488c6 100644 --- a/simplejson/tests/test_decode.py +++ b/simplejson/tests/test_decode.py @@ -1,8 +1,9 @@ +from __future__ import absolute_import import decimal from unittest import TestCase -from StringIO import StringIO import simplejson as json +from simplejson.compat import StringIO from simplejson import OrderedDict class TestDecode(TestCase): diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py index eab040c..e8c6054 100644 --- a/simplejson/tests/test_dump.py +++ b/simplejson/tests/test_dump.py @@ -1,14 +1,55 @@ from unittest import TestCase -from cStringIO import StringIO - +from simplejson.compat import StringIO, long_type, b, binary_type, PY3 import simplejson as json +def as_text_type(s): + if PY3 and isinstance(s, binary_type): + return s.decode('ascii') + return s + class TestDump(TestCase): def test_dump(self): sio = StringIO() json.dump({}, sio) self.assertEquals(sio.getvalue(), '{}') + def test_constants(self): + for c in [None, True, False]: + self.assert_(json.loads(json.dumps(c)) is c) + self.assert_(json.loads(json.dumps([c]))[0] is c) + self.assert_(json.loads(json.dumps({'a': c}))['a'] is c) + + def test_stringify_key(self): + items = [(b('bytes'), 'bytes'), + (1.0, '1.0'), + (10, '10'), + (True, 'true'), + (False, 'false'), + (None, 'null'), + (long_type(100), '100')] + for k, expect in items: + self.assertEquals( + json.loads(json.dumps({k: expect})), + {expect: expect}) + self.assertEquals( + json.loads(json.dumps({k: expect}, sort_keys=True)), + {expect: expect}) + self.assertRaises(TypeError, json.dumps, {json: 1}) + for v in [{}, {'other': 1}, {b('derp'): 1, 'herp': 2}]: + for sort_keys in [False, True]: + v0 = dict(v) + v0[json] = 1 + v1 = dict((as_text_type(key), val) for (key, val) in v.items()) + self.assertEquals( + json.loads(json.dumps(v0, skipkeys=True, sort_keys=sort_keys)), + v1) + self.assertEquals( + json.loads(json.dumps({'': v0}, skipkeys=True, sort_keys=sort_keys)), + {'': v1}) + self.assertEquals( + json.loads(json.dumps([v0], skipkeys=True, sort_keys=sort_keys)), + [v1]) + def test_dumps(self): self.assertEquals(json.dumps({}), '{}') @@ -16,15 +57,23 @@ class TestDump(TestCase): self.assertEquals(json.dumps( {True: False, False: True}, sort_keys=True), '{"false": true, "true": false}') - self.assertEquals(json.dumps( - {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), - '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') + self.assertEquals( + json.dumps( + {2: 3.0, + 4.0: long_type(5), + False: 1, + long_type(6): True, + "7": 0}, + sort_keys=True), + '{"2": 3.0, "4.0": 5, "6": true, "7": 0, "false": 1}') def test_ordered_dict(self): # http://bugs.python.org/issue6105 items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] s = json.dumps(json.OrderedDict(items)) - self.assertEqual(s, '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') + self.assertEqual( + s, + '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') def test_indent_unknown_type_acceptance(self): """ @@ -63,5 +112,5 @@ class TestDump(TestCase): raise NotImplementedError("To do non-awesome things with" " this object, please construct it from an integer!") - s = json.dumps(range(3), indent=AwesomeInt(3)) + s = json.dumps([0, 1, 2], indent=AwesomeInt(3)) self.assertEqual(s, '[\n 0,\n 1,\n 2\n]') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py index 6c40961..780bb0a 100644 --- a/simplejson/tests/test_encode_basestring_ascii.py +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -1,6 +1,7 @@ from unittest import TestCase import simplejson.encoder +from simplejson.compat import b CASES = [ (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), @@ -11,9 +12,9 @@ CASES = [ (u' s p a c e d ', '" s p a c e d "'), (u'\U0001d120', '"\\ud834\\udd20"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), (u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), diff --git a/simplejson/tests/test_errors.py b/simplejson/tests/test_errors.py index 620ccf3..86c14e5 100644 --- a/simplejson/tests/test_errors.py +++ b/simplejson/tests/test_errors.py @@ -1,6 +1,8 @@ +import sys from unittest import TestCase import simplejson as json +from simplejson.compat import u, b class TestErrors(TestCase): def test_string_keys_error(self): @@ -11,8 +13,8 @@ class TestErrors(TestCase): err = None try: json.loads('{}\na\nb') - except json.JSONDecodeError, e: - err = e + except json.JSONDecodeError: + err = sys.exc_info()[1] else: self.fail('Expected JSONDecodeError') self.assertEquals(err.lineno, 2) @@ -22,13 +24,12 @@ class TestErrors(TestCase): def test_scan_error(self): err = None - for t in (str, unicode): + for t in (u, b): try: json.loads(t('{"asdf": "')) - except json.JSONDecodeError, e: - err = e + except json.JSONDecodeError: + err = sys.exc_info()[1] else: self.fail('Expected JSONDecodeError') self.assertEquals(err.lineno, 1) self.assertEquals(err.colno, 9) -
\ No newline at end of file diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py index 646c0f4..8d281b7 100644 --- a/simplejson/tests/test_fail.py +++ b/simplejson/tests/test_fail.py @@ -1,3 +1,4 @@ +import sys from unittest import TestCase import simplejson as json @@ -54,6 +55,31 @@ JSONDOCS = [ "['single quote']", # http://code.google.com/p/simplejson/issues/detail?id=3 u'["A\u001FZ control characters in string"]', + # misc based on coverage + '{', + '{]', + '{"foo": "bar"]', + '{"foo": "bar"', + 'nul', + 'nulx', + '-', + '-x', + '-e', + '-e0', + '-Infinite', + '-Inf', + 'Infinit', + 'Infinite', + 'NaM', + 'NuN', + 'falsy', + 'fal', + 'trug', + 'tru', + '1e', + '1ex', + '1e-', + '1e-x', ] SKIPS = { @@ -81,11 +107,13 @@ class TestFail(TestCase): for doc in [u'[,]', '[,]']: try: json.loads(doc) - except json.JSONDecodeError, e: + except json.JSONDecodeError: + e = sys.exc_info()[1] self.assertEquals(e.pos, 1) self.assertEquals(e.lineno, 1) self.assertEquals(e.colno, 1) - except Exception, e: + except Exception: + e = sys.exc_info()[1] self.fail("Unexpected exception raised %r %s" % (e, e)) else: - self.fail("Unexpected success parsing '[,]'")
\ No newline at end of file + self.fail("Unexpected success parsing '[,]'") diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py index 94502c6..5d50223 100644 --- a/simplejson/tests/test_float.py +++ b/simplejson/tests/test_float.py @@ -1,19 +1,27 @@ import math from unittest import TestCase - +from simplejson.compat import long_type, text_type import simplejson as json +from simplejson.decoder import NaN, PosInf, NegInf class TestFloat(TestCase): + def test_degenerates(self): + for inf in (PosInf, NegInf): + self.assertEquals(json.loads(json.dumps(inf)), inf) + # Python 2.5 doesn't have math.isnan + nan = json.loads(json.dumps(NaN)) + self.assert_((0 + nan) != nan) + def test_floats(self): for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: self.assertEquals(float(json.dumps(num)), num) self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) + self.assertEquals(json.loads(text_type(json.dumps(num))), num) def test_ints(self): - for num in [1, 1L, 1<<32, 1<<64]: + for num in [1, long_type(1), 1<<32, 1<<64]: self.assertEquals(json.dumps(num), str(num)) self.assertEquals(int(json.dumps(num)), num) self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) + self.assertEquals(json.loads(text_type(json.dumps(num))), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py index 1e6bdb1..a397cca 100644 --- a/simplejson/tests/test_indent.py +++ b/simplejson/tests/test_indent.py @@ -1,8 +1,8 @@ from unittest import TestCase +import textwrap import simplejson as json -import textwrap -from StringIO import StringIO +from simplejson.compat import StringIO class TestIndent(TestCase): def test_indent(self): @@ -83,4 +83,4 @@ class TestIndent(TestCase): # Added in 2.1.4 self.assertEquals( expect, - json.dumps(lst, indent=0))
\ No newline at end of file + json.dumps(lst, indent=0)) diff --git a/simplejson/tests/test_namedtuple.py b/simplejson/tests/test_namedtuple.py index 54a9a12..4387894 100644 --- a/simplejson/tests/test_namedtuple.py +++ b/simplejson/tests/test_namedtuple.py @@ -1,6 +1,7 @@ +from __future__ import absolute_import import unittest import simplejson as json -from StringIO import StringIO +from simplejson.compat import StringIO try: from collections import namedtuple diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py index a7fcd46..045725a 100644 --- a/simplejson/tests/test_scanstring.py +++ b/simplejson/tests/test_scanstring.py @@ -3,8 +3,17 @@ from unittest import TestCase import simplejson as json import simplejson.decoder +from simplejson.compat import b, PY3 class TestScanString(TestCase): + # The bytes type is intentionally not used in most of these tests + # under Python 3 because the decoder immediately coerces to str before + # calling scanstring. In Python 2 we are testing the code paths + # for both unicode and str. + # + # The reason this is done is because Python 3 would require + # entirely different code paths for parsing bytes and str. + # def test_py_scanstring(self): self._test_scanstring(simplejson.decoder.py_scanstring) @@ -103,15 +112,36 @@ class TestScanString(TestCase): scanstring('["Bad value", truth]', 2, None, True), (u'Bad value', 12)) + for c in map(chr, range(0x00, 0x1f)): + self.assertEquals( + scanstring(c + '"', 0, None, False), + (c, 2)) + self.assertRaises( + ValueError, + scanstring, c + '"', 0, None, True) + + self.assertRaises(ValueError, scanstring, '', 0, None, True) + self.assertRaises(ValueError, scanstring, 'a', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) + if sys.maxunicode > 65535: + self.assertRaises(ValueError, scanstring, '\\ud834"', 0, None, True), + self.assertRaises(ValueError, scanstring, '\\ud834\\u"', 0, None, True), + self.assertRaises(ValueError, scanstring, '\\ud834\\x0123"', 0, None, True), + def test_issue3623(self): self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, "xxx") self.assertRaises(UnicodeDecodeError, - json.encoder.encode_basestring_ascii, "xx\xff") + json.encoder.encode_basestring_ascii, b("xx\xff")) def test_overflow(self): - # Python 2.5 does not have maxsize - maxsize = getattr(sys, 'maxsize', sys.maxint) + # Python 2.5 does not have maxsize, Python 3 does not have maxint + maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) + assert maxsize is not None self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", maxsize + 1) - diff --git a/simplejson/tests/test_tuple.py b/simplejson/tests/test_tuple.py index 92856a7..a6a9910 100644 --- a/simplejson/tests/test_tuple.py +++ b/simplejson/tests/test_tuple.py @@ -1,6 +1,6 @@ import unittest -from StringIO import StringIO +from simplejson.compat import StringIO import simplejson as json class TestTuples(unittest.TestCase): @@ -13,7 +13,8 @@ class TestTuples(unittest.TestCase): self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False) # Ensure that the "default" does not get called self.assertEqual(expect, json.dumps(t, default=repr)) - self.assertEqual(expect, json.dumps(t, tuple_as_array=True, default=repr)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True, + default=repr)) # Ensure that the "default" gets called self.assertEqual( json.dumps(repr(t)), @@ -29,7 +30,8 @@ class TestTuples(unittest.TestCase): sio = StringIO() json.dump(t, sio, tuple_as_array=True) self.assertEqual(expect, sio.getvalue()) - self.assertRaises(TypeError, json.dump, t, StringIO(), tuple_as_array=False) + self.assertRaises(TypeError, json.dump, t, StringIO(), + tuple_as_array=False) # Ensure that the "default" does not get called sio = StringIO() json.dump(t, sio, default=repr) @@ -46,4 +48,4 @@ class TestTuples(unittest.TestCase): class TestNamedTuple(unittest.TestCase): def test_namedtuple_dump(self): - pass
\ No newline at end of file + pass diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 83fe65b..9afc3ac 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,6 +1,8 @@ +import sys from unittest import TestCase import simplejson as json +from simplejson.compat import unichr, text_type, b, u class TestUnicode(TestCase): def test_encoding1(self): @@ -76,21 +78,21 @@ class TestUnicode(TestCase): {'a': u'\xe9'}) def test_unicode_preservation(self): - self.assertEquals(type(json.loads(u'""')), unicode) - self.assertEquals(type(json.loads(u'"a"')), unicode) - self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) + self.assertEquals(type(json.loads(u'""')), text_type) + self.assertEquals(type(json.loads(u'"a"')), text_type) + self.assertEquals(type(json.loads(u'["a"]')[0]), text_type) def test_ensure_ascii_false_returns_unicode(self): # http://code.google.com/p/simplejson/issues/detail?id=48 - self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode) + self.assertEquals(type(json.dumps([], ensure_ascii=False)), text_type) + self.assertEquals(type(json.dumps(0, ensure_ascii=False)), text_type) + self.assertEquals(type(json.dumps({}, ensure_ascii=False)), text_type) + self.assertEquals(type(json.dumps("", ensure_ascii=False)), text_type) def test_ensure_ascii_false_bytestring_encoding(self): # http://code.google.com/p/simplejson/issues/detail?id=48 - doc1 = {u'quux': 'Arr\xc3\xaat sur images'} - doc2 = {u'quux': u'Arr\xeat sur images'} + doc1 = {u'quux': b('Arr\xc3\xaat sur images')} + doc2 = {u'quux': u('Arr\xeat sur images')} doc_ascii = '{"quux": "Arr\\u00eat sur images"}' doc_unicode = u'{"quux": "Arr\xeat sur images"}' self.assertEquals(json.dumps(doc1), doc_ascii) @@ -107,3 +109,37 @@ class TestUnicode(TestCase): self.assertEquals(json.dumps(s2), expect) self.assertEquals(json.dumps(s1, ensure_ascii=False), expect) self.assertEquals(json.dumps(s2, ensure_ascii=False), expect) + + def test_invalid_escape_sequences(self): + # incomplete escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234') + # invalid escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"') + if sys.maxunicode > 65535: + # unpaired low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\udc00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\udcff"') + # unpaired high surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xx"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xxxxxx"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"') + # invalid escape sequence for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"') + # invalid value for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0000"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ufc00"')
\ No newline at end of file |