From 5846148410234f6466aefcddc68683811577998e Mon Sep 17 00:00:00 2001 From: Bob Ippolito Date: Wed, 26 Dec 2012 12:48:47 -0800 Subject: First pass at Python 3.3 compatibility First pass at Python 3.3 compatibility, bump to 3.0.0 --- .travis.yml | 3 + CHANGES.txt | 8 + README.rst | 6 +- conf.py | 4 +- index.rst | 39 +- setup.py | 27 +- simplejson/__init__.py | 34 +- simplejson/_speedups.c | 643 +++++++++++++++-------- simplejson/compat.py | 43 ++ simplejson/decoder.py | 31 +- simplejson/encoder.py | 138 +++-- simplejson/tests/__init__.py | 2 +- simplejson/tests/test_decimal.py | 4 +- simplejson/tests/test_decode.py | 3 +- simplejson/tests/test_dump.py | 21 +- simplejson/tests/test_encode_basestring_ascii.py | 5 +- simplejson/tests/test_errors.py | 13 +- simplejson/tests/test_fail.py | 9 +- simplejson/tests/test_float.py | 8 +- simplejson/tests/test_indent.py | 6 +- simplejson/tests/test_namedtuple.py | 3 +- simplejson/tests/test_scanstring.py | 8 +- simplejson/tests/test_tuple.py | 10 +- simplejson/tests/test_unicode.py | 19 +- 24 files changed, 723 insertions(+), 364 deletions(-) create mode 100644 simplejson/compat.py diff --git a/.travis.yml b/.travis.yml index d81b6d1..8d649d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,8 @@ language: python python: + - "2.5" - "2.6" - "2.7" + - "3.3" + - "pypy" script: python setup.py test diff --git a/CHANGES.txt b/CHANGES.txt index f9566a9..a47102b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,11 @@ +Version 3.0.0 released 201X-XX-XX + +* Python 3.3 is now supported, thanks to Vinay Sajip #8 +* `sort_keys`/`item_sort_key` now sort on the stringified verison of the + key, rather than the original object. This ensures that the sort + only compares string types and makes the behavior consistent between + Python 2.x and Python 3.x. + Version 2.6.2 released 2012-09-21 * JSONEncoderForHTML was not exported in the simplejson module diff --git a/README.rst b/README.rst index 955221f..5fdaf21 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ simplejson is a simple, fast, complete, correct and extensible -JSON encoder and decoder for Python 2.5+. It is -pure Python code with no dependencies, but includes an optional C -extension for a serious speed boost. +JSON encoder and decoder for Python 2.5+ +and Python 3.3+. It is pure Python code with no dependencies, +but includes an optional C extension for a serious speed boost. The latest documentation for simplejson can be read online here: http://simplejson.readthedocs.org/ diff --git a/conf.py b/conf.py index 2309f88..e552769 100644 --- a/conf.py +++ b/conf.py @@ -42,9 +42,9 @@ copyright = '2012, Bob Ippolito' # other places throughout the built documents. # # The short X.Y version. -version = '2.6' +version = '3.0' # The full version, including alpha/beta/rc tags. -release = '2.6.2' +release = '3.0.0' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/index.rst b/index.rst index 4a8b5c7..e7deccc 100644 --- a/index.rst +++ b/index.rst @@ -14,7 +14,7 @@ syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. version of the :mod:`json` library contained in Python 2.6, but maintains compatibility with Python 2.5 and (currently) has significant performance advantages, even without using the optional C -extension for speedups. +extension for speedups. :mod:`simplejson` is also supported on Python 3.3+. Development of simplejson happens on Github: http://github.com/simplejson/simplejson @@ -24,15 +24,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -41,14 +41,15 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',', ':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4 * ' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print('\n'.join([l.rstrip() for l in s.splitlines()])) { "4": 5, "6": 7 @@ -62,7 +63,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -218,6 +219,11 @@ Basic Usage will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + If *item_sort_key* is a callable (not the default), then the output of dictionaries will be sorted with it. The callable will be used like this: ``sorted(dct.items(), key=item_sort_key)``. This option takes precedence @@ -226,6 +232,11 @@ Basic Usage .. versionchanged:: 2.5.0 *item_sort_key* is new in 2.5.0. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + .. note:: JSON is not a framed protocol so unlike :mod:`pickle` or :mod:`marshal` it @@ -487,6 +498,11 @@ Encoders and decoders will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + If *item_sort_key* is a callable (not the default), then the output of dictionaries will be sorted with it. The callable will be used like this: ``sorted(dct.items(), key=item_sort_key)``. This option takes precedence @@ -495,6 +511,11 @@ Encoders and decoders .. versionchanged:: 2.5.0 *item_sort_key* is new in 2.5.0. + .. versionchanged:: 3.0.0 + Sorting now happens after the keys have been coerced to + strings, to avoid comparison of heterogeneously typed objects + (since this does not work in Python 3.3+) + If *indent* is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated for each level of nesting. ``None`` (the default) selects the most compact diff --git a/setup.py b/setup.py index f799e29..0086943 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ from distutils.errors import CCompilerError, DistutilsExecError, \ DistutilsPlatformError IS_PYPY = hasattr(sys, 'pypy_translation_info') -VERSION = '2.6.2' +VERSION = '3.0.0' DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python" LONG_DESCRIPTION = open('README.rst', 'r').read() @@ -36,13 +36,13 @@ class ve_build_ext(build_ext): def run(self): try: build_ext.run(self) - except DistutilsPlatformError, x: + except DistutilsPlatformError: raise BuildFailed() def build_extension(self, ext): try: build_ext.build_extension(self, ext) - except ext_errors, x: + except ext_errors: raise BuildFailed() @@ -89,16 +89,17 @@ def run_setup(with_binary): try: run_setup(not IS_PYPY) except BuildFailed: - BUILD_EXT_WARNING = "WARNING: The C extension could not be compiled, speedups are not enabled." - print '*' * 75 - print BUILD_EXT_WARNING - print "Failure information, if any, is above." - print "I'm retrying the build without the C extension now." - print '*' * 75 + BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, " + "speedups are not enabled.") + print('*' * 75) + print(BUILD_EXT_WARNING) + print("Failure information, if any, is above.") + print("I'm retrying the build without the C extension now.") + print('*' * 75) run_setup(False) - print '*' * 75 - print BUILD_EXT_WARNING - print "Plain-Python installation succeeded." - print '*' * 75 + print('*' * 75) + print(BUILD_EXT_WARNING) + print("Plain-Python installation succeeded.") + print('*' * 75) diff --git a/simplejson/__init__.py b/simplejson/__init__.py index fe2bd5a..c655e92 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -14,15 +14,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -31,14 +31,15 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print('\n'.join([l.rstrip() for l in s.splitlines()])) { "4": 5, "6": 7 @@ -52,7 +53,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -97,7 +98,8 @@ Using simplejson.tool from the shell to validate and pretty-print:: $ echo '{ 1.2:3.4}' | python -m simplejson.tool Expecting property name: line 1 column 2 (char 2) """ -__version__ = '2.6.2' +from __future__ import absolute_import +__version__ = '3.0.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -108,20 +110,20 @@ __author__ = 'Bob Ippolito ' from decimal import Decimal -from decoder import JSONDecoder, JSONDecodeError -from encoder import JSONEncoder, JSONEncoderForHTML +from .decoder import JSONDecoder, JSONDecodeError +from .encoder import JSONEncoder, JSONEncoderForHTML def _import_OrderedDict(): import collections try: return collections.OrderedDict except AttributeError: - import ordered_dict + from . import ordered_dict return ordered_dict.OrderedDict OrderedDict = _import_OrderedDict() def _import_c_make_encoder(): try: - from simplejson._speedups import make_encoder + from ._speedups import make_encoder return make_encoder except ImportError: return None @@ -469,9 +471,9 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, def _toggle_speedups(enabled): - import simplejson.decoder as dec - import simplejson.encoder as enc - import simplejson.scanner as scan + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan c_make_encoder = _import_c_make_encoder() if enabled: dec.scanstring = dec.c_scanstring or dec.py_scanstring diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index be68b2d..c3b6c09 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1,6 +1,31 @@ #include "Python.h" #include "structmember.h" -#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double) + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyString_Check PyBytes_Check +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyInt_Check(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE +#define JSON_ASCII_Check PyUnicode_Check +#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 +#define PyInt_Type PyLong_Type +#define PyInt_FromString PyLong_FromString +#else /* PY_MAJOR_VERSION >= 3 */ +#define JSON_UNICHR Py_UNICODE +#define JSON_ASCII_Check PyString_Check +#define JSON_ASCII_AS_STRING PyString_AS_STRING +#define JSON_InternFromString PyString_InternFromString +#define JSON_Intern_GET_SIZE PyString_GET_SIZE +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) #define PyOS_string_to_double json_PyOS_string_to_double static double json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); @@ -15,22 +40,32 @@ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exce return x; } #endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE) +#if !defined(Py_SIZE) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) #endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#if PY_VERSION_HEX < 0x02050000 +#if !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX #define PY_SSIZE_T_MIN INT_MIN #define PyInt_FromSsize_t PyInt_FromLong #define PyInt_AsSsize_t PyInt_AsLong #endif -#ifndef Py_IS_FINITE +#if !defined(Py_IS_FINITE) #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) #endif +#endif /* PY_VERSION_HEX < 0x02050000 */ #ifdef __GNUC__ #define UNUSED __attribute__((__unused__)) @@ -90,6 +125,7 @@ typedef struct _PyEncoderObject { int tuple_as_array; int bigint_as_string; PyObject *item_sort_key; + PyObject *item_sort_kw; } PyEncoderObject; static PyMemberDef encoder_members[] = { @@ -110,14 +146,15 @@ static PyObject * maybe_quote_bigint(PyObject *encoded, PyObject *obj); static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +ascii_char_size(JSON_UNICHR c); +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * ascii_escape_str(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * @@ -160,16 +197,13 @@ static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); static int _is_namedtuple(PyObject *obj); +static PyObject * +moduleinit(void); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) #define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) -#else -#define MAX_EXPANSION MIN_EXPANSION -#endif static PyObject * maybe_quote_bigint(PyObject *encoded, PyObject *obj) @@ -192,8 +226,12 @@ maybe_quote_bigint(PyObject *encoded, PyObject *obj) } if (PyObject_RichCompareBool(obj, big_long, Py_GE) || PyObject_RichCompareBool(obj, small_long, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); +#else PyObject* quoted = PyString_FromFormat("\"%s\"", PyString_AsString(encoded)); +#endif Py_DECREF(encoded); encoded = quoted; } @@ -232,44 +270,126 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) } static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif output[chars++] = 'u'; output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } } return chars; } +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } + else if (c < 0x10000U) { + return MIN_EXPANSION; + } + else { + return 2 * MIN_EXPANSION; + } +} + +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + int kind; + void *data; + PyObject *rval; + char *output; + + if (PyUnicode_READY(pystr)) + return NULL; + + kind = PyUnicode_KIND(pystr); + data = PyUnicode_DATA(pystr); + input_chars = PyUnicode_GetLength(pystr); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + output = (char *)PyUnicode_DATA(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + if (input == NULL) + return NULL; + rval = ascii_escape_unicode(input); + Py_DECREF(input); + return rval; +} + +#else /* PY_MAJOR_VERSION >= 3 */ + static PyObject * ascii_escape_unicode(PyObject *pystr) { @@ -277,7 +397,6 @@ ascii_escape_unicode(PyObject *pystr) Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; - Py_ssize_t max_output_size; Py_ssize_t chars; PyObject *rval; char *output; @@ -286,9 +405,10 @@ ascii_escape_unicode(PyObject *pystr) input_chars = PyUnicode_GET_SIZE(pystr); input_unicode = PyUnicode_AS_UNICODE(pystr); - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size((JSON_UNICHR)input_unicode[i]); + } rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; @@ -297,34 +417,10 @@ ascii_escape_unicode(PyObject *pystr) chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } + chars = ascii_escape_char((JSON_UNICHR)input_unicode[i], output, chars); } output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } + assert(chars == output_size); return rval; } @@ -342,76 +438,146 @@ ascii_escape_str(PyObject *pystr) input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); + output_size = 2; /* Fast path for a string that's already ASCII */ for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; } - break; + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; } + output_size += ascii_char_size(c); } - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } + chars = 0; output = PyString_AS_STRING(rval); - output[0] = '"'; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + static PyObject *sortfun = NULL; + static PyObject *sortargs = NULL; + int skipkeys; - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } + + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + return NULL; + if (s->item_sort_kw == Py_None) + return iter; + skipkeys = PyObject_IsTrue(s->skipkeys); + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; } - else { - chars = ascii_escape_char(c, output, chars); + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; + if (PyString_Check(key) || PyUnicode_Check(key)) { + // item can be added as-is } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); + else { + if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + kstr = _encoded_const(key); } - output = PyString_AS_STRING(rval); + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + } + else if (skipkeys) { + Py_DECREF(item); + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_TypeError, "keys must be a string"); + goto bail; + } + if (kstr == NULL) + goto bail; + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + PyObject *tpl = PyTuple_New(2); + if (tpl == NULL) + goto bail; + PyTuple_SET_ITEM(tpl, 0, kstr); + kstr = NULL; + Py_INCREF(value); + PyTuple_SET_ITEM(tpl, 1, value); + Py_DECREF(item); + item = tpl; } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) + goto bail; + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; } static void @@ -455,6 +621,9 @@ join_list_unicode(PyObject *lst) return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ static PyObject * join_list_string(PyObject *lst) { @@ -472,6 +641,7 @@ join_list_string(PyObject *lst) } return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { @@ -564,9 +734,19 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } /* Pick up this chunk if it's not zero length */ if (next != end) { - PyObject *strchunk; APPEND_OLD_CHUNK - strchunk = PyString_FromStringAndSize(&buf[end], next - end); +#if PY_MAJOR_VERSION >= 3 + if (!has_unicode) { + chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); + } + else { + chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); + } + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; } @@ -580,6 +760,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s else { chunk = strchunk; } +#endif /* PY_MAJOR_VERSION < 3 */ } next++; if (c == '"') { @@ -620,7 +801,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -637,10 +818,10 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } -#ifdef Py_UNICODE_WIDE +#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; + JSON_UNICHR c2 = 0; if (end + 6 >= len) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; @@ -653,7 +834,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s /* Decode 4 hex digits */ for (; next < end; next++) { c2 <<= 4; - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = buf[next]; switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -679,12 +860,19 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s raise_errmsg("Unpaired low surrogate", pystr, end - 5); goto bail; } -#endif +#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ } if (c > 0x7f) { has_unicode = 1; } APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + chunk = PyUnicode_New(1, c); + if (chunk == NULL) { + goto bail; + } + PyUnicode_WRITE(PyUnicode_KIND(chunk), PyUnicode_DATA(chunk), 0, c); +#else /* PY_MAJOR_VERSION >= 3 */ if (has_unicode) { chunk = PyUnicode_FromUnicode(&c, 1); if (chunk == NULL) { @@ -698,13 +886,18 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } +#endif } if (chunks == NULL) { if (chunk != NULL) rval = chunk; else +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(0, 127); +#else rval = PyString_FromStringAndSize("", 0); +#endif } else { APPEND_OLD_CHUNK @@ -1033,7 +1226,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyObject *item; PyObject *key = NULL; PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); + char *encoding = JSON_ASCII_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); Py_ssize_t next_idx; @@ -1474,13 +1667,13 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * PyObject *cstr; PyObject *rval; /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); + cstr = JSON_InternFromString(constant); if (cstr == NULL) return NULL; /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); + idx += JSON_Intern_GET_SIZE(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; @@ -1668,7 +1861,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else rval = PyFloat_FromString(numstr, NULL); +#endif } } else { @@ -1704,7 +1901,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case '"': /* string */ rval = scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), + JSON_ASCII_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); break; @@ -1947,20 +2144,22 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) goto bail; } - /* PyString_AS_STRING is used on encoding */ + /* JSON_ASCII_AS_STRING is used on encoding */ s->encoding = PyObject_GetAttrString(ctx, "encoding"); if (s->encoding == NULL) goto bail; if (s->encoding == Py_None) { Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + s->encoding = JSON_InternFromString(DEFAULT_ENCODING); } +#if PY_MAJOR_VERSION < 3 else if (PyUnicode_Check(s->encoding)) { PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); Py_DECREF(s->encoding); s->encoding = tmp; } - if (s->encoding == NULL || !PyString_Check(s->encoding)) +#endif + if (s->encoding == NULL || !JSON_ASCII_Check(s->encoding)) goto bail; /* All of these will fail "gracefully" so we don't need to verify them */ @@ -2000,8 +2199,7 @@ PyDoc_STRVAR(scanner_doc, "JSON scanner object"); static PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Scanner", /* tp_name */ sizeof(PyScannerObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2054,10 +2252,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->indent = NULL; s->key_separator = NULL; s->item_separator = NULL; - s->sort_keys = NULL; s->skipkeys = NULL; s->key_memo = NULL; + s->sort_keys = NULL; s->item_sort_key = NULL; + s->item_sort_kw = NULL; s->Decimal = NULL; } return (PyObject *)s; @@ -2091,7 +2290,6 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->indent = indent; s->key_separator = key_separator; s->item_separator = item_separator; - s->sort_keys = sort_keys; s->skipkeys = skipkeys; s->key_memo = key_memo; s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); @@ -2100,6 +2298,35 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); s->bigint_as_string = PyObject_IsTrue(bigint_as_string); + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + } + else if (PyObject_IsTrue(sort_keys)) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + return -1; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + return -1; + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + return -1; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + return -1; + } + s->sort_keys = sort_keys; s->item_sort_key = item_sort_key; s->Decimal = Decimal; @@ -2109,9 +2336,9 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) Py_INCREF(s->indent); Py_INCREF(s->key_separator); Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); Py_INCREF(s->skipkeys); Py_INCREF(s->key_memo); + Py_INCREF(s->sort_keys); Py_INCREF(s->item_sort_key); Py_INCREF(s->Decimal); return 0; @@ -2148,7 +2375,7 @@ _encoded_const(PyObject *obj) if (obj == Py_None) { static PyObject *s_null = NULL; if (s_null == NULL) { - s_null = PyString_InternFromString("null"); + s_null = JSON_InternFromString("null"); } Py_INCREF(s_null); return s_null; @@ -2156,7 +2383,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_True) { static PyObject *s_true = NULL; if (s_true == NULL) { - s_true = PyString_InternFromString("true"); + s_true = JSON_InternFromString("true"); } Py_INCREF(s_true); return s_true; @@ -2164,7 +2391,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_False) { static PyObject *s_false = NULL; if (s_false == NULL) { - s_false = PyString_InternFromString("false"); + s_false = JSON_InternFromString("false"); } Py_INCREF(s_false); return s_false; @@ -2186,13 +2413,28 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) return NULL; } if (i > 0) { - return PyString_FromString("Infinity"); + static PyObject *sInfinity = NULL; + if (sInfinity == NULL) + sInfinity = JSON_InternFromString("Infinity"); + if (sInfinity) + Py_INCREF(sInfinity); + return sInfinity; } else if (i < 0) { - return PyString_FromString("-Infinity"); + static PyObject *sNegInfinity = NULL; + if (sNegInfinity == NULL) + sNegInfinity = JSON_InternFromString("-Infinity"); + if (sNegInfinity) + Py_INCREF(sNegInfinity); + return sNegInfinity; } else { - return PyString_FromString("NaN"); + static PyObject *sNaN = NULL; + if (sNaN == NULL) + sNaN = JSON_InternFromString("NaN"); + if (sNaN) + Py_INCREF(sNaN); + return sNaN; } } /* Use a better float format here? */ @@ -2266,7 +2508,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi else if (PyDict_Check(obj)) { rv = encoder_listencode_dict(s, rval, obj, indent_level); } - else if (s->use_decimal && PyObject_TypeCheck(obj, s->Decimal)) { + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) rv = _steal_list_append(rval, encoded); @@ -2322,7 +2564,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; - static PyObject *iteritems = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; PyObject *iter = NULL; @@ -2332,12 +2573,11 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss int skipkeys; Py_ssize_t idx; - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); - iteritems = PyString_InternFromString("iteritems"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) return -1; } if (PyDict_Size(dct) == 0) @@ -2372,50 +2612,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss */ } - if (PyCallable_Check(s->item_sort_key)) { - if (PyDict_CheckExact(dct)) - items = PyDict_Items(dct); - else - items = PyMapping_Items(dct); - PyObject_CallMethod(items, "sort", "OO", Py_None, s->item_sort_key); - } - else if (PyObject_IsTrue(s->sort_keys)) { - /* First sort the keys then replace them with (key, value) tuples. */ - Py_ssize_t i, nitems; - if (PyDict_CheckExact(dct)) - items = PyDict_Keys(dct); - else - items = PyMapping_Keys(dct); - if (items == NULL) - goto bail; - if (!PyList_Check(items)) { - PyErr_SetString(PyExc_ValueError, "keys must return list"); - goto bail; - } - if (PyList_Sort(items) < 0) - goto bail; - nitems = PyList_GET_SIZE(items); - for (i = 0; i < nitems; i++) { - PyObject *key, *value; - key = PyList_GET_ITEM(items, i); - value = PyDict_GetItem(dct, key); - item = PyTuple_Pack(2, key, value); - if (item == NULL) - goto bail; - PyList_SET_ITEM(items, i, item); - Py_DECREF(key); - } - } - else { - if (PyDict_CheckExact(dct)) - items = PyDict_Items(dct); - else - items = PyMapping_Items(dct); - } - if (items == NULL) - goto bail; - iter = PyObject_GetIter(items); - Py_DECREF(items); + iter = encoder_dict_iteritems(s, dct); if (iter == NULL) goto bail; @@ -2536,9 +2733,9 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss int i = 0; if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); if (open_array == NULL || close_array == NULL || empty_array == NULL) return -1; } @@ -2636,10 +2833,12 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->indent); Py_VISIT(s->key_separator); Py_VISIT(s->item_separator); - Py_VISIT(s->sort_keys); Py_VISIT(s->skipkeys); Py_VISIT(s->key_memo); + Py_VISIT(s->sort_keys); + Py_VISIT(s->item_sort_kw); Py_VISIT(s->item_sort_key); + Py_VISIT(s->Decimal); return 0; } @@ -2656,9 +2855,10 @@ encoder_clear(PyObject *self) Py_CLEAR(s->indent); Py_CLEAR(s->key_separator); Py_CLEAR(s->item_separator); - Py_CLEAR(s->sort_keys); Py_CLEAR(s->skipkeys); Py_CLEAR(s->key_memo); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->item_sort_kw); Py_CLEAR(s->item_sort_key); Py_CLEAR(s->Decimal); return 0; @@ -2668,8 +2868,7 @@ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") static PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Encoder", /* tp_name */ sizeof(PyEncoderObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2725,21 +2924,53 @@ static PyMethodDef speedups_methods[] = { PyDoc_STRVAR(module_doc, "simplejson speedups\n"); -void -init_speedups(void) +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +static PyObject * +moduleinit(void) { PyObject *m; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) - return; + return NULL; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) - return; - + return NULL; +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); Py_INCREF((PyObject*)&PyEncoderType); PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + return m; +} + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); } +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif diff --git a/simplejson/compat.py b/simplejson/compat.py new file mode 100644 index 0000000..449e48a --- /dev/null +++ b/simplejson/compat.py @@ -0,0 +1,43 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 1c1526c..bd14788 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -1,13 +1,14 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct - -from simplejson.scanner import make_scanner +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner def _import_c_scanstring(): try: - from simplejson._speedups import scanstring + from ._speedups import scanstring return scanstring except ImportError: return None @@ -18,7 +19,7 @@ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + _BYTES = fromhex('7FF80000000000007FF0000000000000') # The struct module in Python 2.4 would get frexp() out of range here # when an endian is specified in the format string. Fixed in Python 2.5+ if sys.byteorder != 'big': @@ -87,14 +88,14 @@ _CONSTANTS = { STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), } DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match): + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError @@ -117,8 +118,8 @@ def py_scanstring(s, end, encoding=None, strict=True, content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) + if not isinstance(content, text_type): + content = text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -168,7 +169,7 @@ def py_scanstring(s, end, encoding=None, strict=True, end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -177,9 +178,10 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, +def JSONObject(state, encoding, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state # Backwards compatibility if memo is None: memo = {} @@ -273,7 +275,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -398,11 +401,13 @@ class JSONDecoder(object): self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ + if _PY3 and isinstance(s, binary_type): + s = s.decode('utf-8') obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 6b4a6a4..dad59fc 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -1,11 +1,13 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re +from operator import itemgetter from decimal import Decimal - +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 def _import_speedups(): try: - from simplejson import _speedups + from . import _speedups return _speedups.encode_basestring_ascii, _speedups.make_encoder except ImportError: return None, None @@ -13,7 +15,7 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from simplejson.decoder import PosInf -ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +ESCAPE = re.compile(u(r'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')) ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -24,32 +26,40 @@ ESCAPE_DCT = { '\n': '\\n', '\r': '\\r', '\t': '\\t', - u'\u2028': '\\u2028', - u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u('"')): """Return a JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] - return u'"' + ESCAPE.sub(replace, s) + u'"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -181,7 +191,7 @@ class JSONEncoder(object): self.tuple_as_array = tuple_as_array self.bigint_as_string = bigint_as_string self.item_sort_key = item_sort_key - if indent is not None and not isinstance(indent, basestring): + if indent is not None and not isinstance(indent, string_types): indent = indent * ' ' self.indent = indent if separators is not None: @@ -221,12 +231,11 @@ class JSONEncoder(object): """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -262,7 +271,7 @@ class JSONEncoder(object): _encoder = encode_basestring if self.encoding != 'utf-8': def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): + if isinstance(o, binary_type): o = o.decode(_encoding) return _orig_encoder(o) @@ -306,6 +315,7 @@ class JSONEncoder(object): self.skipkeys, _one_shot, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, self.bigint_as_string, self.item_sort_key, + self.encoding, Decimal=Decimal) try: return _iterencode(o, 0) @@ -343,25 +353,25 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _use_decimal, _namedtuple_as_object, _tuple_as_array, - _bigint_as_string, _item_sort_key, + _bigint_as_string, _item_sort_key, _encoding, ## HACK: hand-optimized bytecode; turn globals into locals - False=False, - True=True, + _PY3=PY3, ValueError=ValueError, - basestring=basestring, + string_types=string_types, Decimal=Decimal, dict=dict, float=float, id=id, - int=int, + integer_types=integer_types, isinstance=isinstance, list=list, - long=long, str=str, tuple=tuple, ): if _item_sort_key and not callable(_item_sort_key): raise TypeError("item_sort_key must be None or callable") + elif _sort_keys and not _item_sort_key: + _item_sort_key = itemgetter(0) def _iterencode_list(lst, _current_indent_level): if not lst: @@ -387,7 +397,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, first = False else: buf = separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield buf + _encoder(value) elif value is None: yield buf + 'null' @@ -395,7 +406,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield buf + 'true' elif value is False: yield buf + 'false' - elif isinstance(value, (int, long)): + elif isinstance(value, integer_types): yield ((buf + str(value)) if (not _bigint_as_string or (-1 << 53) < value < (1 << 53)) @@ -428,6 +439,27 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, if markers is not None: del markers[markerid] + def _stringify_key(key): + if isinstance(key, string_types): + pass + elif isinstance(key, binary_type): + key = key.decode(_encoding) + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, integer_types): + key = str(key) + elif _skipkeys: + key = None + else: + raise TypeError("key " + repr(key) + " is not a string") + return key + def _iterencode_dict(dct, _current_indent_level): if not dct: yield '{}' @@ -447,40 +479,35 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, newline_indent = None item_separator = _item_separator first = True + if _PY3: + iteritems = dct.items() + else: + iteritems = dct.iteritems() if _item_sort_key: - items = dct.items() + items = [] + for k, v in dct.items(): + if not isinstance(k, string_types): + k = _stringify_key(k) + if k is None: + continue + items.append((k, v)) items.sort(key=_item_sort_key) - elif _sort_keys: - items = dct.items() - items.sort(key=lambda kv: kv[0]) else: - items = dct.iteritems() + items = iteritems for key, value in items: - if isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = _floatstr(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif isinstance(key, (int, long)): - key = str(key) - elif _skipkeys: - continue - else: - raise TypeError("key " + repr(key) + " is not a string") + if not (_item_sort_key or isinstance(key, string_types)): + key = _stringify_key(key) + if key is None: + # _skipkeys must be True + continue if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield _encoder(value) elif value is None: yield 'null' @@ -488,7 +515,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif value is False: yield 'false' - elif isinstance(value, (int, long)): + elif isinstance(value, integer_types): yield (str(value) if (not _bigint_as_string or (-1 << 53) < value < (1 << 53)) @@ -521,7 +548,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, del markers[markerid] def _iterencode(o, _current_indent_level): - if isinstance(o, basestring): + if (isinstance(o, string_types) or + (_PY3 and isinstance(o, binary_type))): yield _encoder(o) elif o is None: yield 'null' @@ -529,7 +557,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif o is False: yield 'false' - elif isinstance(o, (int, long)): + elif isinstance(o, integer_types): yield (str(o) if (not _bigint_as_string or (-1 << 53) < o < (1 << 53)) diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py index 12289b6..064b598 100644 --- a/simplejson/tests/__init__.py +++ b/simplejson/tests/__init__.py @@ -1,7 +1,7 @@ +from __future__ import absolute_import import unittest import doctest - class OptionalExtensionTestSuite(unittest.TestSuite): def run(self, result): import simplejson diff --git a/simplejson/tests/test_decimal.py b/simplejson/tests/test_decimal.py index 5fbe36c..8d76f4e 100644 --- a/simplejson/tests/test_decimal.py +++ b/simplejson/tests/test_decimal.py @@ -1,7 +1,7 @@ import decimal from decimal import Decimal from unittest import TestCase -from StringIO import StringIO +from simplejson.compat import StringIO, reload_module import simplejson as json @@ -60,7 +60,7 @@ class TestDecimal(TestCase): # Simulate a subinterpreter that reloads the Python modules but not # the C code https://github.com/simplejson/simplejson/issues/34 global Decimal - Decimal = reload(decimal).Decimal + Decimal = reload_module(decimal).Decimal import simplejson.encoder simplejson.encoder.Decimal = Decimal self.test_decimal_roundtrip() diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py index 37b231b..5c488c6 100644 --- a/simplejson/tests/test_decode.py +++ b/simplejson/tests/test_decode.py @@ -1,8 +1,9 @@ +from __future__ import absolute_import import decimal from unittest import TestCase -from StringIO import StringIO import simplejson as json +from simplejson.compat import StringIO from simplejson import OrderedDict class TestDecode(TestCase): diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py index eab040c..9f1e826 100644 --- a/simplejson/tests/test_dump.py +++ b/simplejson/tests/test_dump.py @@ -1,6 +1,5 @@ from unittest import TestCase -from cStringIO import StringIO - +from simplejson.compat import StringIO, long_type import simplejson as json class TestDump(TestCase): @@ -16,15 +15,23 @@ class TestDump(TestCase): self.assertEquals(json.dumps( {True: False, False: True}, sort_keys=True), '{"false": true, "true": false}') - self.assertEquals(json.dumps( - {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), - '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') + self.assertEquals( + json.dumps( + {2: 3.0, + 4.0: long_type(5), + False: 1, + long_type(6): True, + "7": 0}, + sort_keys=True), + '{"2": 3.0, "4.0": 5, "6": true, "7": 0, "false": 1}') def test_ordered_dict(self): # http://bugs.python.org/issue6105 items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] s = json.dumps(json.OrderedDict(items)) - self.assertEqual(s, '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') + self.assertEqual( + s, + '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') def test_indent_unknown_type_acceptance(self): """ @@ -63,5 +70,5 @@ class TestDump(TestCase): raise NotImplementedError("To do non-awesome things with" " this object, please construct it from an integer!") - s = json.dumps(range(3), indent=AwesomeInt(3)) + s = json.dumps([0, 1, 2], indent=AwesomeInt(3)) self.assertEqual(s, '[\n 0,\n 1,\n 2\n]') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py index 6c40961..780bb0a 100644 --- a/simplejson/tests/test_encode_basestring_ascii.py +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -1,6 +1,7 @@ from unittest import TestCase import simplejson.encoder +from simplejson.compat import b CASES = [ (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), @@ -11,9 +12,9 @@ CASES = [ (u' s p a c e d ', '" s p a c e d "'), (u'\U0001d120', '"\\ud834\\udd20"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), diff --git a/simplejson/tests/test_errors.py b/simplejson/tests/test_errors.py index 620ccf3..86c14e5 100644 --- a/simplejson/tests/test_errors.py +++ b/simplejson/tests/test_errors.py @@ -1,6 +1,8 @@ +import sys from unittest import TestCase import simplejson as json +from simplejson.compat import u, b class TestErrors(TestCase): def test_string_keys_error(self): @@ -11,8 +13,8 @@ class TestErrors(TestCase): err = None try: json.loads('{}\na\nb') - except json.JSONDecodeError, e: - err = e + except json.JSONDecodeError: + err = sys.exc_info()[1] else: self.fail('Expected JSONDecodeError') self.assertEquals(err.lineno, 2) @@ -22,13 +24,12 @@ class TestErrors(TestCase): def test_scan_error(self): err = None - for t in (str, unicode): + for t in (u, b): try: json.loads(t('{"asdf": "')) - except json.JSONDecodeError, e: - err = e + except json.JSONDecodeError: + err = sys.exc_info()[1] else: self.fail('Expected JSONDecodeError') self.assertEquals(err.lineno, 1) self.assertEquals(err.colno, 9) - \ No newline at end of file diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py index 646c0f4..d388f19 100644 --- a/simplejson/tests/test_fail.py +++ b/simplejson/tests/test_fail.py @@ -1,3 +1,4 @@ +import sys from unittest import TestCase import simplejson as json @@ -81,11 +82,13 @@ class TestFail(TestCase): for doc in [u'[,]', '[,]']: try: json.loads(doc) - except json.JSONDecodeError, e: + except json.JSONDecodeError: + e = sys.exc_info()[1] self.assertEquals(e.pos, 1) self.assertEquals(e.lineno, 1) self.assertEquals(e.colno, 1) - except Exception, e: + except Exception: + e = sys.exc_info()[1] self.fail("Unexpected exception raised %r %s" % (e, e)) else: - self.fail("Unexpected success parsing '[,]'") \ No newline at end of file + self.fail("Unexpected success parsing '[,]'") diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py index 94502c6..f7e29d6 100644 --- a/simplejson/tests/test_float.py +++ b/simplejson/tests/test_float.py @@ -1,6 +1,6 @@ import math from unittest import TestCase - +from simplejson.compat import long_type, text_type import simplejson as json class TestFloat(TestCase): @@ -9,11 +9,11 @@ class TestFloat(TestCase): math.pi**-100, 3.1]: self.assertEquals(float(json.dumps(num)), num) self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) + self.assertEquals(json.loads(text_type(json.dumps(num))), num) def test_ints(self): - for num in [1, 1L, 1<<32, 1<<64]: + for num in [1, long_type(1), 1<<32, 1<<64]: self.assertEquals(json.dumps(num), str(num)) self.assertEquals(int(json.dumps(num)), num) self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) + self.assertEquals(json.loads(text_type(json.dumps(num))), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py index 1e6bdb1..a397cca 100644 --- a/simplejson/tests/test_indent.py +++ b/simplejson/tests/test_indent.py @@ -1,8 +1,8 @@ from unittest import TestCase +import textwrap import simplejson as json -import textwrap -from StringIO import StringIO +from simplejson.compat import StringIO class TestIndent(TestCase): def test_indent(self): @@ -83,4 +83,4 @@ class TestIndent(TestCase): # Added in 2.1.4 self.assertEquals( expect, - json.dumps(lst, indent=0)) \ No newline at end of file + json.dumps(lst, indent=0)) diff --git a/simplejson/tests/test_namedtuple.py b/simplejson/tests/test_namedtuple.py index 54a9a12..4387894 100644 --- a/simplejson/tests/test_namedtuple.py +++ b/simplejson/tests/test_namedtuple.py @@ -1,6 +1,7 @@ +from __future__ import absolute_import import unittest import simplejson as json -from StringIO import StringIO +from simplejson.compat import StringIO try: from collections import namedtuple diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py index a7fcd46..0e34937 100644 --- a/simplejson/tests/test_scanstring.py +++ b/simplejson/tests/test_scanstring.py @@ -3,6 +3,7 @@ from unittest import TestCase import simplejson as json import simplejson.decoder +from simplejson.compat import b class TestScanString(TestCase): def test_py_scanstring(self): @@ -107,11 +108,10 @@ class TestScanString(TestCase): self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, "xxx") self.assertRaises(UnicodeDecodeError, - json.encoder.encode_basestring_ascii, "xx\xff") + json.encoder.encode_basestring_ascii, b("xx\xff")) def test_overflow(self): - # Python 2.5 does not have maxsize - maxsize = getattr(sys, 'maxsize', sys.maxint) + # Python 2.5 does not have maxsize, Python 3 does not have maxint + maxsize = getattr(sys, 'maxsize') or getattr(sys, 'maxint') self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", maxsize + 1) - diff --git a/simplejson/tests/test_tuple.py b/simplejson/tests/test_tuple.py index 92856a7..a6a9910 100644 --- a/simplejson/tests/test_tuple.py +++ b/simplejson/tests/test_tuple.py @@ -1,6 +1,6 @@ import unittest -from StringIO import StringIO +from simplejson.compat import StringIO import simplejson as json class TestTuples(unittest.TestCase): @@ -13,7 +13,8 @@ class TestTuples(unittest.TestCase): self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False) # Ensure that the "default" does not get called self.assertEqual(expect, json.dumps(t, default=repr)) - self.assertEqual(expect, json.dumps(t, tuple_as_array=True, default=repr)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True, + default=repr)) # Ensure that the "default" gets called self.assertEqual( json.dumps(repr(t)), @@ -29,7 +30,8 @@ class TestTuples(unittest.TestCase): sio = StringIO() json.dump(t, sio, tuple_as_array=True) self.assertEqual(expect, sio.getvalue()) - self.assertRaises(TypeError, json.dump, t, StringIO(), tuple_as_array=False) + self.assertRaises(TypeError, json.dump, t, StringIO(), + tuple_as_array=False) # Ensure that the "default" does not get called sio = StringIO() json.dump(t, sio, default=repr) @@ -46,4 +48,4 @@ class TestTuples(unittest.TestCase): class TestNamedTuple(unittest.TestCase): def test_namedtuple_dump(self): - pass \ No newline at end of file + pass diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 83fe65b..6558709 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,6 +1,7 @@ from unittest import TestCase import simplejson as json +from simplejson.compat import unichr, text_type, b, u class TestUnicode(TestCase): def test_encoding1(self): @@ -76,21 +77,21 @@ class TestUnicode(TestCase): {'a': u'\xe9'}) def test_unicode_preservation(self): - self.assertEquals(type(json.loads(u'""')), unicode) - self.assertEquals(type(json.loads(u'"a"')), unicode) - self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) + self.assertEquals(type(json.loads(u'""')), text_type) + self.assertEquals(type(json.loads(u'"a"')), text_type) + self.assertEquals(type(json.loads(u'["a"]')[0]), text_type) def test_ensure_ascii_false_returns_unicode(self): # http://code.google.com/p/simplejson/issues/detail?id=48 - self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode) + self.assertEquals(type(json.dumps([], ensure_ascii=False)), text_type) + self.assertEquals(type(json.dumps(0, ensure_ascii=False)), text_type) + self.assertEquals(type(json.dumps({}, ensure_ascii=False)), text_type) + self.assertEquals(type(json.dumps("", ensure_ascii=False)), text_type) def test_ensure_ascii_false_bytestring_encoding(self): # http://code.google.com/p/simplejson/issues/detail?id=48 - doc1 = {u'quux': 'Arr\xc3\xaat sur images'} - doc2 = {u'quux': u'Arr\xeat sur images'} + doc1 = {u'quux': b('Arr\xc3\xaat sur images')} + doc2 = {u'quux': u('Arr\xeat sur images')} doc_ascii = '{"quux": "Arr\\u00eat sur images"}' doc_unicode = u'{"quux": "Arr\xeat sur images"}' self.assertEquals(json.dumps(doc1), doc_ascii) -- cgit v1.2.1