diff options
author | Colin Deasy <cdeasy@demonware.net> | 2014-05-09 16:47:16 +0100 |
---|---|---|
committer | Colin Deasy <cdeasy@demonware.net> | 2014-05-14 17:03:44 +0100 |
commit | 426849c9134e203903a00595f345013327068aaa (patch) | |
tree | 6b1436c4819ec2d8e45fdbd7a13b09d4cedbf426 | |
parent | 72b03f213ec0def34930f1b04f66f71a27f27665 (diff) | |
download | simplejson-426849c9134e203903a00595f345013327068aaa.tar.gz |
Adding `int_as_string_bitcount` option
-rw-r--r-- | simplejson/__init__.py | 43 | ||||
-rw-r--r-- | simplejson/_speedups.c | 77 | ||||
-rw-r--r-- | simplejson/encoder.py | 68 | ||||
-rw-r--r-- | simplejson/tests/test_bigint_as_string.py | 79 | ||||
-rw-r--r-- | simplejson/tests/test_bitsize_int_as_string.py | 67 | ||||
-rw-r--r-- | simplejson/tests/test_speedups.py | 32 |
6 files changed, 262 insertions, 104 deletions
diff --git a/simplejson/__init__.py b/simplejson/__init__.py index ed9c9a8..31c391e 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -144,14 +144,15 @@ _default_encoder = JSONEncoder( item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=True, - namedtuple_as_object=True, tuple_as_array=True, - bigint_as_string=False, sort_keys=False, item_sort_key=None, - for_json=False, ignore_nan=False, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -209,6 +210,9 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, lossy operation that will not round-trip correctly and should be used sparingly. + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than 2**n or lower than -2**n will be encoded as strings. + If specified, *item_sort_key* is a callable used to sort the items in each dictionary. This is useful if you want to sort items other than in alphabetical order by key. This option takes precedence over @@ -238,8 +242,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal and namedtuple_as_object and tuple_as_array - and not bigint_as_string and not item_sort_key - and not for_json and not ignore_nan and not kw): + and not bigint_as_string and int_as_string_bitcount is None + and not item_sort_key and not for_json and not ignore_nan and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -255,6 +259,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, item_sort_key=item_sort_key, for_json=for_json, ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost @@ -263,11 +268,11 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=True, - namedtuple_as_object=True, tuple_as_array=True, - bigint_as_string=False, sort_keys=False, item_sort_key=None, - for_json=False, ignore_nan=False, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -319,6 +324,9 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, or lower than -2**53 will be encoded as strings. This is to avoid the rounding that happens in Javascript otherwise. + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than 2**n or lower than -2**n will be encoded as strings. + If specified, *item_sort_key* is a callable used to sort the items in each dictionary. This is useful if you want to sort items other than in alphabetical order by key. This option takes precendence over @@ -343,14 +351,16 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, """ # cached encoder - if (not skipkeys and ensure_ascii and + if ( + not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal and namedtuple_as_object and tuple_as_array - and not bigint_as_string and not sort_keys - and not item_sort_key and not for_json - and not ignore_nan and not kw): + and not bigint_as_string and int_as_string_bitcount is None + and not sort_keys and not item_sort_key and not for_json + and not ignore_nan and not kw + ): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -366,6 +376,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, item_sort_key=item_sort_key, for_json=for_json, ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).encode(obj) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index e888873..5ee2eb7 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -168,7 +168,7 @@ typedef struct _PyEncoderObject { int use_decimal; int namedtuple_as_object; int tuple_as_array; - int bigint_as_string; + int int_as_string_bitcount; PyObject *item_sort_key; PyObject *item_sort_kw; int for_json; @@ -197,7 +197,7 @@ JSON_ParseEncoding(PyObject *encoding); static PyObject * JSON_UnicodeFromChar(JSON_UNICHR c); static PyObject * -maybe_quote_bigint(PyObject *encoded, PyObject *obj); +maybe_quote_bigint(PyObject *encoded, PyObject *obj, int max_int_bits); static Py_ssize_t ascii_char_size(JSON_UNICHR c); static Py_ssize_t @@ -384,24 +384,19 @@ JSON_UnicodeFromChar(JSON_UNICHR c) } static PyObject * -maybe_quote_bigint(PyObject *encoded, PyObject *obj) +maybe_quote_bigint(PyObject *encoded, PyObject *obj, int max_int_bits) { - static PyObject *big_long = NULL; - static PyObject *small_long = NULL; + PyObject *big_long = PyLong_FromUnsignedLongLong(1LLU << max_int_bits); if (big_long == NULL) { - big_long = PyLong_FromLongLong(1LL << 53); - if (big_long == NULL) { - Py_DECREF(encoded); - return NULL; - } + Py_DECREF(encoded); + return NULL; } + PyObject *small_long = PyLong_FromLongLong(-1LL << max_int_bits); if (small_long == NULL) { - small_long = PyLong_FromLongLong(-1LL << 53); - if (small_long == NULL) { - Py_DECREF(encoded); - return NULL; - } + Py_DECREF(encoded); + return NULL; } + if (PyObject_RichCompareBool(obj, big_long, Py_GE) || PyObject_RichCompareBool(obj, small_long, Py_LE)) { #if PY_MAJOR_VERSION >= 3 @@ -413,6 +408,9 @@ maybe_quote_bigint(PyObject *encoded, PyObject *obj) Py_DECREF(encoded); encoded = quoted; } + + Py_DECREF(big_long); + Py_DECREF(small_long); return encoded; } @@ -2567,6 +2565,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->sort_keys = NULL; s->item_sort_key = NULL; s->item_sort_kw = NULL; + s->int_as_string_bitcount = -1; s->Decimal = NULL; } return (PyObject *)s; @@ -2576,13 +2575,33 @@ static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "for_json", "ignore_nan", "Decimal", NULL}; + static char *kwlist[] = { + "markers", + "default", + "encoder", + "indent", + "key_separator", + "item_separator", + "sort_keys", + "skipkeys", + "allow_nan", + "key_memo", + "use_decimal", + "namedtuple_as_object", + "tuple_as_array", + "int_as_string_bitcount", + "item_sort_key", + "encoding", + "for_json", + "ignore_nan", + "Decimal", + NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; - PyObject *bigint_as_string, *item_sort_key, *encoding, *for_json; + PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json; PyObject *ignore_nan, *Decimal; assert(PyEncoder_Check(self)); @@ -2591,8 +2610,9 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, - &namedtuple_as_object, &tuple_as_array, &bigint_as_string, - &item_sort_key, &encoding, &for_json, &ignore_nan, &Decimal)) + &namedtuple_as_object, &tuple_as_array, + &int_as_string_bitcount, &item_sort_key, &encoding, &for_json, + &ignore_nan, &Decimal)) return -1; s->markers = markers; @@ -2614,7 +2634,20 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->use_decimal = PyObject_IsTrue(use_decimal); s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); - s->bigint_as_string = PyObject_IsTrue(bigint_as_string); + s->int_as_string_bitcount = -1; + if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) { + static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8; + int int_as_string_bitcount_val = PyLong_AsLong(int_as_string_bitcount); + if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < long_long_bitsize) { + s->int_as_string_bitcount = int_as_string_bitcount_val; + } + else { + PyErr_Format(PyExc_TypeError, + "int_as_string_bitcount (%d) must be greater than 0 and less than the number of bits of a `long long` type (%u bits)", + int_as_string_bitcount_val, long_long_bitsize); + } + } + if (item_sort_key != Py_None) { if (!PyCallable_Check(item_sort_key)) PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); @@ -2801,8 +2834,8 @@ encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ss else if (PyInt_Check(obj) || PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) { - if (s->bigint_as_string) { - encoded = maybe_quote_bigint(encoded, obj); + if (s->int_as_string_bitcount > 0) { + encoded = maybe_quote_bigint(encoded, obj, s->int_as_string_bitcount); if (encoded == NULL) break; } diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 9815ee5..6b2f76d 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -116,12 +116,14 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None, - use_decimal=True, namedtuple_as_object=True, - tuple_as_array=True, bigint_as_string=False, - item_sort_key=None, for_json=False, ignore_nan=False): + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -180,6 +182,9 @@ class JSONEncoder(object): or lower than -2**53 will be encoded as strings. This is to avoid the rounding that happens in Javascript otherwise. + If int_as_string_bitcount is a positive number (n), then int of size + greater than 2**n or lower than -2**n will be encoded as strings. + If specified, item_sort_key is a callable used to sort the items in each dictionary. This is useful if you want to sort items other than in alphabetical order by key. @@ -207,6 +212,7 @@ class JSONEncoder(object): self.item_sort_key = item_sort_key self.for_json = for_json self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount if indent is not None and not isinstance(indent, string_types): indent = indent * ' ' self.indent = indent @@ -315,8 +321,12 @@ class JSONEncoder(object): return text - key_memo = {} + int_as_string_bitcount = ( + (self.bigint_as_string and 53) + or + self.int_as_string_bitcount + ) if (_one_shot and c_make_encoder is not None and self.indent is None): _iterencode = c_make_encoder( @@ -324,17 +334,17 @@ class JSONEncoder(object): self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, self.allow_nan, key_memo, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, - self.bigint_as_string, self.item_sort_key, - self.encoding, self.for_json, self.ignore_nan, - Decimal) + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, Decimal) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, _one_shot, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, - self.bigint_as_string, self.item_sort_key, - self.encoding, self.for_json, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, Decimal=Decimal) try: return _iterencode(o, 0) @@ -372,7 +382,8 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _use_decimal, _namedtuple_as_object, _tuple_as_array, - _bigint_as_string, _item_sort_key, _encoding, _for_json, + _int_as_string_bitcount, _item_sort_key, + _encoding,_for_json, ## HACK: hand-optimized bytecode; turn globals into locals _PY3=PY3, ValueError=ValueError, @@ -392,6 +403,24 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif _sort_keys and not _item_sort_key: _item_sort_key = itemgetter(0) + if _int_as_string_bitcount is not None and _int_as_string_bitcount < 0: + raise TypeError("int_as_string_bitcount must be a positive integer") + + def _encode_int(value): + skip_quoting = ( + _int_as_string_bitcount is None + or + _int_as_string_bitcount < 1 + ) + if ( + skip_quoting or + (-1 << _int_as_string_bitcount) + < value < + (1 << _int_as_string_bitcount) + ): + return str(value) + return '"' + str(value) + '"' + def _iterencode_list(lst, _current_indent_level): if not lst: yield '[]' @@ -426,10 +455,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif value is False: yield buf + 'false' elif isinstance(value, integer_types): - yield ((buf + str(value)) - if (not _bigint_as_string or - (-1 << 53) < value < (1 << 53)) - else (buf + '"' + str(value) + '"')) + yield buf + _encode_int(value) elif isinstance(value, float): yield buf + _floatstr(value) elif _use_decimal and isinstance(value, Decimal): @@ -540,10 +566,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif value is False: yield 'false' elif isinstance(value, integer_types): - yield (str(value) - if (not _bigint_as_string or - (-1 << 53) < value < (1 << 53)) - else ('"' + str(value) + '"')) + yield _encode_int(value) elif isinstance(value, float): yield _floatstr(value) elif _use_decimal and isinstance(value, Decimal): @@ -585,10 +608,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif o is False: yield 'false' elif isinstance(o, integer_types): - yield (str(o) - if (not _bigint_as_string or - (-1 << 53) < o < (1 << 53)) - else ('"' + str(o) + '"')) + yield _encode_int(o) elif isinstance(o, float): yield _floatstr(o) else: diff --git a/simplejson/tests/test_bigint_as_string.py b/simplejson/tests/test_bigint_as_string.py index 20ea64c..2cf2cc2 100644 --- a/simplejson/tests/test_bigint_as_string.py +++ b/simplejson/tests/test_bigint_as_string.py @@ -1,7 +1,7 @@ from unittest import TestCase import simplejson as json -from simplejson.compat import long_type + class TestBigintAsString(TestCase): # Python 2.5, at least the one that ships on Mac OS X, calculates @@ -15,44 +15,53 @@ class TestBigintAsString(TestCase): ((-1 << 53) - 1, '-9007199254740993'), ((-1 << 53) + 1, -9007199254740991)] + options = ( + {"bigint_as_string": True}, + {"int_as_string_bitcount": 53} + ) + def test_ints(self): - for val, expect in self.values: - self.assertEqual( - val, - json.loads(json.dumps(val))) - self.assertEqual( - expect, - json.loads(json.dumps(val, bigint_as_string=True))) + for opts in self.options: + for val, expect in self.values: + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) def test_lists(self): - for val, expect in self.values: - val = [val, val] - expect = [expect, expect] - self.assertEqual( - val, - json.loads(json.dumps(val))) - self.assertEqual( - expect, - json.loads(json.dumps(val, bigint_as_string=True))) + for opts in self.options: + for val, expect in self.values: + val = [val, val] + expect = [expect, expect] + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) def test_dicts(self): - for val, expect in self.values: - val = {'k': val} - expect = {'k': expect} - self.assertEqual( - val, - json.loads(json.dumps(val))) - self.assertEqual( - expect, - json.loads(json.dumps(val, bigint_as_string=True))) + for opts in self.options: + for val, expect in self.values: + val = {'k': val} + expect = {'k': expect} + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) def test_dict_keys(self): - for val, _ in self.values: - expect = {str(val): 'value'} - val = {val: 'value'} - self.assertEqual( - expect, - json.loads(json.dumps(val))) - self.assertEqual( - expect, - json.loads(json.dumps(val, bigint_as_string=True))) + for opts in self.options: + for val, _ in self.values: + expect = {str(val): 'value'} + val = {val: 'value'} + self.assertEqual( + expect, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, **opts))) diff --git a/simplejson/tests/test_bitsize_int_as_string.py b/simplejson/tests/test_bitsize_int_as_string.py new file mode 100644 index 0000000..7bb4d2c --- /dev/null +++ b/simplejson/tests/test_bitsize_int_as_string.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + + +class TestBitSizeIntAsString(TestCase): + # Python 2.5, at least the one that ships on Mac OS X, calculates + # 2 ** 31 as 0! It manages to calculate 1 << 31 correctly. + values = [ + (200, 200), + ((1 << 31) - 1, (1 << 31) - 1), + ((1 << 31), str(1 << 31)), + ((1 << 31) + 1, str((1 << 31) + 1)), + (-100, -100), + ((-1 << 31), str(-1 << 31)), + ((-1 << 31) - 1, str((-1 << 31) - 1)), + ((-1 << 31) + 1, (-1 << 31) + 1), + ] + + def test_ints_outside_range_fails(self): + self.assertNotEqual( + str(1 << 15), + json.loads(json.dumps(1 << 15, int_as_string_bitcount=16)), + ) + + def test_ints(self): + for val, expect in self.values: + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31)), + ) + + def test_lists(self): + for val, expect in self.values: + val = [val, val] + expect = [expect, expect] + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31))) + + def test_dicts(self): + for val, expect in self.values: + val = {'k': val} + expect = {'k': expect} + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31))) + + def test_dict_keys(self): + for val, _ in self.values: + expect = {str(val): 'value'} + val = {val: 'value'} + self.assertEqual( + expect, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, int_as_string_bitcount=31))) diff --git a/simplejson/tests/test_speedups.py b/simplejson/tests/test_speedups.py index 825ecf2..df9637f 100644 --- a/simplejson/tests/test_speedups.py +++ b/simplejson/tests/test_speedups.py @@ -1,20 +1,38 @@ +import unittest from unittest import TestCase from simplejson import encoder, scanner + def has_speedups(): return encoder.c_make_encoder is not None + +def skip_if_speedups_missing(func): + def wrapper(*args, **kwargs): + if not has_speedups(): + if hasattr(unittest, 'SkipTest'): + raise unittest.SkipTest("C Extension not available") + else: + print "C Extension not available" + return + return func(*args, **kwargs) + + return wrapper + + class TestDecode(TestCase): + @skip_if_speedups_missing def test_make_scanner(self): - if not has_speedups(): - return self.assertRaises(AttributeError, scanner.c_make_scanner, 1) + @skip_if_speedups_missing def test_make_encoder(self): - if not has_speedups(): - return - self.assertRaises(TypeError, encoder.c_make_encoder, + self.assertRaises( + TypeError, + encoder.c_make_encoder, None, - "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75", - None) + ("\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7" + "\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75"), + None + ) |