summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-04-25 00:39:48 +0300
committerSerhiy Storchaka <storchaka@gmail.com>2018-04-25 01:01:59 +0300
commit12e30827bc6224a133a1ccfb977c0bccb0c84576 (patch)
tree57e111e7df0b5fae214cde92767dc3d52c1971b7
parent2ec708bbc884b956c453eb057547cf0150294be9 (diff)
downloadsimplejson-bytes-encoding.tar.gz
Allow to disable serializing bytes by default in Python 3.bytes-encoding
If encoding is None, then bytes objects will be passed to the default() method instead of transforming into unicode.
-rw-r--r--index.rst22
-rw-r--r--simplejson/_speedups.c29
-rw-r--r--simplejson/encoder.py19
-rw-r--r--simplejson/tests/test_dump.py76
4 files changed, 125 insertions, 21 deletions
diff --git a/index.rst b/index.rst
index 8c9c7ab..94c792a 100644
--- a/index.rst
+++ b/index.rst
@@ -192,8 +192,16 @@ Basic Usage
.. versionchanged:: 2.1.4
Use ``(',', ': ')`` as default if *indent* is not ``None``.
- *encoding* is the character encoding for str instances, default is
- ``'utf-8'``.
+ If *encoding* is not ``None``, then all input :class:`bytes` objects in
+ Python 3 and 8-bit strings in Python 2 will be transformed
+ into unicode using that encoding prior to JSON-encoding. The default is
+ ``'utf-8'``. If *encoding* is ``None``, then all :class:`bytes` objects
+ will be passed to the *default* function in Python 3
+
+ .. versionchanged:: 3.15.0
+ ``encoding=None`` disables serializing :class:`bytes` by default in
+ Python 3.
+
*default(obj)* is a function that should return a serializable version of
*obj* or raise :exc:`TypeError`. The default simply raises :exc:`TypeError`.
@@ -656,9 +664,15 @@ Encoders and decoders
that can't otherwise be serialized. It should return a JSON encodable
version of the object or raise a :exc:`TypeError`.
- If *encoding* is not ``None``, then all input strings will be transformed
+ If *encoding* is not ``None``, then all input :class:`bytes` objects in
+ Python 3 and 8-bit strings in Python 2 will be transformed
into unicode using that encoding prior to JSON-encoding. The default is
- ``'utf-8'``.
+ ``'utf-8'``. If *encoding* is ``None``, then all :class:`bytes` objects
+ will be passed to the :meth:`default` method in Python 3
+
+ .. versionchanged:: 3.15.0
+ ``encoding=None`` disables serializing :class:`bytes` by default in
+ Python 3.
If *namedtuple_as_object* is true (default: ``True``),
objects with ``_asdict()`` methods will be encoded
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index df23d15..e791618 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -634,8 +634,8 @@ encoder_stringify_key(PyEncoderObject *s, PyObject *key)
Py_INCREF(key);
return key;
}
- else if (PyString_Check(key)) {
#if PY_MAJOR_VERSION >= 3
+ else if (PyString_Check(key) && s->encoding != NULL) {
const char *encoding = JSON_ASCII_AS_STRING(s->encoding);
if (encoding == NULL)
return NULL;
@@ -644,11 +644,13 @@ encoder_stringify_key(PyEncoderObject *s, PyObject *key)
PyString_GET_SIZE(key),
encoding,
NULL);
+ }
#else /* PY_MAJOR_VERSION >= 3 */
+ else if (PyString_Check(key)) {
Py_INCREF(key);
return key;
-#endif /* PY_MAJOR_VERSION < 3 */
}
+#endif /* PY_MAJOR_VERSION < 3 */
else if (PyFloat_Check(key)) {
return encoder_encode_float(s, key);
}
@@ -676,7 +678,7 @@ encoder_stringify_key(PyEncoderObject *s, PyObject *key)
else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) {
return PyObject_Str(key);
}
- else if (s->skipkeys) {
+ if (s->skipkeys) {
Py_INCREF(Py_None);
return Py_None;
}
@@ -2578,11 +2580,19 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
s->defaultfn = defaultfn;
Py_INCREF(encoder);
s->encoder = encoder;
- s->encoding = JSON_ParseEncoding(encoding);
- if (s->encoding == NULL)
- goto bail;
- if (JSON_ASCII_AS_STRING(s->encoding) == NULL)
- goto bail;
+#if PY_MAJOR_VERSION >= 3
+ if (encoding == Py_None) {
+ s->encoding = NULL;
+ }
+ else
+#endif /* PY_MAJOR_VERSION >= 3 */
+ {
+ s->encoding = JSON_ParseEncoding(encoding);
+ if (s->encoding == NULL)
+ goto bail;
+ if (JSON_ASCII_AS_STRING(s->encoding) == NULL)
+ goto bail;
+ }
Py_INCREF(indent);
s->indent = indent;
Py_INCREF(key_separator);
@@ -2854,7 +2864,8 @@ encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ss
if (cstr != NULL)
rv = _steal_accumulate(rval, cstr);
}
- else if (PyString_Check(obj) || PyUnicode_Check(obj))
+ else if ((PyString_Check(obj) && s->encoding != NULL) ||
+ PyUnicode_Check(obj))
{
PyObject *encoded = encoder_encode_string(s, obj);
if (encoded != NULL)
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index 5693eb6..d2b6bca 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -309,7 +309,7 @@ class JSONEncoder(object):
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
- if self.encoding != 'utf-8':
+ if self.encoding != 'utf-8' and self.encoding is not None:
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
if isinstance(o, binary_type):
o = o.decode(_encoding)
@@ -482,8 +482,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
first = False
else:
buf = separator
- if (isinstance(value, string_types) or
- (_PY3 and isinstance(value, bytes))):
+ if isinstance(value, string_types):
+ yield buf + _encoder(value)
+ elif _PY3 and isinstance(value, bytes) and _encoding is not None:
yield buf + _encoder(value)
elif isinstance(value, RawJSON):
yield buf + value.encoded_json
@@ -533,7 +534,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
def _stringify_key(key):
if isinstance(key, string_types): # pragma: no cover
pass
- elif isinstance(key, binary_type):
+ elif _PY3 and isinstance(key, bytes) and _encoding is not None:
key = key.decode(_encoding)
elif isinstance(key, float):
key = _floatstr(key)
@@ -603,8 +604,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield item_separator
yield _encoder(key)
yield _key_separator
- if (isinstance(value, string_types) or
- (_PY3 and isinstance(value, bytes))):
+ if isinstance(value, string_types):
+ yield _encoder(value)
+ elif _PY3 and isinstance(value, bytes) and _encoding is not None:
yield _encoder(value)
elif isinstance(value, RawJSON):
yield value.encoded_json
@@ -647,8 +649,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
del markers[markerid]
def _iterencode(o, _current_indent_level):
- if (isinstance(o, string_types) or
- (_PY3 and isinstance(o, bytes))):
+ if isinstance(o, string_types):
+ yield _encoder(o)
+ elif _PY3 and isinstance(o, bytes) and _encoding is not None:
yield _encoder(o)
elif isinstance(o, RawJSON):
yield o.encoded_json
diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py
index 2a30125..6b36c20 100644
--- a/simplejson/tests/test_dump.py
+++ b/simplejson/tests/test_dump.py
@@ -11,6 +11,9 @@ def as_text_type(s):
return s.decode('ascii')
return s
+def decode_iso_8859_15(b):
+ return b.decode('iso-8859-15')
+
class TestDump(TestCase):
def test_dump(self):
sio = StringIO()
@@ -140,3 +143,76 @@ class TestDump(TestCase):
json.dumps(MisbehavingTextSubtype(text)),
json.dumps(text)
)
+
+ def test_bytes_toplevel(self):
+ self.assertEqual(json.dumps(b('\xe2\x82\xac')), r'"\u20ac"')
+ self.assertRaises(UnicodeDecodeError, json.dumps, b('\xa4'))
+ self.assertEqual(json.dumps(b('\xa4'), encoding='iso-8859-1'),
+ r'"\u00a4"')
+ self.assertEqual(json.dumps(b('\xa4'), encoding='iso-8859-15'),
+ r'"\u20ac"')
+ if PY3:
+ self.assertRaises(TypeError, json.dumps, b('\xe2\x82\xac'),
+ encoding=None)
+ self.assertRaises(TypeError, json.dumps, b('\xa4'),
+ encoding=None)
+ self.assertEqual(json.dumps(b('\xa4'), encoding=None,
+ default=decode_iso_8859_15),
+ r'"\u20ac"')
+ else:
+ self.assertEqual(json.dumps(b('\xe2\x82\xac'), encoding=None),
+ r'"\u20ac"')
+ self.assertRaises(UnicodeDecodeError, json.dumps, b('\xa4'),
+ encoding=None)
+ self.assertRaises(UnicodeDecodeError, json.dumps, b('\xa4'),
+ encoding=None, default=decode_iso_8859_15)
+
+ def test_bytes_nested(self):
+ self.assertEqual(json.dumps([b('\xe2\x82\xac')]), r'["\u20ac"]')
+ self.assertRaises(UnicodeDecodeError, json.dumps, [b('\xa4')])
+ self.assertEqual(json.dumps([b('\xa4')], encoding='iso-8859-1'),
+ r'["\u00a4"]')
+ self.assertEqual(json.dumps([b('\xa4')], encoding='iso-8859-15'),
+ r'["\u20ac"]')
+ if PY3:
+ self.assertRaises(TypeError, json.dumps, [b('\xe2\x82\xac')],
+ encoding=None)
+ self.assertRaises(TypeError, json.dumps, [b('\xa4')],
+ encoding=None)
+ self.assertEqual(json.dumps([b('\xa4')], encoding=None,
+ default=decode_iso_8859_15),
+ r'["\u20ac"]')
+ else:
+ self.assertEqual(json.dumps([b('\xe2\x82\xac')], encoding=None),
+ r'["\u20ac"]')
+ self.assertRaises(UnicodeDecodeError, json.dumps, [b('\xa4')],
+ encoding=None)
+ self.assertRaises(UnicodeDecodeError, json.dumps, [b('\xa4')],
+ encoding=None, default=decode_iso_8859_15)
+
+ def test_bytes_key(self):
+ self.assertEqual(json.dumps({b('\xe2\x82\xac'): 42}), r'{"\u20ac": 42}')
+ self.assertRaises(UnicodeDecodeError, json.dumps, {b('\xa4'): 42})
+ self.assertEqual(json.dumps({b('\xa4'): 42}, encoding='iso-8859-1'),
+ r'{"\u00a4": 42}')
+ self.assertEqual(json.dumps({b('\xa4'): 42}, encoding='iso-8859-15'),
+ r'{"\u20ac": 42}')
+ if PY3:
+ self.assertRaises(TypeError, json.dumps, {b('\xe2\x82\xac'): 42},
+ encoding=None)
+ self.assertRaises(TypeError, json.dumps, {b('\xa4'): 42},
+ encoding=None)
+ self.assertRaises(TypeError, json.dumps, {b('\xa4'): 42},
+ encoding=None, default=decode_iso_8859_15)
+ self.assertEqual(json.dumps({b('\xa4'): 42}, encoding=None,
+ skipkeys=True),
+ r'{}')
+ else:
+ self.assertEqual(json.dumps({b('\xe2\x82\xac'): 42}, encoding=None),
+ r'{"\u20ac": 42}')
+ self.assertRaises(UnicodeDecodeError, json.dumps, {b('\xa4'): 42},
+ encoding=None)
+ self.assertRaises(UnicodeDecodeError, json.dumps, {b('\xa4'): 42},
+ encoding=None, default=decode_iso_8859_15)
+ self.assertRaises(UnicodeDecodeError, json.dumps, {b('\xa4'): 42},
+ encoding=None, skipkeys=True)