diff options
author | Bob Ippolito <bob@redivi.com> | 2023-04-05 21:24:51 -0700 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2023-04-05 21:24:51 -0700 |
commit | 2cbc419a31208dd9d0ed5706d5f3aa333ebd3e30 (patch) | |
tree | c3e879b38d37d8ae25eb76efc8dd2a6f21fce34a /simplejson | |
parent | 440a5e45be508bb0a0ed25af62b7dbfe8cf1e142 (diff) | |
download | simplejson-2cbc419a31208dd9d0ed5706d5f3aa333ebd3e30.tar.gz |
Additional security hardening improvements:
* Remove unused namedtuple_as_object and tuple_as_array arguments from
simplejson.load (SJ-PT-23-102)
* Remove vestigial _one_shot code from iterencode (SJ-PT-23-103)
* Change default of allow_nan from True to False and add allow_nan
to decoder (SJ-PT-23-107)
Diffstat (limited to 'simplejson')
-rw-r--r-- | simplejson/__init__.py | 100 | ||||
-rw-r--r-- | simplejson/_speedups.c | 18 | ||||
-rw-r--r-- | simplejson/decoder.py | 18 | ||||
-rw-r--r-- | simplejson/encoder.py | 28 | ||||
-rw-r--r-- | simplejson/scanner.py | 6 | ||||
-rw-r--r-- | simplejson/tests/test_float.py | 7 |
6 files changed, 83 insertions, 94 deletions
diff --git a/simplejson/__init__.py b/simplejson/__init__.py index 92b6009..206e22d 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -149,28 +149,10 @@ def _import_c_make_encoder(): except ImportError: return None -_default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, - use_decimal=True, - namedtuple_as_object=True, - tuple_as_array=True, - iterable_as_array=False, - bigint_as_string=False, - item_sort_key=None, - for_json=False, - ignore_nan=False, - int_as_string_bitcount=None, -) +_default_encoder = JSONEncoder() def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, + allow_nan=False, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None, @@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, contain non-ASCII characters, so long as they do not need to be escaped by JSON. When it is true, all non-ASCII characters are escaped. - If *allow_nan* is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the original JSON specification, instead of using - the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + If *allow_nan* is true (default: ``False``), then out of range ``float`` + values (``nan``, ``inf``, ``-inf``) will be serialized to + their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``) + instead of raising a ValueError. See *ignore_nan* for ECMA-262 compliant behavior. If *indent* is a string, then JSON array elements and object members @@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, """ # cached encoder if (not skipkeys and ensure_ascii and - check_circular and allow_nan and + check_circular and not allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal and namedtuple_as_object and tuple_as_array and not iterable_as_array @@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, + allow_nan=False, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None, @@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + If *allow_nan* is true (default: ``False``), then out of range ``float`` + values (``nan``, ``inf``, ``-inf``) will be serialized to + their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``) + instead of raising a ValueError. See + *ignore_nan* for ECMA-262 compliant behavior. If ``indent`` is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated @@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, """ # cached encoder if (not skipkeys and ensure_ascii and - check_circular and allow_nan and + check_circular and not allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal and namedtuple_as_object and tuple_as_array and not iterable_as_array @@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None, - object_pairs_hook=None) +_default_decoder = JSONDecoder() def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, - use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, - **kw): + use_decimal=False, allow_nan=False, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document as `str` or `bytes`) to a Python object. @@ -451,14 +432,18 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, ``int(num_str)``. This can be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity`` + and enable the use of the deprecated *parse_constant*. If *use_decimal* is true (default: ``False``) then it implies parse_float=decimal.Decimal for parity with ``dump``. + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead of subclassing whenever possible. @@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, - use_decimal=use_decimal, **kw) + use_decimal=use_decimal, allow_nan=allow_nan, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, - use_decimal=False, **kw): + use_decimal=False, allow_nan=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. @@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, ``int(num_str)``. This can be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity`` + and enable the use of the deprecated *parse_constant*. If *use_decimal* is true (default: ``False``) then it implies parse_float=decimal.Decimal for parity with ``dump``. + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead of subclassing whenever possible. @@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None - and not use_decimal and not kw): + and not use_decimal and not allow_nan and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder @@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, if parse_float is not None: raise TypeError("use_decimal=True implies parse_float=Decimal") kw['parse_float'] = Decimal + if allow_nan: + kw['allow_nan'] = True return cls(encoding=encoding, **kw).decode(s) @@ -560,22 +551,9 @@ def _toggle_speedups(enabled): scan.make_scanner = scan.py_make_scanner dec.make_scanner = scan.make_scanner global _default_decoder - _default_decoder = JSONDecoder( - encoding=None, - object_hook=None, - object_pairs_hook=None, - ) + _default_decoder = JSONDecoder() global _default_encoder - _default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, - ) + _default_encoder = JSONEncoder() def simple_first(kv): """Helper function to pass to item_sort_key to sort simple diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index e08f4c4..bd56b4d 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1843,7 +1843,7 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +_parse_constant(PyScannerObject *s, PyObject *pystr, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON constant from PyString pystr. constant is the Python string that was found @@ -1855,6 +1855,10 @@ _parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize Returns the result of parse_constant */ PyObject *rval; + if (s->parse_constant == Py_None) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } /* rval = parse_constant(constant) */ rval = PyObject_CallOneArg(s->parse_constant, constant); @@ -2158,7 +2162,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr); } else fallthrough = 1; @@ -2166,7 +2170,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2174,7 +2178,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2277,7 +2281,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && PyUnicode_READ(kind, str, idx + 2) == 'N') { - rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr); } else fallthrough = 1; @@ -2292,7 +2296,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyUnicode_READ(kind, str, idx + 5) == 'i' && PyUnicode_READ(kind, str, idx + 6) == 't' && PyUnicode_READ(kind, str, idx + 7) == 'y') { - rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2308,7 +2312,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyUnicode_READ(kind, str, idx + 6) == 'i' && PyUnicode_READ(kind, str, idx + 7) == 't' && PyUnicode_READ(kind, str, idx + 8) == 'y') { - rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr); } else fallthrough = 1; diff --git a/simplejson/decoder.py b/simplejson/decoder.py index bbde82c..c99a976 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -306,14 +306,15 @@ class JSONDecoder(object): | null | None | +---------------+-------------------+ - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + When allow_nan=True, it also understands + ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding ``float`` values, which is outside the JSON spec. """ def __init__(self, encoding=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, - object_pairs_hook=None): + object_pairs_hook=None, allow_nan=False): """ *encoding* determines the encoding used to interpret any :class:`str` objects decoded by this instance (``'utf-8'`` by @@ -346,10 +347,13 @@ class JSONDecoder(object): ``int(num_str)``. This can be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``. + + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. *strict* controls the parser's behavior when it encounters an invalid control character in a string. The default setting of @@ -364,7 +368,7 @@ class JSONDecoder(object): self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or bounded_int - self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None) self.strict = strict self.parse_object = JSONObject self.parse_array = JSONArray diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 3f94f3a..661ff36 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -140,7 +140,7 @@ class JSONEncoder(object): key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, + check_circular=True, allow_nan=False, sort_keys=False, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, @@ -161,10 +161,11 @@ class JSONEncoder(object): prevent an infinite recursion (which would cause an OverflowError). Otherwise, no such check takes place. - If allow_nan is true, then NaN, Infinity, and -Infinity will be - encoded as such. This behavior is not JSON specification compliant, - but is consistent with most JavaScript based encoders and decoders. - Otherwise, it will be a ValueError to encode such floats. + If allow_nan is true (default: False), then out of range float + values (nan, inf, -inf) will be serialized to + their JavaScript equivalents (NaN, Infinity, -Infinity) + instead of raising a ValueError. See + ignore_nan for ECMA-262 compliant behavior. If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure @@ -294,7 +295,7 @@ class JSONEncoder(object): # This doesn't pass the iterator directly to ''.join() because the # exceptions aren't as detailed. The list call should be roughly # equivalent to the PySequence_Fast that ''.join() would do. - chunks = self.iterencode(o, _one_shot=True) + chunks = self.iterencode(o) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) if self.ensure_ascii: @@ -302,7 +303,7 @@ class JSONEncoder(object): else: return u''.join(chunks) - def iterencode(self, o, _one_shot=False): + def iterencode(self, o): """Encode the given object and yield each string representation as available. @@ -356,8 +357,7 @@ class JSONEncoder(object): key_memo = {} int_as_string_bitcount = ( 53 if self.bigint_as_string else self.int_as_string_bitcount) - if (_one_shot and c_make_encoder is not None - and self.indent is None): + if (c_make_encoder is not None and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, @@ -370,7 +370,7 @@ class JSONEncoder(object): _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot, self.use_decimal, + self.skipkeys, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, int_as_string_bitcount, self.item_sort_key, self.encoding, self.for_json, @@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder): def encode(self, o): # Override JSONEncoder.encode because it has hacks for # performance that make things more complicated. - chunks = self.iterencode(o, True) + chunks = self.iterencode(o) if self.ensure_ascii: return ''.join(chunks) else: return u''.join(chunks) - def iterencode(self, o, _one_shot=False): - chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) + def iterencode(self, o): + chunks = super(JSONEncoderForHTML, self).iterencode(o) for chunk in chunks: chunk = chunk.replace('&', '\\u0026') chunk = chunk.replace('<', '\\u003c') @@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, - _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + _key_separator, _item_separator, _sort_keys, _skipkeys, _use_decimal, _namedtuple_as_object, _tuple_as_array, _int_as_string_bitcount, _item_sort_key, _encoding,_for_json, diff --git a/simplejson/scanner.py b/simplejson/scanner.py index 85e385e..34710d6 100644 --- a/simplejson/scanner.py +++ b/simplejson/scanner.py @@ -60,11 +60,11 @@ def py_make_scanner(context): else: res = parse_int(integer) return res, m.end() - elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN': return parse_constant('NaN'), idx + 3 - elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity': return parse_constant('Infinity'), idx + 8 - elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: raise JSONDecodeError(errmsg, string, idx) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py index e382ec2..a977969 100644 --- a/simplejson/tests/test_float.py +++ b/simplejson/tests/test_float.py @@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf class TestFloat(TestCase): def test_degenerates_allow(self): for inf in (PosInf, NegInf): - self.assertEqual(json.loads(json.dumps(inf)), inf) + self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf) # Python 2.5 doesn't have math.isnan - nan = json.loads(json.dumps(NaN)) + nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True) self.assertTrue((0 + nan) != nan) def test_degenerates_ignore(self): @@ -19,6 +19,9 @@ class TestFloat(TestCase): def test_degenerates_deny(self): for f in (PosInf, NegInf, NaN): self.assertRaises(ValueError, json.dumps, f, allow_nan=False) + for s in ('Infinity', '-Infinity', 'NaN'): + self.assertRaises(ValueError, json.loads, s, allow_nan=False) + self.assertRaises(ValueError, json.loads, s) def test_floats(self): for num in [1617161771.7650001, math.pi, math.pi**100, |