diff options
author | Bob Ippolito <bob@redivi.com> | 2013-02-21 14:19:08 -0800 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2013-02-21 14:19:08 -0800 |
commit | 104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2 (patch) | |
tree | 3cf08092fb2b95376a2b93f88e400c98c53872d8 | |
parent | 44d7709a31f3a19f3d465411585ebb7be7fa2295 (diff) | |
download | simplejson-104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2.tar.gz |
improve truncated input error messages, use JSONDecodeError instead of StopIteration (#61)
-rw-r--r-- | CHANGES.txt | 9 | ||||
-rw-r--r-- | conf.py | 6 | ||||
-rw-r--r-- | setup.py | 2 | ||||
-rw-r--r-- | simplejson/__init__.py | 5 | ||||
-rw-r--r-- | simplejson/_speedups.c | 181 | ||||
-rw-r--r-- | simplejson/decoder.py | 89 | ||||
-rw-r--r-- | simplejson/scanner.py | 54 | ||||
-rw-r--r-- | simplejson/tests/test_fail.py | 41 |
8 files changed, 237 insertions, 150 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index f26da7d..aeb131e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,12 @@ +Version 3.1.0 released XXXX-XX-XX + +* Improve error messages for certain kinds of truncated input + http://bugs.python.org/issue16009 +* Moved JSONDecodeError to json.scanner (still available for import + from json.decoder) +* Changed scanner to use JSONDecodeError directly rather than + StopIteration to improve error messages + Version 3.0.9 released 2013-02-21 * Fix an off-by-one error in the colno property of JSONDecodeError @@ -36,15 +36,15 @@ master_doc = 'index' # General substitutions. project = 'simplejson' -copyright = '2012, Bob Ippolito' +copyright = '2013, Bob Ippolito' # The default replacements for |version| and |release|, also used in various # other places throughout the built documents. # # The short X.Y version. -version = '3.0' +version = '3.1' # The full version, including alpha/beta/rc tags. -release = '3.0.9' +release = '3.1' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -8,7 +8,7 @@ from distutils.errors import CCompilerError, DistutilsExecError, \ DistutilsPlatformError IS_PYPY = hasattr(sys, 'pypy_translation_info') -VERSION = '3.0.9' +VERSION = '3.1.0' DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python" with open('README.rst', 'r') as f: diff --git a/simplejson/__init__.py b/simplejson/__init__.py index 5574457..a908826 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -99,7 +99,7 @@ Using simplejson.tool from the shell to validate and pretty-print:: Expecting property name: line 1 column 3 (char 2) """ from __future__ import absolute_import -__version__ = '3.0.9' +__version__ = '3.1.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -110,7 +110,8 @@ __author__ = 'Bob Ippolito <bob@redivi.com>' from decimal import Decimal -from .decoder import JSONDecoder, JSONDecodeError +from .scanner import JSONDecodeError +from .decoder import JSONDecoder from .encoder import JSONEncoder, JSONEncoderForHTML def _import_OrderedDict(): import collections diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index ae6044b..403e08d 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1,3 +1,4 @@ +/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */ #include "Python.h" #include "structmember.h" @@ -109,6 +110,21 @@ JSON_Accu_FinishAsList(JSON_Accu *acc); static void JSON_Accu_Destroy(JSON_Accu *acc); +#define ERR_EXPECTING_VALUE "Expecting value" +#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'" +#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'" +#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'" +#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes" +#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'" +#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter" +#define ERR_STRING_UNTERMINATED "Unterminated string starting at" +#define ERR_STRING_CONTROL "Invalid control character %r at" +#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r" +#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence" +#define ERR_STRING_SURROGATE "Invalid \\uXXXX\\uXXXX surrogate pair" +#define ERR_STRING_HIGH_SURROGATE "Unpaired high surrogate" +#define ERR_STRING_LOW_SURROGATE "Unpaired low surrogate" + typedef struct _PyScannerObject { PyObject_HEAD PyObject *encoding; @@ -746,16 +762,15 @@ bail: static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { - /* Use the Python function simplejson.decoder.errmsg to raise a nice - looking ValueError exception */ + /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ static PyObject *JSONDecodeError = NULL; PyObject *exc; if (JSONDecodeError == NULL) { - PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); - if (decoder == NULL) + PyObject *scanner = PyImport_ImportModule("simplejson.scanner"); + if (scanner == NULL) return; - JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); - Py_DECREF(decoder); + JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError"); + Py_DECREF(scanner); if (JSONDecodeError == NULL) return; } @@ -873,7 +888,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s PyObject *strchunk = NULL; if (len == end) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } else if (end < 0 || len < end) { @@ -889,7 +904,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s break; } else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); + raise_errmsg(ERR_STRING_CONTROL, pystr, next); goto bail; } else if (c > 0x7f) { @@ -897,7 +912,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } } if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ @@ -936,7 +951,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s break; } if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } c = buf[next]; @@ -955,7 +970,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s default: c = 0; } if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); goto bail; } } @@ -964,7 +979,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s next++; end = next + 4; if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); goto bail; } /* Decode 4 hex digits */ @@ -982,7 +997,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s case 'F': c |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } @@ -991,11 +1006,11 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s if ((c & 0xfc00) == 0xd800) { JSON_UNICHR c2 = 0; if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5); goto bail; } if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5); goto bail; } end += 6; @@ -1014,18 +1029,18 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s case 'F': c2 |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5); goto bail; } c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_LOW_SURROGATE, pystr, end - 5); goto bail; } #endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ @@ -1102,7 +1117,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next PyObject *chunk = NULL; if (len == end) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } else if (end < 0 || len < end) { @@ -1118,12 +1133,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next break; } else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); + raise_errmsg(ERR_STRING_CONTROL, pystr, next); goto bail; } } if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ @@ -1144,7 +1159,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next break; } if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } c = PyUnicode_READ(kind, buf, next); @@ -1163,7 +1178,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next default: c = 0; } if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); goto bail; } } @@ -1172,7 +1187,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next next++; end = next + 4; if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); goto bail; } /* Decode 4 hex digits */ @@ -1190,7 +1205,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next case 'F': c |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } @@ -1199,12 +1214,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if ((c & 0xfc00) == 0xd800) { JSON_UNICHR c2 = 0; if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5); goto bail; } if (PyUnicode_READ(kind, buf, next++) != '\\' || PyUnicode_READ(kind, buf, next++) != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5); goto bail; } end += 6; @@ -1223,18 +1238,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next case 'F': c2 |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5); goto bail; } c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); + raise_errmsg(ERR_STRING_LOW_SURROGATE, pystr, end - 5); goto bail; } #endif @@ -1405,6 +1420,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ char *encoding = JSON_ASCII_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; Py_ssize_t next_idx; if (has_pairs_hook) { pairs = PyList_New(0); @@ -1422,14 +1438,14 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { + int trailing_delimiter = 0; while (idx <= end_idx) { PyObject *memokey; + trailing_delimiter = 0; /* read key */ if (str[idx] != '"') { - raise_errmsg( - "Expecting property name enclosed in double quotes", - pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); goto bail; } key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); @@ -1450,7 +1466,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ /* skip whitespace between key and : delimiter, read :, skip whitespace */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting ':' delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); goto bail; } idx++; @@ -1485,23 +1501,33 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; if (idx > end_idx) break; if (str[idx] == '}') { break; } else if (str[idx] != ',') { - raise_errmsg("Expecting ',' delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } } /* verify that idx < end_idx, str[idx] should be '}' */ if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } goto bail; } @@ -1555,6 +1581,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss PyObject *val = NULL; int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; Py_ssize_t next_idx; if (has_pairs_hook) { @@ -1573,14 +1600,14 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss /* only loop if the object is non-empty */ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { + int trailing_delimiter = 0; while (idx <= end_idx) { PyObject *memokey; + trailing_delimiter = 0; /* read key */ if (PyUnicode_READ(kind, str, idx) != '"') { - raise_errmsg( - "Expecting property name enclosed in double quotes", - pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); goto bail; } key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); @@ -1602,7 +1629,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss whitespace */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { - raise_errmsg("Expecting ':' delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); goto bail; } idx++; @@ -1638,24 +1665,34 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; if (idx > end_idx) break; if (PyUnicode_READ(kind, str, idx) == '}') { break; } else if (PyUnicode_READ(kind, str, idx) != ',') { - raise_errmsg("Expecting ',' delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } } /* verify that idx < end_idx, str[idx] should be '}' */ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { - raise_errmsg("Expecting object", pystr, end_idx); + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } goto bail; } @@ -1712,15 +1749,12 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t /* only loop if the array is non-empty */ if (idx <= end_idx && str[idx] != ']') { + int trailing_delimiter = 0; while (idx <= end_idx) { - + trailing_delimiter = 0; /* read any JSON term and de-tuplefy the (rval, idx) */ val = scan_once_str(s, pystr, idx, &next_idx); if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - raise_errmsg("Expecting object", pystr, idx); - } goto bail; } @@ -1739,19 +1773,28 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t break; } else if (str[idx] != ',') { - raise_errmsg("Expecting ',' delimiter", pystr, idx); + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } } /* verify that idx < end_idx, str[idx] should be ']' */ if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } goto bail; } *next_idx_ptr = idx + 1; @@ -1787,15 +1830,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi /* only loop if the array is non-empty */ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { + int trailing_delimiter = 0; while (idx <= end_idx) { - + trailing_delimiter = 0; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - raise_errmsg("Expecting object", pystr, idx); - } goto bail; } @@ -1814,19 +1854,28 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi break; } else if (PyUnicode_READ(kind, str, idx) != ',') { - raise_errmsg("Expecting ',' delimiter", pystr, idx); + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } } /* verify that idx < end_idx, str[idx] should be ']' */ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } goto bail; } *next_idx_ptr = idx + 1; @@ -1886,11 +1935,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz /* read a sign if it's there, make sure it's not the end of the string */ if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } + idx++; } /* read as many integer digits as we find as long as it doesn't start with 0 */ @@ -1904,7 +1953,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz } /* no integer digits, error */ else { - PyErr_SetNone(PyExc_StopIteration); + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } @@ -1993,11 +2042,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ /* read a sign if it's there, make sure it's not the end of the string */ if (PyUnicode_READ(kind, str, idx) == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } + idx++; } /* read as many integer digits as we find as long as it doesn't start with 0 */ @@ -2014,7 +2063,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } else { /* no integer digits, error */ - PyErr_SetNone(PyExc_StopIteration); + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } @@ -2097,7 +2146,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n PyObject *rval = NULL; int fallthrough = 0; if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } switch (str[idx]) { @@ -2205,7 +2254,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyObject *rval = NULL; int fallthrough = 0; if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } switch (PyUnicode_READ(kind, str, idx)) { diff --git a/simplejson/decoder.py b/simplejson/decoder.py index d5a1968..54ced0a 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -5,7 +5,8 @@ import re import sys import struct from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr -from .scanner import make_scanner +from .scanner import make_scanner, JSONDecodeError + def _import_c_scanstring(): try: from ._speedups import scanstring @@ -14,6 +15,8 @@ def _import_c_scanstring(): return None c_scanstring = _import_c_scanstring() +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL @@ -29,57 +32,6 @@ def _floatconstants(): NaN, PosInf, NegInf = _floatconstants() - -class JSONDecodeError(ValueError): - """Subclass of ValueError with the following additional properties: - - msg: The unformatted error message - doc: The JSON document being parsed - pos: The start index of doc where parsing failed - end: The end index of doc where parsing failed (may be None) - lineno: The line corresponding to pos - colno: The column corresponding to pos - endlineno: The line corresponding to end (may be None) - endcolno: The column corresponding to end (may be None) - - """ - def __init__(self, msg, doc, pos, end=None): - ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) - self.msg = msg - self.doc = doc - self.pos = pos - self.end = end - self.lineno, self.colno = linecol(doc, pos) - if end is not None: - self.endlineno, self.endcolno = linecol(doc, end) - else: - self.endlineno, self.endcolno = None, None - - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos + 1 - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -128,8 +80,7 @@ def py_scanstring(s, end, encoding=None, strict=True, break elif terminator != '\\': if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) + msg = "Invalid control character %r at" raise JSONDecodeError(msg, s, end) else: _append(terminator) @@ -144,26 +95,25 @@ def py_scanstring(s, end, encoding=None, strict=True, try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: " + repr(esc) + msg = "Invalid \\X escape sequence %r" raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" esc = s[end + 1:end + 5] next_end = end + 5 if len(esc) != 4: - msg = "Invalid \\uXXXX escape" raise JSONDecodeError(msg, s, end) try: uni = int(esc, 16) except ValueError: - msg = "Invalid \\uXXXX escape" raise JSONDecodeError(msg, s, end) # Check for surrogate pair on UCS-4 systems if _maxunicode > 65535: unimask = uni & 0xfc00 if unimask == 0xd800: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + msg = "Unpaired high surrogate" if not s[end + 5:end + 7] == '\\u': raise JSONDecodeError(msg, s, end) esc2 = s[end + 7:end + 11] @@ -174,7 +124,6 @@ def py_scanstring(s, end, encoding=None, strict=True, except ValueError: raise JSONDecodeError(msg, s, end) if uni2 & 0xfc00 != 0xdc00: - msg = "Unpaired high surrogate" raise JSONDecodeError(msg, s, end) uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) next_end += 6 @@ -246,10 +195,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook, except IndexError: pass - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) + value, end = scan_once(s, end) pairs.append((key, value)) try: @@ -264,7 +210,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook, if nextchar == '}': break elif nextchar != ',': - raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) try: nextchar = s[end] @@ -301,12 +247,11 @@ def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) _append = values.append while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) + value, end = scan_once(s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -316,7 +261,7 @@ def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise JSONDecodeError("Expecting ',' delimiter", s, end) + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) try: if s[end] in _ws: @@ -445,8 +390,4 @@ class JSONDecoder(object): """ if _PY3 and not isinstance(s, text_type): raise TypeError("Input string must be text, not bytes") - try: - obj, end = self.scan_once(s, idx=_w(s, idx).end()) - except StopIteration: - raise JSONDecodeError("No JSON object could be decoded", s, idx) - return obj, end + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/simplejson/scanner.py b/simplejson/scanner.py index 54593a3..6a0099f 100644 --- a/simplejson/scanner.py +++ b/simplejson/scanner.py @@ -9,12 +9,59 @@ def _import_c_make_scanner(): return None c_make_scanner = _import_c_make_scanner() -__all__ = ['make_scanner'] +__all__ = ['make_scanner', 'JSONDecodeError'] NUMBER_RE = re.compile( r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', (re.VERBOSE | re.MULTILINE | re.DOTALL)) +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + end: The end index of doc where parsing failed (may be None) + lineno: The line corresponding to pos + colno: The column corresponding to pos + endlineno: The line corresponding to end (may be None) + endcolno: The column corresponding to end (may be None) + + """ + # Note that this exception is used from _speedups + def __init__(self, msg, doc, pos, end=None): + ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) + self.msg = msg + self.doc = doc + self.pos = pos + self.end = end + self.lineno, self.colno = linecol(doc, pos) + if end is not None: + self.endlineno, self.endcolno = linecol(doc, end) + else: + self.endlineno, self.endcolno = None, None + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + 1 + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + msg = msg.replace('%r', repr(doc[pos:pos + 1])) + if end is None: + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + def py_make_scanner(context): parse_object = context.parse_object parse_array = context.parse_array @@ -30,10 +77,11 @@ def py_make_scanner(context): memo = context.memo def _scan_once(string, idx): + errmsg = 'Expecting value' try: nextchar = string[idx] except IndexError: - raise StopIteration + raise JSONDecodeError(errmsg, string, idx) if nextchar == '"': return parse_string(string, idx + 1, encoding, strict) @@ -64,7 +112,7 @@ def py_make_scanner(context): elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: - raise StopIteration + raise JSONDecodeError(errmsg, string, idx) def scan_once(string, idx): try: diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py index f458a4b..d882ec0 100644 --- a/simplejson/tests/test_fail.py +++ b/simplejson/tests/test_fail.py @@ -99,7 +99,6 @@ class TestFail(TestCase): except json.JSONDecodeError: pass else: - #self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) def test_array_decoder_issue46(self): @@ -117,3 +116,43 @@ class TestFail(TestCase): self.fail("Unexpected exception raised %r %s" % (e, e)) else: self.fail("Unexpected success parsing '[,]'") + + def test_truncated_input(self): + test_cases = [ + ('', 'Expecting value', 0), + ('[', "Expecting value or ']'", 1), + ('[42', "Expecting ',' delimiter", 3), + ('[42,', 'Expecting value', 4), + ('["', 'Unterminated string starting at', 1), + ('["spam', 'Unterminated string starting at', 1), + ('["spam"', "Expecting ',' delimiter", 7), + ('["spam",', 'Expecting value', 8), + ('{', 'Expecting property name enclosed in double quotes', 1), + ('{"', 'Unterminated string starting at', 1), + ('{"spam', 'Unterminated string starting at', 1), + ('{"spam"', "Expecting ':' delimiter", 7), + ('{"spam":', 'Expecting value', 8), + ('{"spam":42', "Expecting ',' delimiter", 10), + ('{"spam":42,', 'Expecting property name enclosed in double quotes', + 11), + ('"', 'Unterminated string starting at', 0), + ('"spam', 'Unterminated string starting at', 0), + ('[,', "Expecting value", 1), + ] + for data, msg, idx in test_cases: + try: + json.loads(data) + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual( + e.msg[:len(msg)], + msg, + "%r doesn't start with %r for %r" % (e.msg, msg, data)) + self.assertEqual( + e.pos, idx, + "pos %r != %r for %r" % (e.pos, idx, data)) + except Exception: + e = sys.exc_info()[1] + self.fail("Unexpected exception raised %r %s" % (e, e)) + else: + self.fail("Unexpected success parsing '%r'" % (data,)) |