summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2013-02-21 14:19:08 -0800
committerBob Ippolito <bob@redivi.com>2013-02-21 14:19:08 -0800
commit104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2 (patch)
tree3cf08092fb2b95376a2b93f88e400c98c53872d8
parent44d7709a31f3a19f3d465411585ebb7be7fa2295 (diff)
downloadsimplejson-104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2.tar.gz
improve truncated input error messages, use JSONDecodeError instead of StopIteration (#61)
-rw-r--r--CHANGES.txt9
-rw-r--r--conf.py6
-rw-r--r--setup.py2
-rw-r--r--simplejson/__init__.py5
-rw-r--r--simplejson/_speedups.c181
-rw-r--r--simplejson/decoder.py89
-rw-r--r--simplejson/scanner.py54
-rw-r--r--simplejson/tests/test_fail.py41
8 files changed, 237 insertions, 150 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index f26da7d..aeb131e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,12 @@
+Version 3.1.0 released XXXX-XX-XX
+
+* Improve error messages for certain kinds of truncated input
+ http://bugs.python.org/issue16009
+* Moved JSONDecodeError to json.scanner (still available for import
+ from json.decoder)
+* Changed scanner to use JSONDecodeError directly rather than
+ StopIteration to improve error messages
+
Version 3.0.9 released 2013-02-21
* Fix an off-by-one error in the colno property of JSONDecodeError
diff --git a/conf.py b/conf.py
index 7ce0097..6f8024d 100644
--- a/conf.py
+++ b/conf.py
@@ -36,15 +36,15 @@ master_doc = 'index'
# General substitutions.
project = 'simplejson'
-copyright = '2012, Bob Ippolito'
+copyright = '2013, Bob Ippolito'
# The default replacements for |version| and |release|, also used in various
# other places throughout the built documents.
#
# The short X.Y version.
-version = '3.0'
+version = '3.1'
# The full version, including alpha/beta/rc tags.
-release = '3.0.9'
+release = '3.1'
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
diff --git a/setup.py b/setup.py
index 59579b8..1ef4f8a 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ from distutils.errors import CCompilerError, DistutilsExecError, \
DistutilsPlatformError
IS_PYPY = hasattr(sys, 'pypy_translation_info')
-VERSION = '3.0.9'
+VERSION = '3.1.0'
DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python"
with open('README.rst', 'r') as f:
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
index 5574457..a908826 100644
--- a/simplejson/__init__.py
+++ b/simplejson/__init__.py
@@ -99,7 +99,7 @@ Using simplejson.tool from the shell to validate and pretty-print::
Expecting property name: line 1 column 3 (char 2)
"""
from __future__ import absolute_import
-__version__ = '3.0.9'
+__version__ = '3.1.0'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@@ -110,7 +110,8 @@ __author__ = 'Bob Ippolito <bob@redivi.com>'
from decimal import Decimal
-from .decoder import JSONDecoder, JSONDecodeError
+from .scanner import JSONDecodeError
+from .decoder import JSONDecoder
from .encoder import JSONEncoder, JSONEncoderForHTML
def _import_OrderedDict():
import collections
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index ae6044b..403e08d 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -1,3 +1,4 @@
+/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */
#include "Python.h"
#include "structmember.h"
@@ -109,6 +110,21 @@ JSON_Accu_FinishAsList(JSON_Accu *acc);
static void
JSON_Accu_Destroy(JSON_Accu *acc);
+#define ERR_EXPECTING_VALUE "Expecting value"
+#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'"
+#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'"
+#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'"
+#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes"
+#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'"
+#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter"
+#define ERR_STRING_UNTERMINATED "Unterminated string starting at"
+#define ERR_STRING_CONTROL "Invalid control character %r at"
+#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r"
+#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence"
+#define ERR_STRING_SURROGATE "Invalid \\uXXXX\\uXXXX surrogate pair"
+#define ERR_STRING_HIGH_SURROGATE "Unpaired high surrogate"
+#define ERR_STRING_LOW_SURROGATE "Unpaired low surrogate"
+
typedef struct _PyScannerObject {
PyObject_HEAD
PyObject *encoding;
@@ -746,16 +762,15 @@ bail:
static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
- /* Use the Python function simplejson.decoder.errmsg to raise a nice
- looking ValueError exception */
+ /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
static PyObject *JSONDecodeError = NULL;
PyObject *exc;
if (JSONDecodeError == NULL) {
- PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
- if (decoder == NULL)
+ PyObject *scanner = PyImport_ImportModule("simplejson.scanner");
+ if (scanner == NULL)
return;
- JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
- Py_DECREF(decoder);
+ JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError");
+ Py_DECREF(scanner);
if (JSONDecodeError == NULL)
return;
}
@@ -873,7 +888,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
PyObject *strchunk = NULL;
if (len == end) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
else if (end < 0 || len < end) {
@@ -889,7 +904,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
break;
}
else if (strict && c <= 0x1f) {
- raise_errmsg("Invalid control character at", pystr, next);
+ raise_errmsg(ERR_STRING_CONTROL, pystr, next);
goto bail;
}
else if (c > 0x7f) {
@@ -897,7 +912,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
}
}
if (!(c == '"' || c == '\\')) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
@@ -936,7 +951,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
break;
}
if (next == len) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
c = buf[next];
@@ -955,7 +970,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
default: c = 0;
}
if (c == 0) {
- raise_errmsg("Invalid \\escape", pystr, end - 2);
+ raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
goto bail;
}
}
@@ -964,7 +979,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
next++;
end = next + 4;
if (end >= len) {
- raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+ raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
@@ -982,7 +997,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
case 'F':
c |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
}
}
@@ -991,11 +1006,11 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
if ((c & 0xfc00) == 0xd800) {
JSON_UNICHR c2 = 0;
if (end + 6 >= len) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5);
goto bail;
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5);
goto bail;
}
end += 6;
@@ -1014,18 +1029,18 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
case 'F':
c2 |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
}
}
if ((c2 & 0xfc00) != 0xdc00) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5);
goto bail;
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
else if ((c & 0xfc00) == 0xdc00) {
- raise_errmsg("Unpaired low surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_LOW_SURROGATE, pystr, end - 5);
goto bail;
}
#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */
@@ -1102,7 +1117,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
PyObject *chunk = NULL;
if (len == end) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
else if (end < 0 || len < end) {
@@ -1118,12 +1133,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
break;
}
else if (strict && c <= 0x1f) {
- raise_errmsg("Invalid control character at", pystr, next);
+ raise_errmsg(ERR_STRING_CONTROL, pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
@@ -1144,7 +1159,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
break;
}
if (next == len) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
c = PyUnicode_READ(kind, buf, next);
@@ -1163,7 +1178,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
default: c = 0;
}
if (c == 0) {
- raise_errmsg("Invalid \\escape", pystr, end - 2);
+ raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
goto bail;
}
}
@@ -1172,7 +1187,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
next++;
end = next + 4;
if (end >= len) {
- raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+ raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
@@ -1190,7 +1205,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
case 'F':
c |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
}
}
@@ -1199,12 +1214,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
if ((c & 0xfc00) == 0xd800) {
JSON_UNICHR c2 = 0;
if (end + 6 >= len) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5);
goto bail;
}
if (PyUnicode_READ(kind, buf, next++) != '\\' ||
PyUnicode_READ(kind, buf, next++) != 'u') {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5);
goto bail;
}
end += 6;
@@ -1223,18 +1238,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
case 'F':
c2 |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
}
}
if ((c2 & 0xfc00) != 0xdc00) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_HIGH_SURROGATE, pystr, end - 5);
goto bail;
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
else if ((c & 0xfc00) == 0xdc00) {
- raise_errmsg("Unpaired low surrogate", pystr, end - 5);
+ raise_errmsg(ERR_STRING_LOW_SURROGATE, pystr, end - 5);
goto bail;
}
#endif
@@ -1405,6 +1420,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
char *encoding = JSON_ASCII_AS_STRING(s->encoding);
int strict = PyObject_IsTrue(s->strict);
int has_pairs_hook = (s->pairs_hook != Py_None);
+ int did_parse = 0;
Py_ssize_t next_idx;
if (has_pairs_hook) {
pairs = PyList_New(0);
@@ -1422,14 +1438,14 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
PyObject *memokey;
+ trailing_delimiter = 0;
/* read key */
if (str[idx] != '"') {
- raise_errmsg(
- "Expecting property name enclosed in double quotes",
- pystr, idx);
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
}
key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
@@ -1450,7 +1466,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* skip whitespace between key and : delimiter, read :, skip whitespace */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
if (idx > end_idx || str[idx] != ':') {
- raise_errmsg("Expecting ':' delimiter", pystr, idx);
+ raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
@@ -1485,23 +1501,33 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
/* bail if the object is closed or we didn't get the , delimiter */
+ did_parse = 1;
if (idx > end_idx) break;
if (str[idx] == '}') {
break;
}
else if (str[idx] != ',') {
- raise_errmsg("Expecting ',' delimiter", pystr, idx);
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , delimiter */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ trailing_delimiter = 1;
}
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+ goto bail;
+ }
}
/* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || str[idx] != '}') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (did_parse) {
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
+ }
goto bail;
}
@@ -1555,6 +1581,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
PyObject *val = NULL;
int strict = PyObject_IsTrue(s->strict);
int has_pairs_hook = (s->pairs_hook != Py_None);
+ int did_parse = 0;
Py_ssize_t next_idx;
if (has_pairs_hook) {
@@ -1573,14 +1600,14 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
/* only loop if the object is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
PyObject *memokey;
+ trailing_delimiter = 0;
/* read key */
if (PyUnicode_READ(kind, str, idx) != '"') {
- raise_errmsg(
- "Expecting property name enclosed in double quotes",
- pystr, idx);
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
}
key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
@@ -1602,7 +1629,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
whitespace */
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
- raise_errmsg("Expecting ':' delimiter", pystr, idx);
+ raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
@@ -1638,24 +1665,34 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
/* bail if the object is closed or we didn't get the ,
delimiter */
+ did_parse = 1;
if (idx > end_idx) break;
if (PyUnicode_READ(kind, str, idx) == '}') {
break;
}
else if (PyUnicode_READ(kind, str, idx) != ',') {
- raise_errmsg("Expecting ',' delimiter", pystr, idx);
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , delimiter */
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+ trailing_delimiter = 1;
}
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+ goto bail;
+ }
}
/* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (did_parse) {
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
+ }
goto bail;
}
@@ -1712,15 +1749,12 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t
/* only loop if the array is non-empty */
if (idx <= end_idx && str[idx] != ']') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
-
+ trailing_delimiter = 0;
/* read any JSON term and de-tuplefy the (rval, idx) */
val = scan_once_str(s, pystr, idx, &next_idx);
if (val == NULL) {
- if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
- PyErr_Clear();
- raise_errmsg("Expecting object", pystr, idx);
- }
goto bail;
}
@@ -1739,19 +1773,28 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t
break;
}
else if (str[idx] != ',') {
- raise_errmsg("Expecting ',' delimiter", pystr, idx);
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ trailing_delimiter = 1;
}
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ goto bail;
+ }
}
/* verify that idx < end_idx, str[idx] should be ']' */
if (idx > end_idx || str[idx] != ']') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (PyList_GET_SIZE(rval)) {
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
+ }
goto bail;
}
*next_idx_ptr = idx + 1;
@@ -1787,15 +1830,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
/* only loop if the array is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
-
+ trailing_delimiter = 0;
/* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx);
if (val == NULL) {
- if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
- PyErr_Clear();
- raise_errmsg("Expecting object", pystr, idx);
- }
goto bail;
}
@@ -1814,19 +1854,28 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
break;
}
else if (PyUnicode_READ(kind, str, idx) != ',') {
- raise_errmsg("Expecting ',' delimiter", pystr, idx);
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , */
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+ trailing_delimiter = 1;
}
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ goto bail;
+ }
}
/* verify that idx < end_idx, str[idx] should be ']' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (PyList_GET_SIZE(rval)) {
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
+ }
goto bail;
}
*next_idx_ptr = idx + 1;
@@ -1886,11 +1935,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
/* read a sign if it's there, make sure it's not the end of the string */
if (str[idx] == '-') {
- idx++;
- if (idx > end_idx) {
- PyErr_SetNone(PyExc_StopIteration);
+ if (idx >= end_idx) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
+ idx++;
}
/* read as many integer digits as we find as long as it doesn't start with 0 */
@@ -1904,7 +1953,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
}
/* no integer digits, error */
else {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
@@ -1993,11 +2042,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
/* read a sign if it's there, make sure it's not the end of the string */
if (PyUnicode_READ(kind, str, idx) == '-') {
- idx++;
- if (idx > end_idx) {
- PyErr_SetNone(PyExc_StopIteration);
+ if (idx >= end_idx) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
+ idx++;
}
/* read as many integer digits as we find as long as it doesn't start with 0 */
@@ -2014,7 +2063,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
else {
/* no integer digits, error */
- PyErr_SetNone(PyExc_StopIteration);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
@@ -2097,7 +2146,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
PyObject *rval = NULL;
int fallthrough = 0;
if (idx >= length) {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
switch (str[idx]) {
@@ -2205,7 +2254,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
PyObject *rval = NULL;
int fallthrough = 0;
if (idx >= length) {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
switch (PyUnicode_READ(kind, str, idx)) {
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index d5a1968..54ced0a 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -5,7 +5,8 @@ import re
import sys
import struct
from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
-from .scanner import make_scanner
+from .scanner import make_scanner, JSONDecodeError
+
def _import_c_scanstring():
try:
from ._speedups import scanstring
@@ -14,6 +15,8 @@ def _import_c_scanstring():
return None
c_scanstring = _import_c_scanstring()
+# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
+# compatibility, but it was never in the __all__
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
@@ -29,57 +32,6 @@ def _floatconstants():
NaN, PosInf, NegInf = _floatconstants()
-
-class JSONDecodeError(ValueError):
- """Subclass of ValueError with the following additional properties:
-
- msg: The unformatted error message
- doc: The JSON document being parsed
- pos: The start index of doc where parsing failed
- end: The end index of doc where parsing failed (may be None)
- lineno: The line corresponding to pos
- colno: The column corresponding to pos
- endlineno: The line corresponding to end (may be None)
- endcolno: The column corresponding to end (may be None)
-
- """
- def __init__(self, msg, doc, pos, end=None):
- ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
- self.msg = msg
- self.doc = doc
- self.pos = pos
- self.end = end
- self.lineno, self.colno = linecol(doc, pos)
- if end is not None:
- self.endlineno, self.endcolno = linecol(doc, end)
- else:
- self.endlineno, self.endcolno = None, None
-
-
-def linecol(doc, pos):
- lineno = doc.count('\n', 0, pos) + 1
- if lineno == 1:
- colno = pos + 1
- else:
- colno = pos - doc.rindex('\n', 0, pos)
- return lineno, colno
-
-
-def errmsg(msg, doc, pos, end=None):
- # Note that this function is called from _speedups
- lineno, colno = linecol(doc, pos)
- if end is None:
- #fmt = '{0}: line {1} column {2} (char {3})'
- #return fmt.format(msg, lineno, colno, pos)
- fmt = '%s: line %d column %d (char %d)'
- return fmt % (msg, lineno, colno, pos)
- endlineno, endcolno = linecol(doc, end)
- #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
- #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
- fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
- return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
-
-
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
@@ -128,8 +80,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
break
elif terminator != '\\':
if strict:
- msg = "Invalid control character %r at" % (terminator,)
- #msg = "Invalid control character {0!r} at".format(terminator)
+ msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
else:
_append(terminator)
@@ -144,26 +95,25 @@ def py_scanstring(s, end, encoding=None, strict=True,
try:
char = _b[esc]
except KeyError:
- msg = "Invalid \\escape: " + repr(esc)
+ msg = "Invalid \\X escape sequence %r"
raise JSONDecodeError(msg, s, end)
end += 1
else:
# Unicode escape sequence
+ msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
next_end = end + 5
if len(esc) != 4:
- msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
try:
uni = int(esc, 16)
except ValueError:
- msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
# Check for surrogate pair on UCS-4 systems
if _maxunicode > 65535:
unimask = uni & 0xfc00
if unimask == 0xd800:
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+ msg = "Unpaired high surrogate"
if not s[end + 5:end + 7] == '\\u':
raise JSONDecodeError(msg, s, end)
esc2 = s[end + 7:end + 11]
@@ -174,7 +124,6 @@ def py_scanstring(s, end, encoding=None, strict=True,
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 != 0xdc00:
- msg = "Unpaired high surrogate"
raise JSONDecodeError(msg, s, end)
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
next_end += 6
@@ -246,10 +195,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
except IndexError:
pass
- try:
- value, end = scan_once(s, end)
- except StopIteration:
- raise JSONDecodeError("Expecting object", s, end)
+ value, end = scan_once(s, end)
pairs.append((key, value))
try:
@@ -264,7 +210,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
if nextchar == '}':
break
elif nextchar != ',':
- raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
+ raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
try:
nextchar = s[end]
@@ -301,12 +247,11 @@ def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
+ elif nextchar == '':
+ raise JSONDecodeError("Expecting value or ']'", s, end)
_append = values.append
while True:
- try:
- value, end = scan_once(s, end)
- except StopIteration:
- raise JSONDecodeError("Expecting object", s, end)
+ value, end = scan_once(s, end)
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@@ -316,7 +261,7 @@ def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
- raise JSONDecodeError("Expecting ',' delimiter", s, end)
+ raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
try:
if s[end] in _ws:
@@ -445,8 +390,4 @@ class JSONDecoder(object):
"""
if _PY3 and not isinstance(s, text_type):
raise TypeError("Input string must be text, not bytes")
- try:
- obj, end = self.scan_once(s, idx=_w(s, idx).end())
- except StopIteration:
- raise JSONDecodeError("No JSON object could be decoded", s, idx)
- return obj, end
+ return self.scan_once(s, idx=_w(s, idx).end())
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
index 54593a3..6a0099f 100644
--- a/simplejson/scanner.py
+++ b/simplejson/scanner.py
@@ -9,12 +9,59 @@ def _import_c_make_scanner():
return None
c_make_scanner = _import_c_make_scanner()
-__all__ = ['make_scanner']
+__all__ = ['make_scanner', 'JSONDecodeError']
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
+class JSONDecodeError(ValueError):
+ """Subclass of ValueError with the following additional properties:
+
+ msg: The unformatted error message
+ doc: The JSON document being parsed
+ pos: The start index of doc where parsing failed
+ end: The end index of doc where parsing failed (may be None)
+ lineno: The line corresponding to pos
+ colno: The column corresponding to pos
+ endlineno: The line corresponding to end (may be None)
+ endcolno: The column corresponding to end (may be None)
+
+ """
+ # Note that this exception is used from _speedups
+ def __init__(self, msg, doc, pos, end=None):
+ ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
+ self.msg = msg
+ self.doc = doc
+ self.pos = pos
+ self.end = end
+ self.lineno, self.colno = linecol(doc, pos)
+ if end is not None:
+ self.endlineno, self.endcolno = linecol(doc, end)
+ else:
+ self.endlineno, self.endcolno = None, None
+
+
+def linecol(doc, pos):
+ lineno = doc.count('\n', 0, pos) + 1
+ if lineno == 1:
+ colno = pos + 1
+ else:
+ colno = pos - doc.rindex('\n', 0, pos)
+ return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+ lineno, colno = linecol(doc, pos)
+ msg = msg.replace('%r', repr(doc[pos:pos + 1]))
+ if end is None:
+ fmt = '%s: line %d column %d (char %d)'
+ return fmt % (msg, lineno, colno, pos)
+ endlineno, endcolno = linecol(doc, end)
+ fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+ return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
@@ -30,10 +77,11 @@ def py_make_scanner(context):
memo = context.memo
def _scan_once(string, idx):
+ errmsg = 'Expecting value'
try:
nextchar = string[idx]
except IndexError:
- raise StopIteration
+ raise JSONDecodeError(errmsg, string, idx)
if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict)
@@ -64,7 +112,7 @@ def py_make_scanner(context):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
- raise StopIteration
+ raise JSONDecodeError(errmsg, string, idx)
def scan_once(string, idx):
try:
diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py
index f458a4b..d882ec0 100644
--- a/simplejson/tests/test_fail.py
+++ b/simplejson/tests/test_fail.py
@@ -99,7 +99,6 @@ class TestFail(TestCase):
except json.JSONDecodeError:
pass
else:
- #self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
def test_array_decoder_issue46(self):
@@ -117,3 +116,43 @@ class TestFail(TestCase):
self.fail("Unexpected exception raised %r %s" % (e, e))
else:
self.fail("Unexpected success parsing '[,]'")
+
+ def test_truncated_input(self):
+ test_cases = [
+ ('', 'Expecting value', 0),
+ ('[', "Expecting value or ']'", 1),
+ ('[42', "Expecting ',' delimiter", 3),
+ ('[42,', 'Expecting value', 4),
+ ('["', 'Unterminated string starting at', 1),
+ ('["spam', 'Unterminated string starting at', 1),
+ ('["spam"', "Expecting ',' delimiter", 7),
+ ('["spam",', 'Expecting value', 8),
+ ('{', 'Expecting property name enclosed in double quotes', 1),
+ ('{"', 'Unterminated string starting at', 1),
+ ('{"spam', 'Unterminated string starting at', 1),
+ ('{"spam"', "Expecting ':' delimiter", 7),
+ ('{"spam":', 'Expecting value', 8),
+ ('{"spam":42', "Expecting ',' delimiter", 10),
+ ('{"spam":42,', 'Expecting property name enclosed in double quotes',
+ 11),
+ ('"', 'Unterminated string starting at', 0),
+ ('"spam', 'Unterminated string starting at', 0),
+ ('[,', "Expecting value", 1),
+ ]
+ for data, msg, idx in test_cases:
+ try:
+ json.loads(data)
+ except json.JSONDecodeError:
+ e = sys.exc_info()[1]
+ self.assertEqual(
+ e.msg[:len(msg)],
+ msg,
+ "%r doesn't start with %r for %r" % (e.msg, msg, data))
+ self.assertEqual(
+ e.pos, idx,
+ "pos %r != %r for %r" % (e.pos, idx, data))
+ except Exception:
+ e = sys.exc_info()[1]
+ self.fail("Unexpected exception raised %r %s" % (e, e))
+ else:
+ self.fail("Unexpected success parsing '%r'" % (data,))