diff options
author | Bob Ippolito <bob@redivi.com> | 2012-12-28 19:17:54 -0800 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2012-12-28 19:17:54 -0800 |
commit | 0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd (patch) | |
tree | 8b146fcb9078ad4277c3b6c75856482053d43798 | |
parent | 77850638e824d23e3301962b3b142fe2e0520abb (diff) | |
download | simplejson-0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd.tar.gz |
start working on improving coverage, remove unused bytes code paths from py3
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | simplejson/_speedups.c | 60 | ||||
-rw-r--r-- | simplejson/decoder.py | 15 | ||||
-rw-r--r-- | simplejson/tests/__init__.py | 18 | ||||
-rw-r--r-- | simplejson/tests/test_scanstring.py | 8 |
5 files changed, 78 insertions, 24 deletions
@@ -5,6 +5,7 @@ /MANIFEST /.coverage /coverage.xml +/htmlcov /build /dist /docs diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index c3b6c09..6f6d3ad 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -155,9 +155,17 @@ static PyObject * ascii_escape_str(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +#if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); @@ -684,6 +692,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { Py_CLEAR(chunk); \ } +#if PY_MAJOR_VERSION < 3 static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) { @@ -916,7 +925,7 @@ bail: Py_XDECREF(chunks); return NULL; } - +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) @@ -1127,12 +1136,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) if (encoding == NULL) { encoding = DEFAULT_ENCODING; } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", @@ -1209,8 +1222,10 @@ scanner_clear(PyObject *self) return 0; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyString pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1357,9 +1372,11 @@ bail: Py_XDECREF(pairs); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1509,8 +1526,10 @@ bail: return NULL; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1580,9 +1599,11 @@ bail: Py_DECREF(rval); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1654,7 +1675,8 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON constant from PyString pystr. constant is the constant string that was found ("NaN", "Infinity", "-Infinity"). @@ -1679,8 +1701,10 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyString pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1781,6 +1805,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz *next_idx_ptr = idx; return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { @@ -1877,6 +1902,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { @@ -1976,6 +2002,8 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n Py_LeaveRecursiveCall(); return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ + static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) @@ -2091,12 +2119,14 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) return NULL; - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scan_once_unicode(s, pystr, idx, &next_idx); } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", diff --git a/simplejson/decoder.py b/simplejson/decoder.py index bd14788..c844b3c 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -95,7 +95,8 @@ BACKSLASH = { DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join): + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError @@ -118,7 +119,7 @@ def py_scanstring(s, end, encoding=None, strict=True, content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, text_type): + if not _PY3 and not isinstance(content, text_type): content = text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, @@ -155,7 +156,7 @@ def py_scanstring(s, end, encoding=None, strict=True, raise JSONDecodeError(msg, s, end) uni = int(esc, 16) # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + if 0xd800 <= uni <= 0xdbff and _maxunicode > 65535: msg = "Invalid \\uXXXX\\uXXXX surrogate pair" if not s[end + 5:end + 7] == '\\u': raise JSONDecodeError(msg, s, end) @@ -388,6 +389,8 @@ class JSONDecoder(object): ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -407,14 +410,14 @@ class JSONDecoder(object): """ if _PY3 and isinstance(s, binary_type): - s = s.decode('utf-8') + s = s.decode(self.encoding) obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0, _w=WHITESPACE.match): + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. @@ -425,6 +428,8 @@ class JSONDecoder(object): have extraneous data at the end. """ + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") try: obj, end = self.scan_once(s, idx=_w(s, idx).end()) except StopIteration: diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py index 064b598..da21365 100644 --- a/simplejson/tests/__init__.py +++ b/simplejson/tests/__init__.py @@ -1,17 +1,27 @@ from __future__ import absolute_import import unittest import doctest +import sys class OptionalExtensionTestSuite(unittest.TestSuite): def run(self, result): import simplejson run = unittest.TestSuite.run run(self, result) - simplejson._toggle_speedups(False) - run(self, result) - simplejson._toggle_speedups(True) + if simplejson._import_c_make_encoder() is None: + TestMissingSpeedups().run(result) + else: + simplejson._toggle_speedups(False) + run(self, result) + simplejson._toggle_speedups(True) return result +class TestMissingSpeedups(unittest.TestCase): + def runTest(self): + if hasattr(sys, 'pypy_translation_info'): + "PyPy doesn't need speedups! :)" + elif hasattr(self, 'skipTest'): + self.skipTest('_speedups.so is missing!') def additional_tests(suite=None): import simplejson @@ -55,7 +65,7 @@ def all_tests_suite(): def main(): - runner = unittest.TextTestRunner() + runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v')) suite = all_tests_suite() raise SystemExit(not runner.run(suite).wasSuccessful()) diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py index 4045fec..297bc1b 100644 --- a/simplejson/tests/test_scanstring.py +++ b/simplejson/tests/test_scanstring.py @@ -6,6 +6,14 @@ import simplejson.decoder from simplejson.compat import b class TestScanString(TestCase): + # The bytes type is intentionally not used in most of these tests + # under Python 3 because the decoder immediately coerces to str before + # calling scanstring. In Python 2 we are testing the code paths + # for both unicode and str. + # + # The reason this is done is because Python 3 would require + # entirely different code paths for parsing bytes and str. + # def test_py_scanstring(self): self._test_scanstring(simplejson.decoder.py_scanstring) |