summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2012-12-28 19:17:54 -0800
committerBob Ippolito <bob@redivi.com>2012-12-28 19:17:54 -0800
commit0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd (patch)
tree8b146fcb9078ad4277c3b6c75856482053d43798
parent77850638e824d23e3301962b3b142fe2e0520abb (diff)
downloadsimplejson-0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd.tar.gz
start working on improving coverage, remove unused bytes code paths from py3
-rw-r--r--.gitignore1
-rw-r--r--simplejson/_speedups.c60
-rw-r--r--simplejson/decoder.py15
-rw-r--r--simplejson/tests/__init__.py18
-rw-r--r--simplejson/tests/test_scanstring.py8
5 files changed, 78 insertions, 24 deletions
diff --git a/.gitignore b/.gitignore
index c1f7933..fb20d6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
/MANIFEST
/.coverage
/coverage.xml
+/htmlcov
/build
/dist
/docs
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index c3b6c09..6f6d3ad 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -155,9 +155,17 @@ static PyObject *
ascii_escape_str(PyObject *pystr);
static PyObject *
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
+#if PY_MAJOR_VERSION < 3
static PyObject *
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+#endif
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
@@ -684,6 +692,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
Py_CLEAR(chunk); \
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
{
@@ -916,7 +925,7 @@ bail:
Py_XDECREF(chunks);
return NULL;
}
-
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
@@ -1127,12 +1136,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args)
if (encoding == NULL) {
encoding = DEFAULT_ENCODING;
}
- if (PyString_Check(pystr)) {
- rval = scanstring_str(pystr, end, encoding, strict, &next_end);
- }
- else if (PyUnicode_Check(pystr)) {
+ if (PyUnicode_Check(pystr)) {
rval = scanstring_unicode(pystr, end, strict, &next_end);
}
+#if PY_MAJOR_VERSION < 3
+ /* Using a bytes input is unsupported for scanning in Python 3.
+ It is coerced to str in the decoder before it gets here. */
+ else if (PyString_Check(pystr)) {
+ rval = scanstring_str(pystr, end, encoding, strict, &next_end);
+ }
+#endif
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string, not %.80s",
@@ -1209,8 +1222,10 @@ scanner_clear(PyObject *self)
return 0;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
-_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON object from PyString pystr.
idx is the index of the first character after the opening curly brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1357,9 +1372,11 @@ bail:
Py_XDECREF(pairs);
return NULL;
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
-_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON object from PyUnicode pystr.
idx is the index of the first character after the opening curly brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1509,8 +1526,10 @@ bail:
return NULL;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
-_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON array from PyString pystr.
idx is the index of the first character after the opening brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1580,9 +1599,11 @@ bail:
Py_DECREF(rval);
return NULL;
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
-_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON array from PyString pystr.
idx is the index of the first character after the opening brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1654,7 +1675,8 @@ bail:
}
static PyObject *
-_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON constant from PyString pystr.
constant is the constant string that was found
("NaN", "Infinity", "-Infinity").
@@ -1679,8 +1701,10 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *
return rval;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
-_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON number from PyString pystr.
idx is the index of the first character of the number
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1781,6 +1805,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
*next_idx_ptr = idx;
return rval;
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
@@ -1877,6 +1902,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
return rval;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
{
@@ -1976,6 +2002,8 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
Py_LeaveRecursiveCall();
return rval;
}
+#endif /* PY_MAJOR_VERSION < 3 */
+
static PyObject *
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
@@ -2091,12 +2119,14 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
return NULL;
- if (PyString_Check(pystr)) {
- rval = scan_once_str(s, pystr, idx, &next_idx);
- }
- else if (PyUnicode_Check(pystr)) {
+ if (PyUnicode_Check(pystr)) {
rval = scan_once_unicode(s, pystr, idx, &next_idx);
}
+#if PY_MAJOR_VERSION < 3
+ else if (PyString_Check(pystr)) {
+ rval = scan_once_str(s, pystr, idx, &next_idx);
+ }
+#endif /* PY_MAJOR_VERSION < 3 */
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string, not %.80s",
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index bd14788..c844b3c 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -95,7 +95,8 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8"
def py_scanstring(s, end, encoding=None, strict=True,
- _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join):
+ _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
+ _PY3=PY3, _maxunicode=sys.maxunicode):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@@ -118,7 +119,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
if content:
- if not isinstance(content, text_type):
+ if not _PY3 and not isinstance(content, text_type):
content = text_type(content, encoding)
_append(content)
# Terminator is the end of string, a literal control character,
@@ -155,7 +156,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
raise JSONDecodeError(msg, s, end)
uni = int(esc, 16)
# Check for surrogate pair on UCS-4 systems
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+ if 0xd800 <= uni <= 0xdbff and _maxunicode > 65535:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
if not s[end + 5:end + 7] == '\\u':
raise JSONDecodeError(msg, s, end)
@@ -388,6 +389,8 @@ class JSONDecoder(object):
``False`` then control characters will be allowed in strings.
"""
+ if encoding is None:
+ encoding = DEFAULT_ENCODING
self.encoding = encoding
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
@@ -407,14 +410,14 @@ class JSONDecoder(object):
"""
if _PY3 and isinstance(s, binary_type):
- s = s.decode('utf-8')
+ s = s.decode(self.encoding)
obj, end = self.raw_decode(s)
end = _w(s, end).end()
if end != len(s):
raise JSONDecodeError("Extra data", s, end, len(s))
return obj
- def raw_decode(self, s, idx=0, _w=WHITESPACE.match):
+ def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
beginning with a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
@@ -425,6 +428,8 @@ class JSONDecoder(object):
have extraneous data at the end.
"""
+ if _PY3 and not isinstance(s, text_type):
+ raise TypeError("Input string must be text, not bytes")
try:
obj, end = self.scan_once(s, idx=_w(s, idx).end())
except StopIteration:
diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py
index 064b598..da21365 100644
--- a/simplejson/tests/__init__.py
+++ b/simplejson/tests/__init__.py
@@ -1,17 +1,27 @@
from __future__ import absolute_import
import unittest
import doctest
+import sys
class OptionalExtensionTestSuite(unittest.TestSuite):
def run(self, result):
import simplejson
run = unittest.TestSuite.run
run(self, result)
- simplejson._toggle_speedups(False)
- run(self, result)
- simplejson._toggle_speedups(True)
+ if simplejson._import_c_make_encoder() is None:
+ TestMissingSpeedups().run(result)
+ else:
+ simplejson._toggle_speedups(False)
+ run(self, result)
+ simplejson._toggle_speedups(True)
return result
+class TestMissingSpeedups(unittest.TestCase):
+ def runTest(self):
+ if hasattr(sys, 'pypy_translation_info'):
+ "PyPy doesn't need speedups! :)"
+ elif hasattr(self, 'skipTest'):
+ self.skipTest('_speedups.so is missing!')
def additional_tests(suite=None):
import simplejson
@@ -55,7 +65,7 @@ def all_tests_suite():
def main():
- runner = unittest.TextTestRunner()
+ runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v'))
suite = all_tests_suite()
raise SystemExit(not runner.run(suite).wasSuccessful())
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index 4045fec..297bc1b 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -6,6 +6,14 @@ import simplejson.decoder
from simplejson.compat import b
class TestScanString(TestCase):
+ # The bytes type is intentionally not used in most of these tests
+ # under Python 3 because the decoder immediately coerces to str before
+ # calling scanstring. In Python 2 we are testing the code paths
+ # for both unicode and str.
+ #
+ # The reason this is done is because Python 3 would require
+ # entirely different code paths for parsing bytes and str.
+ #
def test_py_scanstring(self):
self._test_scanstring(simplejson.decoder.py_scanstring)