start working on improving coverage, remove unused bytes code paths from py3

author: Bob Ippolito <bob@redivi.com> 2012-12-28 19:17:54 -0800
committer: Bob Ippolito <bob@redivi.com> 2012-12-28 19:17:54 -0800
commit: 0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd (patch)
tree: 8b146fcb9078ad4277c3b6c75856482053d43798
parent: 77850638e824d23e3301962b3b142fe2e0520abb (diff)
download: simplejson-0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd.tar.gz
5 files changed, 78 insertions, 24 deletions
diff --git a/.gitignore b/.gitignore
index c1f7933..fb20d6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
 /MANIFEST
 /.coverage
 /coverage.xml
+/htmlcov
 /build
 /dist
 /docs
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index c3b6c09..6f6d3ad 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -155,9 +155,17 @@ static PyObject *
 ascii_escape_str(PyObject *pystr);
 static PyObject *
 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
+#if PY_MAJOR_VERSION < 3
 static PyObject *
 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
 static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+#endif
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
 static PyObject *
 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
@@ -684,6 +692,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
         Py_CLEAR(chunk); \
     }
 
+#if PY_MAJOR_VERSION < 3
 static PyObject *
 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
 {
@@ -916,7 +925,7 @@ bail:
     Py_XDECREF(chunks);
     return NULL;
 }
-
+#endif /* PY_MAJOR_VERSION < 3 */
 
 static PyObject *
 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
@@ -1127,12 +1136,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args)
     if (encoding == NULL) {
         encoding = DEFAULT_ENCODING;
     }
-    if (PyString_Check(pystr)) {
-        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
-    }
-    else if (PyUnicode_Check(pystr)) {
+    if (PyUnicode_Check(pystr)) {
         rval = scanstring_unicode(pystr, end, strict, &next_end);
     }
+#if PY_MAJOR_VERSION < 3
+    /* Using a bytes input is unsupported for scanning in Python 3.
+       It is coerced to str in the decoder before it gets here. */
+    else if (PyString_Check(pystr)) {
+        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
+    }
+#endif
     else {
         PyErr_Format(PyExc_TypeError,
                      "first argument must be a string, not %.80s",
@@ -1209,8 +1222,10 @@ scanner_clear(PyObject *self)
     return 0;
 }
 
+#if PY_MAJOR_VERSION < 3
 static PyObject *
-_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
     /* Read a JSON object from PyString pystr.
     idx is the index of the first character after the opening curly brace.
     *next_idx_ptr is a return-by-reference index to the first character after
@@ -1357,9 +1372,11 @@ bail:
     Py_XDECREF(pairs);
     return NULL;
 }
+#endif /* PY_MAJOR_VERSION < 3 */
 
 static PyObject *
-_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
     /* Read a JSON object from PyUnicode pystr.
     idx is the index of the first character after the opening curly brace.
     *next_idx_ptr is a return-by-reference index to the first character after
@@ -1509,8 +1526,10 @@ bail:
     return NULL;
 }
 
+#if PY_MAJOR_VERSION < 3
 static PyObject *
-_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
     /* Read a JSON array from PyString pystr.
     idx is the index of the first character after the opening brace.
     *next_idx_ptr is a return-by-reference index to the first character after
@@ -1580,9 +1599,11 @@ bail:
     Py_DECREF(rval);
     return NULL;
 }
+#endif /* PY_MAJOR_VERSION < 3 */
 
 static PyObject *
-_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
     /* Read a JSON array from PyString pystr.
     idx is the index of the first character after the opening brace.
     *next_idx_ptr is a return-by-reference index to the first character after
@@ -1654,7 +1675,8 @@ bail:
 }
 
 static PyObject *
-_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
     /* Read a JSON constant from PyString pystr.
     constant is the constant string that was found
         ("NaN", "Infinity", "-Infinity").
@@ -1679,8 +1701,10 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *
     return rval;
 }
 
+#if PY_MAJOR_VERSION < 3
 static PyObject *
-_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
+{
     /* Read a JSON number from PyString pystr.
     idx is the index of the first character of the number
     *next_idx_ptr is a return-by-reference index to the first character after
@@ -1781,6 +1805,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
     *next_idx_ptr = idx;
     return rval;
 }
+#endif /* PY_MAJOR_VERSION < 3 */
 
 static PyObject *
 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
@@ -1877,6 +1902,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
     return rval;
 }
 
+#if PY_MAJOR_VERSION < 3
 static PyObject *
 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
 {
@@ -1976,6 +2002,8 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
     Py_LeaveRecursiveCall();
     return rval;
 }
+#endif /* PY_MAJOR_VERSION < 3 */
+
 
 static PyObject *
 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
@@ -2091,12 +2119,14 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
         return NULL;
 
-    if (PyString_Check(pystr)) {
-        rval = scan_once_str(s, pystr, idx, &next_idx);
-    }
-    else if (PyUnicode_Check(pystr)) {
+    if (PyUnicode_Check(pystr)) {
         rval = scan_once_unicode(s, pystr, idx, &next_idx);
     }
+#if PY_MAJOR_VERSION < 3
+    else if (PyString_Check(pystr)) {
+        rval = scan_once_str(s, pystr, idx, &next_idx);
+    }
+#endif /* PY_MAJOR_VERSION < 3 */
     else {
         PyErr_Format(PyExc_TypeError,
                  "first argument must be a string, not %.80s",
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index bd14788..c844b3c 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -95,7 +95,8 @@ BACKSLASH = {
 DEFAULT_ENCODING = "utf-8"
 
 def py_scanstring(s, end, encoding=None, strict=True,
-        _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join):
+        _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
+        _PY3=PY3, _maxunicode=sys.maxunicode):
     """Scan the string s for a JSON string. End is the index of the
     character in s after the quote that started the JSON string.
     Unescapes all valid JSON string escape sequences and raises ValueError
@@ -118,7 +119,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
         content, terminator = chunk.groups()
         # Content is contains zero or more unescaped string characters
         if content:
-            if not isinstance(content, text_type):
+            if not _PY3 and not isinstance(content, text_type):
                 content = text_type(content, encoding)
             _append(content)
         # Terminator is the end of string, a literal control character,
@@ -155,7 +156,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
                 raise JSONDecodeError(msg, s, end)
             uni = int(esc, 16)
             # Check for surrogate pair on UCS-4 systems
-            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+            if 0xd800 <= uni <= 0xdbff and _maxunicode > 65535:
                 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
                 if not s[end + 5:end + 7] == '\\u':
                     raise JSONDecodeError(msg, s, end)
@@ -388,6 +389,8 @@ class JSONDecoder(object):
         ``False`` then control characters will be allowed in strings.
 
         """
+        if encoding is None:
+            encoding = DEFAULT_ENCODING
         self.encoding = encoding
         self.object_hook = object_hook
         self.object_pairs_hook = object_pairs_hook
@@ -407,14 +410,14 @@ class JSONDecoder(object):
 
         """
         if _PY3 and isinstance(s, binary_type):
-            s = s.decode('utf-8')
+            s = s.decode(self.encoding)
         obj, end = self.raw_decode(s)
         end = _w(s, end).end()
         if end != len(s):
             raise JSONDecodeError("Extra data", s, end, len(s))
         return obj
 
-    def raw_decode(self, s, idx=0, _w=WHITESPACE.match):
+    def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
         """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
         beginning with a JSON document) and return a 2-tuple of the Python
         representation and the index in ``s`` where the document ended.
@@ -425,6 +428,8 @@ class JSONDecoder(object):
         have extraneous data at the end.
 
         """
+        if _PY3 and not isinstance(s, text_type):
+            raise TypeError("Input string must be text, not bytes")
         try:
             obj, end = self.scan_once(s, idx=_w(s, idx).end())
         except StopIteration:
diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py
index 064b598..da21365 100644
--- a/simplejson/tests/__init__.py
+++ b/simplejson/tests/__init__.py
@@ -1,17 +1,27 @@
 from __future__ import absolute_import
 import unittest
 import doctest
+import sys
 
 class OptionalExtensionTestSuite(unittest.TestSuite):
     def run(self, result):
         import simplejson
         run = unittest.TestSuite.run
         run(self, result)
-        simplejson._toggle_speedups(False)
-        run(self, result)
-        simplejson._toggle_speedups(True)
+        if simplejson._import_c_make_encoder() is None:
+            TestMissingSpeedups().run(result)
+        else:
+            simplejson._toggle_speedups(False)
+            run(self, result)
+            simplejson._toggle_speedups(True)
         return result
 
+class TestMissingSpeedups(unittest.TestCase):
+    def runTest(self):
+        if hasattr(sys, 'pypy_translation_info'):
+            "PyPy doesn't need speedups! :)"
+        elif hasattr(self, 'skipTest'):
+            self.skipTest('_speedups.so is missing!')
 
 def additional_tests(suite=None):
     import simplejson
@@ -55,7 +65,7 @@ def all_tests_suite():
 
 
 def main():
-    runner = unittest.TextTestRunner()
+    runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v'))
     suite = all_tests_suite()
     raise SystemExit(not runner.run(suite).wasSuccessful())
 
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index 4045fec..297bc1b 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -6,6 +6,14 @@ import simplejson.decoder
 from simplejson.compat import b
 
 class TestScanString(TestCase):
+    # The bytes type is intentionally not used in most of these tests
+    # under Python 3 because the decoder immediately coerces to str before
+    # calling scanstring. In Python 2 we are testing the code paths
+    # for both unicode and str.
+    #
+    # The reason this is done is because Python 3 would require
+    # entirely different code paths for parsing bytes and str.
+    #
     def test_py_scanstring(self):
         self._test_scanstring(simplejson.decoder.py_scanstring)
author	Bob Ippolito <bob@redivi.com>	2012-12-28 19:17:54 -0800
committer	Bob Ippolito <bob@redivi.com>	2012-12-28 19:17:54 -0800
commit	0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd (patch)
tree	8b146fcb9078ad4277c3b6c75856482053d43798
parent	77850638e824d23e3301962b3b142fe2e0520abb (diff)
download	simplejson-0bc9e8a586a6c2e4a85b3896a71fc04ed718cbfd.tar.gz