summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2007-03-18 04:51:52 +0000
committerBob Ippolito <bob@redivi.com>2007-03-18 04:51:52 +0000
commita0fac116e1e5fd038174312c5b52d15a163fcbc8 (patch)
tree615dcd6d0ab1843cc18507ed731eb00f2a12a13e
parentbe103d0368413157c2ac4df0c8b67e97f735bc34 (diff)
downloadsimplejson-a0fac116e1e5fd038174312c5b52d15a163fcbc8.tar.gz
optional C speedups
git-svn-id: http://simplejson.googlecode.com/svn/trunk@45 a4795897-2c25-0410-b006-0d3caba88fa1
-rw-r--r--setup.cfg4
-rw-r--r--setup.py16
-rw-r--r--simplejson/__init__.py42
-rw-r--r--simplejson/_speedups.c206
-rw-r--r--simplejson/encoder.py41
5 files changed, 291 insertions, 18 deletions
diff --git a/setup.cfg b/setup.cfg
index 406686a..01bb954 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,3 @@
[egg_info]
-#tag_build = dev
-#tag_svn_revision = true
+tag_build = dev
+tag_svn_revision = true
diff --git a/setup.py b/setup.py
index 7cf6d49..4a8d7b5 100644
--- a/setup.py
+++ b/setup.py
@@ -3,14 +3,15 @@
import ez_setup
ez_setup.use_setuptools()
-from setuptools import setup, find_packages
+from setuptools import setup, find_packages, Extension, Feature
-VERSION = '1.6'
+VERSION = '1.7'
DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python"
LONG_DESCRIPTION = """
simplejson is a simple, fast, complete, correct and extensible
JSON <http://json.org> encoder and decoder for Python 2.3+. It is
-pure Python code with no dependencies.
+pure Python code with no dependencies, but includes an optional C
+extension for a serious speed boost.
simplejson was formerly known as simple_json, but changed its name to
comply with PEP 8 module naming guidelines.
@@ -31,6 +32,14 @@ Programming Language :: Python
Topic :: Software Development :: Libraries :: Python Modules
""".splitlines()))
+speedups = Feature(
+ "options C speed-enhancement modules",
+ standard=True,
+ ext_modules = [
+ Extension("simplejson._speedups", ["simplejson/_speedups.c"]),
+ ],
+)
+
setup(
name="simplejson",
version=VERSION,
@@ -48,4 +57,5 @@ setup(
entry_points={
'paste.filter_app_factory': ['json = simplejson.jsonfilter:factory'],
},
+ features={'speedups': speedups},
)
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
index dc3d99d..8e4f77b 100644
--- a/simplejson/__init__.py
+++ b/simplejson/__init__.py
@@ -86,7 +86,7 @@ Extending JSONEncoder::
Note that the JSON produced by this module's default settings
is a subset of YAML, so it may be used as a serializer for that as well.
"""
-__version__ = '1.6'
+__version__ = '1.7'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder',
@@ -95,8 +95,20 @@ __all__ = [
from decoder import JSONDecoder
from encoder import JSONEncoder
+_default_encoder = JSONEncoder(
+ skipkeys=False,
+ ensure_ascii=True,
+ check_circular=True,
+ allow_nan=True,
+ indent=None,
+ separators=None,
+ encoding='utf-8'
+)
+
+
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, encoding='utf-8', **kw):
+ allow_nan=True, cls=None, indent=None, encoding='utf-8',
+ _iterencode=_default_encoder.iterencode, **kw):
"""
Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
@@ -130,19 +142,27 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg.
"""
- if cls is None:
- cls = JSONEncoder
- iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
- check_circular=check_circular, allow_nan=allow_nan, indent=indent,
- encoding=encoding, **kw).iterencode(obj)
+ # cached encoder
+ if (skipkeys is False and ensure_ascii is True and
+ check_circular is True and allow_nan is True and
+ cls is None and indent is None and separators is None and
+ encoding == 'utf-8' and not kw):
+ iterable = _iterencode(obj)
+ else:
+ if cls is None:
+ cls = JSONEncoder
+ iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+ check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+ encoding=encoding, **kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at
# a debuggability cost
for chunk in iterable:
fp.write(chunk)
+
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
- encoding='utf-8', **kw):
+ encoding='utf-8', _encode=_default_encoder.encode, **kw):
"""
Serialize ``obj`` to a JSON formatted ``str``.
@@ -178,6 +198,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg.
"""
+ # cached encoder
+ if (skipkeys is False and ensure_ascii is True and
+ check_circular is True and allow_nan is True and
+ cls is None and indent is None and separators is None and
+ encoding == 'utf-8' and not kw):
+ return _encode(obj)
if cls is None:
cls = JSONEncoder
return cls(
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
new file mode 100644
index 0000000..780bb6b
--- /dev/null
+++ b/simplejson/_speedups.c
@@ -0,0 +1,206 @@
+#include "Python.h"
+#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#endif
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
+static PyObject *
+ascii_escape_unicode(PyObject *pystr);
+static PyObject *
+ascii_escape_str(PyObject *pystr);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
+void init_speedups(void);
+
+#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
+ Py_UNICODE x;
+ output[chars++] = '\\';
+ switch (c) {
+ case '/': output[chars++] = (char)c; break;
+ case '\\': output[chars++] = (char)c; break;
+ case '"': output[chars++] = (char)c; break;
+ case '\b': output[chars++] = 'b'; break;
+ case '\f': output[chars++] = 'f'; break;
+ case '\n': output[chars++] = 'n'; break;
+ case '\r': output[chars++] = 'r'; break;
+ case '\t': output[chars++] = 't'; break;
+ default:
+#ifdef Py_UNICODE_WIDE
+ if (c >= 0x10000) {
+ /* UTF-16 surrogate pair */
+ Py_UNICODE v = c - 0x10000;
+ c = 0xd800 | ((v >> 10) & 0x3ff);
+ output[chars++] = 'u';
+ x = (c & 0xf000) >> 12;
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ x = (c & 0x0f00) >> 8;
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ x = (c & 0x00f0) >> 4;
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ x = (c & 0x000f);
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ c = 0xdc00 | (v & 0x3ff);
+ output[chars++] = '\\';
+ }
+#endif
+ output[chars++] = 'u';
+ x = (c & 0xf000) >> 12;
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ x = (c & 0x0f00) >> 8;
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ x = (c & 0x00f0) >> 4;
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ x = (c & 0x000f);
+ output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+ }
+ return chars;
+}
+
+static PyObject *
+ascii_escape_unicode(PyObject *pystr) {
+ Py_ssize_t i;
+ Py_ssize_t input_chars;
+ Py_ssize_t output_size;
+ Py_ssize_t chars;
+ PyObject *rval;
+ char *output;
+ Py_UNICODE *input_unicode;
+
+ input_chars = PyUnicode_GET_SIZE(pystr);
+ input_unicode = PyUnicode_AS_UNICODE(pystr);
+ /* One char input can be up to 6 chars output, estimate 4 of these */
+ output_size = 32 + input_chars;
+ rval = PyString_FromStringAndSize(NULL, output_size);
+ if (rval == NULL) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ chars = 0;
+ output[chars++] = '"';
+ for (i = 0; i < input_chars; i++) {
+ Py_UNICODE c = input_unicode[i];
+ if (S_CHAR(c)) {
+ output[chars++] = (char)c;
+ } else {
+ chars = ascii_escape_char(c, output, chars);
+ }
+ if (output_size - chars < 7) {
+ /* There's more than four, so let's resize by a lot */
+ output_size *= 2;
+ if (output_size > 2 + (input_chars * 6)) {
+ output_size = 2 + (input_chars * 6);
+ }
+ if (_PyString_Resize(&rval, output_size) == -1) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ }
+ }
+ output[chars++] = '"';
+ if (_PyString_Resize(&rval, chars) == -1) {
+ return NULL;
+ }
+ return rval;
+}
+
+static PyObject *
+ascii_escape_str(PyObject *pystr) {
+ Py_ssize_t i;
+ Py_ssize_t input_chars;
+ Py_ssize_t output_size;
+ Py_ssize_t chars;
+ PyObject *rval;
+ char *output;
+ char *input_str;
+
+ input_chars = PyString_GET_SIZE(pystr);
+ input_str = PyString_AS_STRING(pystr);
+ /* One char input can be up to 6 chars output, estimate 4 of these */
+ output_size = 32 + input_chars;
+ rval = PyString_FromStringAndSize(NULL, output_size);
+ if (rval == NULL) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ chars = 0;
+ output[chars++] = '"';
+ for (i = 0; i < input_chars; i++) {
+ Py_UNICODE c = (Py_UNICODE)input_str[i];
+ if (S_CHAR(c)) {
+ output[chars++] = (char)c;
+ } else if (c > 0x7F) {
+ /* We hit a non-ASCII character, bail to unicode mode */
+ PyObject *uni;
+ Py_DECREF(rval);
+ uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+ if (uni == NULL) {
+ return NULL;
+ }
+ rval = ascii_escape_unicode(uni);
+ Py_DECREF(uni);
+ return rval;
+ } else {
+ chars = ascii_escape_char(c, output, chars);
+ }
+ if (output_size - chars < 7) {
+ /* There's more than four, so let's resize by a lot */
+ output_size *= 2;
+ if (output_size > 2 + (input_chars * 6)) {
+ output_size = 2 + (input_chars * 6);
+ }
+ if (_PyString_Resize(&rval, output_size) == -1) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ }
+ }
+ output[chars++] = '"';
+ if (_PyString_Resize(&rval, chars) == -1) {
+ return NULL;
+ }
+ return rval;
+}
+
+PyDoc_STRVAR(pydoc_encode_basestring_ascii,
+ "encode_basestring_ascii(basestring) -> str\n"
+ "\n"
+ "..."
+);
+
+static PyObject *
+py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
+ /* METH_O */
+ if (PyString_Check(pystr)) {
+ return ascii_escape_str(pystr);
+ } else if (PyUnicode_Check(pystr)) {
+ return ascii_escape_unicode(pystr);
+ }
+ PyErr_SetString(PyExc_TypeError, "first argument must be a string");
+ return NULL;
+}
+
+#define DEFN(n, k) \
+ { \
+ #n, \
+ (PyCFunction)py_ ##n, \
+ k, \
+ pydoc_ ##n \
+ }
+static PyMethodDef speedups_methods[] = {
+ DEFN(encode_basestring_ascii, METH_O),
+ {}
+};
+#undef DEFN
+
+void
+init_speedups(void)
+{
+ PyObject *m;
+ m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
+}
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index 92ed9d4..eec9c7f 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -2,6 +2,10 @@
Implementation of JSONEncoder
"""
import re
+try:
+ from simplejson import _speedups
+except ImportError:
+ _speedups = None
ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
@@ -56,9 +60,22 @@ def encode_basestring_ascii(s):
try:
return ESCAPE_DCT[s]
except KeyError:
- return '\\u%04x' % (ord(s),)
+ n = ord(s)
+ if n < 0x10000:
+ return '\\u%04x' % (n,)
+ else:
+ # surrogate pair
+ n -= 0x10000
+ s1 = 0xd800 | ((n >> 10) & 0x3ff)
+ s2 = 0xdc00 | (n & 0x3ff)
+ return '\\u%04x\\u%04x' % (s1, s2)
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+try:
+ encode_basestring_ascii = _speedups.encode_basestring_ascii
+ _need_utf8 = True
+except AttributeError:
+ _need_utf8 = False
class JSONEncoder(object):
"""
@@ -212,9 +229,13 @@ class JSONEncoder(object):
items = [(k, dct[k]) for k in keys]
else:
items = dct.iteritems()
+ _encoding = self.encoding
+ _do_decode = (_encoding is not None
+ and not (_need_utf8 and _encoding == 'utf-8'))
for key, value in items:
- if self.encoding is not None and isinstance(key, str):
- key = key.decode(self.encoding)
+ if isinstance(key, str):
+ if _do_decode:
+ key = key.decode(_encoding)
elif isinstance(key, basestring):
pass
# JavaScript is weakly typed for these, so it makes sense to
@@ -254,8 +275,10 @@ class JSONEncoder(object):
encoder = encode_basestring_ascii
else:
encoder = encode_basestring
- if self.encoding and isinstance(o, str):
- o = o.decode(self.encoding)
+ _encoding = self.encoding
+ if (_encoding is not None and isinstance(o, str)
+ and not (_need_utf8 and _encoding == 'utf-8')):
+ o = o.decode(_encoding)
yield encoder(o)
elif o is None:
yield 'null'
@@ -315,6 +338,14 @@ class JSONEncoder(object):
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo":["bar", "baz"]}'
"""
+ # This is for extremely simple cases and benchmarks...
+ if isinstance(o, basestring):
+ if isinstance(o, str):
+ _encoding = self.encoding
+ if (_encoding is not None
+ and not (_encoding == 'utf-8' and _need_utf8)):
+ o = o.decode(_encoding)
+ return encode_basestring_ascii(o)
# This doesn't pass the iterator directly to ''.join() because it
# sucks at reporting exceptions. It's going to do this internally
# anyway because it uses PySequence_Fast or similar.