summaryrefslogtreecommitdiff
path: root/simplejson/_speedups.c
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2009-12-27 11:09:28 +0000
committerBob Ippolito <bob@redivi.com>2009-12-27 11:09:28 +0000
commitbc73ddee065b44769068e00b7f057aca650c5c23 (patch)
tree559d05476d44774d417cf6e00d6b3daf2dad4c7c /simplejson/_speedups.c
parentbdc1425fcdc600562dc14818cad9627f01d9dd03 (diff)
downloadsimplejson-bc73ddee065b44769068e00b7f057aca650c5c23.tar.gz
http://bugs.python.org/issue7451
git-svn-id: http://simplejson.googlecode.com/svn/trunk@206 a4795897-2c25-0410-b006-0d3caba88fa1
Diffstat (limited to 'simplejson/_speedups.c')
-rw-r--r--simplejson/_speedups.c221
1 files changed, 146 insertions, 75 deletions
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index ee57d9d..50df622 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -54,6 +54,7 @@ typedef struct _PyScannerObject {
PyObject *parse_float;
PyObject *parse_int;
PyObject *parse_constant;
+ PyObject *memo;
} PyScannerObject;
static PyMemberDef scanner_members[] = {
@@ -441,6 +442,21 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
return tpl;
}
+#define APPEND_OLD_CHUNK \
+ if (chunk != NULL) { \
+ if (chunks == NULL) { \
+ chunks = PyList_New(0); \
+ if (chunks == NULL) { \
+ goto bail; \
+ } \
+ } \
+ if (PyList_Append(chunks, chunk)) { \
+ Py_DECREF(chunk); \
+ goto bail; \
+ } \
+ Py_CLEAR(chunk); \
+ }
+
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
{
@@ -459,10 +475,8 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
Py_ssize_t next = begin;
int has_unicode = 0;
char *buf = PyString_AS_STRING(pystr);
- PyObject *chunks = PyList_New(0);
- if (chunks == NULL) {
- goto bail;
- }
+ PyObject *chunks = NULL;
+ PyObject *chunk = NULL;
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
@@ -470,7 +484,6 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
- PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = (unsigned char)buf[next];
if (c == '"' || c == '\\') {
@@ -490,6 +503,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
+ APPEND_OLD_CHUNK
PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
if (strchunk == NULL) {
goto bail;
@@ -504,11 +518,6 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
else {
chunk = strchunk;
}
- if (PyList_Append(chunks, chunk)) {
- Py_DECREF(chunk);
- goto bail;
- }
- Py_DECREF(chunk);
}
next++;
if (c == '"') {
@@ -613,6 +622,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
if (c > 0x7f) {
has_unicode = 1;
}
+ APPEND_OLD_CHUNK
if (has_unicode) {
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
@@ -626,22 +636,28 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
goto bail;
}
}
- if (PyList_Append(chunks, chunk)) {
- Py_DECREF(chunk);
+ }
+
+ if (chunks == NULL) {
+ if (chunk != NULL)
+ rval = chunk;
+ else
+ rval = PyString_FromStringAndSize("", 0);
+ }
+ else {
+ APPEND_OLD_CHUNK
+ rval = join_list_string(chunks);
+ if (rval == NULL) {
goto bail;
}
- Py_DECREF(chunk);
+ Py_CLEAR(chunks);
}
- rval = join_list_string(chunks);
- if (rval == NULL) {
- goto bail;
- }
- Py_CLEAR(chunks);
*next_end_ptr = end;
return rval;
bail:
*next_end_ptr = -1;
+ Py_XDECREF(chunk);
Py_XDECREF(chunks);
return NULL;
}
@@ -663,10 +679,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
- PyObject *chunks = PyList_New(0);
- if (chunks == NULL) {
- goto bail;
- }
+ PyObject *chunks = NULL;
+ PyObject *chunk = NULL;
+
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
@@ -674,7 +689,6 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
- PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
@@ -691,15 +705,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
+ APPEND_OLD_CHUNK
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
if (chunk == NULL) {
goto bail;
}
- if (PyList_Append(chunks, chunk)) {
- Py_DECREF(chunk);
- goto bail;
- }
- Py_DECREF(chunk);
}
next++;
if (c == '"') {
@@ -801,26 +811,32 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
#endif
}
+ APPEND_OLD_CHUNK
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
- if (PyList_Append(chunks, chunk)) {
- Py_DECREF(chunk);
- goto bail;
- }
- Py_DECREF(chunk);
}
- rval = join_list_unicode(chunks);
- if (rval == NULL) {
- goto bail;
+ if (chunks == NULL) {
+ if (chunk != NULL)
+ rval = chunk;
+ else
+ rval = PyUnicode_FromStringAndSize("", 0);
+ }
+ else {
+ APPEND_OLD_CHUNK
+ rval = join_list_unicode(chunks);
+ if (rval == NULL) {
+ goto bail;
+ }
+ Py_CLEAR(chunks);
}
- Py_DECREF(chunks);
*next_end_ptr = end;
return rval;
bail:
*next_end_ptr = -1;
+ Py_XDECREF(chunk);
Py_XDECREF(chunks);
return NULL;
}
@@ -914,6 +930,7 @@ scanner_traverse(PyObject *self, visitproc visit, void *arg)
Py_VISIT(s->parse_float);
Py_VISIT(s->parse_int);
Py_VISIT(s->parse_constant);
+ Py_VISIT(s->memo);
return 0;
}
@@ -930,6 +947,7 @@ scanner_clear(PyObject *self)
Py_CLEAR(s->parse_float);
Py_CLEAR(s->parse_int);
Py_CLEAR(s->parse_constant);
+ Py_CLEAR(s->memo);
return 0;
}
@@ -945,17 +963,25 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
*/
char *str = PyString_AS_STRING(pystr);
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
- PyObject *rval;
- PyObject *pairs;
+ PyObject *rval = NULL;
+ PyObject *pairs = NULL;
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
char *encoding = PyString_AS_STRING(s->encoding);
int strict = PyObject_IsTrue(s->strict);
+ int has_pairs_hook = (s->pairs_hook != Py_None);
Py_ssize_t next_idx;
- pairs = PyList_New(0);
- if (pairs == NULL)
- return NULL;
+ if (has_pairs_hook) {
+ pairs = PyList_New(0);
+ if (pairs == NULL)
+ return NULL;
+ }
+ else {
+ rval = PyDict_New();
+ if (rval == NULL)
+ return NULL;
+ }
/* skip whitespace after { */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
@@ -963,6 +989,8 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') {
while (idx <= end_idx) {
+ PyObject *memokey;
+
/* read key */
if (str[idx] != '"') {
raise_errmsg("Expecting property name", pystr, idx);
@@ -971,6 +999,16 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
if (key == NULL)
goto bail;
+ memokey = PyDict_GetItem(s->memo, key);
+ if (memokey != NULL) {
+ Py_INCREF(memokey);
+ Py_DECREF(key);
+ key = memokey;
+ }
+ else {
+ if (PyDict_SetItem(s->memo, key, key) < 0)
+ goto bail;
+ }
idx = next_idx;
/* skip whitespace between key and : delimiter, read :, skip whitespace */
@@ -987,16 +1025,24 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
if (val == NULL)
goto bail;
- item = PyTuple_Pack(2, key, val);
- if (item == NULL)
- goto bail;
- Py_CLEAR(key);
- Py_CLEAR(val);
- if (PyList_Append(pairs, item) == -1) {
+ if (has_pairs_hook) {
+ item = PyTuple_Pack(2, key, val);
+ if (item == NULL)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ if (PyList_Append(pairs, item) == -1) {
+ Py_DECREF(item);
+ goto bail;
+ }
Py_DECREF(item);
- goto bail;
}
- Py_DECREF(item);
+ else {
+ if (PyDict_SetItem(rval, key, val) < 0)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ }
idx = next_idx;
/* skip whitespace before } or , */
@@ -1033,12 +1079,6 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
return val;
}
- rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
- pairs, NULL);
- if (rval == NULL)
- goto bail;
- Py_CLEAR(pairs);
-
/* if object_hook is not None: rval = object_hook(rval) */
if (s->object_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
@@ -1051,6 +1091,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
*next_idx_ptr = idx + 1;
return rval;
bail:
+ Py_XDECREF(rval);
Py_XDECREF(key);
Py_XDECREF(val);
Py_XDECREF(pairs);
@@ -1068,17 +1109,25 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
*/
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
- PyObject *rval;
- PyObject *pairs;
+ PyObject *rval = NULL;
+ PyObject *pairs = NULL;
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
int strict = PyObject_IsTrue(s->strict);
+ int has_pairs_hook = (s->pairs_hook != Py_None);
Py_ssize_t next_idx;
- pairs = PyList_New(0);
- if (pairs == NULL)
- return NULL;
+ if (has_pairs_hook) {
+ pairs = PyList_New(0);
+ if (pairs == NULL)
+ return NULL;
+ }
+ else {
+ rval = PyDict_New();
+ if (rval == NULL)
+ return NULL;
+ }
/* skip whitespace after { */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
@@ -1086,6 +1135,8 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
/* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') {
while (idx <= end_idx) {
+ PyObject *memokey;
+
/* read key */
if (str[idx] != '"') {
raise_errmsg("Expecting property name", pystr, idx);
@@ -1094,6 +1145,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
if (key == NULL)
goto bail;
+ memokey = PyDict_GetItem(s->memo, key);
+ if (memokey != NULL) {
+ Py_INCREF(memokey);
+ Py_DECREF(key);
+ key = memokey;
+ }
+ else {
+ if (PyDict_SetItem(s->memo, key, key) < 0)
+ goto bail;
+ }
idx = next_idx;
/* skip whitespace between key and : delimiter, read :, skip whitespace */
@@ -1110,16 +1171,24 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
if (val == NULL)
goto bail;
- item = PyTuple_Pack(2, key, val);
- if (item == NULL)
- goto bail;
- Py_CLEAR(key);
- Py_CLEAR(val);
- if (PyList_Append(pairs, item) == -1) {
+ if (has_pairs_hook) {
+ item = PyTuple_Pack(2, key, val);
+ if (item == NULL)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ if (PyList_Append(pairs, item) == -1) {
+ Py_DECREF(item);
+ goto bail;
+ }
Py_DECREF(item);
- goto bail;
}
- Py_DECREF(item);
+ else {
+ if (PyDict_SetItem(rval, key, val) < 0)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ }
idx = next_idx;
/* skip whitespace before } or , */
@@ -1157,12 +1226,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
return val;
}
- rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
- pairs, NULL);
- if (rval == NULL)
- goto bail;
- Py_CLEAR(pairs);
-
/* if object_hook is not None: rval = object_hook(rval) */
if (s->object_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
@@ -1175,6 +1238,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
*next_idx_ptr = idx + 1;
return rval;
bail:
+ Py_XDECREF(rval);
Py_XDECREF(key);
Py_XDECREF(val);
Py_XDECREF(pairs);
@@ -1723,6 +1787,7 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
Py_TYPE(pystr)->tp_name);
return NULL;
}
+ PyDict_Clear(s->memo);
return _build_rval_index_tuple(rval, next_idx);
}
@@ -1756,6 +1821,12 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
return -1;
+
+ if (s->memo == NULL) {
+ s->memo = PyDict_New();
+ if (s->memo == NULL)
+ goto bail;
+ }
/* PyString_AS_STRING is used on encoding */
s->encoding = PyObject_GetAttrString(ctx, "encoding");