summaryrefslogtreecommitdiff
path: root/simplejson/_speedups.c
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2009-01-05 01:29:32 +0000
committerBob Ippolito <bob@redivi.com>2009-01-05 01:29:32 +0000
commit823f4a283da154d0ea83d1537a3914762fbe0c76 (patch)
tree5496fff98ba864af5bb5fd8757b063a3fba107ba /simplejson/_speedups.c
parentb919f3cbb861f3319a0af080e69652c839abf064 (diff)
downloadsimplejson-823f4a283da154d0ea83d1537a3914762fbe0c76.tar.gz
changes relevant to http://codereview.appspot.com/7311 -- mostly documentation improvements and a small error handling fix
git-svn-id: http://simplejson.googlecode.com/svn/trunk@161 a4795897-2c25-0410-b006-0d3caba88fa1
Diffstat (limited to 'simplejson/_speedups.c')
-rw-r--r--simplejson/_speedups.c150
1 files changed, 138 insertions, 12 deletions
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index ebf35bc..679a4c8 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -131,6 +131,7 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj);
static int
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
{
+ /* PyObject to Py_ssize_t converter */
*size_ptr = PyInt_AsSsize_t(o);
if (*size_ptr == -1 && PyErr_Occurred());
return 1;
@@ -140,12 +141,16 @@ _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
static PyObject *
_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
{
+ /* Py_ssize_t to PyObject converter */
return PyInt_FromSsize_t(*size_ptr);
}
static Py_ssize_t
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
{
+ /* Escape unicode code point c to ASCII escape sequences
+ in char *output. output must have at least 12 bytes unused to
+ accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
output[chars++] = '\\';
switch (c) {
case '\\': output[chars++] = (char)c; break;
@@ -182,9 +187,11 @@ ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
static PyObject *
ascii_escape_unicode(PyObject *pystr)
{
+ /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
+ Py_ssize_t max_output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
@@ -195,6 +202,7 @@ ascii_escape_unicode(PyObject *pystr)
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+ max_output_size = 2 + (input_chars * MAX_EXPANSION);
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
@@ -212,15 +220,19 @@ ascii_escape_unicode(PyObject *pystr)
}
if (output_size - chars < (1 + MAX_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
- output_size *= 2;
+ Py_ssize_t new_output_size = output_size * 2;
/* This is an upper bound */
- if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
- output_size = 2 + (input_chars * MAX_EXPANSION);
+ if (new_output_size > max_output_size) {
+ new_output_size = max_output_size;
}
- if (_PyString_Resize(&rval, output_size) == -1) {
- return NULL;
+ /* Make sure that the output size changed before resizing */
+ if (new_output_size != output_size) {
+ output_size = new_output_size;
+ if (_PyString_Resize(&rval, output_size) == -1) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
}
- output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
@@ -233,6 +245,7 @@ ascii_escape_unicode(PyObject *pystr)
static PyObject *
ascii_escape_str(PyObject *pystr)
{
+ /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
@@ -318,6 +331,8 @@ ascii_escape_str(PyObject *pystr)
static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
+ /* Use the Python function simplejson.decoder.errmsg to raise a nice
+ looking ValueError exception */
static PyObject *errmsg_fn = NULL;
PyObject *pymsg;
if (errmsg_fn == NULL) {
@@ -339,6 +354,7 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
static PyObject *
join_list_unicode(PyObject *lst)
{
+ /* return u''.join(lst) */
static PyObject *joinfn = NULL;
if (joinfn == NULL) {
PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
@@ -356,6 +372,7 @@ join_list_unicode(PyObject *lst)
static PyObject *
join_list_string(PyObject *lst)
{
+ /* return ''.join(lst) */
static PyObject *joinfn = NULL;
if (joinfn == NULL) {
PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
@@ -372,6 +389,7 @@ join_list_string(PyObject *lst)
static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
+ /* return (rval, idx) tuple, stealing reference to rval */
PyObject *tpl;
PyObject *pyidx;
/*
@@ -399,6 +417,15 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
{
+ /* Read the JSON string from PyString pystr.
+ end is the index of the first character after the quote.
+ encoding is the encoding of pystr (must be an ASCII superset)
+ if strict is zero then literal control characters are allowed
+ *next_end_ptr is a return-by-reference index of the character
+ after the end quote
+
+ Return value is a new PyString (if ASCII-only) or PyUnicode
+ */
PyObject *rval;
Py_ssize_t len = PyString_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
@@ -596,6 +623,15 @@ bail:
static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
{
+ /* Read the JSON string from PyUnicode pystr.
+ end is the index of the first character after the quote.
+ encoding is the encoding of pystr (must be an ASCII superset)
+ if strict is zero then literal control characters are allowed
+ *next_end_ptr is a return-by-reference index of the character
+ after the end quote
+
+ Return value is a new PyUnicode
+ */
PyObject *rval;
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
@@ -764,9 +800,16 @@ bail:
}
PyDoc_STRVAR(pydoc_scanstring,
- "scanstring(basestring, end, encoding) -> (str, end)\n"
+ "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
"\n"
- "..."
+ "Scan the string s for a JSON string. End is the index of the\n"
+ "character in s after the quote that started the JSON string.\n"
+ "Unescapes all valid JSON string escape sequences and raises ValueError\n"
+ "on attempt to decode an invalid string. If strict is False then literal\n"
+ "control characters are allowed in the string.\n"
+ "\n"
+ "Returns a tuple of the decoded string and the index of the character in s\n"
+ "after the end quote."
);
static PyObject *
@@ -777,7 +820,7 @@ py_scanstring(PyObject* self UNUSED, PyObject *args)
Py_ssize_t end;
Py_ssize_t next_end = -1;
char *encoding = NULL;
- int strict = 0;
+ int strict = 1;
if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
return NULL;
}
@@ -802,12 +845,13 @@ py_scanstring(PyObject* self UNUSED, PyObject *args)
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
"encode_basestring_ascii(basestring) -> str\n"
"\n"
- "..."
+ "Return an ASCII-only JSON representation of a Python string"
);
static PyObject *
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
{
+ /* Return an ASCII-only JSON representation of a Python string */
/* METH_O */
if (PyString_Check(pystr)) {
return ascii_escape_str(pystr);
@@ -826,6 +870,7 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
static void
scanner_dealloc(PyObject *self)
{
+ /* Deallocate scanner object */
PyScannerObject *s;
assert(PyScanner_Check(self));
s = (PyScannerObject *)self;
@@ -840,6 +885,13 @@ scanner_dealloc(PyObject *self)
static PyObject *
_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON object from PyString pystr.
+ idx is the index of the first character after the opening curly brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing curly brace.
+
+ Returns a new PyObject (usually a dict, but object_hook can change that)
+ */
char *str = PyString_AS_STRING(pystr);
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
PyObject *rval = PyDict_New();
@@ -931,6 +983,13 @@ bail:
static PyObject *
_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON object from PyUnicode pystr.
+ idx is the index of the first character after the opening curly brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing curly brace.
+
+ Returns a new PyObject (usually a dict, but object_hook can change that)
+ */
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
PyObject *val = NULL;
@@ -1023,6 +1082,13 @@ bail:
static PyObject *
_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON array from PyString pystr.
+ idx is the index of the first character after the opening brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing brace.
+
+ Returns a new PyList
+ */
char *str = PyString_AS_STRING(pystr);
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
PyObject *val = NULL;
@@ -1083,6 +1149,13 @@ bail:
static PyObject *
_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON array from PyString pystr.
+ idx is the index of the first character after the opening brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing brace.
+
+ Returns a new PyList
+ */
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
PyObject *val = NULL;
@@ -1143,6 +1216,15 @@ bail:
static PyObject *
_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON constant from PyString pystr.
+ constant is the constant string that was found
+ ("NaN", "Infinity", "-Infinity").
+ idx is the index of the first character of the constant
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the constant.
+
+ Returns the result of parse_constant
+ */
PyObject *cstr;
PyObject *rval;
/* constant is "NaN", "Infinity", or "-Infinity" */
@@ -1160,6 +1242,15 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *
static PyObject *
_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON number from PyString pystr.
+ idx is the index of the first character of the number
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of that number:
+ PyInt, PyLong, or PyFloat.
+ May return other types if parse_int or parse_float are set
+ */
char *str = PyString_AS_STRING(pystr);
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Py_ssize_t idx = start;
@@ -1249,6 +1340,15 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
static PyObject *
_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON number from PyUnicode pystr.
+ idx is the index of the first character of the number
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of that number:
+ PyInt, PyLong, or PyFloat.
+ May return other types if parse_int or parse_float are set
+ */
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Py_ssize_t idx = start;
@@ -1332,6 +1432,13 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
static PyObject *
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
{
+ /* Read one JSON term (of any kind) from PyString pystr.
+ idx is the index of the first character of the term
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of the term.
+ */
char *str = PyString_AS_STRING(pystr);
Py_ssize_t length = PyString_GET_SIZE(pystr);
if (idx >= length) {
@@ -1401,6 +1508,13 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
static PyObject *
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
{
+ /* Read one JSON term (of any kind) from PyUnicode pystr.
+ idx is the index of the first character of the term
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of the term.
+ */
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
if (idx >= length) {
@@ -1469,6 +1583,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
static PyObject *
scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
{
+ /* Python callable interface to scan_once_{str,unicode} */
PyObject *pystr;
PyObject *rval;
Py_ssize_t idx;
@@ -1498,6 +1613,7 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
static int
scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
{
+ /* Initialize Scanner object */
PyObject *ctx;
static char *kwlist[] = {"context", NULL};
PyScannerObject *s;
@@ -1564,7 +1680,7 @@ static
PyTypeObject PyScannerType = {
PyObject_HEAD_INIT(0)
0, /* tp_internal */
- "make_scanner", /* tp_name */
+ "Scanner", /* tp_name */
sizeof(PyScannerObject), /* tp_basicsize */
0, /* tp_itemsize */
scanner_dealloc, /* tp_dealloc */
@@ -1607,6 +1723,7 @@ PyTypeObject PyScannerType = {
static int
encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
{
+ /* initialize Encoder object */
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
PyEncoderObject *s;
@@ -1644,6 +1761,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
{
+ /* Python callable interface to encode_listencode_obj */
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
PyObject *obj;
PyObject *rval;
@@ -1667,6 +1785,7 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
_encoded_const(PyObject *obj)
{
+ /* Return the JSON string representation of None, True, False */
if (obj == Py_None) {
static PyObject *s_null = NULL;
if (s_null == NULL) {
@@ -1700,6 +1819,7 @@ _encoded_const(PyObject *obj)
static PyObject *
encoder_encode_float(PyEncoderObject *s, PyObject *obj)
{
+ /* Return the JSON representation of a PyFloat */
double i = PyFloat_AS_DOUBLE(obj);
if (!Py_IS_FINITE(i)) {
if (!s->allow_nan) {
@@ -1723,6 +1843,7 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
static PyObject *
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
{
+ /* Return the JSON representation of a string */
if (s->fast_encode)
return py_encode_basestring_ascii(NULL, obj);
else
@@ -1732,6 +1853,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
static int
_steal_list_append(PyObject *lst, PyObject *stolen)
{
+ /* Append stolen and then decrement its reference count */
int rval = PyList_Append(lst, stolen);
Py_DECREF(stolen);
return rval;
@@ -1740,6 +1862,7 @@ _steal_list_append(PyObject *lst, PyObject *stolen)
static int
encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
{
+ /* Encode Python object obj to a JSON term, rval is a PyList */
PyObject *newobj;
int rv;
@@ -1818,6 +1941,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
static int
encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
{
+ /* Encode Python dict dct a JSON term, rval is a PyList */
static PyObject *open_dict = NULL;
static PyObject *close_dict = NULL;
static PyObject *empty_dict = NULL;
@@ -1949,6 +2073,7 @@ bail:
static int
encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
{
+ /* Encode Python list seq to a JSON term, rval is a PyList */
static PyObject *open_array = NULL;
static PyObject *close_array = NULL;
static PyObject *empty_array = NULL;
@@ -2038,6 +2163,7 @@ bail:
static void
encoder_dealloc(PyObject *self)
{
+ /* Deallocate Encoder */
PyEncoderObject *s;
assert(PyEncoder_Check(self));
s = (PyEncoderObject *)self;
@@ -2058,7 +2184,7 @@ static
PyTypeObject PyEncoderType = {
PyObject_HEAD_INIT(0)
0, /* tp_internal */
- "make_encoder", /* tp_name */
+ "Encoder", /* tp_name */
sizeof(PyEncoderObject), /* tp_basicsize */
0, /* tp_itemsize */
encoder_dealloc, /* tp_dealloc */