#include "Python.h" #include "structmember.h" #if PY_MAJOR_VERSION >= 3 #define PyInt_FromSsize_t PyLong_FromSsize_t #define PyInt_AsSsize_t PyLong_AsSsize_t #define PyString_Check PyBytes_Check #define PyString_GET_SIZE PyBytes_GET_SIZE #define PyString_AS_STRING PyBytes_AS_STRING #define PyString_FromStringAndSize PyBytes_FromStringAndSize #define PyInt_Check(obj) 0 #define JSON_UNICHR Py_UCS4 #define JSON_InternFromString PyUnicode_InternFromString #define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE #define JSON_ASCII_Check PyUnicode_Check #define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 #define PyInt_Type PyLong_Type #define PyInt_FromString PyLong_FromString #define PY2_UNUSED #define PY3_UNUSED UNUSED #define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) #else /* PY_MAJOR_VERSION >= 3 */ #define PY2_UNUSED UNUSED #define PY3_UNUSED #define PyUnicode_READY(obj) 0 #define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) #define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) #define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) #define PyUnicode_GetLength PyUnicode_GET_SIZE #define JSON_UNICHR Py_UNICODE #define JSON_ASCII_Check PyString_Check #define JSON_ASCII_AS_STRING PyString_AS_STRING #define JSON_InternFromString PyString_InternFromString #define JSON_Intern_GET_SIZE PyString_GET_SIZE #define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) #endif /* PY_MAJOR_VERSION < 3 */ #if PY_VERSION_HEX < 0x02070000 #if !defined(PyOS_string_to_double) #define PyOS_string_to_double json_PyOS_string_to_double static double json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); static double json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) { double x; assert(endptr == NULL); assert(overflow_exception == NULL); PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) x = PyOS_ascii_atof(s); PyFPE_END_PROTECT(x) return x; } #endif #endif /* PY_VERSION_HEX < 0x02070000 */ #if PY_VERSION_HEX < 0x02060000 #if !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif #if !defined(Py_SIZE) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) #endif #if !defined(PyVarObject_HEAD_INIT) #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, #endif #endif /* PY_VERSION_HEX < 0x02060000 */ #if PY_VERSION_HEX < 0x02050000 #if !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX #define PY_SSIZE_T_MIN INT_MIN #define PyInt_FromSsize_t PyInt_FromLong #define PyInt_AsSsize_t PyInt_AsLong #endif #if !defined(Py_IS_FINITE) #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) #endif #endif /* PY_VERSION_HEX < 0x02050000 */ #ifdef __GNUC__ #define UNUSED __attribute__((__unused__)) #else #define UNUSED #endif #define DEFAULT_ENCODING "utf-8" #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; #undef small /* defined by some Windows headers */ typedef PyObject *(*joinerfunc)(PyObject *); typedef struct { PyObject *large; /* A list of previously accumulated large strings */ PyObject *small; /* Pending small strings */ joinerfunc joiner; } JSON_Accu; static int JSON_Accu_Init(JSON_Accu *acc); static int JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); static PyObject * JSON_Accu_FinishAsList(JSON_Accu *acc); static void JSON_Accu_Destroy(JSON_Accu *acc); typedef struct _PyScannerObject { PyObject_HEAD PyObject *encoding; PyObject *strict; PyObject *object_hook; PyObject *pairs_hook; PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; PyObject *memo; } PyScannerObject; static PyMemberDef scanner_members[] = { {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, {NULL} }; typedef struct _PyEncoderObject { PyObject_HEAD PyObject *markers; PyObject *defaultfn; PyObject *encoder; PyObject *indent; PyObject *key_separator; PyObject *item_separator; PyObject *sort_keys; PyObject *key_memo; PyObject *encoding; PyObject *Decimal; char skipkeys; int fast_encode; int allow_nan; int use_decimal; int namedtuple_as_object; int tuple_as_array; int bigint_as_string; PyObject *item_sort_key; PyObject *item_sort_kw; } PyEncoderObject; static PyMemberDef encoder_members[] = { {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, {NULL} }; static PyObject * join_list_unicode(PyObject *lst); static PyObject * JSON_ParseEncoding(PyObject *encoding); static PyObject * JSON_UnicodeFromChar(JSON_UNICHR c); static PyObject * maybe_quote_bigint(PyObject *encoded, PyObject *obj); static Py_ssize_t ascii_char_size(JSON_UNICHR c); static Py_ssize_t ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * ascii_escape_str(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); #if PY_MAJOR_VERSION < 3 static PyObject * join_list_string(PyObject *lst); static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); static PyObject * _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); #endif static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); static PyObject * scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static int scanner_init(PyObject *self, PyObject *args, PyObject *kwds); static void scanner_dealloc(PyObject *self); static int scanner_clear(PyObject *self); static PyObject * encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds); static void encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); static PyObject * encoder_stringify_key(PyEncoderObject *s, PyObject *key); static int encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); static int encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); static int encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); static PyObject * _encoded_const(PyObject *obj); static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); static PyObject * encoder_encode_string(PyEncoderObject *s, PyObject *obj); static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); static PyObject * _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); static int _is_namedtuple(PyObject *obj); static PyObject * moduleinit(void); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) #define MIN_EXPANSION 6 static int JSON_Accu_Init(JSON_Accu *acc) { /* Lazily allocated */ acc->large = NULL; acc->small = PyList_New(0); if (acc->small == NULL) return -1; #if PY_MAJOR_VERSION >= 3 acc->joiner = join_list_unicode; #else /* PY_MAJOR_VERSION >= 3 */ acc->joiner = join_list_string; #endif /* PY_MAJOR_VERSION < 3 */ return 0; } static int flush_accumulator(JSON_Accu *acc) { Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); if (nsmall) { int ret; PyObject *joined; if (acc->large == NULL) { acc->large = PyList_New(0); if (acc->large == NULL) return -1; } joined = acc->joiner(acc->small); if (joined == NULL) return -1; if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { Py_DECREF(joined); return -1; } ret = PyList_Append(acc->large, joined); Py_DECREF(joined); return ret; } return 0; } static int JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) { Py_ssize_t nsmall; #if PY_MAJOR_VERSION >= 3 assert(PyUnicode_Check(unicode)); #else /* PY_MAJOR_VERSION >= 3 */ assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); #endif /* PY_MAJOR_VERSION < 3 */ if (PyList_Append(acc->small, unicode)) return -1; nsmall = PyList_GET_SIZE(acc->small); /* Each item in a list of unicode objects has an overhead (in 64-bit * builds) of: * - 8 bytes for the list slot * - 56 bytes for the header of the unicode object * that is, 64 bytes. 100000 such objects waste more than 6MB * compared to a single concatenated string. */ if (nsmall < 100000) return 0; return flush_accumulator(acc); } static PyObject * JSON_Accu_FinishAsList(JSON_Accu *acc) { int ret; PyObject *res; ret = flush_accumulator(acc); Py_CLEAR(acc->small); if (ret) { Py_CLEAR(acc->large); return NULL; } res = acc->large; acc->large = NULL; return res; } static void JSON_Accu_Destroy(JSON_Accu *acc) { Py_CLEAR(acc->small); Py_CLEAR(acc->large); } static int IS_DIGIT(JSON_UNICHR c) { return c >= '0' && c <= '9'; } static PyObject * JSON_UnicodeFromChar(JSON_UNICHR c) { #if PY_MAJOR_VERSION >= 3 PyObject *rval = PyUnicode_New(1, c); if (rval) PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); return rval; #else /* PY_MAJOR_VERSION >= 3 */ return PyUnicode_FromUnicode(&c, 1); #endif /* PY_MAJOR_VERSION < 3 */ } static PyObject * maybe_quote_bigint(PyObject *encoded, PyObject *obj) { static PyObject *big_long = NULL; static PyObject *small_long = NULL; if (big_long == NULL) { big_long = PyLong_FromLongLong(1LL << 53); if (big_long == NULL) { Py_DECREF(encoded); return NULL; } } if (small_long == NULL) { small_long = PyLong_FromLongLong(-1LL << 53); if (small_long == NULL) { Py_DECREF(encoded); return NULL; } } if (PyObject_RichCompareBool(obj, big_long, Py_GE) || PyObject_RichCompareBool(obj, small_long, Py_LE)) { #if PY_MAJOR_VERSION >= 3 PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); #else PyObject* quoted = PyString_FromFormat("\"%s\"", PyString_AsString(encoded)); #endif Py_DECREF(encoded); encoded = quoted; } return encoded; } static int _is_namedtuple(PyObject *obj) { int rval = 0; PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict"); if (_asdict == NULL) { PyErr_Clear(); return 0; } rval = PyCallable_Check(_asdict); Py_DECREF(_asdict); return rval; } static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) { /* PyObject to Py_ssize_t converter */ *size_ptr = PyInt_AsSsize_t(o); if (*size_ptr == -1 && PyErr_Occurred()) return 0; return 1; } static PyObject * _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) { /* Py_ssize_t to PyObject converter */ return PyInt_FromSsize_t(*size_ptr); } static Py_ssize_t ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ if (S_CHAR(c)) { output[chars++] = (char)c; } else { output[chars++] = '\\'; switch (c) { case '\\': output[chars++] = (char)c; break; case '"': output[chars++] = (char)c; break; case '\b': output[chars++] = 'b'; break; case '\f': output[chars++] = 'f'; break; case '\n': output[chars++] = 'n'; break; case '\r': output[chars++] = 'r'; break; case '\t': output[chars++] = 't'; break; default: #if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 if (c >= 0x10000) { /* UTF-16 surrogate pair */ JSON_UNICHR v = c - 0x10000; c = 0xd800 | ((v >> 10) & 0x3ff); output[chars++] = 'u'; output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; output[chars++] = "0123456789abcdef"[(c ) & 0xf]; c = 0xdc00 | (v & 0x3ff); output[chars++] = '\\'; } #endif output[chars++] = 'u'; output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; output[chars++] = "0123456789abcdef"[(c ) & 0xf]; } } return chars; } static Py_ssize_t ascii_char_size(JSON_UNICHR c) { if (S_CHAR(c)) { return 1; } else if (c == '\\' || c == '"' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') { return 2; } else if (c < 0x10000U) { return MIN_EXPANSION; } else { return 2 * MIN_EXPANSION; } } static PyObject * ascii_escape_unicode(PyObject *pystr) { /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; Py_ssize_t chars; int kind; void *data; PyObject *rval; char *output; if (PyUnicode_READY(pystr)) return NULL; kind = PyUnicode_KIND(pystr); data = PyUnicode_DATA(pystr); input_chars = PyUnicode_GetLength(pystr); output_size = 2; for (i = 0; i < input_chars; i++) { output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); } #if PY_MAJOR_VERSION >= 3 rval = PyUnicode_New(output_size, 127); if (rval == NULL) { return NULL; } assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); output = (char *)PyUnicode_DATA(rval); #else rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } output = PyString_AS_STRING(rval); #endif chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); } output[chars++] = '"'; assert(chars == output_size); return rval; } #if PY_MAJOR_VERSION >= 3 static PyObject * ascii_escape_str(PyObject *pystr) { PyObject *rval; PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); if (input == NULL) return NULL; rval = ascii_escape_unicode(input); Py_DECREF(input); return rval; } #else /* PY_MAJOR_VERSION >= 3 */ static PyObject * ascii_escape_str(PyObject *pystr) { /* Take a PyString pystr and return a new ASCII-only escaped PyString */ Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; Py_ssize_t chars; PyObject *rval; char *output; char *input_str; input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); output_size = 2; /* Fast path for a string that's already ASCII */ for (i = 0; i < input_chars; i++) { JSON_UNICHR c = (JSON_UNICHR)input_str[i]; if (c > 0x7f) { /* We hit a non-ASCII character, bail to unicode mode */ PyObject *uni; uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); if (uni == NULL) { return NULL; } rval = ascii_escape_unicode(uni); Py_DECREF(uni); return rval; } output_size += ascii_char_size(c); } rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } chars = 0; output = PyString_AS_STRING(rval); output[chars++] = '"'; for (i = 0; i < input_chars; i++) { chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); } output[chars++] = '"'; assert(chars == output_size); return rval; } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * encoder_stringify_key(PyEncoderObject *s, PyObject *key) { if (PyUnicode_Check(key)) { Py_INCREF(key); return key; } else if (PyString_Check(key)) { #if PY_MAJOR_VERSION >= 3 return PyUnicode_Decode( PyString_AS_STRING(key), PyString_GET_SIZE(key), JSON_ASCII_AS_STRING(s->encoding), NULL); #else /* PY_MAJOR_VERSION >= 3 */ Py_INCREF(key); return key; #endif /* PY_MAJOR_VERSION < 3 */ } else if (PyFloat_Check(key)) { return encoder_encode_float(s, key); } else if (key == Py_True || key == Py_False || key == Py_None) { /* This must come before the PyInt_Check because True and False are also 1 and 0.*/ return _encoded_const(key); } else if (PyInt_Check(key) || PyLong_Check(key)) { return PyObject_Str(key); } else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { return PyObject_Str(key); } else if (s->skipkeys) { Py_INCREF(Py_None); return Py_None; } PyErr_SetString(PyExc_TypeError, "keys must be a string"); return NULL; } static PyObject * encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) { PyObject *items; PyObject *iter = NULL; PyObject *lst = NULL; PyObject *item = NULL; PyObject *kstr = NULL; static PyObject *sortfun = NULL; static PyObject *sortargs = NULL; if (sortargs == NULL) { sortargs = PyTuple_New(0); if (sortargs == NULL) return NULL; } if (PyDict_CheckExact(dct)) items = PyDict_Items(dct); else items = PyMapping_Items(dct); if (items == NULL) return NULL; iter = PyObject_GetIter(items); Py_DECREF(items); if (iter == NULL) return NULL; if (s->item_sort_kw == Py_None) return iter; lst = PyList_New(0); if (lst == NULL) goto bail; while ((item = PyIter_Next(iter))) { PyObject *key, *value; if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); goto bail; } key = PyTuple_GET_ITEM(item, 0); if (key == NULL) goto bail; #if PY_MAJOR_VERSION < 3 else if (PyString_Check(key)) { // item can be added as-is } #endif /* PY_MAJOR_VERSION < 3 */ else if (PyUnicode_Check(key)) { // item can be added as-is } else { kstr = encoder_stringify_key(s, key); if (kstr == NULL) goto bail; else if (kstr == Py_None) { // skipkeys Py_DECREF(kstr); continue; } value = PyTuple_GET_ITEM(item, 1); if (value == NULL) goto bail; PyObject *tpl = PyTuple_Pack(2, kstr, value); if (tpl == NULL) goto bail; Py_CLEAR(kstr); Py_DECREF(item); item = tpl; } if (PyList_Append(lst, item)) goto bail; Py_DECREF(item); } Py_CLEAR(iter); if (PyErr_Occurred()) goto bail; sortfun = PyObject_GetAttrString(lst, "sort"); if (sortfun == NULL) goto bail; if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) goto bail; Py_CLEAR(sortfun); iter = PyObject_GetIter(lst); Py_CLEAR(lst); return iter; bail: Py_XDECREF(sortfun); Py_XDECREF(kstr); Py_XDECREF(item); Py_XDECREF(lst); Py_XDECREF(iter); return NULL; } static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { /* Use the Python function simplejson.decoder.errmsg to raise a nice looking ValueError exception */ static PyObject *JSONDecodeError = NULL; PyObject *exc; if (JSONDecodeError == NULL) { PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); if (decoder == NULL) return; JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); Py_DECREF(decoder); if (JSONDecodeError == NULL) return; } exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); if (exc) { PyErr_SetObject(JSONDecodeError, exc); Py_DECREF(exc); } } static PyObject * join_list_unicode(PyObject *lst) { /* return u''.join(lst) */ static PyObject *joinfn = NULL; if (joinfn == NULL) { PyObject *ustr = JSON_NewEmptyUnicode(); if (ustr == NULL) return NULL; joinfn = PyObject_GetAttrString(ustr, "join"); Py_DECREF(ustr); if (joinfn == NULL) return NULL; } return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } #if PY_MAJOR_VERSION >= 3 #define join_list_string join_list_unicode #else /* PY_MAJOR_VERSION >= 3 */ static PyObject * join_list_string(PyObject *lst) { /* return ''.join(lst) */ static PyObject *joinfn = NULL; if (joinfn == NULL) { PyObject *ustr = PyString_FromStringAndSize(NULL, 0); if (ustr == NULL) return NULL; joinfn = PyObject_GetAttrString(ustr, "join"); Py_DECREF(ustr); if (joinfn == NULL) return NULL; } return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { /* return (rval, idx) tuple, stealing reference to rval */ PyObject *tpl; PyObject *pyidx; /* steal a reference to rval, returns (rval, idx) */ if (rval == NULL) { return NULL; } pyidx = PyInt_FromSsize_t(idx); if (pyidx == NULL) { Py_DECREF(rval); return NULL; } tpl = PyTuple_New(2); if (tpl == NULL) { Py_DECREF(pyidx); Py_DECREF(rval); return NULL; } PyTuple_SET_ITEM(tpl, 0, rval); PyTuple_SET_ITEM(tpl, 1, pyidx); return tpl; } #define APPEND_OLD_CHUNK \ if (chunk != NULL) { \ if (chunks == NULL) { \ chunks = PyList_New(0); \ if (chunks == NULL) { \ goto bail; \ } \ } \ if (PyList_Append(chunks, chunk)) { \ goto bail; \ } \ Py_CLEAR(chunk); \ } #if PY_MAJOR_VERSION < 3 static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) { /* Read the JSON string from PyString pystr. end is the index of the first character after the quote. encoding is the encoding of pystr (must be an ASCII superset) if strict is zero then literal control characters are allowed *next_end_ptr is a return-by-reference index of the character after the end quote Return value is a new PyString (if ASCII-only) or PyUnicode */ PyObject *rval; Py_ssize_t len = PyString_GET_SIZE(pystr); Py_ssize_t begin = end - 1; Py_ssize_t next = begin; int has_unicode = 0; char *buf = PyString_AS_STRING(pystr); PyObject *chunks = NULL; PyObject *chunk = NULL; if (len == end) { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ Py_UNICODE c = 0; for (next = end; next < len; next++) { c = (unsigned char)buf[next]; if (c == '"' || c == '\\') { break; } else if (strict && c <= 0x1f) { raise_errmsg("Invalid control character at", pystr, next); goto bail; } else if (c > 0x7f) { has_unicode = 1; } } if (!(c == '"' || c == '\\')) { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ if (next != end) { APPEND_OLD_CHUNK #if PY_MAJOR_VERSION >= 3 if (!has_unicode) { chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); } else { chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); } if (chunk == NULL) { goto bail; } #else /* PY_MAJOR_VERSION >= 3 */ PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; } if (has_unicode) { chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); Py_DECREF(strchunk); if (chunk == NULL) { goto bail; } } else { chunk = strchunk; } #endif /* PY_MAJOR_VERSION < 3 */ } next++; if (c == '"') { end = next; break; } if (next == len) { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } c = buf[next]; if (c != 'u') { /* Non-unicode backslash escapes */ end = next + 1; switch (c) { case '"': break; case '\\': break; case '/': break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; default: c = 0; } if (c == 0) { raise_errmsg("Invalid \\escape", pystr, end - 2); goto bail; } } else { c = 0; next++; end = next + 4; if (end >= len) { raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); goto bail; } /* Decode 4 hex digits */ for (; next < end; next++) { JSON_UNICHR digit = (JSON_UNICHR)buf[next]; c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c |= (digit - '0'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': c |= (digit - 'a' + 10); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': c |= (digit - 'A' + 10); break; default: raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); goto bail; } } #if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { JSON_UNICHR c2 = 0; if (end + 6 >= len) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } if (buf[next++] != '\\' || buf[next++] != 'u') { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } end += 6; /* Decode 4 hex digits */ for (; next < end; next++) { c2 <<= 4; JSON_UNICHR digit = buf[next]; switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c2 |= (digit - '0'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': c2 |= (digit - 'a' + 10); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': c2 |= (digit - 'A' + 10); break; default: raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); goto bail; } } if ((c2 & 0xfc00) != 0xdc00) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } else if ((c & 0xfc00) == 0xdc00) { raise_errmsg("Unpaired low surrogate", pystr, end - 5); goto bail; } #endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ } if (c > 0x7f) { has_unicode = 1; } APPEND_OLD_CHUNK #if PY_MAJOR_VERSION >= 3 chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } #else /* PY_MAJOR_VERSION >= 3 */ if (has_unicode) { chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } } else { char c_char = Py_CHARMASK(c); chunk = PyString_FromStringAndSize(&c_char, 1); if (chunk == NULL) { goto bail; } } #endif } if (chunks == NULL) { if (chunk != NULL) rval = chunk; else rval = JSON_NewEmptyUnicode(); } else { APPEND_OLD_CHUNK rval = join_list_string(chunks); if (rval == NULL) { goto bail; } Py_CLEAR(chunks); } *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; Py_XDECREF(chunk); Py_XDECREF(chunks); return NULL; } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) { /* Read the JSON string from PyUnicode pystr. end is the index of the first character after the quote. if strict is zero then literal control characters are allowed *next_end_ptr is a return-by-reference index of the character after the end quote Return value is a new PyUnicode */ PyObject *rval; Py_ssize_t begin = end - 1; Py_ssize_t next = begin; PY2_UNUSED int kind = PyUnicode_KIND(pystr); Py_ssize_t len = PyUnicode_GetLength(pystr); void *buf = PyUnicode_DATA(pystr); PyObject *chunks = NULL; PyObject *chunk = NULL; if (len == end) { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ JSON_UNICHR c = 0; for (next = end; next < len; next++) { c = PyUnicode_READ(kind, buf, next); if (c == '"' || c == '\\') { break; } else if (strict && c <= 0x1f) { raise_errmsg("Invalid control character at", pystr, next); goto bail; } } if (!(c == '"' || c == '\\')) { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ if (next != end) { APPEND_OLD_CHUNK #if PY_MAJOR_VERSION < 3 chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); #else chunk = PyUnicode_Substring(pystr, end, next); #endif if (chunk == NULL) { goto bail; } } next++; if (c == '"') { end = next; break; } if (next == len) { raise_errmsg("Unterminated string starting at", pystr, begin); goto bail; } c = PyUnicode_READ(kind, buf, next); if (c != 'u') { /* Non-unicode backslash escapes */ end = next + 1; switch (c) { case '"': break; case '\\': break; case '/': break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; default: c = 0; } if (c == 0) { raise_errmsg("Invalid \\escape", pystr, end - 2); goto bail; } } else { c = 0; next++; end = next + 4; if (end >= len) { raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); goto bail; } /* Decode 4 hex digits */ for (; next < end; next++) { JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c |= (digit - '0'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': c |= (digit - 'a' + 10); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': c |= (digit - 'A' + 10); break; default: raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); goto bail; } } #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { JSON_UNICHR c2 = 0; if (end + 6 >= len) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } if (PyUnicode_READ(kind, buf, next++) != '\\' || PyUnicode_READ(kind, buf, next++) != 'u') { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } end += 6; /* Decode 4 hex digits */ for (; next < end; next++) { c2 <<= 4; JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c2 |= (digit - '0'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': c2 |= (digit - 'a' + 10); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': c2 |= (digit - 'A' + 10); break; default: raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); goto bail; } } if ((c2 & 0xfc00) != 0xdc00) { raise_errmsg("Unpaired high surrogate", pystr, end - 5); goto bail; } c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } else if ((c & 0xfc00) == 0xdc00) { raise_errmsg("Unpaired low surrogate", pystr, end - 5); goto bail; } #endif } APPEND_OLD_CHUNK chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } } if (chunks == NULL) { if (chunk != NULL) rval = chunk; else rval = JSON_NewEmptyUnicode(); } else { APPEND_OLD_CHUNK rval = join_list_unicode(chunks); if (rval == NULL) { goto bail; } Py_CLEAR(chunks); } *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; Py_XDECREF(chunk); Py_XDECREF(chunks); return NULL; } PyDoc_STRVAR(pydoc_scanstring, "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" "\n" "Scan the string s for a JSON string. End is the index of the\n" "character in s after the quote that started the JSON string.\n" "Unescapes all valid JSON string escape sequences and raises ValueError\n" "on attempt to decode an invalid string. If strict is False then literal\n" "control characters are allowed in the string.\n" "\n" "Returns a tuple of the decoded string and the index of the character in s\n" "after the end quote." ); static PyObject * py_scanstring(PyObject* self UNUSED, PyObject *args) { PyObject *pystr; PyObject *rval; Py_ssize_t end; Py_ssize_t next_end = -1; char *encoding = NULL; int strict = 1; if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { return NULL; } if (encoding == NULL) { encoding = DEFAULT_ENCODING; } if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); } #if PY_MAJOR_VERSION < 3 /* Using a bytes input is unsupported for scanning in Python 3. It is coerced to str in the decoder before it gets here. */ else if (PyString_Check(pystr)) { rval = scanstring_str(pystr, end, encoding, strict, &next_end); } #endif else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name); return NULL; } return _build_rval_index_tuple(rval, next_end); } PyDoc_STRVAR(pydoc_encode_basestring_ascii, "encode_basestring_ascii(basestring) -> str\n" "\n" "Return an ASCII-only JSON representation of a Python string" ); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) { /* Return an ASCII-only JSON representation of a Python string */ /* METH_O */ if (PyString_Check(pystr)) { return ascii_escape_str(pystr); } else if (PyUnicode_Check(pystr)) { return ascii_escape_unicode(pystr); } else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name); return NULL; } } static void scanner_dealloc(PyObject *self) { /* Deallocate scanner object */ scanner_clear(self); Py_TYPE(self)->tp_free(self); } static int scanner_traverse(PyObject *self, visitproc visit, void *arg) { PyScannerObject *s; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; Py_VISIT(s->encoding); Py_VISIT(s->strict); Py_VISIT(s->object_hook); Py_VISIT(s->pairs_hook); Py_VISIT(s->parse_float); Py_VISIT(s->parse_int); Py_VISIT(s->parse_constant); Py_VISIT(s->memo); return 0; } static int scanner_clear(PyObject *self) { PyScannerObject *s; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); Py_CLEAR(s->memo); return 0; } #if PY_MAJOR_VERSION < 3 static PyObject * _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON object from PyString pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after the closing curly brace. Returns a new PyObject (usually a dict, but object_hook or object_pairs_hook can change that) */ char *str = PyString_AS_STRING(pystr); Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; PyObject *rval = NULL; PyObject *pairs = NULL; PyObject *item; PyObject *key = NULL; PyObject *val = NULL; char *encoding = JSON_ASCII_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); Py_ssize_t next_idx; if (has_pairs_hook) { pairs = PyList_New(0); if (pairs == NULL) return NULL; } else { rval = PyDict_New(); if (rval == NULL) return NULL; } /* skip whitespace after { */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { while (idx <= end_idx) { PyObject *memokey; /* read key */ if (str[idx] != '"') { raise_errmsg( "Expecting property name enclosed in double quotes", pystr, idx); goto bail; } key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); if (key == NULL) goto bail; memokey = PyDict_GetItem(s->memo, key); if (memokey != NULL) { Py_INCREF(memokey); Py_DECREF(key); key = memokey; } else { if (PyDict_SetItem(s->memo, key, key) < 0) goto bail; } idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; if (idx > end_idx || str[idx] != ':') { raise_errmsg("Expecting ':' delimiter", pystr, idx); goto bail; } idx++; while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* read any JSON data type */ val = scan_once_str(s, pystr, idx, &next_idx); if (val == NULL) goto bail; if (has_pairs_hook) { item = PyTuple_Pack(2, key, val); if (item == NULL) goto bail; Py_CLEAR(key); Py_CLEAR(val); if (PyList_Append(pairs, item) == -1) { Py_DECREF(item); goto bail; } Py_DECREF(item); } else { if (PyDict_SetItem(rval, key, val) < 0) goto bail; Py_CLEAR(key); Py_CLEAR(val); } idx = next_idx; /* skip whitespace before } or , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* bail if the object is closed or we didn't get the , delimiter */ if (idx > end_idx) break; if (str[idx] == '}') { break; } else if (str[idx] != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; } } /* verify that idx < end_idx, str[idx] should be '}' */ if (idx > end_idx || str[idx] != '}') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ if (s->pairs_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); if (val == NULL) goto bail; Py_DECREF(pairs); *next_idx_ptr = idx + 1; return val; } /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); if (val == NULL) goto bail; Py_DECREF(rval); rval = val; val = NULL; } *next_idx_ptr = idx + 1; return rval; bail: Py_XDECREF(rval); Py_XDECREF(key); Py_XDECREF(val); Py_XDECREF(pairs); return NULL; } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after the closing curly brace. Returns a new PyObject (usually a dict, but object_hook can change that) */ void *str = PyUnicode_DATA(pystr); Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; PY2_UNUSED int kind = PyUnicode_KIND(pystr); PyObject *rval = NULL; PyObject *pairs = NULL; PyObject *item; PyObject *key = NULL; PyObject *val = NULL; int strict = PyObject_IsTrue(s->strict); int has_pairs_hook = (s->pairs_hook != Py_None); Py_ssize_t next_idx; if (has_pairs_hook) { pairs = PyList_New(0); if (pairs == NULL) return NULL; } else { rval = PyDict_New(); if (rval == NULL) return NULL; } /* skip whitespace after { */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the object is non-empty */ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { while (idx <= end_idx) { PyObject *memokey; /* read key */ if (PyUnicode_READ(kind, str, idx) != '"') { raise_errmsg( "Expecting property name enclosed in double quotes", pystr, idx); goto bail; } key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); if (key == NULL) goto bail; memokey = PyDict_GetItem(s->memo, key); if (memokey != NULL) { Py_INCREF(memokey); Py_DECREF(key); key = memokey; } else { if (PyDict_SetItem(s->memo, key, key) < 0) goto bail; } idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { raise_errmsg("Expecting ':' delimiter", pystr, idx); goto bail; } idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); if (val == NULL) goto bail; if (has_pairs_hook) { item = PyTuple_Pack(2, key, val); if (item == NULL) goto bail; Py_CLEAR(key); Py_CLEAR(val); if (PyList_Append(pairs, item) == -1) { Py_DECREF(item); goto bail; } Py_DECREF(item); } else { if (PyDict_SetItem(rval, key, val) < 0) goto bail; Py_CLEAR(key); Py_CLEAR(val); } idx = next_idx; /* skip whitespace before } or , */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the object is closed or we didn't get the , delimiter */ if (idx > end_idx) break; if (PyUnicode_READ(kind, str, idx) == '}') { break; } else if (PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; } } /* verify that idx < end_idx, str[idx] should be '}' */ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ if (s->pairs_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); if (val == NULL) goto bail; Py_DECREF(pairs); *next_idx_ptr = idx + 1; return val; } /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); if (val == NULL) goto bail; Py_DECREF(rval); rval = val; val = NULL; } *next_idx_ptr = idx + 1; return rval; bail: Py_XDECREF(rval); Py_XDECREF(key); Py_XDECREF(val); Py_XDECREF(pairs); return NULL; } #if PY_MAJOR_VERSION < 3 static PyObject * _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after the closing brace. Returns a new PyList */ char *str = PyString_AS_STRING(pystr); Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; if (rval == NULL) return NULL; /* skip whitespace after [ */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* only loop if the array is non-empty */ if (idx <= end_idx && str[idx] != ']') { while (idx <= end_idx) { /* read any JSON term and de-tuplefy the (rval, idx) */ val = scan_once_str(s, pystr, idx, &next_idx); if (val == NULL) { if (PyErr_ExceptionMatches(PyExc_StopIteration)) { PyErr_Clear(); raise_errmsg("Expecting object", pystr, idx); } goto bail; } if (PyList_Append(rval, val) == -1) goto bail; Py_CLEAR(val); idx = next_idx; /* skip whitespace between term and , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* bail if the array is closed or we didn't get the , delimiter */ if (idx > end_idx) break; if (str[idx] == ']') { break; } else if (str[idx] != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; } } /* verify that idx < end_idx, str[idx] should be ']' */ if (idx > end_idx || str[idx] != ']') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } *next_idx_ptr = idx + 1; return rval; bail: Py_XDECREF(val); Py_DECREF(rval); return NULL; } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after the closing brace. Returns a new PyList */ PY2_UNUSED int kind = PyUnicode_KIND(pystr); void *str = PyUnicode_DATA(pystr); Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; if (rval == NULL) return NULL; /* skip whitespace after [ */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the array is non-empty */ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { while (idx <= end_idx) { /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); if (val == NULL) { if (PyErr_ExceptionMatches(PyExc_StopIteration)) { PyErr_Clear(); raise_errmsg("Expecting object", pystr, idx); } goto bail; } if (PyList_Append(rval, val) == -1) goto bail; Py_CLEAR(val); idx = next_idx; /* skip whitespace between term and , */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the array is closed or we didn't get the , delimiter */ if (idx > end_idx) break; if (PyUnicode_READ(kind, str, idx) == ']') { break; } else if (PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } idx++; /* skip whitespace after , */ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; } } /* verify that idx < end_idx, str[idx] should be ']' */ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { raise_errmsg("Expecting object", pystr, end_idx); goto bail; } *next_idx_ptr = idx + 1; return rval; bail: Py_XDECREF(val); Py_DECREF(rval); return NULL; } static PyObject * _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON constant from PyString pystr. constant is the constant string that was found ("NaN", "Infinity", "-Infinity"). idx is the index of the first character of the constant *next_idx_ptr is a return-by-reference index to the first character after the constant. Returns the result of parse_constant */ PyObject *cstr; PyObject *rval; /* constant is "NaN", "Infinity", or "-Infinity" */ cstr = JSON_InternFromString(constant); if (cstr == NULL) return NULL; /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); idx += JSON_Intern_GET_SIZE(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; } #if PY_MAJOR_VERSION < 3 static PyObject * _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { /* Read a JSON number from PyString pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after the number. Returns a new PyObject representation of that number: PyInt, PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ char *str = PyString_AS_STRING(pystr); Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; Py_ssize_t idx = start; int is_float = 0; PyObject *rval; PyObject *numstr; /* read a sign if it's there, make sure it's not the end of the string */ if (str[idx] == '-') { idx++; if (idx > end_idx) { PyErr_SetNone(PyExc_StopIteration); return NULL; } } /* read as many integer digits as we find as long as it doesn't start with 0 */ if (str[idx] >= '1' && str[idx] <= '9') { idx++; while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; } /* if it starts with 0 we only expect one integer digit */ else if (str[idx] == '0') { idx++; } /* no integer digits, error */ else { PyErr_SetNone(PyExc_StopIteration); return NULL; } /* if the next char is '.' followed by a digit then read all float digits */ if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { is_float = 1; idx += 2; while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { /* save the index of the 'e' or 'E' just in case we need to backtrack */ Py_ssize_t e_start = idx; idx++; /* read an exponent sign if present */ if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; /* read all digits */ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; /* if we got a digit, then parse as float. if not, backtrack */ if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { is_float = 1; } else { idx = e_start; } } /* copy the section we determined to be a number */ numstr = PyString_FromStringAndSize(&str[start], idx - start); if (numstr == NULL) return NULL; if (is_float) { /* parse as a float using a fast path if available, otherwise call user defined method */ if (s->parse_float != (PyObject *)&PyFloat_Type) { rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ double d = PyOS_string_to_double(PyString_AS_STRING(numstr), NULL, NULL); if (d == -1.0 && PyErr_Occurred()) return NULL; rval = PyFloat_FromDouble(d); } } else { /* parse as an int using a fast path if available, otherwise call user defined method */ if (s->parse_int != (PyObject *)&PyInt_Type) { rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); } else { rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); } } Py_DECREF(numstr); *next_idx_ptr = idx; return rval; } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { /* Read a JSON number from PyUnicode pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after the number. Returns a new PyObject representation of that number: PyInt, PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ PY2_UNUSED int kind = PyUnicode_KIND(pystr); void *str = PyUnicode_DATA(pystr); Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; Py_ssize_t idx = start; int is_float = 0; JSON_UNICHR c; PyObject *rval; PyObject *numstr; /* read a sign if it's there, make sure it's not the end of the string */ if (PyUnicode_READ(kind, str, idx) == '-') { idx++; if (idx > end_idx) { PyErr_SetNone(PyExc_StopIteration); return NULL; } } /* read as many integer digits as we find as long as it doesn't start with 0 */ c = PyUnicode_READ(kind, str, idx); if (c == '0') { /* if it starts with 0 we only expect one integer digit */ idx++; } else if (IS_DIGIT(c)) { idx++; while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { idx++; } } else { /* no integer digits, error */ PyErr_SetNone(PyExc_StopIteration); return NULL; } /* if the next char is '.' followed by a digit then read all float digits */ if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { is_float = 1; idx += 2; while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) { Py_ssize_t e_start = idx; idx++; /* read an exponent sign if present */ if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++; /* read all digits */ while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; /* if we got a digit, then parse as float. if not, backtrack */ if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { is_float = 1; } else { idx = e_start; } } /* copy the section we determined to be a number */ #if PY_MAJOR_VERSION >= 3 numstr = PyUnicode_Substring(pystr, start, idx); #else numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); #endif if (numstr == NULL) return NULL; if (is_float) { /* parse as a float using a fast path if available, otherwise call user defined method */ if (s->parse_float != (PyObject *)&PyFloat_Type) { rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { #if PY_MAJOR_VERSION >= 3 rval = PyFloat_FromString(numstr); #else rval = PyFloat_FromString(numstr, NULL); #endif } } else { /* no fast path for unicode -> int, just call */ rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); } Py_DECREF(numstr); *next_idx_ptr = idx; return rval; } #if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read one JSON term (of any kind) from PyString pystr. idx is the index of the first character of the term *next_idx_ptr is a return-by-reference index to the first character after the number. Returns a new PyObject representation of the term. */ char *str = PyString_AS_STRING(pystr); Py_ssize_t length = PyString_GET_SIZE(pystr); PyObject *rval = NULL; int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } switch (str[idx]) { case '"': /* string */ rval = scanstring_str(pystr, idx + 1, JSON_ASCII_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); break; case '{': /* object */ if (Py_EnterRecursiveCall(" while decoding a JSON object " "from a string")) return NULL; rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); Py_LeaveRecursiveCall(); break; case '[': /* array */ if (Py_EnterRecursiveCall(" while decoding a JSON array " "from a string")) return NULL; rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); Py_LeaveRecursiveCall(); break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; rval = Py_None; } else fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; rval = Py_True; } else fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; rval = Py_False; } else fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } else fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } else fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } else fallthrough = 1; break; default: fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ if (fallthrough) rval = _match_number_str(s, pystr, idx, next_idx_ptr); return rval; } #endif /* PY_MAJOR_VERSION < 3 */ static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read one JSON term (of any kind) from PyUnicode pystr. idx is the index of the first character of the term *next_idx_ptr is a return-by-reference index to the first character after the number. Returns a new PyObject representation of the term. */ PY2_UNUSED int kind = PyUnicode_KIND(pystr); void *str = PyUnicode_DATA(pystr); Py_ssize_t length = PyUnicode_GetLength(pystr); PyObject *rval = NULL; int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ rval = scanstring_unicode(pystr, idx + 1, PyObject_IsTrue(s->strict), next_idx_ptr); break; case '{': /* object */ if (Py_EnterRecursiveCall(" while decoding a JSON object " "from a unicode string")) rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); Py_LeaveRecursiveCall(); break; case '[': /* array */ if (Py_EnterRecursiveCall(" while decoding a JSON array " "from a unicode string")) rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); Py_LeaveRecursiveCall(); break; case 'n': /* null */ if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; rval = Py_None; } else fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; rval = Py_True; } else fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 's' && PyUnicode_READ(kind, str, idx + 4) == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; rval = Py_False; } else fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && PyUnicode_READ(kind, str, idx + 2) == 'N') { rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } else fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' && PyUnicode_READ(kind, str, idx + 2) == 'f' && PyUnicode_READ(kind, str, idx + 3) == 'i' && PyUnicode_READ(kind, str, idx + 4) == 'n' && PyUnicode_READ(kind, str, idx + 5) == 'i' && PyUnicode_READ(kind, str, idx + 6) == 't' && PyUnicode_READ(kind, str, idx + 7) == 'y') { rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } else fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' && PyUnicode_READ(kind, str, idx + 2) == 'n' && PyUnicode_READ(kind, str, idx + 3) == 'f' && PyUnicode_READ(kind, str, idx + 4) == 'i' && PyUnicode_READ(kind, str, idx + 5) == 'n' && PyUnicode_READ(kind, str, idx + 6) == 'i' && PyUnicode_READ(kind, str, idx + 7) == 't' && PyUnicode_READ(kind, str, idx + 8) == 'y') { rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } else fallthrough = 1; break; default: fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ if (fallthrough) rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); return rval; } static PyObject * scanner_call(PyObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to scan_once_{str,unicode} */ PyObject *pystr; PyObject *rval; Py_ssize_t idx; Py_ssize_t next_idx = -1; static char *kwlist[] = {"string", "idx", NULL}; PyScannerObject *s; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) return NULL; if (PyUnicode_Check(pystr)) { rval = scan_once_unicode(s, pystr, idx, &next_idx); } #if PY_MAJOR_VERSION < 3 else if (PyString_Check(pystr)) { rval = scan_once_str(s, pystr, idx, &next_idx); } #endif /* PY_MAJOR_VERSION < 3 */ else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name); return NULL; } PyDict_Clear(s->memo); return _build_rval_index_tuple(rval, next_idx); } static PyObject * scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyScannerObject *s; s = (PyScannerObject *)type->tp_alloc(type, 0); if (s != NULL) { s->encoding = NULL; s->strict = NULL; s->object_hook = NULL; s->pairs_hook = NULL; s->parse_float = NULL; s->parse_int = NULL; s->parse_constant = NULL; } return (PyObject *)s; } static PyObject * JSON_ParseEncoding(PyObject *encoding) { if (encoding == NULL) return NULL; if (encoding == Py_None) return JSON_InternFromString(DEFAULT_ENCODING); #if PY_MAJOR_VERSION < 3 if (PyUnicode_Check(encoding)) return PyUnicode_AsEncodedString(encoding, NULL, NULL); #endif if (JSON_ASCII_Check(encoding)) { Py_INCREF(encoding); return encoding; } PyErr_SetString(PyExc_TypeError, "encoding must be a string"); return NULL; } static int scanner_init(PyObject *self, PyObject *args, PyObject *kwds) { /* Initialize Scanner object */ PyObject *ctx; static char *kwlist[] = {"context", NULL}; PyScannerObject *s; PyObject *encoding; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) return -1; if (s->memo == NULL) { s->memo = PyDict_New(); if (s->memo == NULL) goto bail; } /* JSON_ASCII_AS_STRING is used on encoding */ encoding = PyObject_GetAttrString(ctx, "encoding"); s->encoding = JSON_ParseEncoding(encoding); Py_XDECREF(encoding); if (s->encoding == NULL) goto bail; /* All of these will fail "gracefully" so we don't need to verify them */ s->strict = PyObject_GetAttrString(ctx, "strict"); if (s->strict == NULL) goto bail; s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); if (s->object_hook == NULL) goto bail; s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); if (s->pairs_hook == NULL) goto bail; s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); if (s->parse_float == NULL) goto bail; s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); if (s->parse_int == NULL) goto bail; s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); if (s->parse_constant == NULL) goto bail; return 0; bail: Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); return -1; } PyDoc_STRVAR(scanner_doc, "JSON scanner object"); static PyTypeObject PyScannerType = { PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Scanner", /* tp_name */ sizeof(PyScannerObject), /* tp_basicsize */ 0, /* tp_itemsize */ scanner_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ scanner_call, /* tp_call */ 0, /* tp_str */ 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ scanner_doc, /* tp_doc */ scanner_traverse, /* tp_traverse */ scanner_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ scanner_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ scanner_init, /* tp_init */ 0,/* PyType_GenericAlloc, */ /* tp_alloc */ scanner_new, /* tp_new */ 0,/* PyObject_GC_Del, */ /* tp_free */ }; static PyObject * encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyEncoderObject *s; s = (PyEncoderObject *)type->tp_alloc(type, 0); if (s != NULL) { s->markers = NULL; s->defaultfn = NULL; s->encoder = NULL; s->encoding = NULL; s->indent = NULL; s->key_separator = NULL; s->item_separator = NULL; s->key_memo = NULL; s->sort_keys = NULL; s->item_sort_key = NULL; s->item_sort_kw = NULL; s->Decimal = NULL; } return (PyObject *)s; } static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "Decimal", NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; PyObject *bigint_as_string, *item_sort_key, *encoding, *Decimal; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, &namedtuple_as_object, &tuple_as_array, &bigint_as_string, &item_sort_key, &encoding, &Decimal)) return -1; s->markers = markers; s->defaultfn = defaultfn; s->encoder = encoder; s->encoding = JSON_ParseEncoding(encoding); if (s->encoding == NULL) return -1; s->indent = indent; s->key_separator = key_separator; s->item_separator = item_separator; s->skipkeys = (char)PyObject_IsTrue(skipkeys); s->key_memo = key_memo; s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); s->allow_nan = PyObject_IsTrue(allow_nan); s->use_decimal = PyObject_IsTrue(use_decimal); s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); s->bigint_as_string = PyObject_IsTrue(bigint_as_string); if (item_sort_key != Py_None) { if (!PyCallable_Check(item_sort_key)) PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); } else if (PyObject_IsTrue(sort_keys)) { static PyObject *itemgetter0 = NULL; if (!itemgetter0) { PyObject *operator = PyImport_ImportModule("operator"); if (!operator) return -1; itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); Py_DECREF(operator); } item_sort_key = itemgetter0; if (!item_sort_key) return -1; } if (item_sort_key == Py_None) { Py_INCREF(Py_None); s->item_sort_kw = Py_None; } else { s->item_sort_kw = PyDict_New(); if (s->item_sort_kw == NULL) return -1; if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) return -1; } s->sort_keys = sort_keys; s->item_sort_key = item_sort_key; s->Decimal = Decimal; Py_INCREF(s->markers); Py_INCREF(s->defaultfn); Py_INCREF(s->encoder); Py_INCREF(s->indent); Py_INCREF(s->key_separator); Py_INCREF(s->item_separator); Py_INCREF(s->key_memo); Py_INCREF(s->sort_keys); Py_INCREF(s->item_sort_key); Py_INCREF(s->Decimal); return 0; } static PyObject * encoder_call(PyObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; Py_ssize_t indent_level; PyEncoderObject *s; JSON_Accu rval; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; if (JSON_Accu_Init(&rval)) return NULL; if (encoder_listencode_obj(s, &rval, obj, indent_level)) { JSON_Accu_Destroy(&rval); return NULL; } return JSON_Accu_FinishAsList(&rval); } static PyObject * _encoded_const(PyObject *obj) { /* Return the JSON string representation of None, True, False */ if (obj == Py_None) { static PyObject *s_null = NULL; if (s_null == NULL) { s_null = JSON_InternFromString("null"); } Py_INCREF(s_null); return s_null; } else if (obj == Py_True) { static PyObject *s_true = NULL; if (s_true == NULL) { s_true = JSON_InternFromString("true"); } Py_INCREF(s_true); return s_true; } else if (obj == Py_False) { static PyObject *s_false = NULL; if (s_false == NULL) { s_false = JSON_InternFromString("false"); } Py_INCREF(s_false); return s_false; } else { PyErr_SetString(PyExc_ValueError, "not a const"); return NULL; } } static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj) { /* Return the JSON representation of a PyFloat */ double i = PyFloat_AS_DOUBLE(obj); if (!Py_IS_FINITE(i)) { if (!s->allow_nan) { PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); return NULL; } if (i > 0) { static PyObject *sInfinity = NULL; if (sInfinity == NULL) sInfinity = JSON_InternFromString("Infinity"); if (sInfinity) Py_INCREF(sInfinity); return sInfinity; } else if (i < 0) { static PyObject *sNegInfinity = NULL; if (sNegInfinity == NULL) sNegInfinity = JSON_InternFromString("-Infinity"); if (sNegInfinity) Py_INCREF(sNegInfinity); return sNegInfinity; } else { static PyObject *sNaN = NULL; if (sNaN == NULL) sNaN = JSON_InternFromString("NaN"); if (sNaN) Py_INCREF(sNaN); return sNaN; } } /* Use a better float format here? */ return PyObject_Repr(obj); } static PyObject * encoder_encode_string(PyEncoderObject *s, PyObject *obj) { /* Return the JSON representation of a string */ if (s->fast_encode) return py_encode_basestring_ascii(NULL, obj); else return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); } static int _steal_accumulate(JSON_Accu *lst, PyObject *stolen) { /* Append stolen and then decrement its reference count */ int rval = JSON_Accu_Accumulate(lst, stolen); Py_DECREF(stolen); return rval; } static int encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ int rv = -1; do { if (obj == Py_None || obj == Py_True || obj == Py_False) { PyObject *cstr = _encoded_const(obj); if (cstr != NULL) rv = _steal_accumulate(rval, cstr); } else if (PyString_Check(obj) || PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded != NULL) rv = _steal_accumulate(rval, encoded); } else if (PyInt_Check(obj) || PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) { if (s->bigint_as_string) { encoded = maybe_quote_bigint(encoded, obj); if (encoded == NULL) break; } rv = _steal_accumulate(rval, encoded); } } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); if (encoded != NULL) rv = _steal_accumulate(rval, encoded); } else if (s->namedtuple_as_object && _is_namedtuple(obj)) { if (Py_EnterRecursiveCall(" while encoding a JSON object")) return rv; PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL); if (newobj != NULL) { rv = encoder_listencode_dict(s, rval, newobj, indent_level); Py_DECREF(newobj); } Py_LeaveRecursiveCall(); } else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { if (Py_EnterRecursiveCall(" while encoding a JSON object")) return rv; rv = encoder_listencode_list(s, rval, obj, indent_level); Py_LeaveRecursiveCall(); } else if (PyDict_Check(obj)) { if (Py_EnterRecursiveCall(" while encoding a JSON object")) return rv; rv = encoder_listencode_dict(s, rval, obj, indent_level); Py_LeaveRecursiveCall(); } else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) rv = _steal_accumulate(rval, encoded); } else { PyObject *ident = NULL; PyObject *newobj; if (s->markers != Py_None) { int has_key; ident = PyLong_FromVoidPtr(obj); if (ident == NULL) break; has_key = PyDict_Contains(s->markers, ident); if (has_key) { if (has_key != -1) PyErr_SetString(PyExc_ValueError, "Circular reference detected"); Py_DECREF(ident); break; } if (PyDict_SetItem(s->markers, ident, obj)) { Py_DECREF(ident); break; } } if (Py_EnterRecursiveCall(" while encoding a JSON object")) return rv; newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); if (newobj == NULL) { Py_XDECREF(ident); break; } rv = encoder_listencode_obj(s, rval, newobj, indent_level); Py_LeaveRecursiveCall(); Py_DECREF(newobj); if (rv) { Py_XDECREF(ident); rv = -1; } else if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) { Py_XDECREF(ident); rv = -1; } Py_XDECREF(ident); } } } while (0); return rv; } static int encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) { /* Encode Python dict dct a JSON term */ static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; PyObject *iter = NULL; PyObject *item = NULL; PyObject *items = NULL; PyObject *encoded = NULL; Py_ssize_t idx; if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { open_dict = JSON_InternFromString("{"); close_dict = JSON_InternFromString("}"); empty_dict = JSON_InternFromString("{}"); if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) return -1; } if (PyDict_Size(dct) == 0) return JSON_Accu_Accumulate(rval, empty_dict); if (s->markers != Py_None) { int has_key; ident = PyLong_FromVoidPtr(dct); if (ident == NULL) goto bail; has_key = PyDict_Contains(s->markers, ident); if (has_key) { if (has_key != -1) PyErr_SetString(PyExc_ValueError, "Circular reference detected"); goto bail; } if (PyDict_SetItem(s->markers, ident, dct)) { goto bail; } } if (JSON_Accu_Accumulate(rval, open_dict)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level += 1; /* newline_indent = '\n' + (_indent * _current_indent_level) separator = _item_separator + newline_indent buf += newline_indent */ } iter = encoder_dict_iteritems(s, dct); if (iter == NULL) goto bail; idx = 0; while ((item = PyIter_Next(iter))) { PyObject *encoded, *key, *value; if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); goto bail; } key = PyTuple_GET_ITEM(item, 0); if (key == NULL) goto bail; value = PyTuple_GET_ITEM(item, 1); if (value == NULL) goto bail; encoded = PyDict_GetItem(s->key_memo, key); if (encoded != NULL) { Py_INCREF(encoded); } else { kstr = encoder_stringify_key(s, key); if (kstr == NULL) goto bail; else if (kstr == Py_None) { // skipkeys Py_DECREF(item); Py_DECREF(kstr); continue; } } if (idx) { if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoded == NULL) { encoded = encoder_encode_string(s, kstr); Py_CLEAR(kstr); if (encoded == NULL) goto bail; if (PyDict_SetItem(s->key_memo, key, encoded)) goto bail; } if (JSON_Accu_Accumulate(rval, encoded)) { goto bail; } Py_CLEAR(encoded); if (JSON_Accu_Accumulate(rval, s->key_separator)) goto bail; if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; Py_CLEAR(item); idx += 1; } Py_CLEAR(iter); if (PyErr_Occurred()) goto bail; if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) goto bail; Py_CLEAR(ident); } if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level -= 1; /* yield '\n' + (_indent * _current_indent_level) */ } if (JSON_Accu_Accumulate(rval, close_dict)) goto bail; return 0; bail: Py_XDECREF(encoded); Py_XDECREF(items); Py_XDECREF(iter); Py_XDECREF(kstr); Py_XDECREF(ident); return -1; } static int encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) { /* Encode Python list seq to a JSON term */ static PyObject *open_array = NULL; static PyObject *close_array = NULL; static PyObject *empty_array = NULL; PyObject *ident = NULL; PyObject *iter = NULL; PyObject *obj = NULL; int is_true; int i = 0; if (open_array == NULL || close_array == NULL || empty_array == NULL) { open_array = JSON_InternFromString("["); close_array = JSON_InternFromString("]"); empty_array = JSON_InternFromString("[]"); if (open_array == NULL || close_array == NULL || empty_array == NULL) return -1; } ident = NULL; is_true = PyObject_IsTrue(seq); if (is_true == -1) return -1; else if (is_true == 0) return JSON_Accu_Accumulate(rval, empty_array); if (s->markers != Py_None) { int has_key; ident = PyLong_FromVoidPtr(seq); if (ident == NULL) goto bail; has_key = PyDict_Contains(s->markers, ident); if (has_key) { if (has_key != -1) PyErr_SetString(PyExc_ValueError, "Circular reference detected"); goto bail; } if (PyDict_SetItem(s->markers, ident, seq)) { goto bail; } } iter = PyObject_GetIter(seq); if (iter == NULL) goto bail; if (JSON_Accu_Accumulate(rval, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level += 1; /* newline_indent = '\n' + (_indent * _current_indent_level) separator = _item_separator + newline_indent buf += newline_indent */ } while ((obj = PyIter_Next(iter))) { if (i) { if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoder_listencode_obj(s, rval, obj, indent_level)) goto bail; i++; Py_CLEAR(obj); } Py_CLEAR(iter); if (PyErr_Occurred()) goto bail; if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) goto bail; Py_CLEAR(ident); } if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level -= 1; /* yield '\n' + (_indent * _current_indent_level) */ } if (JSON_Accu_Accumulate(rval, close_array)) goto bail; return 0; bail: Py_XDECREF(obj); Py_XDECREF(iter); Py_XDECREF(ident); return -1; } static void encoder_dealloc(PyObject *self) { /* Deallocate Encoder */ encoder_clear(self); Py_TYPE(self)->tp_free(self); } static int encoder_traverse(PyObject *self, visitproc visit, void *arg) { PyEncoderObject *s; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; Py_VISIT(s->markers); Py_VISIT(s->defaultfn); Py_VISIT(s->encoder); Py_VISIT(s->encoding); Py_VISIT(s->indent); Py_VISIT(s->key_separator); Py_VISIT(s->item_separator); Py_VISIT(s->key_memo); Py_VISIT(s->sort_keys); Py_VISIT(s->item_sort_kw); Py_VISIT(s->item_sort_key); Py_VISIT(s->Decimal); return 0; } static int encoder_clear(PyObject *self) { /* Deallocate Encoder */ PyEncoderObject *s; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; Py_CLEAR(s->markers); Py_CLEAR(s->defaultfn); Py_CLEAR(s->encoder); Py_CLEAR(s->encoding); Py_CLEAR(s->indent); Py_CLEAR(s->key_separator); Py_CLEAR(s->item_separator); Py_CLEAR(s->key_memo); Py_CLEAR(s->sort_keys); Py_CLEAR(s->item_sort_kw); Py_CLEAR(s->item_sort_key); Py_CLEAR(s->Decimal); return 0; } PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); static PyTypeObject PyEncoderType = { PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Encoder", /* tp_name */ sizeof(PyEncoderObject), /* tp_basicsize */ 0, /* tp_itemsize */ encoder_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ encoder_call, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ encoder_doc, /* tp_doc */ encoder_traverse, /* tp_traverse */ encoder_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ encoder_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ encoder_init, /* tp_init */ 0, /* tp_alloc */ encoder_new, /* tp_new */ 0, /* tp_free */ }; static PyMethodDef speedups_methods[] = { {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii, METH_O, pydoc_encode_basestring_ascii}, {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS, pydoc_scanstring}, {NULL, NULL, 0, NULL} }; PyDoc_STRVAR(module_doc, "simplejson speedups\n"); #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_speedups", /* m_name */ module_doc, /* m_doc */ -1, /* m_size */ speedups_methods, /* m_methods */ NULL, /* m_reload */ NULL, /* m_traverse */ NULL, /* m_clear*/ NULL, /* m_free */ }; #endif static PyObject * moduleinit(void) { PyObject *m; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) return NULL; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) return NULL; #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&moduledef); #else m = Py_InitModule3("_speedups", speedups_methods, module_doc); #endif Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); Py_INCREF((PyObject*)&PyEncoderType); PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); return m; } #if PY_MAJOR_VERSION >= 3 PyMODINIT_FUNC PyInit__speedups(void) { return moduleinit(); } #else void init_speedups(void) { moduleinit(); } #endif