diff options
Diffstat (limited to 'simple_json/encoder.py')
-rw-r--r-- | simple_json/encoder.py | 275 |
1 files changed, 275 insertions, 0 deletions
diff --git a/simple_json/encoder.py b/simple_json/encoder.py new file mode 100644 index 0000000..71dfdb4 --- /dev/null +++ b/simple_json/encoder.py @@ -0,0 +1,275 @@ +""" +Implementation of JSONEncoder +""" +import re +import math + +# this should match any kind of infinity +INFCHARS = re.compile(r'[infINF]') +ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(20): + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +def floatstr(o, allow_nan=True): + s = str(o) + # If the first non-sign is a digit then it's not a special value + if (o < 0.0 and s[1].isdigit()) or s[0].isdigit(): + return s + elif not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + # These are the string representations on the platforms I've tried + if s == 'nan': + return 'NaN' + if s == 'inf': + return 'Infinity' + if s == '-inf': + return '-Infinity' + # NaN should either be inequal to itself, or equal to everything + if o != o or o == 0.0: + return 'NaN' + # Last ditch effort, assume inf + if o < 0: + return '-Infinity' + return 'Infinity' + +def encode_basestring(s): + """ + Return a JSON representation of a Python string + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + +def encode_basestring_ascii(s): + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + return '\\u%04x' % (ord(s),) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +class JSONEncoder(object): + """ + Extensible JSON <http://json.org> encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default(o)`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + """ + def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True): + """ + Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is False, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is True, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If ensure_ascii + is false, the output will be unicode object. + + If check_circular is True, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is True, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + + def _iterencode_list(self, lst, markers=None): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + yield '[' + first = True + for value in lst: + if first: + first = False + else: + yield ', ' + for chunk in self._iterencode(value, markers): + yield chunk + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(self, dct, markers=None): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + first = True + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + allow_nan = self.allow_nan + for key, value in dct.iteritems(): + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = floatstr(key, allow_nan) + elif isinstance(key, (int, long)): + key = str(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif self.skipkeys: + continue + else: + raise TypeError("key %r is not a string" % (key,)) + if first: + first = False + else: + yield ', ' + yield encoder(key) + yield ':' + for chunk in self._iterencode(value, markers): + yield chunk + yield '}' + if markers is not None: + del markers[markerid] + + def iterencode(self, o): + """ + Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + """ + if self.check_circular: + markers = {} + else: + markers = None + return self._iterencode(o, markers) + + def _iterencode(self, o, markers=None): + if isinstance(o, basestring): + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + yield encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield floatstr(o, self.allow_nan) + elif isinstance(o, (list, tuple)): + for chunk in self._iterencode_list(o, markers): + yield chunk + elif isinstance(o, dict): + for chunk in self._iterencode_dict(o, markers): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + for chunk in self._iterencode_default(o, markers): + yield chunk + if markers is not None: + del markers[markerid] + + def _iterencode_default(self, o, markers=None): + newobj = self.default(o) + return self._iterencode(newobj, markers) + + def default(self, o): + """ + Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + """ + raise TypeError("%r is not JSON serializable" % (o,)) + + def encode(self, o): + """ + Return a JSON string representation of a Python data structure. + """ + # This doesn't pass the iterator directly to ''.join() because it + # sucks at reporting exceptions. It's going to do this internally + # anyway because it uses PySequence_Fast or similar. + chunks = list(self.iterencode(o)) + return ''.join(chunks) + +__all__ = ['JSONEncoder'] |