diff options
| author | Christian Heimes <christian@cheimes.de> | 2008-05-08 14:29:10 +0000 | 
|---|---|---|
| committer | Christian Heimes <christian@cheimes.de> | 2008-05-08 14:29:10 +0000 | 
| commit | 90540004d3e1cbc0bd5595838bca3d1970198120 (patch) | |
| tree | 9006c98a1c71cf39761d3eb35d4c227540b24947 /Lib/json/encoder.py | |
| parent | c848655eb05c38f9c31bca0df87f2013670a1efa (diff) | |
| download | cpython-git-90540004d3e1cbc0bd5595838bca3d1970198120.tar.gz | |
Merged revisions 62734,62736,62748,62769 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
  r62734 | brett.cannon | 2008-05-05 22:21:38 +0200 (Mon, 05 May 2008) | 5 lines
  Add the 'json' package. Code taken from simplejson 1.9 and contributed by Bob
  Ippolito.
  Closes issue #2750.
........
  r62736 | georg.brandl | 2008-05-05 22:53:39 +0200 (Mon, 05 May 2008) | 2 lines
  Fix JSON module docs.
........
  r62748 | benjamin.peterson | 2008-05-06 04:51:10 +0200 (Tue, 06 May 2008) | 2 lines
  PEP 8 nits in json package
........
  r62769 | christian.heimes | 2008-05-06 18:18:41 +0200 (Tue, 06 May 2008) | 2 lines
  Intern static string
  Use float constructors instead of magic code for float constants
........
Diffstat (limited to 'Lib/json/encoder.py')
| -rw-r--r-- | Lib/json/encoder.py | 384 | 
1 files changed, 384 insertions, 0 deletions
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py new file mode 100644 index 0000000000..4cb366016f --- /dev/null +++ b/Lib/json/encoder.py @@ -0,0 +1,384 @@ +"""Implementation of JSONEncoder +""" + +import re + +try: +    from _json import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: +    c_encode_basestring_ascii = None + +__all__ = ['JSONEncoder'] + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { +    '\\': '\\\\', +    '"': '\\"', +    '\b': '\\b', +    '\f': '\\f', +    '\n': '\\n', +    '\r': '\\r', +    '\t': '\\t', +} +for i in range(0x20): +    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + +# Assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') +FLOAT_REPR = repr + +def floatstr(o, allow_nan=True): +    # Check for specials.  Note that this type of test is processor- and/or +    # platform-specific, so do tests which don't depend on the internals. + +    if o != o: +        text = 'NaN' +    elif o == INFINITY: +        text = 'Infinity' +    elif o == -INFINITY: +        text = '-Infinity' +    else: +        return FLOAT_REPR(o) + +    if not allow_nan: +        msg = "Out of range float values are not JSON compliant: " + repr(o) +        raise ValueError(msg) + +    return text + + +def encode_basestring(s): +    """Return a JSON representation of a Python string + +    """ +    def replace(match): +        return ESCAPE_DCT[match.group(0)] +    return '"' + ESCAPE.sub(replace, s) + '"' + + +def py_encode_basestring_ascii(s): +    if isinstance(s, bytes): # and HAS_UTF8.search(s) is not None: +        s = s.decode('utf-8') +    def replace(match): +        s = match.group(0) +        try: +            return ESCAPE_DCT[s] +        except KeyError: +            n = ord(s) +            if n < 0x10000: +                return '\\u{0:04x}'.format(n) +            else: +                # surrogate pair +                n -= 0x10000 +                s1 = 0xd800 | ((n >> 10) & 0x3ff) +                s2 = 0xdc00 | (n & 0x3ff) +                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) +    return '"' + (ESCAPE_ASCII.sub(replace, s)) + '"' + + +if c_encode_basestring_ascii is not None: +    encode_basestring_ascii = c_encode_basestring_ascii +else: +    encode_basestring_ascii = py_encode_basestring_ascii + + +class JSONEncoder(object): +    """Extensible JSON <http://json.org> encoder for Python data structures. + +    Supports the following objects and types by default: + +    +-------------------+---------------+ +    | Python            | JSON          | +    +===================+===============+ +    | dict              | object        | +    +-------------------+---------------+ +    | list, tuple       | array         | +    +-------------------+---------------+ +    | str, unicode      | string        | +    +-------------------+---------------+ +    | int, long, float  | number        | +    +-------------------+---------------+ +    | True              | true          | +    +-------------------+---------------+ +    | False             | false         | +    +-------------------+---------------+ +    | None              | null          | +    +-------------------+---------------+ + +    To extend this to recognize other objects, subclass and implement a +    ``.default()`` method with another method that returns a serializable +    object for ``o`` if possible, otherwise it should call the superclass +    implementation (to raise ``TypeError``). + +    """ +    __all__ = ['__init__', 'default', 'encode', 'iterencode'] +    item_separator = ', ' +    key_separator = ': ' +    def __init__(self, skipkeys=False, ensure_ascii=True, +            check_circular=True, allow_nan=True, sort_keys=False, +            indent=None, separators=None, encoding='utf-8', default=None): +        """Constructor for JSONEncoder, with sensible defaults. + +        If skipkeys is False, then it is a TypeError to attempt +        encoding of keys that are not str, int, long, float or None.  If +        skipkeys is True, such items are simply skipped. + +        If ensure_ascii is True, the output is guaranteed to be str +        objects with all incoming unicode characters escaped.  If +        ensure_ascii is false, the output will be unicode object. + +        If check_circular is True, then lists, dicts, and custom encoded +        objects will be checked for circular references during encoding to +        prevent an infinite recursion (which would cause an OverflowError). +        Otherwise, no such check takes place. + +        If allow_nan is True, then NaN, Infinity, and -Infinity will be +        encoded as such.  This behavior is not JSON specification compliant, +        but is consistent with most JavaScript based encoders and decoders. +        Otherwise, it will be a ValueError to encode such floats. + +        If sort_keys is True, then the output of dictionaries will be +        sorted by key; this is useful for regression tests to ensure +        that JSON serializations can be compared on a day-to-day basis. + +        If indent is a non-negative integer, then JSON array +        elements and object members will be pretty-printed with that +        indent level.  An indent level of 0 will only insert newlines. +        None is the most compact representation. + +        If specified, separators should be a (item_separator, key_separator) +        tuple.  The default is (', ', ': ').  To get the most compact JSON +        representation you should specify (',', ':') to eliminate whitespace. + +        If specified, default is a function that gets called for objects +        that can't otherwise be serialized.  It should return a JSON encodable +        version of the object or raise a ``TypeError``. + +        If encoding is not None, then all input strings will be +        transformed into unicode using that encoding prior to JSON-encoding. +        The default is UTF-8. + +        """ +        self.skipkeys = skipkeys +        self.ensure_ascii = ensure_ascii +        self.check_circular = check_circular +        self.allow_nan = allow_nan +        self.sort_keys = sort_keys +        self.indent = indent +        self.current_indent_level = 0 +        if separators is not None: +            self.item_separator, self.key_separator = separators +        if default is not None: +            self.default = default +        self.encoding = encoding + +    def _newline_indent(self): +        return '\n' + (' ' * (self.indent * self.current_indent_level)) + +    def _iterencode_list(self, lst, markers=None): +        if not lst: +            yield '[]' +            return +        if markers is not None: +            markerid = id(lst) +            if markerid in markers: +                raise ValueError("Circular reference detected") +            markers[markerid] = lst +        yield '[' +        if self.indent is not None: +            self.current_indent_level += 1 +            newline_indent = self._newline_indent() +            separator = self.item_separator + newline_indent +            yield newline_indent +        else: +            newline_indent = None +            separator = self.item_separator +        first = True +        for value in lst: +            if first: +                first = False +            else: +                yield separator +            for chunk in self._iterencode(value, markers): +                yield chunk +        if newline_indent is not None: +            self.current_indent_level -= 1 +            yield self._newline_indent() +        yield ']' +        if markers is not None: +            del markers[markerid] + +    def _iterencode_dict(self, dct, markers=None): +        if not dct: +            yield '{}' +            return +        if markers is not None: +            markerid = id(dct) +            if markerid in markers: +                raise ValueError("Circular reference detected") +            markers[markerid] = dct +        yield '{' +        key_separator = self.key_separator +        if self.indent is not None: +            self.current_indent_level += 1 +            newline_indent = self._newline_indent() +            item_separator = self.item_separator + newline_indent +            yield newline_indent +        else: +            newline_indent = None +            item_separator = self.item_separator +        first = True +        if self.ensure_ascii: +            encoder = encode_basestring_ascii +        else: +            encoder = encode_basestring +        allow_nan = self.allow_nan +        if self.sort_keys: +            keys = list(dct.keys()) +            keys.sort() +            items = [(k, dct[k]) for k in keys] +        else: +            items = iter(dct.items()) +        _encoding = self.encoding +        _do_decode = (_encoding is not None +            and not (_encoding == 'utf-8')) +        for key, value in items: +            if isinstance(key, str): +                if _do_decode: +                    key = key.decode(_encoding) +            elif isinstance(key, str): +                pass +            # JavaScript is weakly typed for these, so it makes sense to +            # also allow them.  Many encoders seem to do something like this. +            elif isinstance(key, float): +                key = floatstr(key, allow_nan) +            elif isinstance(key, (int, int)): +                key = str(key) +            elif key is True: +                key = 'true' +            elif key is False: +                key = 'false' +            elif key is None: +                key = 'null' +            elif self.skipkeys: +                continue +            else: +                raise TypeError("key {0!r} is not a string".format(key)) +            if first: +                first = False +            else: +                yield item_separator +            yield encoder(key) +            yield key_separator +            for chunk in self._iterencode(value, markers): +                yield chunk +        if newline_indent is not None: +            self.current_indent_level -= 1 +            yield self._newline_indent() +        yield '}' +        if markers is not None: +            del markers[markerid] + +    def _iterencode(self, o, markers=None): +        if isinstance(o, str): +            if self.ensure_ascii: +                encoder = encode_basestring_ascii +            else: +                encoder = encode_basestring +            _encoding = self.encoding +            if (_encoding is not None and isinstance(o, str) +                    and not (_encoding == 'utf-8')): +                o = o.decode(_encoding) +            yield encoder(o) +        elif o is None: +            yield 'null' +        elif o is True: +            yield 'true' +        elif o is False: +            yield 'false' +        elif isinstance(o, (int, int)): +            yield str(o) +        elif isinstance(o, float): +            yield floatstr(o, self.allow_nan) +        elif isinstance(o, (list, tuple)): +            for chunk in self._iterencode_list(o, markers): +                yield chunk +        elif isinstance(o, dict): +            for chunk in self._iterencode_dict(o, markers): +                yield chunk +        else: +            if markers is not None: +                markerid = id(o) +                if markerid in markers: +                    raise ValueError("Circular reference detected") +                markers[markerid] = o +            for chunk in self._iterencode_default(o, markers): +                yield chunk +            if markers is not None: +                del markers[markerid] + +    def _iterencode_default(self, o, markers=None): +        newobj = self.default(o) +        return self._iterencode(newobj, markers) + +    def default(self, o): +        """Implement this method in a subclass such that it returns a serializable +        object for ``o``, or calls the base implementation (to raise a +        ``TypeError``). + +        For example, to support arbitrary iterators, you could implement +        default like this:: + +            def default(self, o): +                try: +                    iterable = iter(o) +                except TypeError: +                    pass +                else: +                    return list(iterable) +                return JSONEncoder.default(self, o) + +        """ +        raise TypeError(repr(o) + " is not JSON serializable") + +    def encode(self, o): +        """Return a JSON string representation of a Python data structure. + +        >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) +        '{"foo": ["bar", "baz"]}' + +        """ +        # This is for extremely simple cases and benchmarks. +        if isinstance(o, (str, bytes)): +            if isinstance(o, bytes): +                _encoding = self.encoding +                if (_encoding is not None +                        and not (_encoding == 'utf-8')): +                    o = o.decode(_encoding) +            if self.ensure_ascii: +                return encode_basestring_ascii(o) +            else: +                return encode_basestring(o) +        # This doesn't pass the iterator directly to ''.join() because the +        # exceptions aren't as detailed.  The list call should be roughly +        # equivalent to the PySequence_Fast that ''.join() would do. +        chunks = list(self.iterencode(o)) +        return ''.join(chunks) + +    def iterencode(self, o): +        """Encode the given object and yield each string representation as +        available. + +        For example:: + +            for chunk in JSONEncoder().iterencode(bigobject): +                mysocket.write(chunk) + +        """ +        if self.check_circular: +            markers = {} +        else: +            markers = None +        return self._iterencode(o, markers)  | 
