summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Babcock <nbabcock19@hotmail.com>2015-07-10 17:25:50 -0400
committerNick Babcock <nbabcock19@hotmail.com>2015-07-10 17:25:50 -0400
commita9e5e7256c9fb27126813c9430be1dab5f27f7ba (patch)
tree488eb7357fd5e781395161b7aa6be5aabea6a8ed
parent49724ee06f038d27ab3d4adbe4ed403692799aa1 (diff)
parent0bcdf20cc525c1343b796cb8f247ea5213c6557e (diff)
downloadsimplejson-a9e5e7256c9fb27126813c9430be1dab5f27f7ba.tar.gz
Merge branch 'master' into iterable_as_array-gh1
Conflicts: CHANGES.txt conf.py index.rst setup.py simplejson/__init__.py simplejson/_speedups.c simplejson/encoder.py simplejson/tests/test_tuple.py
-rw-r--r--.gitignore2
-rw-r--r--.travis.yml11
-rw-r--r--CHANGES.txt285
-rw-r--r--LICENSE.txt60
-rw-r--r--MANIFEST.in1
-rw-r--r--README.rst22
-rw-r--r--conf.py6
-rw-r--r--index.rst546
-rw-r--r--setup.py52
-rw-r--r--simplejson/__init__.py205
-rw-r--r--simplejson/_speedups.c1707
-rw-r--r--simplejson/compat.py46
-rw-r--r--simplejson/decoder.py185
-rw-r--r--simplejson/encoder.py373
-rw-r--r--simplejson/scanner.py62
-rw-r--r--simplejson/tests/__init__.py83
-rw-r--r--simplejson/tests/test_bigint_as_string.py67
-rw-r--r--simplejson/tests/test_bitsize_int_as_string.py73
-rw-r--r--simplejson/tests/test_decimal.py48
-rw-r--r--simplejson/tests/test_decode.py24
-rw-r--r--simplejson/tests/test_default.py2
-rw-r--r--simplejson/tests/test_dump.py121
-rw-r--r--simplejson/tests/test_encode_basestring_ascii.py9
-rw-r--r--simplejson/tests/test_encode_for_html.py8
-rw-r--r--simplejson/tests/test_errors.py41
-rw-r--r--simplejson/tests/test_fail.py105
-rw-r--r--simplejson/tests/test_float.py34
-rw-r--r--simplejson/tests/test_for_json.py97
-rw-r--r--simplejson/tests/test_indent.py30
-rw-r--r--simplejson/tests/test_item_sort_key.py20
-rw-r--r--simplejson/tests/test_namedtuple.py61
-rw-r--r--simplejson/tests/test_pass1.py21
-rw-r--r--simplejson/tests/test_pass2.py2
-rw-r--r--simplejson/tests/test_pass3.py2
-rw-r--r--simplejson/tests/test_recursion.py2
-rw-r--r--simplejson/tests/test_scanstring.py133
-rw-r--r--simplejson/tests/test_separators.py6
-rw-r--r--simplejson/tests/test_speedups.py33
-rw-r--r--simplejson/tests/test_subclass.py37
-rw-r--r--simplejson/tests/test_tool.py97
-rw-r--r--simplejson/tests/test_tuple.py8
-rw-r--r--simplejson/tests/test_unicode.py102
-rw-r--r--simplejson/tool.py25
43 files changed, 3742 insertions, 1112 deletions
diff --git a/.gitignore b/.gitignore
index c1f7933..590f60f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,9 +2,11 @@
*.egg
*.pyc
*.so
+.DS_Store
/MANIFEST
/.coverage
/coverage.xml
+/htmlcov
/build
/dist
/docs
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..71fcb13
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,11 @@
+language: python
+python:
+ - "2.6"
+ - "2.7"
+ - "3.3"
+ - "3.4"
+ - "pypy"
+script:
+ - python setup.py build_ext -i
+ - python -m compileall -f .
+ - python setup.py test
diff --git a/CHANGES.txt b/CHANGES.txt
index 69ebb57..bbacbd1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,8 +1,289 @@
-Version 2.3.0 released 2011-XX-XX
+Version 3.7.4 released 2015-XX-XX
* New iterable_as_array encoder option to perform lazy serialization of
any iterable objects, without having to convert to tuple or list.
+Version 3.7.3 released 2015-05-31
+
+* Fix typo introduced in 3.7.0 (behavior should be indistinguishable)
+ https://github.com/simplejson/simplejson/commit/e18cc09b688ea1f3305c27616fd3cadd2adc6d31#commitcomment-11443842
+
+Version 3.7.2 released 2015-05-22
+
+* Do not cache Decimal class in encoder, only reference the decimal module.
+ This may make reload work in more common scenarios.
+
+Version 3.7.1 released 2015-05-18
+
+* Fix compilation with MSVC
+ https://github.com/simplejson/simplejson/pull/119
+
+Version 3.7.0 released 2015-05-18
+
+* simplejson no longer trusts custom str/repr methods for int, long, float
+ subclasses. These instances are now formatted as if they were exact
+ instances of those types.
+ https://github.com/simplejson/simplejson/issues/118
+
+Version 3.6.5 released 2014-10-24
+
+* Importing bug fix for reference leak when an error occurs during
+ dict encoding
+ https://github.com/simplejson/simplejson/issues/109
+
+Version 3.6.4 released 2014-09-29
+
+* Important bug fix for dump when only sort_keys is set
+ https://github.com/simplejson/simplejson/issues/106
+
+Version 3.6.3 released 2014-08-18
+
+* Documentation updates
+ https://github.com/simplejson/simplejson/issues/103
+
+Version 3.6.2 released 2014-08-09
+
+* Documentation updates
+ http://bugs.python.org/issue21514
+
+Version 3.6.1 released 2014-08-09
+
+* Documentation updates
+ https://github.com/simplejson/simplejson/issues/102
+
+Version 3.6.0 released 2014-07-21
+
+* Automatically strip any UTF-8 BOM from input to more closely
+ follow the latest specs
+ https://github.com/simplejson/simplejson/pull/101
+
+Version 3.5.3 released 2014-06-24
+
+* Fix lower bound checking in scan_once / raw_decode API
+ https://github.com/simplejson/simplejson/issues/98
+
+Version 3.5.2 released 2014-05-22
+
+* Fix Windows build with VS2008
+ https://github.com/simplejson/simplejson/pull/97
+
+Version 3.5.1 released 2014-05-21
+
+* Consistently reject int_as_string_bitcount settings that are not
+ positive integers
+
+Version 3.5.0 released 2014-05-20
+
+* Added int_as_string_bitcount encoder option
+ https://github.com/simplejson/pull/96
+* Fixed potential crash when encoder created with incorrect options
+
+Version 3.4.1 released 2014-04-30
+
+* Fixed tests to run on Python 3.4
+
+Version 3.4.0 released 2014-04-02
+
+* Native setuptools support re-introduced
+ https://github.com/simplejson/simplejson/pull/92
+
+Version 3.3.3 released 2014-02-14
+
+* Improve test suite's Python 3.4 compatibility
+ https://github.com/simplejson/simplejson/issues/87
+
+Version 3.3.2 released 2014-01-06
+
+* Docstring fix for decoded string types
+ https://github.com/simplejson/simplejson/pull/82
+
+Version 3.3.1 released 2013-10-05
+
+* JSONDecodeError exceptions can now be pickled
+ https://github.com/simplejson/simplejson/pull/78
+
+Version 3.3.0 released 2013-05-07
+
+* Unpaired surrogates once again pass through the decoder, to match older
+ behavior and the RFC-4627 spec.
+ https://github.com/simplejson/simplejson/issues/62
+
+Version 3.2.0 released 2013-05-01
+
+* New ignore_nan kwarg in encoder that serializes out
+ of range floats (Infinity, -Infinity, NaN) as null for ECMA-262
+ compliance.
+ https://github.com/simplejson/simplejson/pull/63
+* New for_json kwarg in encoder to make it possible to for
+ subclasses of dict and list to be specialized.
+ https://github.com/simplejson/simplejson/pull/69
+
+Version 3.1.3 released 2013-04-06
+
+* Updated documentation to discourage subclassing whenever possible.
+ default, object_hook, and object_pairs_hook provide almost all of
+ the functionality of subclassing.
+
+Version 3.1.2 released 2013-03-20
+
+* Updated documentation to reflect separators behavior when indent is
+ not None
+ https://github.com/simplejson/simplejson/issues/59
+* Test suite should be compatible with debug builds of Python 2.x and 3.x
+ https://github.com/simplejson/simplejson/pull/65
+
+Version 3.1.1 released 2013-02-21
+
+* setup.py now has another workaround for Windows machines without
+ MSVC installed
+ http://bugs.python.org/issue7511
+
+Version 3.1.0 released 2013-02-21
+
+* Updated JSON conformance test suite
+ http://bugs.python.org/issue16559
+* simplejson.tool tests and bugfix for Python 3.x
+ http://bugs.python.org/issue16549
+* Improve error messages for certain kinds of truncated input
+ http://bugs.python.org/issue16009
+* Moved JSONDecodeError to json.scanner (still available for import
+ from json.decoder)
+* Changed scanner to use JSONDecodeError directly rather than
+ StopIteration to improve error messages
+
+Version 3.0.9 released 2013-02-21
+
+* Fix an off-by-one error in the colno property of JSONDecodeError
+ (when lineno == 1)
+ http://bugs.python.org/issue17225
+
+Version 3.0.8 released 2013-02-19
+
+* Fix a Python 2.x compiler warning for narrow unicode builds
+ https://github.com/simplejson/simplejson/issues/56
+
+Version 3.0.7 released 2013-01-11
+
+* NOTE: this release only changes the license.
+* simplejson is now dual-licensed software, MIT or AFL v2.1. It is
+ also made explicit that this code is also licensed to the PSF under
+ a Contributor Agreement.
+
+Version 3.0.6 released 2013-01-11
+
+* Fix for major Python 2.x ensure_ascii=False encoding regression
+ introduced in simplejson 3.0.0. If you use this setting, please
+ upgrade immediately.
+ https://github.com/simplejson/simplejson/issues/50
+
+Version 3.0.5 released 2013-01-03
+
+* NOTE: this release only changes the tests, it is
+ not essential to upgrade
+* Tests now run with deprecation warnings printed
+* Fixed Python 3 syntax error in simplejson.tool
+ https://github.com/simplejson/simplejson/issues/49
+* Fixed Python 3.3 deprecation warnings in test suite
+ https://github.com/simplejson/simplejson/issues/48
+
+Version 3.0.4 released 2013-01-02
+
+* MSVC compatibility for Python 3.3
+ https://github.com/simplejson/simplejson/pull/47
+
+Version 3.0.3 released 2013-01-01
+
+* Fixes for bugs introduced in 3.0.2
+* Fixes for Python 2.5 compatibility
+* MSVC compatibility for Python 2.x
+ https://github.com/simplejson/simplejson/pull/46
+
+Version 3.0.2 released 2013-01-01
+
+* THIS VERSION HAS BEEN REMOVED
+* Missed a changeset to _speedups.c in the 3.0.1 branch cut
+
+Version 3.0.1 released 2013-01-01
+
+* THIS VERSION HAS BEEN REMOVED
+* Add accumulator optimization to encoder, equivalent to the usage of
+ `_Py_Accu` in the Python 3.3 json library. Only relevant if encoding
+ very large JSON documents.
+
+Version 3.0.0 released 2012-12-30
+
+* Python 3.3 is now supported, thanks to Vinay Sajip
+ https://github.com/simplejson/simplejson/issues/8
+* `sort_keys`/`item_sort_key` now sort on the stringified verison of the
+ key, rather than the original object. This ensures that the sort
+ only compares string types and makes the behavior consistent between
+ Python 2.x and Python 3.x.
+* Like other number types, Decimal instances used as keys are now
+ coerced to strings when use_decimal is True.
+
+Version 2.6.2 released 2012-09-21
+
+* JSONEncoderForHTML was not exported in the simplejson module
+ https://github.com/simplejson/simplejson/issues/41
+
+Version 2.6.1 released 2012-07-27
+
+* raw_decode() now skips whitespace before the object
+ https://github.com/simplejson/simplejson/pull/38
+
+Version 2.6.0 released 2012-06-26
+
+* Error messages changed to match proposal for Python 3.3.1
+ http://bugs.python.org/issue5067
+
+Version 2.5.2 released 2012-05-10
+
+* Fix for regression introduced in 2.5.1
+ https://github.com/simplejson/simplejson/issues/35
+
+Version 2.5.1 released 2012-05-10
+
+* Support for use_decimal=True in environments that use Python
+ sub-interpreters such as uWSGI
+ https://github.com/simplejson/simplejson/issues/34
+
+Version 2.5.0 released 2012-03-29
+
+* New item_sort_key option for encoder to allow fine grained control of sorted
+ output
+
+Version 2.4.0 released 2012-03-06
+
+* New bigint_as_string option for encoder to trade JavaScript number precision
+ issues for type issues.
+ https://github.com/simplejson/simplejson/issues/31
+
+Version 2.3.3 released 2012-02-27
+
+* Allow unknown numerical types for indent parameter
+ https://github.com/simplejson/simplejson/pull/29
+
+Version 2.3.2 released 2011-12-30
+
+* Fix crashing regression in speedups introduced in 2.3.1
+
+Version 2.3.1 released 2011-12-29
+
+* namedtuple_as_object now checks _asdict to ensure that it
+ is callable.
+ https://github.com/simplejson/simplejson/issues/26
+
+Version 2.3.0 released 2011-12-05
+
+* Any objects with _asdict() methods are now considered for
+ namedtuple_as_object.
+ https://github.com/simplejson/simplejson/pull/22
+
+Version 2.2.1 released 2011-09-06
+
+* Fix MANIFEST.in issue when building a sdist from a sdist.
+ https://github.com/simplejson/simplejson/issues/16
+
Version 2.2.0 released 2011-09-04
* Remove setuptools requirement, reverted to pure distutils
@@ -237,7 +518,7 @@ Version 1.1 released 2005-12-31
* dump, dumps, load, loads now accept an optional cls kwarg to use an
alternate JSONEncoder or JSONDecoder class for convenience.
* The read/write compatibility shim for json-py now have deprecation warnings
-
+
Version 1.0 released 2005-12-25
* Initial release
diff --git a/LICENSE.txt b/LICENSE.txt
index ad95f29..e05f49c 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,3 +1,12 @@
+simplejson is dual-licensed software. It is available under the terms
+of the MIT license, or the Academic Free License version 2.1. The full
+text of each license agreement is included below. This code is also
+licensed to the Python Software Foundation (PSF) under a Contributor
+Agreement.
+
+MIT License
+===========
+
Copyright (c) 2006 Bob Ippolito
Permission is hereby granted, free of charge, to any person obtaining a copy of
@@ -17,3 +26,54 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+
+Academic Free License v. 2.1
+============================
+
+Copyright (c) 2006 Bob Ippolito. All rights reserved.
+
+This Academic Free License (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following notice immediately following the copyright notice for the Original Work:
+
+Licensed under the Academic Free License version 2.1
+
+1) Grant of Copyright License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license to do the following:
+
+a) to reproduce the Original Work in copies;
+
+b) to prepare derivative works ("Derivative Works") based upon the Original Work;
+
+c) to distribute copies of the Original Work and Derivative Works to the public;
+
+d) to perform the Original Work publicly; and
+
+e) to display the Original Work publicly.
+
+2) Grant of Patent License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, to make, use, sell and offer for sale the Original Work and Derivative Works.
+
+3) Grant of Source Code License. The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor hereby agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work, and by publishing the address of that information repository in a notice immediately following the copyright notice that applies to the Original Work.
+
+4) Exclusions From License Grant. Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior written permission of the Licensor. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor except as expressly stated herein. No patent license is granted to make, use, sell or offer to sell embodiments of any patent claims other than the licensed claims defined in Section 2. No right is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any Original Work that Licensor otherwise would have a right to license.
+
+5) This section intentionally omitted.
+
+6) Attribution Rights. You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work.
+
+7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately proceeding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of NON-INFRINGEMENT, MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to Original Work is granted hereunder except under this disclaimer.
+
+8) Limitation of Liability. Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to any person for any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to liability for death or personal injury resulting from Licensor's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You.
+
+9) Acceptance and Termination. If You distribute copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. Nothing else but this License (or another written agreement between Licensor and You) grants You permission to create Derivative Works based upon the Original Work or to exercise any of the rights granted in Section 1 herein, and any attempt to do so except under the terms of this License (or another written agreement between Licensor and You) is expressly prohibited by U.S. copyright law, the equivalent laws of other countries, and by international treaty. Therefore, by exercising any of the rights granted to You in Section 1 herein, You indicate Your acceptance of this License and all of its terms and conditions.
+
+10) Termination for Patent Action. This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware.
+
+11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of the U.S. Copyright Act, 17 U.S.C. ยง 101 et seq., the equivalent laws of other countries, and international treaty. This section shall survive the termination of this License.
+
+12) Attorneys Fees. In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License.
+
+13) Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable.
+
+14) Definition of "You" in This License. "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+15) Right to Use. You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You.
+
+This license is Copyright (C) 2003-2004 Lawrence E. Rosen. All rights reserved. Permission is hereby granted to copy and distribute this license without modification. This license may not be modified without the express written permission of its copyright owner.
diff --git a/MANIFEST.in b/MANIFEST.in
index 9d236b4..73d569f 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,4 @@ include *.py
include *.txt
include *.rst
include scripts/*.py
+include MANIFEST.in
diff --git a/README.rst b/README.rst
index 955221f..e4b47fb 100644
--- a/README.rst
+++ b/README.rst
@@ -1,7 +1,7 @@
simplejson is a simple, fast, complete, correct and extensible
-JSON <http://json.org> encoder and decoder for Python 2.5+. It is
-pure Python code with no dependencies, but includes an optional C
-extension for a serious speed boost.
+JSON <http://json.org> encoder and decoder for Python 2.5+
+and Python 3.3+. It is pure Python code with no dependencies,
+but includes an optional C extension for a serious speed boost.
The latest documentation for simplejson can be read online here:
http://simplejson.readthedocs.org/
@@ -10,10 +10,20 @@ simplejson is the externally maintained development version of the
json library included with Python 2.6 and Python 3.0, but maintains
backwards compatibility with Python 2.5.
-The encoder may be subclassed to provide serialization in any kind of
+The encoder can be specialized to provide serialization in any kind of
situation, without any special support by the objects to be serialized
-(somewhat like pickle).
+(somewhat like pickle). This is best done with the ``default`` kwarg
+to dumps.
The decoder can handle incoming JSON strings of any specified encoding
-(UTF-8 by default).
+(UTF-8 by default). It can also be specialized to post-process JSON
+objects with the ``object_hook`` or ``object_pairs_hook`` kwargs. This
+is particularly useful for implementing protocols such as JSON-RPC
+that have a richer type system than JSON itself.
+For those of you that have legacy systems to maintain, there is a
+very old fork of simplejson in the `python2.2`_ branch that supports
+Python 2.2. This is based off of a very old version of simplejson,
+is not maintained, and should only be used as a last resort.
+
+.. _python2.2: https://github.com/simplejson/simplejson/tree/python2.2
diff --git a/conf.py b/conf.py
index 4b4537a..9156dca 100644
--- a/conf.py
+++ b/conf.py
@@ -36,15 +36,15 @@ master_doc = 'index'
# General substitutions.
project = 'simplejson'
-copyright = '2011, Bob Ippolito'
+copyright = '2015, Bob Ippolito'
# The default replacements for |version| and |release|, also used in various
# other places throughout the built documents.
#
# The short X.Y version.
-version = '2.3'
+version = '3.7'
# The full version, including alpha/beta/rc tags.
-release = '2.3.0'
+release = '3.7.4'
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
diff --git a/index.rst b/index.rst
index b965982..d36d0d1 100644
--- a/index.rst
+++ b/index.rst
@@ -6,15 +6,19 @@
.. moduleauthor:: Bob Ippolito <bob@redivi.com>
.. sectionauthor:: Bob Ippolito <bob@redivi.com>
-JSON (JavaScript Object Notation) <http://json.org> is a subset of JavaScript
-syntax (ECMA-262 3rd edition) used as a lightweight data interchange format.
+`JSON (JavaScript Object Notation) <http://json.org>`_, specified by
+:rfc:`7159` (which obsoletes :rfc:`4627`) and by
+`ECMA-404 <http://www.ecma-international.org/publications/standards/Ecma-404.htm>`_,
+is a lightweight data interchange format inspired by
+`JavaScript <http://en.wikipedia.org/wiki/JavaScript>`_ object literal syntax
+(although it is not a strict subset of JavaScript [#rfc-errata]_ ).
:mod:`simplejson` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
-extension for speedups.
+extension for speedups. :mod:`simplejson` is also supported on Python 3.3+.
Development of simplejson happens on Github:
http://github.com/simplejson/simplejson
@@ -24,15 +28,15 @@ Encoding basic Python object hierarchies::
>>> import simplejson as json
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
- >>> print json.dumps("\"foo\bar")
+ >>> print(json.dumps("\"foo\bar"))
"\"foo\bar"
- >>> print json.dumps(u'\u1234')
+ >>> print(json.dumps(u'\u1234'))
"\u1234"
- >>> print json.dumps('\\')
+ >>> print(json.dumps('\\'))
"\\"
- >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+ >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
{"a": 0, "b": 0, "c": 0}
- >>> from StringIO import StringIO
+ >>> from simplejson.compat import StringIO
>>> io = StringIO()
>>> json.dump(['streaming API'], io)
>>> io.getvalue()
@@ -41,14 +45,14 @@ Encoding basic Python object hierarchies::
Compact encoding::
>>> import simplejson as json
- >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+ >>> obj = [1,2,3,{'4': 5, '6': 7}]
+ >>> json.dumps(obj, separators=(',', ':'), sort_keys=True)
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
>>> import simplejson as json
- >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4 * ' ')
- >>> print '\n'.join([l.rstrip() for l in s.splitlines()])
+ >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4 * ' '))
{
"4": 5,
"6": 7
@@ -62,7 +66,7 @@ Decoding JSON::
True
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True
- >>> from StringIO import StringIO
+ >>> from simplejson.compat import StringIO
>>> io = StringIO('["streaming API"]')
>>> json.load(io)[0] == 'streaming API'
True
@@ -97,7 +101,7 @@ Specializing JSON object encoding::
>>> def encode_complex(obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
- ... raise TypeError(repr(o) + " is not JSON serializable")
+ ... raise TypeError(repr(obj) + " is not JSON serializable")
...
>>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]'
@@ -107,7 +111,7 @@ Specializing JSON object encoding::
'[2.0, 1.0]'
-.. highlight:: none
+.. highlight:: bash
Using :mod:`simplejson.tool` from the shell to validate and pretty-print::
@@ -116,29 +120,42 @@ Using :mod:`simplejson.tool` from the shell to validate and pretty-print::
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
- Expecting property name: line 1 column 2 (char 2)
+ Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
.. highlight:: python
.. note::
- The JSON produced by this module's default settings is a subset of
- YAML, so it may be used as a serializer for that as well.
+ JSON is a subset of `YAML <http://yaml.org/>`_ 1.2. The JSON produced by
+ this module's default settings (in particular, the default *separators*
+ value) is also a subset of YAML 1.0 and 1.1. This module can thus also be
+ used as a YAML serializer.
Basic Usage
-----------
-.. function:: dump(obj, fp[, skipkeys[, ensure_ascii[, check_circular[, allow_nan[, cls[, indent[, separators[, encoding[, default[, use_decimal[, namedtuple_as_object[, tuple_as_array[, iterable_as_array[, **kw]]]]]]]]]]]]]])
+.. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, \
+ check_circular=True, allow_nan=True, cls=None, \
+ indent=None, separators=None, encoding='utf-8', \
+ default=None, use_decimal=True, \
+ namedtuple_as_object=True, tuple_as_array=True, \
+ bigint_as_string=False, sort_keys=False, \
+ item_sort_key=None, for_json=None, ignore_nan=False, \
+ int_as_string_bitcount=None, iterable_as_array=False, **kw)
Serialize *obj* as a JSON formatted stream to *fp* (a ``.write()``-supporting
- file-like object).
+ file-like object) using this :ref:`conversion table <py-to-json-table>`.
If *skipkeys* is true (default: ``False``), then dict keys that are not
of a basic type (:class:`str`, :class:`unicode`, :class:`int`, :class:`long`,
:class:`float`, :class:`bool`, ``None``) will be skipped instead of raising a
:exc:`TypeError`.
+ The :mod:`simplejson` module will produce :class:`str` objects in Python 3,
+ not :class:`bytes` objects. Therefore, ``fp.write()`` must support
+ :class:`str` input.
+
If *ensure_ascii* is false (default: ``True``), then some chunks written
to *fp* may be :class:`unicode` instances, subject to normal Python
:class:`str` to :class:`unicode` coercion rules. Unless ``fp.write()``
@@ -152,9 +169,10 @@ Basic Usage
If *allow_nan* is false (default: ``True``), then it will be a
:exc:`ValueError` to serialize out of range :class:`float` values (``nan``,
- ``inf``, ``-inf``) in strict compliance of the JSON specification.
+ ``inf``, ``-inf``) in strict compliance of the original JSON specification.
If *allow_nan* is true, their JavaScript equivalents will be used
- (``NaN``, ``Infinity``, ``-Infinity``).
+ (``NaN``, ``Infinity``, ``-Infinity``). See also *ignore_nan* for ECMA-262
+ compliant behavior.
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
@@ -166,20 +184,29 @@ Basic Usage
.. versionchanged:: 2.1.0
Changed *indent* from an integer number of spaces to a string.
- If specified, *separators* should be an ``(item_separator, dict_separator)``
- tuple. By default, ``(', ', ': ')`` are used. To get the most compact JSON
- representation, you should specify ``(',', ':')`` to eliminate whitespace.
+ If specified, *separators* should be an ``(item_separator, key_separator)``
+ tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and
+ ``(',', ': ')`` otherwise. To get the most compact JSON representation,
+ you should specify ``(',', ':')`` to eliminate whitespace.
+
+ .. versionchanged:: 2.1.4
+ Use ``(',', ': ')`` as default if *indent* is not ``None``.
*encoding* is the character encoding for str instances, default is
``'utf-8'``.
*default(obj)* is a function that should return a serializable version of
- *obj* or raise :exc:`TypeError`. The default simply raises :exc:`TypeError`.
+ *obj* or raise :exc:`TypeError`. The default simply raises :exc:`TypeError`.
To use a custom :class:`JSONEncoder` subclass (e.g. one that overrides the
:meth:`default` method to serialize additional types), specify it with the
*cls* kwarg.
+ .. note::
+
+ Subclassing is not recommended. Use the *default* kwarg
+ or *for_json* instead. This is faster and more portable.
+
If *use_decimal* is true (default: ``True``) then :class:`decimal.Decimal`
will be natively serialized to JSON with full precision.
@@ -189,48 +216,119 @@ Basic Usage
.. versionchanged:: 2.2.0
The default of *use_decimal* changed to ``True`` in 2.2.0.
- If *namedtuple_as_object* is true (default: ``True``),
- :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
- as JSON objects.
+ If *namedtuple_as_object* is true (default: ``True``),
+ objects with ``_asdict()`` methods will be encoded
+ as JSON objects.
- .. versionchanged:: 2.2.0
- *namedtuple_as_object* is new in 2.2.0.
+ .. versionchanged:: 2.2.0
+ *namedtuple_as_object* is new in 2.2.0.
- If *tuple_as_array* is true (default: ``True``),
- :class:`tuple` (and subclasses) will be encoded as JSON arrays.
+ .. versionchanged:: 2.3.0
+ *namedtuple_as_object* no longer requires that these objects be
+ subclasses of :class:`tuple`.
- .. versionchanged:: 2.2.0
- *tuple_as_array* is new in 2.2.0.
+ If *tuple_as_array* is true (default: ``True``),
+ :class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *iterable_as_array* is true (default: ``False``),
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
- .. versionchanged:: 2.3.0
- *iterable_as_array* is new in 2.3.0.
+ .. versionchanged:: 3.7.4
+ *iterable_as_array* is new in 3.7.4.
+ .. versionchanged:: 2.2.0
+ *tuple_as_array* is new in 2.2.0.
- .. note::
+ If *bigint_as_string* is true (default: ``False``), :class:`int` ``2**53``
+ and higher or lower than ``-2**53`` will be encoded as strings. This is to
+ avoid the rounding that happens in Javascript otherwise. Note that this
+ option loses type information, so use with extreme caution.
+ See also *int_as_string_bitcount*.
+
+ .. versionchanged:: 2.4.0
+ *bigint_as_string* is new in 2.4.0.
+
+ If *sort_keys* is true (not the default), then the output of dictionaries
+ will be sorted by key; this is useful for regression tests to ensure that
+ JSON serializations can be compared on a day-to-day basis.
+
+ .. versionchanged:: 3.0.0
+ Sorting now happens after the keys have been coerced to
+ strings, to avoid comparison of heterogeneously typed objects
+ (since this does not work in Python 3.3+)
+
+ If *item_sort_key* is a callable (not the default), then the output of
+ dictionaries will be sorted with it. The callable will be used like this:
+ ``sorted(dct.items(), key=item_sort_key)``. This option takes precedence
+ over *sort_keys*.
+
+ .. versionchanged:: 2.5.0
+ *item_sort_key* is new in 2.5.0.
+
+ .. versionchanged:: 3.0.0
+ Sorting now happens after the keys have been coerced to
+ strings, to avoid comparison of heterogeneously typed objects
+ (since this does not work in Python 3.3+)
+
+ If *for_json* is true (not the default), objects with a ``for_json()``
+ method will use the return value of that method for encoding as JSON instead
+ of the object.
+
+ .. versionchanged:: 3.2.0
+ *for_json* is new in 3.2.0.
+
+ If *ignore_nan* is true (default: ``False``), then out of range
+ :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+ ``null`` in compliance with the ECMA-262 specification. If true, this will
+ override *allow_nan*.
+
+ .. versionchanged:: 3.2.0
+ *ignore_nan* is new in 3.2.0.
+
+ If *int_as_string_bitcount* is a positive number ``n`` (default: ``None``),
+ :class:`int` ``2**n`` and higher or lower than ``-2**n`` will be encoded as strings. This is to
+ avoid the rounding that happens in Javascript otherwise. Note that this
+ option loses type information, so use with extreme caution.
+ See also *bigint_as_string* (which is equivalent to `int_as_string_bitcount=53`).
+
+ .. versionchanged:: 3.5.0
+ *int_as_string_bitcount* is new in 3.5.0.
+
+ .. note::
JSON is not a framed protocol so unlike :mod:`pickle` or :mod:`marshal` it
does not make sense to serialize more than one JSON document without some
container protocol to delimit them.
-.. function:: dumps(obj[, skipkeys[, ensure_ascii[, check_circular[, allow_nan[, cls[, indent[, separators[, encoding[, default[, use_decimal[, namedtuple_as_object[, tuple_as_array[, iterable_as_array[, **kw]]]]]]]]]]]]]])
+.. function:: dumps(obj, skipkeys=False, ensure_ascii=True, \
+ check_circular=True, allow_nan=True, cls=None, \
+ indent=None, separators=None, encoding='utf-8', \
+ default=None, use_decimal=True, \
+ namedtuple_as_object=True, tuple_as_array=True, \
+ bigint_as_string=False, sort_keys=False, \
+ item_sort_key=None, for_json=None, ignore_nan=False, \
+ int_as_string_bitcount=None, iterable_as_array=False, **kw)
Serialize *obj* to a JSON formatted :class:`str`.
If *ensure_ascii* is false, then the return value will be a
:class:`unicode` instance. The other arguments have the same meaning as in
:func:`dump`. Note that the default *ensure_ascii* setting has much
- better performance.
+ better performance in Python 2.
+ The other options have the same meaning as in :func:`dump`.
-.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, use_decimal[, **kw]]]]]]]]])
+
+.. function:: load(fp, encoding='utf-8', cls=None, object_hook=None, \
+ parse_float=None, parse_int=None, \
+ parse_constant=None, object_pairs_hook=None, \
+ use_decimal=None, **kw)
Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON
- document) to a Python object. :exc:`JSONDecodeError` will be
+ document) to a Python object using this
+ :ref:`conversion table <json-to-py-table>`. :exc:`JSONDecodeError` will be
raised if the given JSON document is not valid.
If the contents of *fp* are encoded with an ASCII based encoding other than
@@ -248,7 +346,8 @@ Basic Usage
*object_hook* is an optional function that will be called with the result of
any object literal decode (a :class:`dict`). The return value of
*object_hook* will be used instead of the :class:`dict`. This feature can be used
- to implement custom decoders (e.g. JSON-RPC class hinting).
+ to implement custom decoders (e.g. `JSON-RPC <http://www.jsonrpc.org>`_
+ class hinting).
*object_pairs_hook* is an optional function that will be called with the
result of any object literal decode with an ordered list of pairs. The
@@ -292,7 +391,12 @@ Basic Usage
To use a custom :class:`JSONDecoder` subclass, specify it with the ``cls``
kwarg. Additional keyword arguments will be passed to the constructor of the
- class.
+ class. You probably shouldn't do this.
+
+ .. note::
+
+ Subclassing is not recommended. You should use *object_hook* or
+ *object_pairs_hook*. This is faster and more portable than subclassing.
.. note::
@@ -303,7 +407,10 @@ Basic Usage
only one JSON document, it is recommended to use :func:`loads`.
-.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, use_decimal[, **kw]]]]]]]]])
+.. function:: loads(fp, encoding='utf-8', cls=None, object_hook=None, \
+ parse_float=None, parse_int=None, \
+ parse_constant=None, object_pairs_hook=None, \
+ use_decimal=None, **kw)
Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON
document) to a Python object. :exc:`JSONDecodeError` will be
@@ -325,31 +432,35 @@ Basic Usage
Encoders and decoders
---------------------
-.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, strict]]]]]]])
+.. class:: JSONDecoder(encoding='utf-8', object_hook=None, parse_float=None, \
+ parse_int=None, parse_constant=None, \
+ object_pairs_hook=None, strict=True)
Simple JSON decoder.
Performs the following translations in decoding by default:
- +---------------+-------------------+
- | JSON | Python |
- +===============+===================+
- | object | dict |
- +---------------+-------------------+
- | array | list |
- +---------------+-------------------+
- | string | unicode |
- +---------------+-------------------+
- | number (int) | int, long |
- +---------------+-------------------+
- | number (real) | float |
- +---------------+-------------------+
- | true | True |
- +---------------+-------------------+
- | false | False |
- +---------------+-------------------+
- | null | None |
- +---------------+-------------------+
+ .. _json-to-py-table:
+
+ +---------------+-----------+-----------+
+ | JSON | Python 2 | Python 3 |
+ +===============+===========+===========+
+ | object | dict | dict |
+ +---------------+-----------+-----------+
+ | array | list | list |
+ +---------------+-----------+-----------+
+ | string | unicode | str |
+ +---------------+-----------+-----------+
+ | number (int) | int, long | int |
+ +---------------+-----------+-----------+
+ | number (real) | float | float |
+ +---------------+-----------+-----------+
+ | true | True | True |
+ +---------------+-----------+-----------+
+ | false | False | False |
+ +---------------+-----------+-----------+
+ | null | None | None |
+ +---------------+-----------+-----------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their
corresponding ``float`` values, which is outside the JSON spec.
@@ -410,24 +521,35 @@ Encoders and decoders
:exc:`JSONDecodeError` will be raised if the given JSON
document is not valid.
- .. method:: raw_decode(s)
+ .. method:: raw_decode(s[, idx=0])
Decode a JSON document from *s* (a :class:`str` or :class:`unicode`
- beginning with a JSON document) and return a 2-tuple of the Python
- representation and the index in *s* where the document ended.
+ beginning with a JSON document) starting from the index *idx* and return
+ a 2-tuple of the Python representation and the index in *s* where the
+ document ended.
This can be used to decode a JSON document from a string that may have
- extraneous data at the end.
+ extraneous data at the end, or to decode a string that has a series of
+ JSON objects.
:exc:`JSONDecodeError` will be raised if the given JSON
document is not valid.
-.. class:: JSONEncoder([skipkeys[, ensure_ascii[, check_circular[, allow_nan[, sort_keys[, indent[, separators[, encoding[, default[, use_decimal[, namedtuple_as_object[, tuple_as_array[, iterable_as_array]]]]]]]]]]]]])
+.. class:: JSONEncoder(skipkeys=False, ensure_ascii=True, \
+ check_circular=True, allow_nan=True, sort_keys=False, \
+ indent=None, separators=None, encoding='utf-8', \
+ default=None, use_decimal=True, \
+ namedtuple_as_object=True, tuple_as_array=True, \
+ bigint_as_string=False, item_sort_key=None, \
+ for_json=True, ignore_nan=False, \
+ int_as_string_bitcount=None, iterable_as_array=False)
Extensible JSON encoder for Python data structures.
Supports the following objects and types by default:
+ .. _py-to-json-table:
+
+-------------------+---------------+
| Python | JSON |
+===================+===============+
@@ -446,6 +568,18 @@ Encoders and decoders
| None | null |
+-------------------+---------------+
+ .. note:: The JSON format only permits strings to be used as object
+ keys, thus any Python dicts to be encoded should only have string keys.
+ For backwards compatibility, several other types are automatically
+ coerced to strings: int, long, float, Decimal, bool, and None.
+ It is error-prone to rely on this behavior, so avoid it when possible.
+ Dictionaries with other types used as keys should be pre-processed or
+ wrapped in another type with an appropriate `for_json` method to
+ transform the keys during encoding.
+
+ It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their
+ corresponding ``float`` values, which is outside the JSON spec.
+
.. versionchanged:: 2.2.0
Changed *namedtuple* encoding from JSON array to object.
@@ -454,29 +588,52 @@ Encoders and decoders
for ``o`` if possible, otherwise it should call the superclass implementation
(to raise :exc:`TypeError`).
+ .. note::
+
+ Subclassing is not recommended. You should use the *default*
+ or *for_json* kwarg. This is faster and more portable than subclassing.
+
If *skipkeys* is false (the default), then it is a :exc:`TypeError` to
- attempt encoding of keys that are not str, int, long, float or None. If
- *skipkeys* is true, such items are simply skipped.
+ attempt encoding of keys that are not str, int, long, float, Decimal, bool,
+ or None. If *skipkeys* is true, such items are simply skipped.
If *ensure_ascii* is true (the default), the output is guaranteed to be
:class:`str` objects with all incoming unicode characters escaped. If
*ensure_ascii* is false, the output will be a unicode object.
- If *check_circular* is false (the default), then lists, dicts, and custom
+ If *check_circular* is true (the default), then lists, dicts, and custom
encoded objects will be checked for circular references during encoding to
prevent an infinite recursion (which would cause an :exc:`OverflowError`).
Otherwise, no such check takes place.
If *allow_nan* is true (the default), then ``NaN``, ``Infinity``, and
- ``-Infinity`` will be encoded as such. This behavior is not JSON
+ ``-Infinity`` will be encoded as such. This behavior is not JSON
specification compliant, but is consistent with most JavaScript based
encoders and decoders. Otherwise, it will be a :exc:`ValueError` to encode
- such floats.
+ such floats. See also *ignore_nan* for ECMA-262 compliant behavior.
If *sort_keys* is true (not the default), then the output of dictionaries
will be sorted by key; this is useful for regression tests to ensure that
JSON serializations can be compared on a day-to-day basis.
+ .. versionchanged:: 3.0.0
+ Sorting now happens after the keys have been coerced to
+ strings, to avoid comparison of heterogeneously typed objects
+ (since this does not work in Python 3.3+)
+
+ If *item_sort_key* is a callable (not the default), then the output of
+ dictionaries will be sorted with it. The callable will be used like this:
+ ``sorted(dct.items(), key=item_sort_key)``. This option takes precedence
+ over *sort_keys*.
+
+ .. versionchanged:: 2.5.0
+ *item_sort_key* is new in 2.5.0.
+
+ .. versionchanged:: 3.0.0
+ Sorting now happens after the keys have been coerced to
+ strings, to avoid comparison of heterogeneously typed objects
+ (since this does not work in Python 3.3+)
+
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
@@ -488,8 +645,12 @@ Encoders and decoders
Changed *indent* from an integer number of spaces to a string.
If specified, *separators* should be an ``(item_separator, key_separator)``
- tuple. By default, ``(', ', ': ')`` are used. To get the most compact JSON
- representation, you should specify ``(',', ':')`` to eliminate whitespace.
+ tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and
+ ``(',', ': ')`` otherwise. To get the most compact JSON representation,
+ you should specify ``(',', ':')`` to eliminate whitespace.
+
+ .. versionchanged:: 2.1.4
+ Use ``(',', ': ')`` as default if *indent* is not ``None``.
If specified, *default* should be a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
@@ -500,12 +661,16 @@ Encoders and decoders
``'utf-8'``.
If *namedtuple_as_object* is true (default: ``True``),
- :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
+ objects with ``_asdict()`` methods will be encoded
as JSON objects.
.. versionchanged:: 2.2.0
*namedtuple_as_object* is new in 2.2.0.
+ .. versionchanged:: 2.3.0
+ *namedtuple_as_object* no longer requires that these objects be
+ subclasses of :class:`tuple`.
+
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
@@ -516,8 +681,31 @@ Encoders and decoders
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
- .. versionchanged:: 2.3.0
- *iterable_as_array* is new in 2.3.0.
+ .. versionchanged:: 3.7.4
+ *iterable_as_array* is new in 3.7.4.
+
+ If *bigint_as_string* is true (default: ``False``), :class:`int`` ``2**53``
+ and higher or lower than ``-2**53`` will be encoded as strings. This is to
+ avoid the rounding that happens in Javascript otherwise. Note that this
+ option loses type information, so use with extreme caution.
+
+ .. versionchanged:: 2.4.0
+ *bigint_as_string* is new in 2.4.0.
+
+ If *for_json* is true (default: ``False``), objects with a ``for_json()``
+ method will use the return value of that method for encoding as JSON instead
+ of the object.
+
+ .. versionchanged:: 3.2.0
+ *for_json* is new in 3.2.0.
+
+ If *ignore_nan* is true (default: ``False``), then out of range
+ :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+ ``null`` in compliance with the ECMA-262 specification. If true, this will
+ override *allow_nan*.
+
+ .. versionchanged:: 3.2.0
+ *ignore_nan* is new in 3.2.0.
.. method:: default(o)
@@ -537,6 +725,13 @@ Encoders and decoders
return list(iterable)
return JSONEncoder.default(self, o)
+ .. note::
+
+ Subclassing is not recommended. You should implement this
+ as a function and pass it to the *default* kwarg of :func:`dumps`.
+ This is faster and more portable than subclassing. The
+ semantics are the same, but without the self argument or the
+ call to the super implementation.
.. method:: encode(o)
@@ -559,7 +754,16 @@ Encoders and decoders
Note that :meth:`encode` has much better performance than
:meth:`iterencode`.
-.. class:: JSONEncoderForHTML([skipkeys[, ensure_ascii[, check_circular[, allow_nan[, sort_keys[, indent[, separators[, encoding[, default]]]]]]]]])
+.. class:: JSONEncoderForHTML(skipkeys=False, ensure_ascii=True, \
+ check_circular=True, allow_nan=True, \
+ sort_keys=False, indent=None, separators=None, \
+ encoding='utf-8', \
+ default=None, use_decimal=True, \
+ namedtuple_as_object=True, \
+ tuple_as_array=True, \
+ bigint_as_string=False, item_sort_key=None, \
+ for_json=True, ignore_nan=False, \
+ int_as_string_bitcount=None)
Subclass of :class:`JSONEncoder` that escapes &, <, and > for embedding in HTML.
@@ -569,7 +773,7 @@ Encoders and decoders
Exceptions
----------
-.. exception:: JSONDecodeError(msg, doc, pos[, end])
+.. exception:: JSONDecodeError(msg, doc, pos, end=None)
Subclass of :exc:`ValueError` with the following additional attributes:
@@ -604,3 +808,183 @@ Exceptions
.. attribute:: endcolno
The column corresponding to end (may be ``None``)
+
+
+Standard Compliance and Interoperability
+----------------------------------------
+
+The JSON format is specified by :rfc:`7159` and by
+`ECMA-404 <http://www.ecma-international.org/publications/standards/Ecma-404.htm>`_.
+This section details this module's level of compliance with the RFC.
+For simplicity, :class:`JSONEncoder` and :class:`JSONDecoder` subclasses, and
+parameters other than those explicitly mentioned, are not considered.
+
+This module does not comply with the RFC in a strict fashion, implementing some
+extensions that are valid JavaScript but not valid JSON. In particular:
+
+- Infinite and NaN number values are accepted and output;
+- Repeated names within an object are accepted, and only the value of the last
+ name-value pair is used.
+
+Since the RFC permits RFC-compliant parsers to accept input texts that are not
+RFC-compliant, this module's deserializer is technically RFC-compliant under
+default settings.
+
+
+Character Encodings
+^^^^^^^^^^^^^^^^^^^
+
+The RFC recommends that JSON be represented using either UTF-8, UTF-16, or
+UTF-32, with UTF-8 being the recommended default for maximum interoperability.
+
+As permitted, though not required, by the RFC, this module's serializer sets
+*ensure_ascii=True* by default, thus escaping the output so that the resulting
+strings only contain ASCII characters.
+
+Other than the *ensure_ascii* parameter, this module is defined strictly in
+terms of conversion between Python objects and
+:class:`Unicode strings <str>`, and thus does not otherwise directly address
+the issue of character encodings.
+
+The RFC prohibits adding a byte order mark (BOM) to the start of a JSON text,
+and this module's serializer does not add a BOM to its output.
+The RFC permits, but does not require, JSON deserializers to ignore an initial
+BOM in their input. This module's deserializer will ignore an initial BOM, if
+present.
+
+.. versionchanged:: 3.6.0
+ Older versions would raise :exc:`ValueError` when an initial BOM is present
+
+The RFC does not explicitly forbid JSON strings which contain byte sequences
+that don't correspond to valid Unicode characters (e.g. unpaired UTF-16
+surrogates), but it does note that they may cause interoperability problems.
+By default, this module accepts and outputs (when present in the original
+:class:`str`) codepoints for such sequences.
+
+Infinite and NaN Number Values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The RFC does not permit the representation of infinite or NaN number values.
+Despite that, by default, this module accepts and outputs ``Infinity``,
+``-Infinity``, and ``NaN`` as if they were valid JSON number literal values::
+
+ >>> # Neither of these calls raises an exception, but the results are not valid JSON
+ >>> json.dumps(float('-inf'))
+ '-Infinity'
+ >>> json.dumps(float('nan'))
+ 'NaN'
+ >>> # Same when deserializing
+ >>> json.loads('-Infinity')
+ -inf
+ >>> json.loads('NaN')
+ nan
+
+In the serializer, the *allow_nan* parameter can be used to alter this
+behavior. In the deserializer, the *parse_constant* parameter can be used to
+alter this behavior.
+
+
+Repeated Names Within an Object
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The RFC specifies that the names within a JSON object should be unique, but
+does not mandate how repeated names in JSON objects should be handled. By
+default, this module does not raise an exception; instead, it ignores all but
+the last name-value pair for a given name::
+
+ >>> weird_json = '{"x": 1, "x": 2, "x": 3}'
+ >>> json.loads(weird_json) == {'x': 3}
+ True
+
+The *object_pairs_hook* parameter can be used to alter this behavior.
+
+
+Top-level Non-Object, Non-Array Values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The old version of JSON specified by the obsolete :rfc:`4627` required that
+the top-level value of a JSON text must be either a JSON object or array
+(Python :class:`dict` or :class:`list`), and could not be a JSON null,
+boolean, number, or string value. :rfc:`7159` removed that restriction, and
+this module does not and has never implemented that restriction in either its
+serializer or its deserializer.
+
+Regardless, for maximum interoperability, you may wish to voluntarily adhere
+to the restriction yourself.
+
+
+Implementation Limitations
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Some JSON deserializer implementations may set limits on:
+
+* the size of accepted JSON texts
+* the maximum level of nesting of JSON objects and arrays
+* the range and precision of JSON numbers
+* the content and maximum length of JSON strings
+
+This module does not impose any such limits beyond those of the relevant
+Python datatypes themselves or the Python interpreter itself.
+
+When serializing to JSON, beware any such limitations in applications that may
+consume your JSON. In particular, it is common for JSON numbers to be
+deserialized into IEEE 754 double precision numbers and thus subject to that
+representation's range and precision limitations. This is especially relevant
+when serializing Python :class:`int` values of extremely large magnitude, or
+when serializing instances of "exotic" numerical types such as
+:class:`decimal.Decimal`.
+
+.. highlight:: bash
+
+.. _json-commandline:
+
+Command Line Interface
+----------------------
+
+The :mod:`simplejson.tool` module provides a simple command line interface to
+validate and pretty-print JSON.
+
+If the optional :option:`infile` and :option:`outfile` arguments are not
+specified, :attr:`sys.stdin` and :attr:`sys.stdout` will be used respectively::
+
+ $ echo '{"json": "obj"}' | python -m simplejson.tool
+ {
+ "json": "obj"
+ }
+ $ echo '{1.2:3.4}' | python -m simplejson.tool
+ Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
+
+
+Command line options
+^^^^^^^^^^^^^^^^^^^^
+
+.. cmdoption:: infile
+
+ The JSON file to be validated or pretty-printed::
+
+ $ python -m simplejson.tool mp_films.json
+ [
+ {
+ "title": "And Now for Something Completely Different",
+ "year": 1971
+ },
+ {
+ "title": "Monty Python and the Holy Grail",
+ "year": 1975
+ }
+ ]
+
+ If *infile* is not specified, read from :attr:`sys.stdin`.
+
+.. cmdoption:: outfile
+
+ Write the output of the *infile* to the given *outfile*. Otherwise, write it
+ to :attr:`sys.stdout`.
+
+.. rubric:: Footnotes
+
+.. [#rfc-errata] As noted in `the errata for RFC 7159
+ <http://www.rfc-editor.org/errata_search.php?rfc=7159>`_,
+ JSON permits literal U+2028 (LINE SEPARATOR) and
+ U+2029 (PARAGRAPH SEPARATOR) characters in strings, whereas JavaScript
+ (as of ECMAScript Edition 5.1) does not.
diff --git a/setup.py b/setup.py
index b2b0e5f..98982db 100644
--- a/setup.py
+++ b/setup.py
@@ -1,29 +1,47 @@
#!/usr/bin/env python
+from __future__ import with_statement
import sys
-from distutils.core import setup, Extension, Command
+try:
+ from setuptools import setup, Extension, Command
+except ImportError:
+ from distutils.core import setup, Extension, Command
from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, \
DistutilsPlatformError
IS_PYPY = hasattr(sys, 'pypy_translation_info')
-VERSION = '2.3.0'
+VERSION = '3.7.4'
DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python"
-LONG_DESCRIPTION = open('README.rst', 'r').read()
+
+with open('README.rst', 'r') as f:
+ LONG_DESCRIPTION = f.read()
CLASSIFIERS = filter(None, map(str.strip,
"""
+Development Status :: 5 - Production/Stable
Intended Audience :: Developers
License :: OSI Approved :: MIT License
+License :: OSI Approved :: Academic Free License (AFL)
Programming Language :: Python
+Programming Language :: Python :: 2
+Programming Language :: Python :: 2.5
+Programming Language :: Python :: 2.6
+Programming Language :: Python :: 2.7
+Programming Language :: Python :: 3
+Programming Language :: Python :: 3.3
+Programming Language :: Python :: 3.4
+Programming Language :: Python :: Implementation :: CPython
+Programming Language :: Python :: Implementation :: PyPy
Topic :: Software Development :: Libraries :: Python Modules
""".splitlines()))
if sys.platform == 'win32' and sys.version_info > (2, 6):
# 2.6's distutils.msvc9compiler can raise an IOError when failing to
# find the compiler
+ # It can also raise ValueError http://bugs.python.org/issue7511
ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError,
- IOError)
+ IOError, ValueError)
else:
ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
@@ -58,7 +76,10 @@ class TestCommand(Command):
def run(self):
import sys, subprocess
raise SystemExit(
- subprocess.call([sys.executable, 'simplejson/tests/__init__.py']))
+ subprocess.call([sys.executable,
+ # Turn on deprecation warnings
+ '-Wd',
+ 'simplejson/tests/__init__.py']))
def run_setup(with_binary):
cmdclass = dict(test=TestCommand)
@@ -89,16 +110,17 @@ def run_setup(with_binary):
try:
run_setup(not IS_PYPY)
except BuildFailed:
- BUILD_EXT_WARNING = "WARNING: The C extension could not be compiled, speedups are not enabled."
- print '*' * 75
- print BUILD_EXT_WARNING
- print "Failure information, if any, is above."
- print "I'm retrying the build without the C extension now."
- print '*' * 75
+ BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, "
+ "speedups are not enabled.")
+ print('*' * 75)
+ print(BUILD_EXT_WARNING)
+ print("Failure information, if any, is above.")
+ print("I'm retrying the build without the C extension now.")
+ print('*' * 75)
run_setup(False)
- print '*' * 75
- print BUILD_EXT_WARNING
- print "Plain-Python installation succeeded."
- print '*' * 75
+ print('*' * 75)
+ print(BUILD_EXT_WARNING)
+ print("Plain-Python installation succeeded.")
+ print('*' * 75)
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
index a1c0578..cac96ca 100644
--- a/simplejson/__init__.py
+++ b/simplejson/__init__.py
@@ -13,15 +13,15 @@ Encoding basic Python object hierarchies::
>>> import simplejson as json
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
- >>> print json.dumps("\"foo\bar")
+ >>> print(json.dumps("\"foo\bar"))
"\"foo\bar"
- >>> print json.dumps(u'\u1234')
+ >>> print(json.dumps(u'\u1234'))
"\u1234"
- >>> print json.dumps('\\')
+ >>> print(json.dumps('\\'))
"\\"
- >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+ >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
{"a": 0, "b": 0, "c": 0}
- >>> from StringIO import StringIO
+ >>> from simplejson.compat import StringIO
>>> io = StringIO()
>>> json.dump(['streaming API'], io)
>>> io.getvalue()
@@ -30,14 +30,14 @@ Encoding basic Python object hierarchies::
Compact encoding::
>>> import simplejson as json
- >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+ >>> obj = [1,2,3,{'4': 5, '6': 7}]
+ >>> json.dumps(obj, separators=(',',':'), sort_keys=True)
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
>>> import simplejson as json
- >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')
- >>> print '\n'.join([l.rstrip() for l in s.splitlines()])
+ >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' '))
{
"4": 5,
"6": 7
@@ -51,7 +51,7 @@ Decoding JSON::
True
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True
- >>> from StringIO import StringIO
+ >>> from simplejson.compat import StringIO
>>> io = StringIO('["streaming API"]')
>>> json.load(io)[0] == 'streaming API'
True
@@ -94,33 +94,35 @@ Using simplejson.tool from the shell to validate and pretty-print::
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
- Expecting property name: line 1 column 2 (char 2)
+ Expecting property name: line 1 column 3 (char 2)
"""
-__version__ = '2.3.0'
+from __future__ import absolute_import
+__version__ = '3.7.4'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
- 'OrderedDict',
+ 'OrderedDict', 'simple_first',
]
__author__ = 'Bob Ippolito <bob@redivi.com>'
from decimal import Decimal
-from decoder import JSONDecoder, JSONDecodeError
-from encoder import JSONEncoder
+from .scanner import JSONDecodeError
+from .decoder import JSONDecoder
+from .encoder import JSONEncoder, JSONEncoderForHTML
def _import_OrderedDict():
import collections
try:
return collections.OrderedDict
except AttributeError:
- import ordered_dict
+ from . import ordered_dict
return ordered_dict.OrderedDict
OrderedDict = _import_OrderedDict()
def _import_c_make_encoder():
try:
- from simplejson._speedups import make_encoder
+ from ._speedups import make_encoder
return make_encoder
except ImportError:
return None
@@ -138,35 +140,42 @@ _default_encoder = JSONEncoder(
namedtuple_as_object=True,
tuple_as_array=True,
iterable_as_array=False,
+ bigint_as_string=False,
+ item_sort_key=None,
+ for_json=False,
+ ignore_nan=False,
+ int_as_string_bitcount=None,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, separators=None,
- encoding='utf-8', default=None, use_decimal=True,
- namedtuple_as_object=True, tuple_as_array=True,
- iterable_as_array=False,
- **kw):
+ allow_nan=True, cls=None, indent=None, separators=None,
+ encoding='utf-8', default=None, use_decimal=True,
+ namedtuple_as_object=True, tuple_as_array=True,
+ bigint_as_string=False, sort_keys=False, item_sort_key=None,
+ for_json=False, ignore_nan=False, int_as_string_bitcount=None,
+ iterable_as_array=False, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
- If ``skipkeys`` is true then ``dict`` keys that are not basic types
+ If *skipkeys* is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
- If ``ensure_ascii`` is false, then the some chunks written to ``fp``
+ If *ensure_ascii* is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error.
- If ``check_circular`` is false, then the circular reference check
+ If *check_circular* is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
- If ``allow_nan`` is false, then it will be a ``ValueError`` to
+ If *allow_nan* is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
- in strict compliance of the JSON specification, instead of using the
- JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+ in strict compliance of the original JSON specification, instead of using
+ the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
+ *ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
@@ -175,14 +184,16 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
- If ``separators`` is an ``(item_separator, dict_separator)`` tuple
- then it will be used instead of the default ``(', ', ': ')`` separators.
- ``(',', ':')`` is the most compact JSON representation.
+ If specified, *separators* should be an
+ ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
+ if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
+ compact JSON representation, you should specify ``(',', ':')`` to eliminate
+ whitespace.
- ``encoding`` is the character encoding for str instances, default is UTF-8.
+ *encoding* is the character encoding for str instances, default is UTF-8.
- ``default(obj)`` is a function that should return a serializable version
- of obj or raise TypeError. The default simply raises TypeError.
+ *default(obj)* is a function that should return a serializable version
+ of obj or raise ``TypeError``. The default simply raises ``TypeError``.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
@@ -198,18 +209,50 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
+ If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
+ or lower than -2**53 will be encoded as strings. This is to avoid the
+ rounding that happens in Javascript otherwise. Note that this is still a
+ lossy operation that will not round-trip correctly and should be used
+ sparingly.
+
+ If *int_as_string_bitcount* is a positive number (n), then int of size
+ greater than or equal to 2**n or lower than or equal to -2**n will be
+ encoded as strings.
+
+ If specified, *item_sort_key* is a callable used to sort the items in
+ each dictionary. This is useful if you want to sort items other than
+ in alphabetical order by key. This option takes precedence over
+ *sort_keys*.
+
+ If *sort_keys* is true (default: ``False``), the output of dictionaries
+ will be sorted by item.
+
+ If *for_json* is true (default: ``False``), objects with a ``for_json()``
+ method will use the return value of that method for encoding as JSON
+ instead of the object.
+
+ If *ignore_nan* is true (default: ``False``), then out of range
+ :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+ ``null`` in compliance with the ECMA-262 specification. If true, this will
+ override *allow_nan*.
+
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
- the ``cls`` kwarg.
+ the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead
+ of subclassing whenever possible.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
- encoding == 'utf-8' and default is None and use_decimal and
- namedtuple_as_object and tuple_as_array and
- not iterable_as_array and not kw):
+ encoding == 'utf-8' and default is None and use_decimal
+ and namedtuple_as_object and tuple_as_array and not iterable_as_array
+ and not bigint_as_string and not sort_keys
+ and not item_sort_key and not for_json
+ and not ignore_nan and int_as_string_bitcount is None
+ and not kw
+ ):
iterable = _default_encoder.iterencode(obj)
else:
if cls is None:
@@ -221,6 +264,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
iterable_as_array=iterable_as_array,
+ bigint_as_string=bigint_as_string,
+ sort_keys=sort_keys,
+ item_sort_key=item_sort_key,
+ for_json=for_json,
+ ignore_nan=ignore_nan,
+ int_as_string_bitcount=int_as_string_bitcount,
**kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at
# a debuggability cost
@@ -229,12 +278,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, separators=None,
- encoding='utf-8', default=None, use_decimal=True,
- namedtuple_as_object=True,
- tuple_as_array=True,
- iterable_as_array=False,
- **kw):
+ allow_nan=True, cls=None, indent=None, separators=None,
+ encoding='utf-8', default=None, use_decimal=True,
+ namedtuple_as_object=True, tuple_as_array=True,
+ bigint_as_string=False, sort_keys=False, item_sort_key=None,
+ for_json=False, ignore_nan=False, int_as_string_bitcount=None,
+ iterable_as_array=False, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types
@@ -261,9 +310,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
- If ``separators`` is an ``(item_separator, dict_separator)`` tuple
- then it will be used instead of the default ``(', ', ': ')`` separators.
- ``(',', ':')`` is the most compact JSON representation.
+ If specified, ``separators`` should be an
+ ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
+ if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
+ compact JSON representation, you should specify ``(',', ':')`` to eliminate
+ whitespace.
``encoding`` is the character encoding for str instances, default is UTF-8.
@@ -284,18 +335,48 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
+ If *bigint_as_string* is true (not the default), ints 2**53 and higher
+ or lower than -2**53 will be encoded as strings. This is to avoid the
+ rounding that happens in Javascript otherwise.
+
+ If *int_as_string_bitcount* is a positive number (n), then int of size
+ greater than or equal to 2**n or lower than or equal to -2**n will be
+ encoded as strings.
+
+ If specified, *item_sort_key* is a callable used to sort the items in
+ each dictionary. This is useful if you want to sort items other than
+ in alphabetical order by key. This option takes precendence over
+ *sort_keys*.
+
+ If *sort_keys* is true (default: ``False``), the output of dictionaries
+ will be sorted by item.
+
+ If *for_json* is true (default: ``False``), objects with a ``for_json()``
+ method will use the return value of that method for encoding as JSON
+ instead of the object.
+
+ If *ignore_nan* is true (default: ``False``), then out of range
+ :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+ ``null`` in compliance with the ECMA-262 specification. If true, this will
+ override *allow_nan*.
+
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
- the ``cls`` kwarg.
+ the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing
+ whenever possible.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
- encoding == 'utf-8' and default is None and use_decimal and
- namedtuple_as_object and tuple_as_array and
- not iterable_as_array and not kw):
+ encoding == 'utf-8' and default is None and use_decimal
+ and namedtuple_as_object and tuple_as_array and not iterable_as_array
+ and not bigint_as_string and not sort_keys
+ and not item_sort_key and not for_json
+ and not ignore_nan and int_as_string_bitcount is None
+ and not kw
+ ):
return _default_encoder.encode(obj)
if cls is None:
cls = JSONEncoder
@@ -307,6 +388,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
iterable_as_array=iterable_as_array,
+ bigint_as_string=bigint_as_string,
+ sort_keys=sort_keys,
+ item_sort_key=item_sort_key,
+ for_json=for_json,
+ ignore_nan=ignore_nan,
+ int_as_string_bitcount=int_as_string_bitcount,
**kw).encode(obj)
@@ -361,7 +448,8 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
- kwarg.
+ kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
+ of subclassing whenever possible.
"""
return loads(fp.read(),
@@ -417,7 +505,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
- kwarg.
+ kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
+ of subclassing whenever possible.
"""
if (cls is None and encoding is None and object_hook is None and
@@ -445,9 +534,9 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
def _toggle_speedups(enabled):
- import simplejson.decoder as dec
- import simplejson.encoder as enc
- import simplejson.scanner as scan
+ from . import decoder as dec
+ from . import encoder as enc
+ from . import scanner as scan
c_make_encoder = _import_c_make_encoder()
if enabled:
dec.scanstring = dec.c_scanstring or dec.py_scanstring
@@ -478,3 +567,9 @@ def _toggle_speedups(enabled):
encoding='utf-8',
default=None,
)
+
+def simple_first(kv):
+ """Helper function to pass to item_sort_key to sort simple
+ elements to the top, then container elements.
+ """
+ return (isinstance(kv[1], (list, dict, tuple)), kv[0])
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index 783bac8..691c73c 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -1,11 +1,50 @@
+/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */
#include "Python.h"
#include "structmember.h"
-#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
+
+#if PY_MAJOR_VERSION >= 3
+#define PyInt_FromSsize_t PyLong_FromSsize_t
+#define PyInt_AsSsize_t PyLong_AsSsize_t
+#define PyString_Check PyBytes_Check
+#define PyString_GET_SIZE PyBytes_GET_SIZE
+#define PyString_AS_STRING PyBytes_AS_STRING
+#define PyString_FromStringAndSize PyBytes_FromStringAndSize
+#define PyInt_Check(obj) 0
+#define PyInt_CheckExact(obj) 0
+#define JSON_UNICHR Py_UCS4
+#define JSON_InternFromString PyUnicode_InternFromString
+#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE
+#define JSON_ASCII_Check PyUnicode_Check
+#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8
+#define PyInt_Type PyLong_Type
+#define PyInt_FromString PyLong_FromString
+#define PY2_UNUSED
+#define PY3_UNUSED UNUSED
+#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127)
+#else /* PY_MAJOR_VERSION >= 3 */
+#define PY2_UNUSED UNUSED
+#define PY3_UNUSED
+#define PyUnicode_READY(obj) 0
+#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE))
+#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj)))
+#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)])
+#define PyUnicode_GetLength PyUnicode_GET_SIZE
+#define JSON_UNICHR Py_UNICODE
+#define JSON_ASCII_Check PyString_Check
+#define JSON_ASCII_AS_STRING PyString_AS_STRING
+#define JSON_InternFromString PyString_InternFromString
+#define JSON_Intern_GET_SIZE PyString_GET_SIZE
+#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0)
+#endif /* PY_MAJOR_VERSION < 3 */
+
+#if PY_VERSION_HEX < 0x02070000
+#if !defined(PyOS_string_to_double)
#define PyOS_string_to_double json_PyOS_string_to_double
static double
json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
static double
-json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
+json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
+{
double x;
assert(endptr == NULL);
assert(overflow_exception == NULL);
@@ -15,22 +54,32 @@ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exce
return x;
}
#endif
-#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
+#endif /* PY_VERSION_HEX < 0x02070000 */
+
+#if PY_VERSION_HEX < 0x02060000
+#if !defined(Py_TYPE)
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
#endif
-#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE)
+#if !defined(Py_SIZE)
#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
#endif
-#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+#if !defined(PyVarObject_HEAD_INIT)
+#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+#endif /* PY_VERSION_HEX < 0x02060000 */
+
+#if PY_VERSION_HEX < 0x02050000
+#if !defined(PY_SSIZE_T_MIN)
typedef int Py_ssize_t;
#define PY_SSIZE_T_MAX INT_MAX
#define PY_SSIZE_T_MIN INT_MIN
#define PyInt_FromSsize_t PyInt_FromLong
#define PyInt_AsSsize_t PyInt_AsLong
#endif
-#ifndef Py_IS_FINITE
+#if !defined(Py_IS_FINITE)
#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
#endif
+#endif /* PY_VERSION_HEX < 0x02050000 */
#ifdef __GNUC__
#define UNUSED __attribute__((__unused__))
@@ -44,11 +93,38 @@ typedef int Py_ssize_t;
#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
-#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr))
+
+#define JSON_ALLOW_NAN 1
+#define JSON_IGNORE_NAN 2
static PyTypeObject PyScannerType;
static PyTypeObject PyEncoderType;
-static PyTypeObject *DecimalTypePtr;
+
+typedef struct {
+ PyObject *large_strings; /* A list of previously accumulated large strings */
+ PyObject *small_strings; /* Pending small strings */
+} JSON_Accu;
+
+static int
+JSON_Accu_Init(JSON_Accu *acc);
+static int
+JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode);
+static PyObject *
+JSON_Accu_FinishAsList(JSON_Accu *acc);
+static void
+JSON_Accu_Destroy(JSON_Accu *acc);
+
+#define ERR_EXPECTING_VALUE "Expecting value"
+#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'"
+#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'"
+#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'"
+#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes"
+#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'"
+#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter"
+#define ERR_STRING_UNTERMINATED "Unterminated string starting at"
+#define ERR_STRING_CONTROL "Invalid control character %r at"
+#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r"
+#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence"
typedef struct _PyScannerObject {
PyObject_HEAD
@@ -82,41 +158,74 @@ typedef struct _PyEncoderObject {
PyObject *key_separator;
PyObject *item_separator;
PyObject *sort_keys;
- PyObject *skipkeys;
PyObject *key_memo;
+ PyObject *encoding;
+ PyObject *Decimal;
+ PyObject *skipkeys_bool;
+ int skipkeys;
int fast_encode;
- int allow_nan;
+ /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */
+ int allow_or_ignore_nan;
int use_decimal;
int namedtuple_as_object;
int tuple_as_array;
int iterable_as_array;
+ PyObject *max_long_size;
+ PyObject *min_long_size;
+ PyObject *item_sort_key;
+ PyObject *item_sort_kw;
+ int for_json;
} PyEncoderObject;
static PyMemberDef encoder_members[] = {
{"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
{"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
{"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
+ {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"},
{"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
{"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
{"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
{"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
- {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
+ /* Python 2.5 does not support T_BOOl */
+ {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"},
{"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
+ {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
+ {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"},
+ {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"},
{NULL}
};
+static PyObject *
+join_list_unicode(PyObject *lst);
+static PyObject *
+JSON_ParseEncoding(PyObject *encoding);
+static PyObject *
+JSON_UnicodeFromChar(JSON_UNICHR c);
+static PyObject *
+maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj);
+static Py_ssize_t
+ascii_char_size(JSON_UNICHR c);
static Py_ssize_t
-ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
+ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars);
static PyObject *
ascii_escape_unicode(PyObject *pystr);
static PyObject *
ascii_escape_str(PyObject *pystr);
static PyObject *
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
-void init_speedups(void);
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+join_list_string(PyObject *lst);
static PyObject *
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+#endif
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
@@ -136,12 +245,14 @@ static void
encoder_dealloc(PyObject *self);
static int
encoder_clear(PyObject *self);
+static PyObject *
+encoder_stringify_key(PyEncoderObject *s, PyObject *key);
static int
-encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
+encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level);
static int
-encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
+encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level);
static int
-encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
+encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level);
static PyObject *
_encoded_const(PyObject *obj);
static void
@@ -156,21 +267,173 @@ static PyObject *
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
static int
_is_namedtuple(PyObject *obj);
+static int
+_has_for_json_hook(PyObject *obj);
+static PyObject *
+moduleinit(void);
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
#define MIN_EXPANSION 6
-#ifdef Py_UNICODE_WIDE
-#define MAX_EXPANSION (2 * MIN_EXPANSION)
+
+static int
+JSON_Accu_Init(JSON_Accu *acc)
+{
+ /* Lazily allocated */
+ acc->large_strings = NULL;
+ acc->small_strings = PyList_New(0);
+ if (acc->small_strings == NULL)
+ return -1;
+ return 0;
+}
+
+static int
+flush_accumulator(JSON_Accu *acc)
+{
+ Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings);
+ if (nsmall) {
+ int ret;
+ PyObject *joined;
+ if (acc->large_strings == NULL) {
+ acc->large_strings = PyList_New(0);
+ if (acc->large_strings == NULL)
+ return -1;
+ }
+#if PY_MAJOR_VERSION >= 3
+ joined = join_list_unicode(acc->small_strings);
+#else /* PY_MAJOR_VERSION >= 3 */
+ joined = join_list_string(acc->small_strings);
+#endif /* PY_MAJOR_VERSION < 3 */
+ if (joined == NULL)
+ return -1;
+ if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) {
+ Py_DECREF(joined);
+ return -1;
+ }
+ ret = PyList_Append(acc->large_strings, joined);
+ Py_DECREF(joined);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode)
+{
+ Py_ssize_t nsmall;
+#if PY_MAJOR_VERSION >= 3
+ assert(PyUnicode_Check(unicode));
+#else /* PY_MAJOR_VERSION >= 3 */
+ assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode));
+#endif /* PY_MAJOR_VERSION < 3 */
+
+ if (PyList_Append(acc->small_strings, unicode))
+ return -1;
+ nsmall = PyList_GET_SIZE(acc->small_strings);
+ /* Each item in a list of unicode objects has an overhead (in 64-bit
+ * builds) of:
+ * - 8 bytes for the list slot
+ * - 56 bytes for the header of the unicode object
+ * that is, 64 bytes. 100000 such objects waste more than 6MB
+ * compared to a single concatenated string.
+ */
+ if (nsmall < 100000)
+ return 0;
+ return flush_accumulator(acc);
+}
+
+static PyObject *
+JSON_Accu_FinishAsList(JSON_Accu *acc)
+{
+ int ret;
+ PyObject *res;
+
+ ret = flush_accumulator(acc);
+ Py_CLEAR(acc->small_strings);
+ if (ret) {
+ Py_CLEAR(acc->large_strings);
+ return NULL;
+ }
+ res = acc->large_strings;
+ acc->large_strings = NULL;
+ if (res == NULL)
+ return PyList_New(0);
+ return res;
+}
+
+static void
+JSON_Accu_Destroy(JSON_Accu *acc)
+{
+ Py_CLEAR(acc->small_strings);
+ Py_CLEAR(acc->large_strings);
+}
+
+static int
+IS_DIGIT(JSON_UNICHR c)
+{
+ return c >= '0' && c <= '9';
+}
+
+static PyObject *
+JSON_UnicodeFromChar(JSON_UNICHR c)
+{
+#if PY_MAJOR_VERSION >= 3
+ PyObject *rval = PyUnicode_New(1, c);
+ if (rval)
+ PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c);
+ return rval;
+#else /* PY_MAJOR_VERSION >= 3 */
+ return PyUnicode_FromUnicode(&c, 1);
+#endif /* PY_MAJOR_VERSION < 3 */
+}
+
+static PyObject *
+maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj)
+{
+ if (s->max_long_size != Py_None && s->min_long_size != Py_None) {
+ if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) ||
+ PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) {
+#if PY_MAJOR_VERSION >= 3
+ PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded);
#else
-#define MAX_EXPANSION MIN_EXPANSION
+ PyObject* quoted = PyString_FromFormat("\"%s\"",
+ PyString_AsString(encoded));
#endif
+ Py_DECREF(encoded);
+ encoded = quoted;
+ }
+ }
+
+ return encoded;
+}
static int
_is_namedtuple(PyObject *obj)
{
- return PyTuple_Check(obj) && PyObject_HasAttrString(obj, "_asdict");
+ int rval = 0;
+ PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
+ if (_asdict == NULL) {
+ PyErr_Clear();
+ return 0;
+ }
+ rval = PyCallable_Check(_asdict);
+ Py_DECREF(_asdict);
+ return rval;
+}
+
+static int
+_has_for_json_hook(PyObject *obj)
+{
+ int rval = 0;
+ PyObject *for_json = PyObject_GetAttrString(obj, "for_json");
+ if (for_json == NULL) {
+ PyErr_Clear();
+ return 0;
+ }
+ rval = PyCallable_Check(for_json);
+ Py_DECREF(for_json);
+ return rval;
}
static int
@@ -191,44 +454,74 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
}
static Py_ssize_t
-ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
+ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars)
{
/* Escape unicode code point c to ASCII escape sequences
in char *output. output must have at least 12 bytes unused to
accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
- output[chars++] = '\\';
- switch (c) {
- case '\\': output[chars++] = (char)c; break;
- case '"': output[chars++] = (char)c; break;
- case '\b': output[chars++] = 'b'; break;
- case '\f': output[chars++] = 'f'; break;
- case '\n': output[chars++] = 'n'; break;
- case '\r': output[chars++] = 'r'; break;
- case '\t': output[chars++] = 't'; break;
- default:
-#ifdef Py_UNICODE_WIDE
- if (c >= 0x10000) {
- /* UTF-16 surrogate pair */
- Py_UNICODE v = c - 0x10000;
- c = 0xd800 | ((v >> 10) & 0x3ff);
+ if (S_CHAR(c)) {
+ output[chars++] = (char)c;
+ }
+ else {
+ output[chars++] = '\\';
+ switch (c) {
+ case '\\': output[chars++] = (char)c; break;
+ case '"': output[chars++] = (char)c; break;
+ case '\b': output[chars++] = 'b'; break;
+ case '\f': output[chars++] = 'f'; break;
+ case '\n': output[chars++] = 'n'; break;
+ case '\r': output[chars++] = 'r'; break;
+ case '\t': output[chars++] = 't'; break;
+ default:
+#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3
+ if (c >= 0x10000) {
+ /* UTF-16 surrogate pair */
+ JSON_UNICHR v = c - 0x10000;
+ c = 0xd800 | ((v >> 10) & 0x3ff);
+ output[chars++] = 'u';
+ output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c ) & 0xf];
+ c = 0xdc00 | (v & 0x3ff);
+ output[chars++] = '\\';
+ }
+#endif
output[chars++] = 'u';
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
output[chars++] = "0123456789abcdef"[(c ) & 0xf];
- c = 0xdc00 | (v & 0x3ff);
- output[chars++] = '\\';
- }
-#endif
- output[chars++] = 'u';
- output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
- output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
- output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
- output[chars++] = "0123456789abcdef"[(c ) & 0xf];
+ }
}
return chars;
}
+static Py_ssize_t
+ascii_char_size(JSON_UNICHR c)
+{
+ if (S_CHAR(c)) {
+ return 1;
+ }
+ else if (c == '\\' ||
+ c == '"' ||
+ c == '\b' ||
+ c == '\f' ||
+ c == '\n' ||
+ c == '\r' ||
+ c == '\t') {
+ return 2;
+ }
+#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3
+ else if (c >= 0x10000U) {
+ return 2 * MIN_EXPANSION;
+ }
+#endif
+ else {
+ return MIN_EXPANSION;
+ }
+}
+
static PyObject *
ascii_escape_unicode(PyObject *pystr)
{
@@ -236,57 +529,62 @@ ascii_escape_unicode(PyObject *pystr)
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
- Py_ssize_t max_output_size;
Py_ssize_t chars;
+ PY2_UNUSED int kind;
+ void *data;
PyObject *rval;
char *output;
- Py_UNICODE *input_unicode;
- input_chars = PyUnicode_GET_SIZE(pystr);
- input_unicode = PyUnicode_AS_UNICODE(pystr);
+ if (PyUnicode_READY(pystr))
+ return NULL;
- /* One char input can be up to 6 chars output, estimate 4 of these */
- output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
- max_output_size = 2 + (input_chars * MAX_EXPANSION);
+ kind = PyUnicode_KIND(pystr);
+ data = PyUnicode_DATA(pystr);
+ input_chars = PyUnicode_GetLength(pystr);
+ output_size = 2;
+ for (i = 0; i < input_chars; i++) {
+ output_size += ascii_char_size(PyUnicode_READ(kind, data, i));
+ }
+#if PY_MAJOR_VERSION >= 3
+ rval = PyUnicode_New(output_size, 127);
+ if (rval == NULL) {
+ return NULL;
+ }
+ assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND);
+ output = (char *)PyUnicode_DATA(rval);
+#else
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
+#endif
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
- Py_UNICODE c = input_unicode[i];
- if (S_CHAR(c)) {
- output[chars++] = (char)c;
- }
- else {
- chars = ascii_escape_char(c, output, chars);
- }
- if (output_size - chars < (1 + MAX_EXPANSION)) {
- /* There's more than four, so let's resize by a lot */
- Py_ssize_t new_output_size = output_size * 2;
- /* This is an upper bound */
- if (new_output_size > max_output_size) {
- new_output_size = max_output_size;
- }
- /* Make sure that the output size changed before resizing */
- if (new_output_size != output_size) {
- output_size = new_output_size;
- if (_PyString_Resize(&rval, output_size) == -1) {
- return NULL;
- }
- output = PyString_AS_STRING(rval);
- }
- }
+ chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars);
}
output[chars++] = '"';
- if (_PyString_Resize(&rval, chars) == -1) {
+ assert(chars == output_size);
+ return rval;
+}
+
+#if PY_MAJOR_VERSION >= 3
+
+static PyObject *
+ascii_escape_str(PyObject *pystr)
+{
+ PyObject *rval;
+ PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL);
+ if (input == NULL)
return NULL;
- }
+ rval = ascii_escape_unicode(input);
+ Py_DECREF(input);
return rval;
}
+#else /* PY_MAJOR_VERSION >= 3 */
+
static PyObject *
ascii_escape_str(PyObject *pystr)
{
@@ -301,91 +599,201 @@ ascii_escape_str(PyObject *pystr)
input_chars = PyString_GET_SIZE(pystr);
input_str = PyString_AS_STRING(pystr);
+ output_size = 2;
/* Fast path for a string that's already ASCII */
for (i = 0; i < input_chars; i++) {
- Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
- if (!S_CHAR(c)) {
- /* If we have to escape something, scan the string for unicode */
- Py_ssize_t j;
- for (j = i; j < input_chars; j++) {
- c = (Py_UNICODE)(unsigned char)input_str[j];
- if (c > 0x7f) {
- /* We hit a non-ASCII character, bail to unicode mode */
- PyObject *uni;
- uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
- if (uni == NULL) {
- return NULL;
- }
- rval = ascii_escape_unicode(uni);
- Py_DECREF(uni);
- return rval;
- }
+ JSON_UNICHR c = (JSON_UNICHR)input_str[i];
+ if (c > 0x7f) {
+ /* We hit a non-ASCII character, bail to unicode mode */
+ PyObject *uni;
+ uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+ if (uni == NULL) {
+ return NULL;
}
- break;
+ rval = ascii_escape_unicode(uni);
+ Py_DECREF(uni);
+ return rval;
}
+ output_size += ascii_char_size(c);
}
- if (i == input_chars) {
- /* Input is already ASCII */
- output_size = 2 + input_chars;
- }
- else {
- /* One char input can be up to 6 chars output, estimate 4 of these */
- output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
- }
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
+ chars = 0;
output = PyString_AS_STRING(rval);
- output[0] = '"';
-
- /* We know that everything up to i is ASCII already */
- chars = i + 1;
- memcpy(&output[1], input_str, i);
+ output[chars++] = '"';
+ for (i = 0; i < input_chars; i++) {
+ chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars);
+ }
+ output[chars++] = '"';
+ assert(chars == output_size);
+ return rval;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
- for (; i < input_chars; i++) {
- Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
- if (S_CHAR(c)) {
- output[chars++] = (char)c;
- }
- else {
- chars = ascii_escape_char(c, output, chars);
- }
- /* An ASCII char can't possibly expand to a surrogate! */
- if (output_size - chars < (1 + MIN_EXPANSION)) {
- /* There's more than four, so let's resize by a lot */
- output_size *= 2;
- if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
- output_size = 2 + (input_chars * MIN_EXPANSION);
- }
- if (_PyString_Resize(&rval, output_size) == -1) {
+static PyObject *
+encoder_stringify_key(PyEncoderObject *s, PyObject *key)
+{
+ if (PyUnicode_Check(key)) {
+ Py_INCREF(key);
+ return key;
+ }
+ else if (PyString_Check(key)) {
+#if PY_MAJOR_VERSION >= 3
+ return PyUnicode_Decode(
+ PyString_AS_STRING(key),
+ PyString_GET_SIZE(key),
+ JSON_ASCII_AS_STRING(s->encoding),
+ NULL);
+#else /* PY_MAJOR_VERSION >= 3 */
+ Py_INCREF(key);
+ return key;
+#endif /* PY_MAJOR_VERSION < 3 */
+ }
+ else if (PyFloat_Check(key)) {
+ return encoder_encode_float(s, key);
+ }
+ else if (key == Py_True || key == Py_False || key == Py_None) {
+ /* This must come before the PyInt_Check because
+ True and False are also 1 and 0.*/
+ return _encoded_const(key);
+ }
+ else if (PyInt_Check(key) || PyLong_Check(key)) {
+ if (!(PyInt_CheckExact(key) || PyLong_CheckExact(key))) {
+ /* See #118, do not trust custom str/repr */
+ PyObject *res;
+ PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, key, NULL);
+ if (tmp == NULL) {
return NULL;
}
- output = PyString_AS_STRING(rval);
+ res = PyObject_Str(tmp);
+ Py_DECREF(tmp);
+ return res;
+ }
+ else {
+ return PyObject_Str(key);
}
}
- output[chars++] = '"';
- if (_PyString_Resize(&rval, chars) == -1) {
+ else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) {
+ return PyObject_Str(key);
+ }
+ else if (s->skipkeys) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ PyErr_SetString(PyExc_TypeError, "keys must be a string");
+ return NULL;
+}
+
+static PyObject *
+encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct)
+{
+ PyObject *items;
+ PyObject *iter = NULL;
+ PyObject *lst = NULL;
+ PyObject *item = NULL;
+ PyObject *kstr = NULL;
+ static PyObject *sortfun = NULL;
+ static PyObject *sortargs = NULL;
+
+ if (sortargs == NULL) {
+ sortargs = PyTuple_New(0);
+ if (sortargs == NULL)
+ return NULL;
+ }
+
+ if (PyDict_CheckExact(dct))
+ items = PyDict_Items(dct);
+ else
+ items = PyMapping_Items(dct);
+ if (items == NULL)
+ return NULL;
+ iter = PyObject_GetIter(items);
+ Py_DECREF(items);
+ if (iter == NULL)
return NULL;
+ if (s->item_sort_kw == Py_None)
+ return iter;
+ lst = PyList_New(0);
+ if (lst == NULL)
+ goto bail;
+ while ((item = PyIter_Next(iter))) {
+ PyObject *key, *value;
+ if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
+ PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+ goto bail;
+ }
+ key = PyTuple_GET_ITEM(item, 0);
+ if (key == NULL)
+ goto bail;
+#if PY_MAJOR_VERSION < 3
+ else if (PyString_Check(key)) {
+ /* item can be added as-is */
+ }
+#endif /* PY_MAJOR_VERSION < 3 */
+ else if (PyUnicode_Check(key)) {
+ /* item can be added as-is */
+ }
+ else {
+ PyObject *tpl;
+ kstr = encoder_stringify_key(s, key);
+ if (kstr == NULL)
+ goto bail;
+ else if (kstr == Py_None) {
+ /* skipkeys */
+ Py_DECREF(kstr);
+ continue;
+ }
+ value = PyTuple_GET_ITEM(item, 1);
+ if (value == NULL)
+ goto bail;
+ tpl = PyTuple_Pack(2, kstr, value);
+ if (tpl == NULL)
+ goto bail;
+ Py_CLEAR(kstr);
+ Py_DECREF(item);
+ item = tpl;
+ }
+ if (PyList_Append(lst, item))
+ goto bail;
+ Py_DECREF(item);
}
- return rval;
+ Py_CLEAR(iter);
+ if (PyErr_Occurred())
+ goto bail;
+ sortfun = PyObject_GetAttrString(lst, "sort");
+ if (sortfun == NULL)
+ goto bail;
+ if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw))
+ goto bail;
+ Py_CLEAR(sortfun);
+ iter = PyObject_GetIter(lst);
+ Py_CLEAR(lst);
+ return iter;
+bail:
+ Py_XDECREF(sortfun);
+ Py_XDECREF(kstr);
+ Py_XDECREF(item);
+ Py_XDECREF(lst);
+ Py_XDECREF(iter);
+ return NULL;
}
static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
- /* Use the Python function simplejson.decoder.errmsg to raise a nice
- looking ValueError exception */
+ /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
static PyObject *JSONDecodeError = NULL;
PyObject *exc;
if (JSONDecodeError == NULL) {
- PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
- if (decoder == NULL)
+ PyObject *scanner = PyImport_ImportModule("simplejson.scanner");
+ if (scanner == NULL)
return;
- JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
- Py_DECREF(decoder);
+ JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError");
+ Py_DECREF(scanner);
if (JSONDecodeError == NULL)
return;
}
@@ -402,7 +810,7 @@ join_list_unicode(PyObject *lst)
/* return u''.join(lst) */
static PyObject *joinfn = NULL;
if (joinfn == NULL) {
- PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
+ PyObject *ustr = JSON_NewEmptyUnicode();
if (ustr == NULL)
return NULL;
@@ -414,6 +822,9 @@ join_list_unicode(PyObject *lst)
return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
}
+#if PY_MAJOR_VERSION >= 3
+#define join_list_string join_list_unicode
+#else /* PY_MAJOR_VERSION >= 3 */
static PyObject *
join_list_string(PyObject *lst)
{
@@ -431,9 +842,11 @@ join_list_string(PyObject *lst)
}
return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
-_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx)
+{
/* return (rval, idx) tuple, stealing reference to rval */
PyObject *tpl;
PyObject *pyidx;
@@ -441,6 +854,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
steal a reference to rval, returns (rval, idx)
*/
if (rval == NULL) {
+ assert(PyErr_Occurred());
return NULL;
}
pyidx = PyInt_FromSsize_t(idx);
@@ -473,6 +887,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
Py_CLEAR(chunk); \
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
{
@@ -493,9 +908,11 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
char *buf = PyString_AS_STRING(pystr);
PyObject *chunks = NULL;
PyObject *chunk = NULL;
+ PyObject *strchunk = NULL;
if (len == end) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+ goto bail;
}
else if (end < 0 || len < end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
@@ -510,7 +927,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
break;
}
else if (strict && c <= 0x1f) {
- raise_errmsg("Invalid control character at", pystr, next);
+ raise_errmsg(ERR_STRING_CONTROL, pystr, next);
goto bail;
}
else if (c > 0x7f) {
@@ -518,13 +935,23 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
}
}
if (!(c == '"' || c == '\\')) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
- PyObject *strchunk;
APPEND_OLD_CHUNK
+#if PY_MAJOR_VERSION >= 3
+ if (!has_unicode) {
+ chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL);
+ }
+ else {
+ chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL);
+ }
+ if (chunk == NULL) {
+ goto bail;
+ }
+#else /* PY_MAJOR_VERSION >= 3 */
strchunk = PyString_FromStringAndSize(&buf[end], next - end);
if (strchunk == NULL) {
goto bail;
@@ -539,6 +966,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
else {
chunk = strchunk;
}
+#endif /* PY_MAJOR_VERSION < 3 */
}
next++;
if (c == '"') {
@@ -546,7 +974,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
break;
}
if (next == len) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
c = buf[next];
@@ -565,7 +993,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
default: c = 0;
}
if (c == 0) {
- raise_errmsg("Invalid \\escape", pystr, end - 2);
+ raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
goto bail;
}
}
@@ -574,12 +1002,12 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
next++;
end = next + 4;
if (end >= len) {
- raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+ raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
- Py_UNICODE digit = buf[next];
+ JSON_UNICHR digit = (JSON_UNICHR)buf[next];
c <<= 4;
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
@@ -592,28 +1020,21 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
case 'F':
c |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
}
}
-#ifdef Py_UNICODE_WIDE
+#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE))
/* Surrogate pair */
if ((c & 0xfc00) == 0xd800) {
- Py_UNICODE c2 = 0;
- if (end + 6 >= len) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
- goto bail;
- }
- if (buf[next++] != '\\' || buf[next++] != 'u') {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
- goto bail;
- }
- end += 6;
- /* Decode 4 hex digits */
- for (; next < end; next++) {
- c2 <<= 4;
- Py_UNICODE digit = buf[next];
- switch (digit) {
+ if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') {
+ JSON_UNICHR c2 = 0;
+ end += 6;
+ /* Decode 4 hex digits */
+ for (next += 2; next < end; next++) {
+ c2 <<= 4;
+ JSON_UNICHR digit = buf[next];
+ switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0'); break;
@@ -624,28 +1045,34 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
case 'F':
c2 |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
+ }
+ }
+ if ((c2 & 0xfc00) != 0xdc00) {
+ /* not a low surrogate, rewind */
+ end -= 6;
+ next = end;
+ }
+ else {
+ c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
}
- if ((c2 & 0xfc00) != 0xdc00) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
- goto bail;
- }
- c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
- else if ((c & 0xfc00) == 0xdc00) {
- raise_errmsg("Unpaired low surrogate", pystr, end - 5);
- goto bail;
- }
-#endif
+#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */
}
if (c > 0x7f) {
has_unicode = 1;
}
APPEND_OLD_CHUNK
+#if PY_MAJOR_VERSION >= 3
+ chunk = JSON_UnicodeFromChar(c);
+ if (chunk == NULL) {
+ goto bail;
+ }
+#else /* PY_MAJOR_VERSION >= 3 */
if (has_unicode) {
- chunk = PyUnicode_FromUnicode(&c, 1);
+ chunk = JSON_UnicodeFromChar(c);
if (chunk == NULL) {
goto bail;
}
@@ -657,13 +1084,14 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
goto bail;
}
}
+#endif
}
if (chunks == NULL) {
if (chunk != NULL)
rval = chunk;
else
- rval = PyString_FromStringAndSize("", 0);
+ rval = JSON_NewEmptyUnicode();
}
else {
APPEND_OLD_CHUNK
@@ -682,7 +1110,7 @@ bail:
Py_XDECREF(chunks);
return NULL;
}
-
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
@@ -696,15 +1124,17 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
Return value is a new PyUnicode
*/
PyObject *rval;
- Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
- const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
+ PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+ Py_ssize_t len = PyUnicode_GetLength(pystr);
+ void *buf = PyUnicode_DATA(pystr);
PyObject *chunks = NULL;
PyObject *chunk = NULL;
if (len == end) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+ goto bail;
}
else if (end < 0 || len < end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
@@ -712,25 +1142,29 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
while (1) {
/* Find the end of the string or the next escape */
- Py_UNICODE c = 0;
+ JSON_UNICHR c = 0;
for (next = end; next < len; next++) {
- c = buf[next];
+ c = PyUnicode_READ(kind, buf, next);
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
- raise_errmsg("Invalid control character at", pystr, next);
+ raise_errmsg(ERR_STRING_CONTROL, pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
APPEND_OLD_CHUNK
- chunk = PyUnicode_FromUnicode(&buf[end], next - end);
+#if PY_MAJOR_VERSION < 3
+ chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end);
+#else
+ chunk = PyUnicode_Substring(pystr, end, next);
+#endif
if (chunk == NULL) {
goto bail;
}
@@ -741,10 +1175,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
break;
}
if (next == len) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
+ raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
goto bail;
}
- c = buf[next];
+ c = PyUnicode_READ(kind, buf, next);
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
@@ -760,7 +1194,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
default: c = 0;
}
if (c == 0) {
- raise_errmsg("Invalid \\escape", pystr, end - 2);
+ raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
goto bail;
}
}
@@ -769,12 +1203,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
next++;
end = next + 4;
if (end >= len) {
- raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+ raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
- Py_UNICODE digit = buf[next];
+ JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
c <<= 4;
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
@@ -787,28 +1221,23 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
case 'F':
c |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
}
}
-#ifdef Py_UNICODE_WIDE
+#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
/* Surrogate pair */
if ((c & 0xfc00) == 0xd800) {
- Py_UNICODE c2 = 0;
- if (end + 6 >= len) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
- goto bail;
- }
- if (buf[next++] != '\\' || buf[next++] != 'u') {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
- goto bail;
- }
- end += 6;
- /* Decode 4 hex digits */
- for (; next < end; next++) {
- c2 <<= 4;
- Py_UNICODE digit = buf[next];
- switch (digit) {
+ JSON_UNICHR c2 = 0;
+ if (end + 6 < len &&
+ PyUnicode_READ(kind, buf, next) == '\\' &&
+ PyUnicode_READ(kind, buf, next + 1) == 'u') {
+ end += 6;
+ /* Decode 4 hex digits */
+ for (next += 2; next < end; next++) {
+ JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
+ c2 <<= 4;
+ switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0'); break;
@@ -819,24 +1248,24 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
case 'F':
c2 |= (digit - 'A' + 10); break;
default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail;
+ }
+ }
+ if ((c2 & 0xfc00) != 0xdc00) {
+ /* not a low surrogate, rewind */
+ end -= 6;
+ next = end;
+ }
+ else {
+ c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
}
- if ((c2 & 0xfc00) != 0xdc00) {
- raise_errmsg("Unpaired high surrogate", pystr, end - 5);
- goto bail;
- }
- c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
- }
- else if ((c & 0xfc00) == 0xdc00) {
- raise_errmsg("Unpaired low surrogate", pystr, end - 5);
- goto bail;
}
#endif
}
APPEND_OLD_CHUNK
- chunk = PyUnicode_FromUnicode(&c, 1);
+ chunk = JSON_UnicodeFromChar(c);
if (chunk == NULL) {
goto bail;
}
@@ -846,7 +1275,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
if (chunk != NULL)
rval = chunk;
else
- rval = PyUnicode_FromUnicode(NULL, 0);
+ rval = JSON_NewEmptyUnicode();
}
else {
APPEND_OLD_CHUNK
@@ -893,12 +1322,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args)
if (encoding == NULL) {
encoding = DEFAULT_ENCODING;
}
- if (PyString_Check(pystr)) {
- rval = scanstring_str(pystr, end, encoding, strict, &next_end);
- }
- else if (PyUnicode_Check(pystr)) {
+ if (PyUnicode_Check(pystr)) {
rval = scanstring_unicode(pystr, end, strict, &next_end);
}
+#if PY_MAJOR_VERSION < 3
+ /* Using a bytes input is unsupported for scanning in Python 3.
+ It is coerced to str in the decoder before it gets here. */
+ else if (PyString_Check(pystr)) {
+ rval = scanstring_str(pystr, end, encoding, strict, &next_end);
+ }
+#endif
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string, not %.80s",
@@ -975,8 +1408,10 @@ scanner_clear(PyObject *self)
return 0;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
-_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON object from PyString pystr.
idx is the index of the first character after the opening curly brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -992,9 +1427,10 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
- char *encoding = PyString_AS_STRING(s->encoding);
+ char *encoding = JSON_ASCII_AS_STRING(s->encoding);
int strict = PyObject_IsTrue(s->strict);
int has_pairs_hook = (s->pairs_hook != Py_None);
+ int did_parse = 0;
Py_ssize_t next_idx;
if (has_pairs_hook) {
pairs = PyList_New(0);
@@ -1012,12 +1448,14 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
PyObject *memokey;
+ trailing_delimiter = 0;
/* read key */
if (str[idx] != '"') {
- raise_errmsg("Expecting property name", pystr, idx);
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
}
key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
@@ -1038,7 +1476,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* skip whitespace between key and : delimiter, read :, skip whitespace */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
if (idx > end_idx || str[idx] != ':') {
- raise_errmsg("Expecting : delimiter", pystr, idx);
+ raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
@@ -1073,23 +1511,33 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
/* bail if the object is closed or we didn't get the , delimiter */
+ did_parse = 1;
if (idx > end_idx) break;
if (str[idx] == '}') {
break;
}
else if (str[idx] != ',') {
- raise_errmsg("Expecting , delimiter", pystr, idx);
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , delimiter */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ trailing_delimiter = 1;
+ }
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+ goto bail;
}
}
/* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || str[idx] != '}') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (did_parse) {
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
+ }
goto bail;
}
@@ -1121,9 +1569,11 @@ bail:
Py_XDECREF(pairs);
return NULL;
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
-_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON object from PyUnicode pystr.
idx is the index of the first character after the opening curly brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1131,8 +1581,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
Returns a new PyObject (usually a dict, but object_hook can change that)
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ void *str = PyUnicode_DATA(pystr);
+ Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
+ PY2_UNUSED int kind = PyUnicode_KIND(pystr);
PyObject *rval = NULL;
PyObject *pairs = NULL;
PyObject *item;
@@ -1140,6 +1591,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
PyObject *val = NULL;
int strict = PyObject_IsTrue(s->strict);
int has_pairs_hook = (s->pairs_hook != Py_None);
+ int did_parse = 0;
Py_ssize_t next_idx;
if (has_pairs_hook) {
@@ -1152,18 +1604,20 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
if (rval == NULL)
return NULL;
}
-
+
/* skip whitespace after { */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* only loop if the object is non-empty */
- if (idx <= end_idx && str[idx] != '}') {
+ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
PyObject *memokey;
+ trailing_delimiter = 0;
/* read key */
- if (str[idx] != '"') {
- raise_errmsg("Expecting property name", pystr, idx);
+ if (PyUnicode_READ(kind, str, idx) != '"') {
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
}
key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
@@ -1181,14 +1635,15 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
}
idx = next_idx;
- /* skip whitespace between key and : delimiter, read :, skip whitespace */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
- if (idx > end_idx || str[idx] != ':') {
- raise_errmsg("Expecting : delimiter", pystr, idx);
+ /* skip whitespace between key and : delimiter, read :, skip
+ whitespace */
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
+ raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx);
@@ -1216,27 +1671,38 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
idx = next_idx;
/* skip whitespace before } or , */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- /* bail if the object is closed or we didn't get the , delimiter */
+ /* bail if the object is closed or we didn't get the ,
+ delimiter */
+ did_parse = 1;
if (idx > end_idx) break;
- if (str[idx] == '}') {
+ if (PyUnicode_READ(kind, str, idx) == '}') {
break;
}
- else if (str[idx] != ',') {
- raise_errmsg("Expecting , delimiter", pystr, idx);
+ else if (PyUnicode_READ(kind, str, idx) != ',') {
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , delimiter */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+ trailing_delimiter = 1;
+ }
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+ goto bail;
}
}
/* verify that idx < end_idx, str[idx] should be '}' */
- if (idx > end_idx || str[idx] != '}') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
+ if (did_parse) {
+ raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
+ }
goto bail;
}
@@ -1269,8 +1735,10 @@ bail:
return NULL;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
-_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON array from PyString pystr.
idx is the index of the first character after the opening brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1291,15 +1759,12 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t
/* only loop if the array is non-empty */
if (idx <= end_idx && str[idx] != ']') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
-
+ trailing_delimiter = 0;
/* read any JSON term and de-tuplefy the (rval, idx) */
val = scan_once_str(s, pystr, idx, &next_idx);
if (val == NULL) {
- if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
- PyErr_Clear();
- raise_errmsg("Expecting object", pystr, idx);
- }
goto bail;
}
@@ -1318,19 +1783,28 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t
break;
}
else if (str[idx] != ',') {
- raise_errmsg("Expecting , delimiter", pystr, idx);
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ trailing_delimiter = 1;
+ }
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ goto bail;
}
}
/* verify that idx < end_idx, str[idx] should be ']' */
if (idx > end_idx || str[idx] != ']') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (PyList_GET_SIZE(rval)) {
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
+ }
goto bail;
}
*next_idx_ptr = idx + 1;
@@ -1340,9 +1814,11 @@ bail:
Py_DECREF(rval);
return NULL;
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
-_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON array from PyString pystr.
idx is the index of the first character after the opening brace.
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1350,8 +1826,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
Returns a new PyList
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+ void *str = PyUnicode_DATA(pystr);
+ Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
PyObject *val = NULL;
PyObject *rval = PyList_New(0);
Py_ssize_t next_idx;
@@ -1359,19 +1836,16 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
return NULL;
/* skip whitespace after [ */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* only loop if the array is non-empty */
- if (idx <= end_idx && str[idx] != ']') {
+ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
+ int trailing_delimiter = 0;
while (idx <= end_idx) {
-
+ trailing_delimiter = 0;
/* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx);
if (val == NULL) {
- if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
- PyErr_Clear();
- raise_errmsg("Expecting object", pystr, idx);
- }
goto bail;
}
@@ -1382,27 +1856,36 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
idx = next_idx;
/* skip whitespace between term and , */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the array is closed or we didn't get the , delimiter */
if (idx > end_idx) break;
- if (str[idx] == ']') {
+ if (PyUnicode_READ(kind, str, idx) == ']') {
break;
}
- else if (str[idx] != ',') {
- raise_errmsg("Expecting , delimiter", pystr, idx);
+ else if (PyUnicode_READ(kind, str, idx) != ',') {
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
goto bail;
}
idx++;
/* skip whitespace after , */
- while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+ trailing_delimiter = 1;
+ }
+ if (trailing_delimiter) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ goto bail;
}
}
/* verify that idx < end_idx, str[idx] should be ']' */
- if (idx > end_idx || str[idx] != ']') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
+ if (PyList_GET_SIZE(rval)) {
+ raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+ } else {
+ raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
+ }
goto bail;
}
*next_idx_ptr = idx + 1;
@@ -1414,7 +1897,8 @@ bail:
}
static PyObject *
-_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON constant from PyString pystr.
constant is the constant string that was found
("NaN", "Infinity", "-Infinity").
@@ -1427,20 +1911,22 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *
PyObject *cstr;
PyObject *rval;
/* constant is "NaN", "Infinity", or "-Infinity" */
- cstr = PyString_InternFromString(constant);
+ cstr = JSON_InternFromString(constant);
if (cstr == NULL)
return NULL;
/* rval = parse_constant(constant) */
rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
- idx += PyString_GET_SIZE(cstr);
+ idx += JSON_Intern_GET_SIZE(cstr);
Py_DECREF(cstr);
*next_idx_ptr = idx;
return rval;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
-_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON number from PyString pystr.
idx is the index of the first character of the number
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1459,11 +1945,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
/* read a sign if it's there, make sure it's not the end of the string */
if (str[idx] == '-') {
- idx++;
- if (idx > end_idx) {
- PyErr_SetNone(PyExc_StopIteration);
+ if (idx >= end_idx) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
+ idx++;
}
/* read as many integer digits as we find as long as it doesn't start with 0 */
@@ -1477,7 +1963,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
}
/* no integer digits, error */
else {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
@@ -1541,9 +2027,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
*next_idx_ptr = idx;
return rval;
}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject *
-_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
+{
/* Read a JSON number from PyUnicode pystr.
idx is the index of the first character of the number
*next_idx_ptr is a return-by-reference index to the first character after
@@ -1553,57 +2041,68 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
PyInt, PyLong, or PyFloat.
May return other types if parse_int or parse_float are set
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+ void *str = PyUnicode_DATA(pystr);
+ Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
Py_ssize_t idx = start;
int is_float = 0;
+ JSON_UNICHR c;
PyObject *rval;
PyObject *numstr;
/* read a sign if it's there, make sure it's not the end of the string */
- if (str[idx] == '-') {
- idx++;
- if (idx > end_idx) {
- PyErr_SetNone(PyExc_StopIteration);
+ if (PyUnicode_READ(kind, str, idx) == '-') {
+ if (idx >= end_idx) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
+ idx++;
}
/* read as many integer digits as we find as long as it doesn't start with 0 */
- if (str[idx] >= '1' && str[idx] <= '9') {
+ c = PyUnicode_READ(kind, str, idx);
+ if (c == '0') {
+ /* if it starts with 0 we only expect one integer digit */
idx++;
- while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
}
- /* if it starts with 0 we only expect one integer digit */
- else if (str[idx] == '0') {
+ else if (IS_DIGIT(c)) {
idx++;
+ while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) {
+ idx++;
+ }
}
- /* no integer digits, error */
else {
- PyErr_SetNone(PyExc_StopIteration);
+ /* no integer digits, error */
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
/* if the next char is '.' followed by a digit then read all float digits */
- if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+ if (idx < end_idx &&
+ PyUnicode_READ(kind, str, idx) == '.' &&
+ IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) {
is_float = 1;
idx += 2;
- while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
}
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
- if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+ if (idx < end_idx &&
+ (PyUnicode_READ(kind, str, idx) == 'e' ||
+ PyUnicode_READ(kind, str, idx) == 'E')) {
Py_ssize_t e_start = idx;
idx++;
/* read an exponent sign if present */
- if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+ if (idx < end_idx &&
+ (PyUnicode_READ(kind, str, idx) == '-' ||
+ PyUnicode_READ(kind, str, idx) == '+')) idx++;
/* read all digits */
- while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
/* if we got a digit, then parse as float. if not, backtrack */
- if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+ if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) {
is_float = 1;
}
else {
@@ -1612,7 +2111,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
/* copy the section we determined to be a number */
- numstr = PyUnicode_FromUnicode(&str[start], idx - start);
+#if PY_MAJOR_VERSION >= 3
+ numstr = PyUnicode_Substring(pystr, start, idx);
+#else
+ numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start);
+#endif
if (numstr == NULL)
return NULL;
if (is_float) {
@@ -1621,7 +2124,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
}
else {
+#if PY_MAJOR_VERSION >= 3
+ rval = PyFloat_FromString(numstr);
+#else
rval = PyFloat_FromString(numstr, NULL);
+#endif
}
}
else {
@@ -1633,6 +2140,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
return rval;
}
+#if PY_MAJOR_VERSION < 3
static PyObject *
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
{
@@ -1647,27 +2155,33 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
Py_ssize_t length = PyString_GET_SIZE(pystr);
PyObject *rval = NULL;
int fallthrough = 0;
- if (idx >= length) {
- PyErr_SetNone(PyExc_StopIteration);
+ if (idx < 0 || idx >= length) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
- if (Py_EnterRecursiveCall(" while decoding a JSON document"))
- return NULL;
switch (str[idx]) {
case '"':
/* string */
rval = scanstring_str(pystr, idx + 1,
- PyString_AS_STRING(s->encoding),
+ JSON_ASCII_AS_STRING(s->encoding),
PyObject_IsTrue(s->strict),
next_idx_ptr);
break;
case '{':
/* object */
+ if (Py_EnterRecursiveCall(" while decoding a JSON object "
+ "from a string"))
+ return NULL;
rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
+ Py_LeaveRecursiveCall();
break;
case '[':
/* array */
+ if (Py_EnterRecursiveCall(" while decoding a JSON array "
+ "from a string"))
+ return NULL;
rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
+ Py_LeaveRecursiveCall();
break;
case 'n':
/* null */
@@ -1729,9 +2243,10 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
/* Didn't find a string, object, array, or named constant. Look for a number. */
if (fallthrough)
rval = _match_number_str(s, pystr, idx, next_idx_ptr);
- Py_LeaveRecursiveCall();
return rval;
}
+#endif /* PY_MAJOR_VERSION < 3 */
+
static PyObject *
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
@@ -1743,17 +2258,16 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
Returns a new PyObject representation of the term.
*/
- Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
- Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
+ PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+ void *str = PyUnicode_DATA(pystr);
+ Py_ssize_t length = PyUnicode_GetLength(pystr);
PyObject *rval = NULL;
int fallthrough = 0;
- if (idx >= length) {
- PyErr_SetNone(PyExc_StopIteration);
+ if (idx < 0 || idx >= length) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL;
}
- if (Py_EnterRecursiveCall(" while decoding a JSON document"))
- return NULL;
- switch (str[idx]) {
+ switch (PyUnicode_READ(kind, str, idx)) {
case '"':
/* string */
rval = scanstring_unicode(pystr, idx + 1,
@@ -1762,15 +2276,26 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case '{':
/* object */
+ if (Py_EnterRecursiveCall(" while decoding a JSON object "
+ "from a unicode string"))
+ return NULL;
rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
+ Py_LeaveRecursiveCall();
break;
case '[':
/* array */
+ if (Py_EnterRecursiveCall(" while decoding a JSON array "
+ "from a unicode string"))
+ return NULL;
rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
+ Py_LeaveRecursiveCall();
break;
case 'n':
/* null */
- if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+ if ((idx + 3 < length) &&
+ PyUnicode_READ(kind, str, idx + 1) == 'u' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'l' &&
+ PyUnicode_READ(kind, str, idx + 3) == 'l') {
Py_INCREF(Py_None);
*next_idx_ptr = idx + 4;
rval = Py_None;
@@ -1780,7 +2305,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 't':
/* true */
- if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+ if ((idx + 3 < length) &&
+ PyUnicode_READ(kind, str, idx + 1) == 'r' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'u' &&
+ PyUnicode_READ(kind, str, idx + 3) == 'e') {
Py_INCREF(Py_True);
*next_idx_ptr = idx + 4;
rval = Py_True;
@@ -1790,7 +2318,11 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 'f':
/* false */
- if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+ if ((idx + 4 < length) &&
+ PyUnicode_READ(kind, str, idx + 1) == 'a' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'l' &&
+ PyUnicode_READ(kind, str, idx + 3) == 's' &&
+ PyUnicode_READ(kind, str, idx + 4) == 'e') {
Py_INCREF(Py_False);
*next_idx_ptr = idx + 5;
rval = Py_False;
@@ -1800,7 +2332,9 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 'N':
/* NaN */
- if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+ if ((idx + 2 < length) &&
+ PyUnicode_READ(kind, str, idx + 1) == 'a' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'N') {
rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
}
else
@@ -1808,7 +2342,14 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case 'I':
/* Infinity */
- if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+ if ((idx + 7 < length) &&
+ PyUnicode_READ(kind, str, idx + 1) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'f' &&
+ PyUnicode_READ(kind, str, idx + 3) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 4) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 5) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 6) == 't' &&
+ PyUnicode_READ(kind, str, idx + 7) == 'y') {
rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
}
else
@@ -1816,7 +2357,15 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
break;
case '-':
/* -Infinity */
- if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+ if ((idx + 8 < length) &&
+ PyUnicode_READ(kind, str, idx + 1) == 'I' &&
+ PyUnicode_READ(kind, str, idx + 2) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 3) == 'f' &&
+ PyUnicode_READ(kind, str, idx + 4) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 5) == 'n' &&
+ PyUnicode_READ(kind, str, idx + 6) == 'i' &&
+ PyUnicode_READ(kind, str, idx + 7) == 't' &&
+ PyUnicode_READ(kind, str, idx + 8) == 'y') {
rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
}
else
@@ -1828,7 +2377,6 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* Didn't find a string, object, array, or named constant. Look for a number. */
if (fallthrough)
rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
- Py_LeaveRecursiveCall();
return rval;
}
@@ -1847,12 +2395,14 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
return NULL;
- if (PyString_Check(pystr)) {
- rval = scan_once_str(s, pystr, idx, &next_idx);
- }
- else if (PyUnicode_Check(pystr)) {
+ if (PyUnicode_Check(pystr)) {
rval = scan_once_unicode(s, pystr, idx, &next_idx);
}
+#if PY_MAJOR_VERSION < 3
+ else if (PyString_Check(pystr)) {
+ rval = scan_once_str(s, pystr, idx, &next_idx);
+ }
+#endif /* PY_MAJOR_VERSION < 3 */
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string, not %.80s",
@@ -1880,6 +2430,25 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)s;
}
+static PyObject *
+JSON_ParseEncoding(PyObject *encoding)
+{
+ if (encoding == NULL)
+ return NULL;
+ if (encoding == Py_None)
+ return JSON_InternFromString(DEFAULT_ENCODING);
+#if PY_MAJOR_VERSION < 3
+ if (PyUnicode_Check(encoding))
+ return PyUnicode_AsEncodedString(encoding, NULL, NULL);
+#endif
+ if (JSON_ASCII_Check(encoding)) {
+ Py_INCREF(encoding);
+ return encoding;
+ }
+ PyErr_SetString(PyExc_TypeError, "encoding must be a string");
+ return NULL;
+}
+
static int
scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
{
@@ -1887,34 +2456,26 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
PyObject *ctx;
static char *kwlist[] = {"context", NULL};
PyScannerObject *s;
+ PyObject *encoding;
assert(PyScanner_Check(self));
s = (PyScannerObject *)self;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
return -1;
-
+
if (s->memo == NULL) {
s->memo = PyDict_New();
if (s->memo == NULL)
goto bail;
}
- /* PyString_AS_STRING is used on encoding */
- s->encoding = PyObject_GetAttrString(ctx, "encoding");
+ /* JSON_ASCII_AS_STRING is used on encoding */
+ encoding = PyObject_GetAttrString(ctx, "encoding");
+ s->encoding = JSON_ParseEncoding(encoding);
+ Py_XDECREF(encoding);
if (s->encoding == NULL)
goto bail;
- if (s->encoding == Py_None) {
- Py_DECREF(Py_None);
- s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
- }
- else if (PyUnicode_Check(s->encoding)) {
- PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
- Py_DECREF(s->encoding);
- s->encoding = tmp;
- }
- if (s->encoding == NULL || !PyString_Check(s->encoding))
- goto bail;
/* All of these will fail "gracefully" so we don't need to verify them */
s->strict = PyObject_GetAttrString(ctx, "strict");
@@ -1953,8 +2514,7 @@ PyDoc_STRVAR(scanner_doc, "JSON scanner object");
static
PyTypeObject PyScannerType = {
- PyObject_HEAD_INIT(NULL)
- 0, /* tp_internal */
+ PyVarObject_HEAD_INIT(NULL, 0)
"simplejson._speedups.Scanner", /* tp_name */
sizeof(PyScannerObject), /* tp_basicsize */
0, /* tp_itemsize */
@@ -2004,12 +2564,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
s->markers = NULL;
s->defaultfn = NULL;
s->encoder = NULL;
+ s->encoding = NULL;
s->indent = NULL;
s->key_separator = NULL;
s->item_separator = NULL;
- s->sort_keys = NULL;
- s->skipkeys = NULL;
s->key_memo = NULL;
+ s->sort_keys = NULL;
+ s->item_sort_key = NULL;
+ s->item_sort_kw = NULL;
+ s->Decimal = NULL;
+ s->max_long_size = NULL;
+ s->min_long_size = NULL;
}
return (PyObject *)s;
}
@@ -2017,47 +2582,140 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static int
encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
{
- /* initialize Encoder object */
- static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "iterable_as_array", NULL};
+ static char *kwlist[] = {
+ "markers",
+ "default",
+ "encoder",
+ "indent",
+ "key_separator",
+ "item_separator",
+ "sort_keys",
+ "skipkeys",
+ "allow_nan",
+ "key_memo",
+ "use_decimal",
+ "namedtuple_as_object",
+ "tuple_as_array",
+ "iterable_as_array"
+ "int_as_string_bitcount",
+ "item_sort_key",
+ "encoding",
+ "for_json",
+ "ignore_nan",
+ "Decimal",
+ NULL};
PyEncoderObject *s;
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
- PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array;
+ PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
+ PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array;
+ PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json;
+ PyObject *ignore_nan, *Decimal;
assert(PyEncoder_Check(self));
s = (PyEncoderObject *)self;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOO:make_encoder", kwlist,
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist,
&markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
&sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
- &namedtuple_as_object, &tuple_as_array, &iterable_as_array))
+ &namedtuple_as_object, &tuple_as_array,
+ &int_as_string_bitcount, &item_sort_key, &encoding, &for_json,
+ &ignore_nan, &Decimal, &iterable_as_array))
return -1;
+ Py_INCREF(markers);
s->markers = markers;
+ Py_INCREF(defaultfn);
s->defaultfn = defaultfn;
+ Py_INCREF(encoder);
s->encoder = encoder;
+ s->encoding = JSON_ParseEncoding(encoding);
+ if (s->encoding == NULL)
+ return -1;
+ Py_INCREF(indent);
s->indent = indent;
+ Py_INCREF(key_separator);
s->key_separator = key_separator;
+ Py_INCREF(item_separator);
s->item_separator = item_separator;
- s->sort_keys = sort_keys;
- s->skipkeys = skipkeys;
+ Py_INCREF(skipkeys);
+ s->skipkeys_bool = skipkeys;
+ s->skipkeys = PyObject_IsTrue(skipkeys);
+ Py_INCREF(key_memo);
s->key_memo = key_memo;
s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
- s->allow_nan = PyObject_IsTrue(allow_nan);
+ s->allow_or_ignore_nan = (
+ (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) |
+ (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0));
s->use_decimal = PyObject_IsTrue(use_decimal);
s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
s->iterable_as_array = PyObject_IsTrue(iterable_as_array);
+ if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) {
+ static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8;
+ int int_as_string_bitcount_val = (int)PyLong_AsLong(int_as_string_bitcount);
+ if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < long_long_bitsize) {
+ s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << int_as_string_bitcount_val);
+ s->min_long_size = PyLong_FromLongLong(-1LL << int_as_string_bitcount_val);
+ if (s->min_long_size == NULL || s->max_long_size == NULL) {
+ return -1;
+ }
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "int_as_string_bitcount (%d) must be greater than 0 and less than the number of bits of a `long long` type (%u bits)",
+ int_as_string_bitcount_val, long_long_bitsize);
+ return -1;
+ }
+ }
+ else if (int_as_string_bitcount == Py_None) {
+ Py_INCREF(Py_None);
+ s->max_long_size = Py_None;
+ Py_INCREF(Py_None);
+ s->min_long_size = Py_None;
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer");
+ return -1;
+ }
+ if (item_sort_key != Py_None) {
+ if (!PyCallable_Check(item_sort_key)) {
+ PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable");
+ return -1;
+ }
+ }
+ else if (PyObject_IsTrue(sort_keys)) {
+ static PyObject *itemgetter0 = NULL;
+ if (!itemgetter0) {
+ PyObject *operator = PyImport_ImportModule("operator");
+ if (!operator)
+ return -1;
+ itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0);
+ Py_DECREF(operator);
+ }
+ item_sort_key = itemgetter0;
+ if (!item_sort_key)
+ return -1;
+ }
+ if (item_sort_key == Py_None) {
+ Py_INCREF(Py_None);
+ s->item_sort_kw = Py_None;
+ }
+ else {
+ s->item_sort_kw = PyDict_New();
+ if (s->item_sort_kw == NULL)
+ return -1;
+ if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key))
+ return -1;
+ }
+ Py_INCREF(sort_keys);
+ s->sort_keys = sort_keys;
+ Py_INCREF(item_sort_key);
+ s->item_sort_key = item_sort_key;
+ Py_INCREF(Decimal);
+ s->Decimal = Decimal;
+ s->for_json = PyObject_IsTrue(for_json);
- Py_INCREF(s->markers);
- Py_INCREF(s->defaultfn);
- Py_INCREF(s->encoder);
- Py_INCREF(s->indent);
- Py_INCREF(s->key_separator);
- Py_INCREF(s->item_separator);
- Py_INCREF(s->sort_keys);
- Py_INCREF(s->skipkeys);
- Py_INCREF(s->key_memo);
return 0;
}
@@ -2067,22 +2725,21 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
/* Python callable interface to encode_listencode_obj */
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
PyObject *obj;
- PyObject *rval;
Py_ssize_t indent_level;
PyEncoderObject *s;
+ JSON_Accu rval;
assert(PyEncoder_Check(self));
s = (PyEncoderObject *)self;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
&obj, _convertPyInt_AsSsize_t, &indent_level))
return NULL;
- rval = PyList_New(0);
- if (rval == NULL)
+ if (JSON_Accu_Init(&rval))
return NULL;
- if (encoder_listencode_obj(s, rval, obj, indent_level)) {
- Py_DECREF(rval);
+ if (encoder_listencode_obj(s, &rval, obj, indent_level)) {
+ JSON_Accu_Destroy(&rval);
return NULL;
}
- return rval;
+ return JSON_Accu_FinishAsList(&rval);
}
static PyObject *
@@ -2092,7 +2749,7 @@ _encoded_const(PyObject *obj)
if (obj == Py_None) {
static PyObject *s_null = NULL;
if (s_null == NULL) {
- s_null = PyString_InternFromString("null");
+ s_null = JSON_InternFromString("null");
}
Py_INCREF(s_null);
return s_null;
@@ -2100,7 +2757,7 @@ _encoded_const(PyObject *obj)
else if (obj == Py_True) {
static PyObject *s_true = NULL;
if (s_true == NULL) {
- s_true = PyString_InternFromString("true");
+ s_true = JSON_InternFromString("true");
}
Py_INCREF(s_true);
return s_true;
@@ -2108,7 +2765,7 @@ _encoded_const(PyObject *obj)
else if (obj == Py_False) {
static PyObject *s_false = NULL;
if (s_false == NULL) {
- s_false = PyString_InternFromString("false");
+ s_false = JSON_InternFromString("false");
}
Py_INCREF(s_false);
return s_false;
@@ -2125,22 +2782,54 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
/* Return the JSON representation of a PyFloat */
double i = PyFloat_AS_DOUBLE(obj);
if (!Py_IS_FINITE(i)) {
- if (!s->allow_nan) {
+ if (!s->allow_or_ignore_nan) {
PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
return NULL;
}
- if (i > 0) {
- return PyString_FromString("Infinity");
+ if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) {
+ return _encoded_const(Py_None);
+ }
+ /* JSON_ALLOW_NAN is set */
+ else if (i > 0) {
+ static PyObject *sInfinity = NULL;
+ if (sInfinity == NULL)
+ sInfinity = JSON_InternFromString("Infinity");
+ if (sInfinity)
+ Py_INCREF(sInfinity);
+ return sInfinity;
}
else if (i < 0) {
- return PyString_FromString("-Infinity");
+ static PyObject *sNegInfinity = NULL;
+ if (sNegInfinity == NULL)
+ sNegInfinity = JSON_InternFromString("-Infinity");
+ if (sNegInfinity)
+ Py_INCREF(sNegInfinity);
+ return sNegInfinity;
}
else {
- return PyString_FromString("NaN");
+ static PyObject *sNaN = NULL;
+ if (sNaN == NULL)
+ sNaN = JSON_InternFromString("NaN");
+ if (sNaN)
+ Py_INCREF(sNaN);
+ return sNaN;
}
}
/* Use a better float format here? */
- return PyObject_Repr(obj);
+ if (PyFloat_CheckExact(obj)) {
+ return PyObject_Repr(obj);
+ }
+ else {
+ /* See #118, do not trust custom str/repr */
+ PyObject *res;
+ PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyFloat_Type, obj, NULL);
+ if (tmp == NULL) {
+ return NULL;
+ }
+ res = PyObject_Repr(tmp);
+ Py_DECREF(tmp);
+ return res;
+ }
}
static PyObject *
@@ -2154,60 +2843,97 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
}
static int
-_steal_list_append(PyObject *lst, PyObject *stolen)
+_steal_accumulate(JSON_Accu *accu, PyObject *stolen)
{
/* Append stolen and then decrement its reference count */
- int rval = PyList_Append(lst, stolen);
+ int rval = JSON_Accu_Accumulate(accu, stolen);
Py_DECREF(stolen);
return rval;
}
static int
-encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
+encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level)
{
/* Encode Python object obj to a JSON term, rval is a PyList */
int rv = -1;
- if (Py_EnterRecursiveCall(" while encoding a JSON document"))
- return rv;
do {
if (obj == Py_None || obj == Py_True || obj == Py_False) {
PyObject *cstr = _encoded_const(obj);
if (cstr != NULL)
- rv = _steal_list_append(rval, cstr);
+ rv = _steal_accumulate(rval, cstr);
}
else if (PyString_Check(obj) || PyUnicode_Check(obj))
{
PyObject *encoded = encoder_encode_string(s, obj);
if (encoded != NULL)
- rv = _steal_list_append(rval, encoded);
+ rv = _steal_accumulate(rval, encoded);
}
else if (PyInt_Check(obj) || PyLong_Check(obj)) {
- PyObject *encoded = PyObject_Str(obj);
- if (encoded != NULL)
- rv = _steal_list_append(rval, encoded);
+ PyObject *encoded;
+ if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) {
+ encoded = PyObject_Str(obj);
+ }
+ else {
+ /* See #118, do not trust custom str/repr */
+ PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, obj, NULL);
+ if (tmp == NULL) {
+ encoded = NULL;
+ }
+ else {
+ encoded = PyObject_Str(tmp);
+ Py_DECREF(tmp);
+ }
+ }
+ if (encoded != NULL) {
+ encoded = maybe_quote_bigint(s, encoded, obj);
+ if (encoded == NULL)
+ break;
+ rv = _steal_accumulate(rval, encoded);
+ }
}
else if (PyFloat_Check(obj)) {
PyObject *encoded = encoder_encode_float(s, obj);
if (encoded != NULL)
- rv = _steal_list_append(rval, encoded);
+ rv = _steal_accumulate(rval, encoded);
+ }
+ else if (s->for_json && _has_for_json_hook(obj)) {
+ PyObject *newobj;
+ if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+ return rv;
+ newobj = PyObject_CallMethod(obj, "for_json", NULL);
+ if (newobj != NULL) {
+ rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+ Py_DECREF(newobj);
+ }
+ Py_LeaveRecursiveCall();
}
else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
- PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL);
+ PyObject *newobj;
+ if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+ return rv;
+ newobj = PyObject_CallMethod(obj, "_asdict", NULL);
if (newobj != NULL) {
rv = encoder_listencode_dict(s, rval, newobj, indent_level);
Py_DECREF(newobj);
}
+ Py_LeaveRecursiveCall();
}
else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
+ if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+ return rv;
rv = encoder_listencode_list(s, rval, obj, indent_level);
+ Py_LeaveRecursiveCall();
}
else if (PyDict_Check(obj)) {
+ if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+ return rv;
rv = encoder_listencode_dict(s, rval, obj, indent_level);
+ Py_LeaveRecursiveCall();
}
- else if (s->use_decimal && Decimal_Check(obj)) {
+ else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) {
PyObject *encoded = PyObject_Str(obj);
if (encoded != NULL)
- rv = _steal_list_append(rval, encoded);
+ rv = _steal_accumulate(rval, encoded);
}
else {
PyObject *ident = NULL;
@@ -2239,12 +2965,16 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
break;
}
}
+ if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+ return rv;
newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
if (newobj == NULL) {
Py_XDECREF(ident);
+ Py_LeaveRecursiveCall();
break;
}
rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+ Py_LeaveRecursiveCall();
Py_DECREF(newobj);
if (rv) {
Py_XDECREF(ident);
@@ -2259,37 +2989,33 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
}
}
} while (0);
- Py_LeaveRecursiveCall();
return rv;
}
static int
-encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
+encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level)
{
- /* Encode Python dict dct a JSON term, rval is a PyList */
+ /* Encode Python dict dct a JSON term */
static PyObject *open_dict = NULL;
static PyObject *close_dict = NULL;
static PyObject *empty_dict = NULL;
- static PyObject *iteritems = NULL;
PyObject *kstr = NULL;
PyObject *ident = NULL;
PyObject *iter = NULL;
PyObject *item = NULL;
PyObject *items = NULL;
PyObject *encoded = NULL;
- int skipkeys;
Py_ssize_t idx;
- if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
- open_dict = PyString_InternFromString("{");
- close_dict = PyString_InternFromString("}");
- empty_dict = PyString_InternFromString("{}");
- iteritems = PyString_InternFromString("iteritems");
- if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
+ if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
+ open_dict = JSON_InternFromString("{");
+ close_dict = JSON_InternFromString("}");
+ empty_dict = JSON_InternFromString("{}");
+ if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
return -1;
}
if (PyDict_Size(dct) == 0)
- return PyList_Append(rval, empty_dict);
+ return JSON_Accu_Accumulate(rval, empty_dict);
if (s->markers != Py_None) {
int has_key;
@@ -2307,7 +3033,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
}
}
- if (PyList_Append(rval, open_dict))
+ if (JSON_Accu_Accumulate(rval, open_dict))
goto bail;
if (s->indent != Py_None) {
@@ -2320,47 +3046,10 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
*/
}
- if (PyObject_IsTrue(s->sort_keys)) {
- /* First sort the keys then replace them with (key, value) tuples. */
- Py_ssize_t i, nitems;
- if (PyDict_CheckExact(dct))
- items = PyDict_Keys(dct);
- else
- items = PyMapping_Keys(dct);
- if (items == NULL)
- goto bail;
- if (!PyList_Check(items)) {
- PyErr_SetString(PyExc_ValueError, "keys must return list");
- goto bail;
- }
- if (PyList_Sort(items) < 0)
- goto bail;
- nitems = PyList_GET_SIZE(items);
- for (i = 0; i < nitems; i++) {
- PyObject *key, *value;
- key = PyList_GET_ITEM(items, i);
- value = PyDict_GetItem(dct, key);
- item = PyTuple_Pack(2, key, value);
- if (item == NULL)
- goto bail;
- PyList_SET_ITEM(items, i, item);
- Py_DECREF(key);
- }
- }
- else {
- if (PyDict_CheckExact(dct))
- items = PyDict_Items(dct);
- else
- items = PyMapping_Items(dct);
- }
- if (items == NULL)
- goto bail;
- iter = PyObject_GetIter(items);
- Py_DECREF(items);
+ iter = encoder_dict_iteritems(s, dct);
if (iter == NULL)
goto bail;
- skipkeys = PyObject_IsTrue(s->skipkeys);
idx = 0;
while ((item = PyIter_Next(iter))) {
PyObject *encoded, *key, *value;
@@ -2378,43 +3067,21 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
encoded = PyDict_GetItem(s->key_memo, key);
if (encoded != NULL) {
Py_INCREF(encoded);
- }
- else if (PyString_Check(key) || PyUnicode_Check(key)) {
- Py_INCREF(key);
- kstr = key;
- }
- else if (PyFloat_Check(key)) {
- kstr = encoder_encode_float(s, key);
- if (kstr == NULL)
- goto bail;
- }
- else if (key == Py_True || key == Py_False || key == Py_None) {
- /* This must come before the PyInt_Check because
- True and False are also 1 and 0.*/
- kstr = _encoded_const(key);
- if (kstr == NULL)
- goto bail;
- }
- else if (PyInt_Check(key) || PyLong_Check(key)) {
- kstr = PyObject_Str(key);
+ } else {
+ kstr = encoder_stringify_key(s, key);
if (kstr == NULL)
goto bail;
+ else if (kstr == Py_None) {
+ /* skipkeys */
+ Py_DECREF(item);
+ Py_DECREF(kstr);
+ continue;
+ }
}
- else if (skipkeys) {
- Py_DECREF(item);
- continue;
- }
- else {
- /* TODO: include repr of key */
- PyErr_SetString(PyExc_TypeError, "keys must be a string");
- goto bail;
- }
-
if (idx) {
- if (PyList_Append(rval, s->item_separator))
+ if (JSON_Accu_Accumulate(rval, s->item_separator))
goto bail;
}
-
if (encoded == NULL) {
encoded = encoder_encode_string(s, kstr);
Py_CLEAR(kstr);
@@ -2423,11 +3090,11 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
if (PyDict_SetItem(s->key_memo, key, encoded))
goto bail;
}
- if (PyList_Append(rval, encoded)) {
+ if (JSON_Accu_Accumulate(rval, encoded)) {
goto bail;
}
Py_CLEAR(encoded);
- if (PyList_Append(rval, s->key_separator))
+ if (JSON_Accu_Accumulate(rval, s->key_separator))
goto bail;
if (encoder_listencode_obj(s, rval, value, indent_level))
goto bail;
@@ -2449,13 +3116,14 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
yield '\n' + (_indent * _current_indent_level)
*/
}
- if (PyList_Append(rval, close_dict))
+ if (JSON_Accu_Accumulate(rval, close_dict))
goto bail;
return 0;
bail:
Py_XDECREF(encoded);
Py_XDECREF(items);
+ Py_XDECREF(item);
Py_XDECREF(iter);
Py_XDECREF(kstr);
Py_XDECREF(ident);
@@ -2464,9 +3132,9 @@ bail:
static int
-encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
+encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level)
{
- /* Encode Python list seq to a JSON term, rval is a PyList */
+ /* Encode Python list seq to a JSON term */
static PyObject *open_array = NULL;
static PyObject *close_array = NULL;
static PyObject *empty_array = NULL;
@@ -2477,9 +3145,9 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
int i = 0;
if (open_array == NULL || close_array == NULL || empty_array == NULL) {
- open_array = PyString_InternFromString("[");
- close_array = PyString_InternFromString("]");
- empty_array = PyString_InternFromString("[]");
+ open_array = JSON_InternFromString("[");
+ close_array = JSON_InternFromString("]");
+ empty_array = JSON_InternFromString("[]");
if (open_array == NULL || close_array == NULL || empty_array == NULL)
return -1;
}
@@ -2488,7 +3156,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
if (is_true == -1)
return -1;
else if (is_true == 0)
- return PyList_Append(rval, empty_array);
+ return JSON_Accu_Accumulate(rval, empty_array);
if (s->markers != Py_None) {
int has_key;
@@ -2510,7 +3178,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
if (iter == NULL)
goto bail;
- if (PyList_Append(rval, open_array))
+ if (JSON_Accu_Accumulate(rval, open_array))
goto bail;
if (s->indent != Py_None) {
/* TODO: DOES NOT RUN */
@@ -2523,7 +3191,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
}
while ((obj = PyIter_Next(iter))) {
if (i) {
- if (PyList_Append(rval, s->item_separator))
+ if (JSON_Accu_Accumulate(rval, s->item_separator))
goto bail;
}
if (encoder_listencode_obj(s, rval, obj, indent_level))
@@ -2546,7 +3214,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
yield '\n' + (_indent * _current_indent_level)
*/
}
- if (PyList_Append(rval, close_array))
+ if (JSON_Accu_Accumulate(rval, close_array))
goto bail;
return 0;
@@ -2574,12 +3242,17 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg)
Py_VISIT(s->markers);
Py_VISIT(s->defaultfn);
Py_VISIT(s->encoder);
+ Py_VISIT(s->encoding);
Py_VISIT(s->indent);
Py_VISIT(s->key_separator);
Py_VISIT(s->item_separator);
- Py_VISIT(s->sort_keys);
- Py_VISIT(s->skipkeys);
Py_VISIT(s->key_memo);
+ Py_VISIT(s->sort_keys);
+ Py_VISIT(s->item_sort_kw);
+ Py_VISIT(s->item_sort_key);
+ Py_VISIT(s->max_long_size);
+ Py_VISIT(s->min_long_size);
+ Py_VISIT(s->Decimal);
return 0;
}
@@ -2593,12 +3266,18 @@ encoder_clear(PyObject *self)
Py_CLEAR(s->markers);
Py_CLEAR(s->defaultfn);
Py_CLEAR(s->encoder);
+ Py_CLEAR(s->encoding);
Py_CLEAR(s->indent);
Py_CLEAR(s->key_separator);
Py_CLEAR(s->item_separator);
- Py_CLEAR(s->sort_keys);
- Py_CLEAR(s->skipkeys);
Py_CLEAR(s->key_memo);
+ Py_CLEAR(s->skipkeys_bool);
+ Py_CLEAR(s->sort_keys);
+ Py_CLEAR(s->item_sort_kw);
+ Py_CLEAR(s->item_sort_key);
+ Py_CLEAR(s->max_long_size);
+ Py_CLEAR(s->min_long_size);
+ Py_CLEAR(s->Decimal);
return 0;
}
@@ -2606,8 +3285,7 @@ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable")
static
PyTypeObject PyEncoderType = {
- PyObject_HEAD_INIT(NULL)
- 0, /* tp_internal */
+ PyVarObject_HEAD_INIT(NULL, 0)
"simplejson._speedups.Encoder", /* tp_name */
sizeof(PyEncoderObject), /* tp_basicsize */
0, /* tp_itemsize */
@@ -2663,28 +3341,53 @@ static PyMethodDef speedups_methods[] = {
PyDoc_STRVAR(module_doc,
"simplejson speedups\n");
-void
-init_speedups(void)
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "_speedups", /* m_name */
+ module_doc, /* m_doc */
+ -1, /* m_size */
+ speedups_methods, /* m_methods */
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear*/
+ NULL, /* m_free */
+};
+#endif
+
+static PyObject *
+moduleinit(void)
{
- PyObject *m, *decimal;
+ PyObject *m;
PyScannerType.tp_new = PyType_GenericNew;
if (PyType_Ready(&PyScannerType) < 0)
- return;
+ return NULL;
PyEncoderType.tp_new = PyType_GenericNew;
if (PyType_Ready(&PyEncoderType) < 0)
- return;
-
- decimal = PyImport_ImportModule("decimal");
- if (decimal == NULL)
- return;
- DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal");
- Py_DECREF(decimal);
- if (DecimalTypePtr == NULL)
- return;
+ return NULL;
+#if PY_MAJOR_VERSION >= 3
+ m = PyModule_Create(&moduledef);
+#else
m = Py_InitModule3("_speedups", speedups_methods, module_doc);
+#endif
Py_INCREF((PyObject*)&PyScannerType);
PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
Py_INCREF((PyObject*)&PyEncoderType);
PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
+ return m;
+}
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC
+PyInit__speedups(void)
+{
+ return moduleinit();
}
+#else
+void
+init_speedups(void)
+{
+ moduleinit();
+}
+#endif
diff --git a/simplejson/compat.py b/simplejson/compat.py
new file mode 100644
index 0000000..a0af4a1
--- /dev/null
+++ b/simplejson/compat.py
@@ -0,0 +1,46 @@
+"""Python 3 compatibility shims
+"""
+import sys
+if sys.version_info[0] < 3:
+ PY3 = False
+ def b(s):
+ return s
+ def u(s):
+ return unicode(s, 'unicode_escape')
+ import cStringIO as StringIO
+ StringIO = BytesIO = StringIO.StringIO
+ text_type = unicode
+ binary_type = str
+ string_types = (basestring,)
+ integer_types = (int, long)
+ unichr = unichr
+ reload_module = reload
+ def fromhex(s):
+ return s.decode('hex')
+
+else:
+ PY3 = True
+ if sys.version_info[:2] >= (3, 4):
+ from importlib import reload as reload_module
+ else:
+ from imp import reload as reload_module
+ import codecs
+ def b(s):
+ return codecs.latin_1_encode(s)[0]
+ def u(s):
+ return s
+ import io
+ StringIO = io.StringIO
+ BytesIO = io.BytesIO
+ text_type = str
+ binary_type = bytes
+ string_types = (str,)
+ integer_types = (int,)
+
+ def unichr(s):
+ return u(chr(s))
+
+ def fromhex(s):
+ return bytes.fromhex(s)
+
+long_type = integer_types[-1]
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index e5496d6..545e658 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -1,24 +1,28 @@
"""Implementation of JSONDecoder
"""
+from __future__ import absolute_import
import re
import sys
import struct
+from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
+from .scanner import make_scanner, JSONDecodeError
-from simplejson.scanner import make_scanner
def _import_c_scanstring():
try:
- from simplejson._speedups import scanstring
+ from ._speedups import scanstring
return scanstring
except ImportError:
return None
c_scanstring = _import_c_scanstring()
+# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
+# compatibility, but it was never in the __all__
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
def _floatconstants():
- _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+ _BYTES = fromhex('7FF80000000000007FF0000000000000')
# The struct module in Python 2.4 would get frexp() out of range here
# when an endian is specified in the format string. Fixed in Python 2.5+
if sys.byteorder != 'big':
@@ -28,57 +32,6 @@ def _floatconstants():
NaN, PosInf, NegInf = _floatconstants()
-
-class JSONDecodeError(ValueError):
- """Subclass of ValueError with the following additional properties:
-
- msg: The unformatted error message
- doc: The JSON document being parsed
- pos: The start index of doc where parsing failed
- end: The end index of doc where parsing failed (may be None)
- lineno: The line corresponding to pos
- colno: The column corresponding to pos
- endlineno: The line corresponding to end (may be None)
- endcolno: The column corresponding to end (may be None)
-
- """
- def __init__(self, msg, doc, pos, end=None):
- ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
- self.msg = msg
- self.doc = doc
- self.pos = pos
- self.end = end
- self.lineno, self.colno = linecol(doc, pos)
- if end is not None:
- self.endlineno, self.endcolno = linecol(doc, end)
- else:
- self.endlineno, self.endcolno = None, None
-
-
-def linecol(doc, pos):
- lineno = doc.count('\n', 0, pos) + 1
- if lineno == 1:
- colno = pos
- else:
- colno = pos - doc.rindex('\n', 0, pos)
- return lineno, colno
-
-
-def errmsg(msg, doc, pos, end=None):
- # Note that this function is called from _speedups
- lineno, colno = linecol(doc, pos)
- if end is None:
- #fmt = '{0}: line {1} column {2} (char {3})'
- #return fmt.format(msg, lineno, colno, pos)
- fmt = '%s: line %d column %d (char %d)'
- return fmt % (msg, lineno, colno, pos)
- endlineno, endcolno = linecol(doc, end)
- #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
- #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
- fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
- return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
-
-
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
@@ -87,14 +40,15 @@ _CONSTANTS = {
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
- '"': u'"', '\\': u'\\', '/': u'/',
- 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+ '"': u('"'), '\\': u('\u005c'), '/': u('/'),
+ 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
}
DEFAULT_ENCODING = "utf-8"
def py_scanstring(s, end, encoding=None, strict=True,
- _b=BACKSLASH, _m=STRINGCHUNK.match):
+ _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
+ _PY3=PY3, _maxunicode=sys.maxunicode):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@@ -117,8 +71,8 @@ def py_scanstring(s, end, encoding=None, strict=True,
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
if content:
- if not isinstance(content, unicode):
- content = unicode(content, encoding)
+ if not _PY3 and not isinstance(content, text_type):
+ content = text_type(content, encoding)
_append(content)
# Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows
@@ -126,8 +80,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
break
elif terminator != '\\':
if strict:
- msg = "Invalid control character %r at" % (terminator,)
- #msg = "Invalid control character {0!r} at".format(terminator)
+ msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
else:
_append(terminator)
@@ -142,33 +95,42 @@ def py_scanstring(s, end, encoding=None, strict=True,
try:
char = _b[esc]
except KeyError:
- msg = "Invalid \\escape: " + repr(esc)
+ msg = "Invalid \\X escape sequence %r"
raise JSONDecodeError(msg, s, end)
end += 1
else:
# Unicode escape sequence
+ msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
- next_end = end + 5
- if len(esc) != 4:
- msg = "Invalid \\uXXXX escape"
- raise JSONDecodeError(msg, s, end)
- uni = int(esc, 16)
+ escX = esc[1:2]
+ if len(esc) != 4 or escX == 'x' or escX == 'X':
+ raise JSONDecodeError(msg, s, end - 1)
+ try:
+ uni = int(esc, 16)
+ except ValueError:
+ raise JSONDecodeError(msg, s, end - 1)
+ end += 5
# Check for surrogate pair on UCS-4 systems
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
- if not s[end + 5:end + 7] == '\\u':
- raise JSONDecodeError(msg, s, end)
- esc2 = s[end + 7:end + 11]
- if len(esc2) != 4:
- raise JSONDecodeError(msg, s, end)
- uni2 = int(esc2, 16)
- uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
- next_end += 6
+ # Note that this will join high/low surrogate pairs
+ # but will also pass unpaired surrogates through
+ if (_maxunicode > 65535 and
+ uni & 0xfc00 == 0xd800 and
+ s[end:end + 2] == '\\u'):
+ esc2 = s[end + 2:end + 6]
+ escX = esc2[1:2]
+ if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
+ try:
+ uni2 = int(esc2, 16)
+ except ValueError:
+ raise JSONDecodeError(msg, s, end)
+ if uni2 & 0xfc00 == 0xdc00:
+ uni = 0x10000 + (((uni - 0xd800) << 10) |
+ (uni2 - 0xdc00))
+ end += 6
char = unichr(uni)
- end = next_end
# Append the unescaped character
_append(char)
- return u''.join(chunks), end
+ return _join(chunks), end
# Use speedup if available
@@ -177,9 +139,10 @@ scanstring = c_scanstring or py_scanstring
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
-def JSONObject((s, end), encoding, strict, scan_once, object_hook,
+def JSONObject(state, encoding, strict, scan_once, object_hook,
object_pairs_hook, memo=None,
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
+ (s, end) = state
# Backwards compatibility
if memo is None:
memo = {}
@@ -203,7 +166,9 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
pairs = object_hook(pairs)
return pairs, end + 1
elif nextchar != '"':
- raise JSONDecodeError("Expecting property name", s, end)
+ raise JSONDecodeError(
+ "Expecting property name enclosed in double quotes",
+ s, end)
end += 1
while True:
key, end = scanstring(s, end, encoding, strict)
@@ -214,7 +179,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
if s[end:end + 1] != ':':
end = _w(s, end).end()
if s[end:end + 1] != ':':
- raise JSONDecodeError("Expecting : delimiter", s, end)
+ raise JSONDecodeError("Expecting ':' delimiter", s, end)
end += 1
@@ -226,10 +191,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
except IndexError:
pass
- try:
- value, end = scan_once(s, end)
- except StopIteration:
- raise JSONDecodeError("Expecting object", s, end)
+ value, end = scan_once(s, end)
pairs.append((key, value))
try:
@@ -244,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
if nextchar == '}':
break
elif nextchar != ',':
- raise JSONDecodeError("Expecting , delimiter", s, end - 1)
+ raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
try:
nextchar = s[end]
@@ -259,7 +221,9 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
end += 1
if nextchar != '"':
- raise JSONDecodeError("Expecting property name", s, end - 1)
+ raise JSONDecodeError(
+ "Expecting property name enclosed in double quotes",
+ s, end - 1)
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
@@ -269,7 +233,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
pairs = object_hook(pairs)
return pairs, end
-def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+ (s, end) = state
values = []
nextchar = s[end:end + 1]
if nextchar in _ws:
@@ -278,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
+ elif nextchar == '':
+ raise JSONDecodeError("Expecting value or ']'", s, end)
_append = values.append
while True:
- try:
- value, end = scan_once(s, end)
- except StopIteration:
- raise JSONDecodeError("Expecting object", s, end)
+ value, end = scan_once(s, end)
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@@ -293,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
- raise JSONDecodeError("Expecting , delimiter", s, end)
+ raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
try:
if s[end] in _ws:
@@ -317,7 +281,7 @@ class JSONDecoder(object):
+---------------+-------------------+
| array | list |
+---------------+-------------------+
- | string | unicode |
+ | string | str, unicode |
+---------------+-------------------+
| number (int) | int, long |
+---------------+-------------------+
@@ -381,6 +345,8 @@ class JSONDecoder(object):
``False`` then control characters will be allowed in strings.
"""
+ if encoding is None:
+ encoding = DEFAULT_ENCODING
self.encoding = encoding
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
@@ -394,28 +360,41 @@ class JSONDecoder(object):
self.memo = {}
self.scan_once = make_scanner(self)
- def decode(self, s, _w=WHITESPACE.match):
+ def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
instance containing a JSON document)
"""
- obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+ if _PY3 and isinstance(s, binary_type):
+ s = s.decode(self.encoding)
+ obj, end = self.raw_decode(s)
end = _w(s, end).end()
if end != len(s):
raise JSONDecodeError("Extra data", s, end, len(s))
return obj
- def raw_decode(self, s, idx=0):
+ def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
beginning with a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
+ Optionally, ``idx`` can be used to specify an offset in ``s`` where
+ the JSON document begins.
This can be used to decode a JSON document from a string that may
have extraneous data at the end.
"""
- try:
- obj, end = self.scan_once(s, idx)
- except StopIteration:
- raise JSONDecodeError("No JSON object could be decoded", s, idx)
- return obj, end
+ if idx < 0:
+ # Ensure that raw_decode bails on negative indexes, the regex
+ # would otherwise mask this behavior. #98
+ raise JSONDecodeError('Expecting value', s, idx)
+ if _PY3 and not isinstance(s, text_type):
+ raise TypeError("Input string must be text, not bytes")
+ # strip UTF-8 bom
+ if len(s) > idx:
+ ord0 = ord(s[idx])
+ if ord0 == 0xfeff:
+ idx += 1
+ elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
+ idx += 3
+ return self.scan_once(s, idx=_w(s, idx).end())
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index 75ba993..d771bb4 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -1,11 +1,14 @@
"""Implementation of JSONEncoder
"""
+from __future__ import absolute_import
import re
-from decimal import Decimal
-
+from operator import itemgetter
+# Do not import Decimal directly to avoid reload issues
+import decimal
+from .compat import u, unichr, binary_type, string_types, integer_types, PY3
def _import_speedups():
try:
- from simplejson import _speedups
+ from . import _speedups
return _speedups.encode_basestring_ascii, _speedups.make_encoder
except ImportError:
return None, None
@@ -13,7 +16,10 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups()
from simplejson.decoder import PosInf
-ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
+#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
+# This is required because u() will mangle the string and ur'' isn't valid
+# python3 syntax
+ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
@@ -24,32 +30,40 @@ ESCAPE_DCT = {
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
- u'\u2028': '\\u2028',
- u'\u2029': '\\u2029',
}
for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+for i in [0x2028, 0x2029]:
+ ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
FLOAT_REPR = repr
-def encode_basestring(s):
+def encode_basestring(s, _PY3=PY3, _q=u('"')):
"""Return a JSON representation of a Python string
"""
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
+ if _PY3:
+ if isinstance(s, binary_type):
+ s = s.decode('utf-8')
+ else:
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
def replace(match):
return ESCAPE_DCT[match.group(0)]
- return u'"' + ESCAPE.sub(replace, s) + u'"'
+ return _q + ESCAPE.sub(replace, s) + _q
-def py_encode_basestring_ascii(s):
+def py_encode_basestring_ascii(s, _PY3=PY3):
"""Return an ASCII-only JSON representation of a Python string
"""
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
+ if _PY3:
+ if isinstance(s, binary_type):
+ s = s.decode('utf-8')
+ else:
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
def replace(match):
s = match.group(0)
try:
@@ -103,11 +117,14 @@ class JSONEncoder(object):
"""
item_separator = ', '
key_separator = ': '
+
def __init__(self, skipkeys=False, ensure_ascii=True,
- check_circular=True, allow_nan=True, sort_keys=False,
- indent=None, separators=None, encoding='utf-8', default=None,
- use_decimal=True, namedtuple_as_object=True,
- tuple_as_array=True, iterable_as_array=False):
+ check_circular=True, allow_nan=True, sort_keys=False,
+ indent=None, separators=None, encoding='utf-8', default=None,
+ use_decimal=True, namedtuple_as_object=True,
+ tuple_as_array=True, bigint_as_string=False,
+ item_sort_key=None, for_json=False, ignore_nan=False,
+ int_as_string_bitcount=None, iterable_as_array=False):
"""Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt
@@ -139,9 +156,10 @@ class JSONEncoder(object):
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
- If specified, separators should be a (item_separator, key_separator)
- tuple. The default is (', ', ': '). To get the most compact JSON
- representation you should specify (',', ':') to eliminate whitespace.
+ If specified, separators should be an (item_separator, key_separator)
+ tuple. The default is (', ', ': ') if *indent* is ``None`` and
+ (',', ': ') otherwise. To get the most compact JSON representation,
+ you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
@@ -155,7 +173,7 @@ class JSONEncoder(object):
be supported directly by the encoder. For the inverse, decode JSON
with ``parse_float=decimal.Decimal``.
- If namedtuple_as_object is true (the default), tuple subclasses with
+ If namedtuple_as_object is true (the default), objects with
``_asdict()`` methods will be encoded as JSON objects.
If tuple_as_array is true (the default), tuple (and subclasses) will
@@ -165,6 +183,27 @@ class JSONEncoder(object):
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
+ If bigint_as_string is true (not the default), ints 2**53 and higher
+ or lower than -2**53 will be encoded as strings. This is to avoid the
+ rounding that happens in Javascript otherwise.
+
+ If int_as_string_bitcount is a positive number (n), then int of size
+ greater than or equal to 2**n or lower than or equal to -2**n will be
+ encoded as strings.
+
+ If specified, item_sort_key is a callable used to sort the items in
+ each dictionary. This is useful if you want to sort items other than
+ in alphabetical order by key.
+
+ If for_json is true (not the default), objects with a ``for_json()``
+ method will use the return value of that method for encoding as JSON
+ instead of the object.
+
+ If *ignore_nan* is true (default: ``False``), then out of range
+ :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
+ as ``null`` in compliance with the ECMA-262 specification. If true,
+ this will override *allow_nan*.
+
"""
self.skipkeys = skipkeys
@@ -176,8 +215,13 @@ class JSONEncoder(object):
self.namedtuple_as_object = namedtuple_as_object
self.tuple_as_array = tuple_as_array
self.iterable_as_array = iterable_as_array
- if isinstance(indent, (int, long)):
- indent = ' ' * indent
+ self.bigint_as_string = bigint_as_string
+ self.item_sort_key = item_sort_key
+ self.for_json = for_json
+ self.ignore_nan = ignore_nan
+ self.int_as_string_bitcount = int_as_string_bitcount
+ if indent is not None and not isinstance(indent, string_types):
+ indent = indent * ' '
self.indent = indent
if separators is not None:
self.item_separator, self.key_separator = separators
@@ -216,12 +260,11 @@ class JSONEncoder(object):
"""
# This is for extremely simple cases and benchmarks.
- if isinstance(o, basestring):
- if isinstance(o, str):
- _encoding = self.encoding
- if (_encoding is not None
- and not (_encoding == 'utf-8')):
- o = o.decode(_encoding)
+ if isinstance(o, binary_type):
+ _encoding = self.encoding
+ if (_encoding is not None and not (_encoding == 'utf-8')):
+ o = o.decode(_encoding)
+ if isinstance(o, string_types):
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
@@ -257,11 +300,11 @@ class JSONEncoder(object):
_encoder = encode_basestring
if self.encoding != 'utf-8':
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
- if isinstance(o, str):
+ if isinstance(o, binary_type):
o = o.decode(_encoding)
return _orig_encoder(o)
- def floatstr(o, allow_nan=self.allow_nan,
+ def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
_repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on
@@ -274,17 +317,23 @@ class JSONEncoder(object):
elif o == _neginf:
text = '-Infinity'
else:
+ if type(o) != float:
+ # See #118, do not trust custom str/repr
+ o = float(o)
return _repr(o)
- if not allow_nan:
+ if ignore_nan:
+ text = 'null'
+ elif not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
-
key_memo = {}
+ int_as_string_bitcount = (
+ 53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None
and self.indent is None):
_iterencode = c_make_encoder(
@@ -292,14 +341,18 @@ class JSONEncoder(object):
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
- self.iterable_as_array)
+ int_as_string_bitcount,
+ self.item_sort_key, self.encoding, self.for_json,
+ self.ignore_nan, decimal.Decimal, self.iterable_as_array)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
- self.iterable_as_array)
+ int_as_string_bitcount,
+ self.item_sort_key, self.encoding, self.for_json,
+ self.iterable_as_array, Decimal=decimal.Decimal)
try:
return _iterencode(o, 0)
finally:
@@ -336,24 +389,53 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
+ _int_as_string_bitcount, _item_sort_key,
+ _encoding,_for_json,
_iterable_as_array,
## HACK: hand-optimized bytecode; turn globals into locals
- False=False,
- True=True,
+ _PY3=PY3,
ValueError=ValueError,
- basestring=basestring,
- Decimal=Decimal,
+ string_types=string_types,
+ Decimal=None,
dict=dict,
float=float,
id=id,
- int=int,
+ integer_types=integer_types,
isinstance=isinstance,
list=list,
- long=long,
str=str,
tuple=tuple,
iter=iter,
):
+ if _use_decimal and Decimal is None:
+ Decimal = decimal.Decimal
+ if _item_sort_key and not callable(_item_sort_key):
+ raise TypeError("item_sort_key must be None or callable")
+ elif _sort_keys and not _item_sort_key:
+ _item_sort_key = itemgetter(0)
+
+ if (_int_as_string_bitcount is not None and
+ (_int_as_string_bitcount <= 0 or
+ not isinstance(_int_as_string_bitcount, integer_types))):
+ raise TypeError("int_as_string_bitcount must be a positive integer")
+
+ def _encode_int(value):
+ skip_quoting = (
+ _int_as_string_bitcount is None
+ or
+ _int_as_string_bitcount < 1
+ )
+ if type(value) not in integer_types:
+ # See #118, do not trust custom str/repr
+ value = int(value)
+ if (
+ skip_quoting or
+ (-1 << _int_as_string_bitcount)
+ < value <
+ (1 << _int_as_string_bitcount)
+ ):
+ return str(value)
+ return '"' + str(value) + '"'
def _iterencode_list(lst, _current_indent_level):
if not lst:
@@ -379,7 +461,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
first = False
else:
buf = separator
- if isinstance(value, basestring):
+ if (isinstance(value, string_types) or
+ (_PY3 and isinstance(value, binary_type))):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
@@ -387,26 +470,30 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield buf + 'true'
elif value is False:
yield buf + 'false'
- elif isinstance(value, (int, long)):
- yield buf + str(value)
+ elif isinstance(value, integer_types):
+ yield buf + _encode_int(value)
elif isinstance(value, float):
yield buf + _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield buf + str(value)
else:
yield buf
- if isinstance(value, list):
- chunks = _iterencode_list(value, _current_indent_level)
- elif (_namedtuple_as_object and isinstance(value, tuple) and
- hasattr(value, '_asdict')):
- chunks = _iterencode_dict(value._asdict(),
- _current_indent_level)
- elif _tuple_as_array and isinstance(value, tuple):
+ for_json = _for_json and getattr(value, 'for_json', None)
+ if for_json and callable(for_json):
+ chunks = _iterencode(for_json(), _current_indent_level)
+ elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+ if _asdict and callable(_asdict):
+ chunks = _iterencode_dict(_asdict(),
+ _current_indent_level)
+ elif _tuple_as_array and isinstance(value, tuple):
+ chunks = _iterencode_list(value, _current_indent_level)
+ elif isinstance(value, dict):
+ chunks = _iterencode_dict(value, _current_indent_level)
+ else:
+ chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
@@ -416,6 +503,32 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
if markers is not None:
del markers[markerid]
+ def _stringify_key(key):
+ if isinstance(key, string_types): # pragma: no cover
+ pass
+ elif isinstance(key, binary_type):
+ key = key.decode(_encoding)
+ elif isinstance(key, float):
+ key = _floatstr(key)
+ elif key is True:
+ key = 'true'
+ elif key is False:
+ key = 'false'
+ elif key is None:
+ key = 'null'
+ elif isinstance(key, integer_types):
+ if type(key) not in integer_types:
+ # See #118, do not trust custom str/repr
+ key = int(key)
+ key = str(key)
+ elif _use_decimal and isinstance(key, Decimal):
+ key = str(key)
+ elif _skipkeys:
+ key = None
+ else:
+ raise TypeError("key " + repr(key) + " is not a string")
+ return key
+
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
@@ -435,37 +548,35 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
newline_indent = None
item_separator = _item_separator
first = True
- if _sort_keys:
- items = dct.items()
- items.sort(key=lambda kv: kv[0])
+ if _PY3:
+ iteritems = dct.items()
+ else:
+ iteritems = dct.iteritems()
+ if _item_sort_key:
+ items = []
+ for k, v in dct.items():
+ if not isinstance(k, string_types):
+ k = _stringify_key(k)
+ if k is None:
+ continue
+ items.append((k, v))
+ items.sort(key=_item_sort_key)
else:
- items = dct.iteritems()
+ items = iteritems
for key, value in items:
- if isinstance(key, basestring):
- pass
- # JavaScript is weakly typed for these, so it makes sense to
- # also allow them. Many encoders seem to do something like this.
- elif isinstance(key, float):
- key = _floatstr(key)
- elif key is True:
- key = 'true'
- elif key is False:
- key = 'false'
- elif key is None:
- key = 'null'
- elif isinstance(key, (int, long)):
- key = str(key)
- elif _skipkeys:
- continue
- else:
- raise TypeError("key " + repr(key) + " is not a string")
+ if not (_item_sort_key or isinstance(key, string_types)):
+ key = _stringify_key(key)
+ if key is None:
+ # _skipkeys must be True
+ continue
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
- if isinstance(value, basestring):
+ if (isinstance(value, string_types) or
+ (_PY3 and isinstance(value, binary_type))):
yield _encoder(value)
elif value is None:
yield 'null'
@@ -473,25 +584,29 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield 'true'
elif value is False:
yield 'false'
- elif isinstance(value, (int, long)):
- yield str(value)
+ elif isinstance(value, integer_types):
+ yield _encode_int(value)
elif isinstance(value, float):
yield _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield str(value)
else:
- if isinstance(value, list):
- chunks = _iterencode_list(value, _current_indent_level)
- elif (_namedtuple_as_object and isinstance(value, tuple) and
- hasattr(value, '_asdict')):
- chunks = _iterencode_dict(value._asdict(),
- _current_indent_level)
- elif _tuple_as_array and isinstance(value, tuple):
+ for_json = _for_json and getattr(value, 'for_json', None)
+ if for_json and callable(for_json):
+ chunks = _iterencode(for_json(), _current_indent_level)
+ elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+ if _asdict and callable(_asdict):
+ chunks = _iterencode_dict(_asdict(),
+ _current_indent_level)
+ elif _tuple_as_array and isinstance(value, tuple):
+ chunks = _iterencode_list(value, _current_indent_level)
+ elif isinstance(value, dict):
+ chunks = _iterencode_dict(value, _current_indent_level)
+ else:
+ chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
@@ -502,7 +617,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
del markers[markerid]
def _iterencode(o, _current_indent_level):
- if isinstance(o, basestring):
+ if (isinstance(o, string_types) or
+ (_PY3 and isinstance(o, binary_type))):
yield _encoder(o)
elif o is None:
yield 'null'
@@ -510,45 +626,52 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield 'true'
elif o is False:
yield 'false'
- elif isinstance(o, (int, long)):
- yield str(o)
+ elif isinstance(o, integer_types):
+ yield _encode_int(o)
elif isinstance(o, float):
yield _floatstr(o)
- elif isinstance(o, list):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- elif (_namedtuple_as_object and isinstance(o, tuple) and
- hasattr(o, '_asdict')):
- for chunk in _iterencode_dict(o._asdict(), _current_indent_level):
- yield chunk
- elif (_tuple_as_array and isinstance(o, tuple)):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- elif isinstance(o, dict):
- for chunk in _iterencode_dict(o, _current_indent_level):
- yield chunk
- elif _use_decimal and isinstance(o, Decimal):
- yield str(o)
else:
- while _iterable_as_array:
- # Markers are not checked here because it is valid for an
- # iterable to return self.
- try:
- o = iter(o)
- except TypeError:
- break
+ for_json = _for_json and getattr(o, 'for_json', None)
+ if for_json and callable(for_json):
+ for chunk in _iterencode(for_json(), _current_indent_level):
+ yield chunk
+ elif isinstance(o, list):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
- return
- if markers is not None:
- markerid = id(o)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = o
- o = _default(o)
- for chunk in _iterencode(o, _current_indent_level):
- yield chunk
- if markers is not None:
- del markers[markerid]
+ else:
+ _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
+ if _asdict and callable(_asdict):
+ for chunk in _iterencode_dict(_asdict(),
+ _current_indent_level):
+ yield chunk
+ elif (_tuple_as_array and isinstance(o, tuple)):
+ for chunk in _iterencode_list(o, _current_indent_level):
+ yield chunk
+ elif isinstance(o, dict):
+ for chunk in _iterencode_dict(o, _current_indent_level):
+ yield chunk
+ elif _use_decimal and isinstance(o, Decimal):
+ yield str(o)
+ else:
+ while _iterable_as_array:
+ # Markers are not checked here because it is valid for
+ # an iterable to return self.
+ try:
+ o = iter(o)
+ except TypeError:
+ break
+ for chunk in _iterencode_list(o, _current_indent_level):
+ yield chunk
+ return
+ if markers is not None:
+ markerid = id(o)
+ if markerid in markers:
+ raise ValueError("Circular reference detected")
+ markers[markerid] = o
+ o = _default(o)
+ for chunk in _iterencode(o, _current_indent_level):
+ yield chunk
+ if markers is not None:
+ del markers[markerid]
return _iterencode
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
index 54593a3..5abed35 100644
--- a/simplejson/scanner.py
+++ b/simplejson/scanner.py
@@ -9,12 +9,62 @@ def _import_c_make_scanner():
return None
c_make_scanner = _import_c_make_scanner()
-__all__ = ['make_scanner']
+__all__ = ['make_scanner', 'JSONDecodeError']
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
+class JSONDecodeError(ValueError):
+ """Subclass of ValueError with the following additional properties:
+
+ msg: The unformatted error message
+ doc: The JSON document being parsed
+ pos: The start index of doc where parsing failed
+ end: The end index of doc where parsing failed (may be None)
+ lineno: The line corresponding to pos
+ colno: The column corresponding to pos
+ endlineno: The line corresponding to end (may be None)
+ endcolno: The column corresponding to end (may be None)
+
+ """
+ # Note that this exception is used from _speedups
+ def __init__(self, msg, doc, pos, end=None):
+ ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
+ self.msg = msg
+ self.doc = doc
+ self.pos = pos
+ self.end = end
+ self.lineno, self.colno = linecol(doc, pos)
+ if end is not None:
+ self.endlineno, self.endcolno = linecol(doc, end)
+ else:
+ self.endlineno, self.endcolno = None, None
+
+ def __reduce__(self):
+ return self.__class__, (self.msg, self.doc, self.pos, self.end)
+
+
+def linecol(doc, pos):
+ lineno = doc.count('\n', 0, pos) + 1
+ if lineno == 1:
+ colno = pos + 1
+ else:
+ colno = pos - doc.rindex('\n', 0, pos)
+ return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+ lineno, colno = linecol(doc, pos)
+ msg = msg.replace('%r', repr(doc[pos:pos + 1]))
+ if end is None:
+ fmt = '%s: line %d column %d (char %d)'
+ return fmt % (msg, lineno, colno, pos)
+ endlineno, endcolno = linecol(doc, end)
+ fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+ return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
@@ -30,10 +80,11 @@ def py_make_scanner(context):
memo = context.memo
def _scan_once(string, idx):
+ errmsg = 'Expecting value'
try:
nextchar = string[idx]
except IndexError:
- raise StopIteration
+ raise JSONDecodeError(errmsg, string, idx)
if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict)
@@ -64,9 +115,14 @@ def py_make_scanner(context):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
- raise StopIteration
+ raise JSONDecodeError(errmsg, string, idx)
def scan_once(string, idx):
+ if idx < 0:
+ # Ensure the same behavior as the C speedup, otherwise
+ # this would work for *some* negative string indices due
+ # to the behavior of __getitem__ for strings. #98
+ raise JSONDecodeError('Expecting value', string, idx)
try:
return _scan_once(string, idx)
finally:
diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py
index 8d2e443..8c1a4f1 100644
--- a/simplejson/tests/__init__.py
+++ b/simplejson/tests/__init__.py
@@ -1,18 +1,26 @@
+from __future__ import absolute_import
import unittest
import doctest
+import sys
-class OptionalExtensionTestSuite(unittest.TestSuite):
+class NoExtensionTestSuite(unittest.TestSuite):
def run(self, result):
import simplejson
- run = unittest.TestSuite.run
- run(self, result)
simplejson._toggle_speedups(False)
- run(self, result)
+ result = unittest.TestSuite.run(self, result)
simplejson._toggle_speedups(True)
return result
+class TestMissingSpeedups(unittest.TestCase):
+ def runTest(self):
+ if hasattr(sys, 'pypy_translation_info'):
+ "PyPy doesn't need speedups! :)"
+ elif hasattr(self, 'skipTest'):
+ self.skipTest('_speedups.so is missing!')
+
+
def additional_tests(suite=None):
import simplejson
import simplejson.encoder
@@ -26,35 +34,50 @@ def additional_tests(suite=None):
def all_tests_suite():
- suite = unittest.TestLoader().loadTestsFromNames([
- 'simplejson.tests.test_check_circular',
- 'simplejson.tests.test_decode',
- 'simplejson.tests.test_default',
- 'simplejson.tests.test_dump',
- 'simplejson.tests.test_encode_basestring_ascii',
- 'simplejson.tests.test_encode_for_html',
- 'simplejson.tests.test_errors',
- 'simplejson.tests.test_fail',
- 'simplejson.tests.test_float',
- 'simplejson.tests.test_indent',
- 'simplejson.tests.test_pass1',
- 'simplejson.tests.test_pass2',
- 'simplejson.tests.test_pass3',
- 'simplejson.tests.test_recursion',
- 'simplejson.tests.test_scanstring',
- 'simplejson.tests.test_separators',
- 'simplejson.tests.test_speedups',
- 'simplejson.tests.test_unicode',
- 'simplejson.tests.test_decimal',
- 'simplejson.tests.test_tuple',
- 'simplejson.tests.test_namedtuple',
- ])
- suite = additional_tests(suite)
- return OptionalExtensionTestSuite([suite])
+ def get_suite():
+ return additional_tests(
+ unittest.TestLoader().loadTestsFromNames([
+ 'simplejson.tests.test_bitsize_int_as_string',
+ 'simplejson.tests.test_bigint_as_string',
+ 'simplejson.tests.test_check_circular',
+ 'simplejson.tests.test_decode',
+ 'simplejson.tests.test_default',
+ 'simplejson.tests.test_dump',
+ 'simplejson.tests.test_encode_basestring_ascii',
+ 'simplejson.tests.test_encode_for_html',
+ 'simplejson.tests.test_errors',
+ 'simplejson.tests.test_fail',
+ 'simplejson.tests.test_float',
+ 'simplejson.tests.test_indent',
+ 'simplejson.tests.test_pass1',
+ 'simplejson.tests.test_pass2',
+ 'simplejson.tests.test_pass3',
+ 'simplejson.tests.test_recursion',
+ 'simplejson.tests.test_scanstring',
+ 'simplejson.tests.test_separators',
+ 'simplejson.tests.test_speedups',
+ 'simplejson.tests.test_unicode',
+ 'simplejson.tests.test_decimal',
+ 'simplejson.tests.test_tuple',
+ 'simplejson.tests.test_namedtuple',
+ 'simplejson.tests.test_tool',
+ 'simplejson.tests.test_for_json',
+ 'simplejson.tests.test_subclass',
+ ]))
+ suite = get_suite()
+ import simplejson
+ if simplejson._import_c_make_encoder() is None:
+ suite.addTest(TestMissingSpeedups())
+ else:
+ suite = unittest.TestSuite([
+ suite,
+ NoExtensionTestSuite([get_suite()]),
+ ])
+ return suite
def main():
- runner = unittest.TextTestRunner()
+ runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v'))
suite = all_tests_suite()
raise SystemExit(not runner.run(suite).wasSuccessful())
diff --git a/simplejson/tests/test_bigint_as_string.py b/simplejson/tests/test_bigint_as_string.py
new file mode 100644
index 0000000..2cf2cc2
--- /dev/null
+++ b/simplejson/tests/test_bigint_as_string.py
@@ -0,0 +1,67 @@
+from unittest import TestCase
+
+import simplejson as json
+
+
+class TestBigintAsString(TestCase):
+ # Python 2.5, at least the one that ships on Mac OS X, calculates
+ # 2 ** 53 as 0! It manages to calculate 1 << 53 correctly.
+ values = [(200, 200),
+ ((1 << 53) - 1, 9007199254740991),
+ ((1 << 53), '9007199254740992'),
+ ((1 << 53) + 1, '9007199254740993'),
+ (-100, -100),
+ ((-1 << 53), '-9007199254740992'),
+ ((-1 << 53) - 1, '-9007199254740993'),
+ ((-1 << 53) + 1, -9007199254740991)]
+
+ options = (
+ {"bigint_as_string": True},
+ {"int_as_string_bitcount": 53}
+ )
+
+ def test_ints(self):
+ for opts in self.options:
+ for val, expect in self.values:
+ self.assertEqual(
+ val,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, **opts)))
+
+ def test_lists(self):
+ for opts in self.options:
+ for val, expect in self.values:
+ val = [val, val]
+ expect = [expect, expect]
+ self.assertEqual(
+ val,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, **opts)))
+
+ def test_dicts(self):
+ for opts in self.options:
+ for val, expect in self.values:
+ val = {'k': val}
+ expect = {'k': expect}
+ self.assertEqual(
+ val,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, **opts)))
+
+ def test_dict_keys(self):
+ for opts in self.options:
+ for val, _ in self.values:
+ expect = {str(val): 'value'}
+ val = {val: 'value'}
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, **opts)))
diff --git a/simplejson/tests/test_bitsize_int_as_string.py b/simplejson/tests/test_bitsize_int_as_string.py
new file mode 100644
index 0000000..fd7d103
--- /dev/null
+++ b/simplejson/tests/test_bitsize_int_as_string.py
@@ -0,0 +1,73 @@
+from unittest import TestCase
+
+import simplejson as json
+
+
+class TestBitSizeIntAsString(TestCase):
+ # Python 2.5, at least the one that ships on Mac OS X, calculates
+ # 2 ** 31 as 0! It manages to calculate 1 << 31 correctly.
+ values = [
+ (200, 200),
+ ((1 << 31) - 1, (1 << 31) - 1),
+ ((1 << 31), str(1 << 31)),
+ ((1 << 31) + 1, str((1 << 31) + 1)),
+ (-100, -100),
+ ((-1 << 31), str(-1 << 31)),
+ ((-1 << 31) - 1, str((-1 << 31) - 1)),
+ ((-1 << 31) + 1, (-1 << 31) + 1),
+ ]
+
+ def test_invalid_counts(self):
+ for n in ['foo', -1, 0, 1.0]:
+ self.assertRaises(
+ TypeError,
+ json.dumps, 0, int_as_string_bitcount=n)
+
+ def test_ints_outside_range_fails(self):
+ self.assertNotEqual(
+ str(1 << 15),
+ json.loads(json.dumps(1 << 15, int_as_string_bitcount=16)),
+ )
+
+ def test_ints(self):
+ for val, expect in self.values:
+ self.assertEqual(
+ val,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, int_as_string_bitcount=31)),
+ )
+
+ def test_lists(self):
+ for val, expect in self.values:
+ val = [val, val]
+ expect = [expect, expect]
+ self.assertEqual(
+ val,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, int_as_string_bitcount=31)))
+
+ def test_dicts(self):
+ for val, expect in self.values:
+ val = {'k': val}
+ expect = {'k': expect}
+ self.assertEqual(
+ val,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, int_as_string_bitcount=31)))
+
+ def test_dict_keys(self):
+ for val, _ in self.values:
+ expect = {str(val): 'value'}
+ val = {val: 'value'}
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val)))
+ self.assertEqual(
+ expect,
+ json.loads(json.dumps(val, int_as_string_bitcount=31)))
diff --git a/simplejson/tests/test_decimal.py b/simplejson/tests/test_decimal.py
index 09f062e..2b0940b 100644
--- a/simplejson/tests/test_decimal.py
+++ b/simplejson/tests/test_decimal.py
@@ -1,6 +1,7 @@
+import decimal
from decimal import Decimal
from unittest import TestCase
-from StringIO import StringIO
+from simplejson.compat import StringIO, reload_module
import simplejson as json
@@ -10,35 +11,40 @@ class TestDecimal(TestCase):
sio = StringIO()
json.dump(obj, sio, **kw)
res = json.dumps(obj, **kw)
- self.assertEquals(res, sio.getvalue())
+ self.assertEqual(res, sio.getvalue())
return res
def loads(self, s, **kw):
sio = StringIO(s)
res = json.loads(s, **kw)
- self.assertEquals(res, json.load(sio, **kw))
+ self.assertEqual(res, json.load(sio, **kw))
return res
def test_decimal_encode(self):
for d in map(Decimal, self.NUMS):
- self.assertEquals(self.dumps(d, use_decimal=True), str(d))
-
+ self.assertEqual(self.dumps(d, use_decimal=True), str(d))
+
def test_decimal_decode(self):
for s in self.NUMS:
- self.assertEquals(self.loads(s, parse_float=Decimal), Decimal(s))
-
+ self.assertEqual(self.loads(s, parse_float=Decimal), Decimal(s))
+
+ def test_stringify_key(self):
+ for d in map(Decimal, self.NUMS):
+ v = {d: d}
+ self.assertEqual(
+ self.loads(
+ self.dumps(v, use_decimal=True), parse_float=Decimal),
+ {str(d): d})
+
def test_decimal_roundtrip(self):
for d in map(Decimal, self.NUMS):
# The type might not be the same (int and Decimal) but they
# should still compare equal.
- self.assertEquals(
- self.loads(
- self.dumps(d, use_decimal=True), parse_float=Decimal),
- d)
- self.assertEquals(
- self.loads(
- self.dumps([d], use_decimal=True), parse_float=Decimal),
- [d])
+ for v in [d, [d], {'': d}]:
+ self.assertEqual(
+ self.loads(
+ self.dumps(v, use_decimal=True), parse_float=Decimal),
+ v)
def test_decimal_defaults(self):
d = Decimal('1.1')
@@ -46,10 +52,20 @@ class TestDecimal(TestCase):
self.assertRaises(TypeError, json.dumps, d, use_decimal=False)
self.assertEqual('1.1', json.dumps(d))
self.assertEqual('1.1', json.dumps(d, use_decimal=True))
- self.assertRaises(TypeError, json.dump, d, StringIO(), use_decimal=False)
+ self.assertRaises(TypeError, json.dump, d, StringIO(),
+ use_decimal=False)
sio = StringIO()
json.dump(d, sio)
self.assertEqual('1.1', sio.getvalue())
sio = StringIO()
json.dump(d, sio, use_decimal=True)
self.assertEqual('1.1', sio.getvalue())
+
+ def test_decimal_reload(self):
+ # Simulate a subinterpreter that reloads the Python modules but not
+ # the C code https://github.com/simplejson/simplejson/issues/34
+ global Decimal
+ Decimal = reload_module(decimal).Decimal
+ import simplejson.encoder
+ simplejson.encoder.Decimal = Decimal
+ self.test_decimal_roundtrip()
diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py
index a140a13..30b692a 100644
--- a/simplejson/tests/test_decode.py
+++ b/simplejson/tests/test_decode.py
@@ -1,8 +1,9 @@
+from __future__ import absolute_import
import decimal
from unittest import TestCase
-from StringIO import StringIO
import simplejson as json
+from simplejson.compat import StringIO
from simplejson import OrderedDict
class TestDecode(TestCase):
@@ -13,19 +14,19 @@ class TestDecode(TestCase):
def test_decimal(self):
rval = json.loads('1.1', parse_float=decimal.Decimal)
self.assertTrue(isinstance(rval, decimal.Decimal))
- self.assertEquals(rval, decimal.Decimal('1.1'))
+ self.assertEqual(rval, decimal.Decimal('1.1'))
def test_float(self):
rval = json.loads('1', parse_int=float)
self.assertTrue(isinstance(rval, float))
- self.assertEquals(rval, 1.0)
+ self.assertEqual(rval, 1.0)
def test_decoder_optimizations(self):
# Several optimizations were made that skip over calls to
# the whitespace regex, so this test is designed to try and
# exercise the uncommon cases. The array cases are already covered.
rval = json.loads('{ "key" : "value" , "k":"v" }')
- self.assertEquals(rval, {"key":"value", "k":"v"})
+ self.assertEqual(rval, {"key":"value", "k":"v"})
def test_empty_objects(self):
s = '{}'
@@ -81,3 +82,18 @@ class TestDecode(TestCase):
self.assertEqual(
({'a': {}}, 9),
cls(object_pairs_hook=dict).raw_decode("{\"a\": {}}"))
+ # https://github.com/simplejson/simplejson/pull/38
+ self.assertEqual(
+ ({'a': {}}, 11),
+ cls().raw_decode(" \n{\"a\": {}}"))
+
+ def test_bounds_checking(self):
+ # https://github.com/simplejson/simplejson/issues/98
+ j = json.decoder.JSONDecoder()
+ for i in [4, 5, 6, -1, -2, -3, -4, -5, -6]:
+ self.assertRaises(ValueError, j.scan_once, '1234', i)
+ self.assertRaises(ValueError, j.raw_decode, '1234', i)
+ x, y = sorted(['128931233', '472389423'], key=id)
+ diff = id(x) - id(y)
+ self.assertRaises(ValueError, j.scan_once, y, diff)
+ self.assertRaises(ValueError, j.raw_decode, y, i)
diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py
index 139e42b..d1eacb8 100644
--- a/simplejson/tests/test_default.py
+++ b/simplejson/tests/test_default.py
@@ -4,6 +4,6 @@ import simplejson as json
class TestDefault(TestCase):
def test_default(self):
- self.assertEquals(
+ self.assertEqual(
json.dumps(type, default=repr),
json.dumps(repr(type)))
diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py
index 8b34004..3661de0 100644
--- a/simplejson/tests/test_dump.py
+++ b/simplejson/tests/test_dump.py
@@ -1,27 +1,130 @@
from unittest import TestCase
-from cStringIO import StringIO
-
+from simplejson.compat import StringIO, long_type, b, binary_type, PY3
import simplejson as json
+def as_text_type(s):
+ if PY3 and isinstance(s, binary_type):
+ return s.decode('ascii')
+ return s
+
class TestDump(TestCase):
def test_dump(self):
sio = StringIO()
json.dump({}, sio)
- self.assertEquals(sio.getvalue(), '{}')
+ self.assertEqual(sio.getvalue(), '{}')
+
+ def test_constants(self):
+ for c in [None, True, False]:
+ self.assertTrue(json.loads(json.dumps(c)) is c)
+ self.assertTrue(json.loads(json.dumps([c]))[0] is c)
+ self.assertTrue(json.loads(json.dumps({'a': c}))['a'] is c)
+
+ def test_stringify_key(self):
+ items = [(b('bytes'), 'bytes'),
+ (1.0, '1.0'),
+ (10, '10'),
+ (True, 'true'),
+ (False, 'false'),
+ (None, 'null'),
+ (long_type(100), '100')]
+ for k, expect in items:
+ self.assertEqual(
+ json.loads(json.dumps({k: expect})),
+ {expect: expect})
+ self.assertEqual(
+ json.loads(json.dumps({k: expect}, sort_keys=True)),
+ {expect: expect})
+ self.assertRaises(TypeError, json.dumps, {json: 1})
+ for v in [{}, {'other': 1}, {b('derp'): 1, 'herp': 2}]:
+ for sort_keys in [False, True]:
+ v0 = dict(v)
+ v0[json] = 1
+ v1 = dict((as_text_type(key), val) for (key, val) in v.items())
+ self.assertEqual(
+ json.loads(json.dumps(v0, skipkeys=True, sort_keys=sort_keys)),
+ v1)
+ self.assertEqual(
+ json.loads(json.dumps({'': v0}, skipkeys=True, sort_keys=sort_keys)),
+ {'': v1})
+ self.assertEqual(
+ json.loads(json.dumps([v0], skipkeys=True, sort_keys=sort_keys)),
+ [v1])
def test_dumps(self):
- self.assertEquals(json.dumps({}), '{}')
+ self.assertEqual(json.dumps({}), '{}')
def test_encode_truefalse(self):
- self.assertEquals(json.dumps(
+ self.assertEqual(json.dumps(
{True: False, False: True}, sort_keys=True),
'{"false": true, "true": false}')
- self.assertEquals(json.dumps(
- {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True),
- '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}')
+ self.assertEqual(
+ json.dumps(
+ {2: 3.0,
+ 4.0: long_type(5),
+ False: 1,
+ long_type(6): True,
+ "7": 0},
+ sort_keys=True),
+ '{"2": 3.0, "4.0": 5, "6": true, "7": 0, "false": 1}')
def test_ordered_dict(self):
# http://bugs.python.org/issue6105
items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)]
s = json.dumps(json.OrderedDict(items))
- self.assertEqual(s, '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') \ No newline at end of file
+ self.assertEqual(
+ s,
+ '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}')
+
+ def test_indent_unknown_type_acceptance(self):
+ """
+ A test against the regression mentioned at `github issue 29`_.
+
+ The indent parameter should accept any type which pretends to be
+ an instance of int or long when it comes to being multiplied by
+ strings, even if it is not actually an int or long, for
+ backwards compatibility.
+
+ .. _github issue 29:
+ http://github.com/simplejson/simplejson/issue/29
+ """
+
+ class AwesomeInt(object):
+ """An awesome reimplementation of integers"""
+
+ def __init__(self, *args, **kwargs):
+ if len(args) > 0:
+ # [construct from literals, objects, etc.]
+ # ...
+
+ # Finally, if args[0] is an integer, store it
+ if isinstance(args[0], int):
+ self._int = args[0]
+
+ # [various methods]
+
+ def __mul__(self, other):
+ # [various ways to multiply AwesomeInt objects]
+ # ... finally, if the right-hand operand is not awesome enough,
+ # try to do a normal integer multiplication
+ if hasattr(self, '_int'):
+ return self._int * other
+ else:
+ raise NotImplementedError("To do non-awesome things with"
+ " this object, please construct it from an integer!")
+
+ s = json.dumps([0, 1, 2], indent=AwesomeInt(3))
+ self.assertEqual(s, '[\n 0,\n 1,\n 2\n]')
+
+ def test_accumulator(self):
+ # the C API uses an accumulator that collects after 100,000 appends
+ lst = [0] * 100000
+ self.assertEqual(json.loads(json.dumps(lst)), lst)
+
+ def test_sort_keys(self):
+ # https://github.com/simplejson/simplejson/issues/106
+ for num_keys in range(2, 32):
+ p = dict((str(x), x) for x in range(num_keys))
+ sio = StringIO()
+ json.dump(p, sio, sort_keys=True)
+ self.assertEqual(sio.getvalue(), json.dumps(p, sort_keys=True))
+ self.assertEqual(json.loads(sio.getvalue()), p)
diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py
index 6c40961..49706bf 100644
--- a/simplejson/tests/test_encode_basestring_ascii.py
+++ b/simplejson/tests/test_encode_basestring_ascii.py
@@ -1,6 +1,7 @@
from unittest import TestCase
import simplejson.encoder
+from simplejson.compat import b
CASES = [
(u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
@@ -11,9 +12,9 @@ CASES = [
(u' s p a c e d ', '" s p a c e d "'),
(u'\U0001d120', '"\\ud834\\udd20"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
- ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'),
+ (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
- ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'),
+ (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
(u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
@@ -34,10 +35,10 @@ class TestEncodeBaseStringAscii(TestCase):
fname = encode_basestring_ascii.__name__
for input_string, expect in CASES:
result = encode_basestring_ascii(input_string)
- #self.assertEquals(result, expect,
+ #self.assertEqual(result, expect,
# '{0!r} != {1!r} for {2}({3!r})'.format(
# result, expect, fname, input_string))
- self.assertEquals(result, expect,
+ self.assertEqual(result, expect,
'%r != %r for %s(%r)' % (result, expect, fname, input_string))
def test_sorted_dict(self):
diff --git a/simplejson/tests/test_encode_for_html.py b/simplejson/tests/test_encode_for_html.py
index c2d5f80..f995254 100644
--- a/simplejson/tests/test_encode_for_html.py
+++ b/simplejson/tests/test_encode_for_html.py
@@ -1,14 +1,12 @@
import unittest
-import simplejson.decoder
-import simplejson.encoder
-
+import simplejson as json
class TestEncodeForHTML(unittest.TestCase):
def setUp(self):
- self.decoder = simplejson.decoder.JSONDecoder()
- self.encoder = simplejson.encoder.JSONEncoderForHTML()
+ self.decoder = json.JSONDecoder()
+ self.encoder = json.JSONEncoderForHTML()
def test_basic_encode(self):
self.assertEqual(r'"\u0026"', self.encoder.encode('&'))
diff --git a/simplejson/tests/test_errors.py b/simplejson/tests/test_errors.py
index 620ccf3..8dede38 100644
--- a/simplejson/tests/test_errors.py
+++ b/simplejson/tests/test_errors.py
@@ -1,6 +1,8 @@
+import sys, pickle
from unittest import TestCase
import simplejson as json
+from simplejson.compat import u, b
class TestErrors(TestCase):
def test_string_keys_error(self):
@@ -11,24 +13,39 @@ class TestErrors(TestCase):
err = None
try:
json.loads('{}\na\nb')
- except json.JSONDecodeError, e:
- err = e
+ except json.JSONDecodeError:
+ err = sys.exc_info()[1]
else:
self.fail('Expected JSONDecodeError')
- self.assertEquals(err.lineno, 2)
- self.assertEquals(err.colno, 1)
- self.assertEquals(err.endlineno, 3)
- self.assertEquals(err.endcolno, 2)
+ self.assertEqual(err.lineno, 2)
+ self.assertEqual(err.colno, 1)
+ self.assertEqual(err.endlineno, 3)
+ self.assertEqual(err.endcolno, 2)
def test_scan_error(self):
err = None
- for t in (str, unicode):
+ for t in (u, b):
try:
json.loads(t('{"asdf": "'))
- except json.JSONDecodeError, e:
- err = e
+ except json.JSONDecodeError:
+ err = sys.exc_info()[1]
else:
self.fail('Expected JSONDecodeError')
- self.assertEquals(err.lineno, 1)
- self.assertEquals(err.colno, 9)
- \ No newline at end of file
+ self.assertEqual(err.lineno, 1)
+ self.assertEqual(err.colno, 10)
+
+ def test_error_is_pickable(self):
+ err = None
+ try:
+ json.loads('{}\na\nb')
+ except json.JSONDecodeError:
+ err = sys.exc_info()[1]
+ else:
+ self.fail('Expected JSONDecodeError')
+ s = pickle.dumps(err)
+ e = pickle.loads(s)
+
+ self.assertEqual(err.msg, e.msg)
+ self.assertEqual(err.doc, e.doc)
+ self.assertEqual(err.pos, e.pos)
+ self.assertEqual(err.end, e.end)
diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py
index 646c0f4..788f3a5 100644
--- a/simplejson/tests/test_fail.py
+++ b/simplejson/tests/test_fail.py
@@ -1,15 +1,16 @@
+import sys
from unittest import TestCase
import simplejson as json
-# Fri Dec 30 18:57:26 2005
+# 2007-10-05
JSONDOCS = [
# http://json.org/JSON_checker/test/fail1.json
'"A JSON payload should be an object or array, not a string."',
# http://json.org/JSON_checker/test/fail2.json
'["Unclosed array"',
# http://json.org/JSON_checker/test/fail3.json
- '{unquoted_key: "keys must be quoted}',
+ '{unquoted_key: "keys must be quoted"}',
# http://json.org/JSON_checker/test/fail4.json
'["extra comma",]',
# http://json.org/JSON_checker/test/fail5.json
@@ -35,7 +36,7 @@ JSONDOCS = [
# http://json.org/JSON_checker/test/fail15.json
'["Illegal backslash escape: \\x15"]',
# http://json.org/JSON_checker/test/fail16.json
- '["Illegal backslash escape: \\\'"]',
+ '[\\naked]',
# http://json.org/JSON_checker/test/fail17.json
'["Illegal backslash escape: \\017"]',
# http://json.org/JSON_checker/test/fail18.json
@@ -52,8 +53,51 @@ JSONDOCS = [
'["Bad value", truth]',
# http://json.org/JSON_checker/test/fail24.json
"['single quote']",
+ # http://json.org/JSON_checker/test/fail25.json
+ '["\ttab\tcharacter\tin\tstring\t"]',
+ # http://json.org/JSON_checker/test/fail26.json
+ '["tab\\ character\\ in\\ string\\ "]',
+ # http://json.org/JSON_checker/test/fail27.json
+ '["line\nbreak"]',
+ # http://json.org/JSON_checker/test/fail28.json
+ '["line\\\nbreak"]',
+ # http://json.org/JSON_checker/test/fail29.json
+ '[0e]',
+ # http://json.org/JSON_checker/test/fail30.json
+ '[0e+]',
+ # http://json.org/JSON_checker/test/fail31.json
+ '[0e+-1]',
+ # http://json.org/JSON_checker/test/fail32.json
+ '{"Comma instead if closing brace": true,',
+ # http://json.org/JSON_checker/test/fail33.json
+ '["mismatch"}',
# http://code.google.com/p/simplejson/issues/detail?id=3
u'["A\u001FZ control characters in string"]',
+ # misc based on coverage
+ '{',
+ '{]',
+ '{"foo": "bar"]',
+ '{"foo": "bar"',
+ 'nul',
+ 'nulx',
+ '-',
+ '-x',
+ '-e',
+ '-e0',
+ '-Infinite',
+ '-Inf',
+ 'Infinit',
+ 'Infinite',
+ 'NaM',
+ 'NuN',
+ 'falsy',
+ 'fal',
+ 'trug',
+ 'tru',
+ '1e',
+ '1ex',
+ '1e-',
+ '1e-x',
]
SKIPS = {
@@ -73,7 +117,6 @@ class TestFail(TestCase):
except json.JSONDecodeError:
pass
else:
- #self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
def test_array_decoder_issue46(self):
@@ -81,11 +124,53 @@ class TestFail(TestCase):
for doc in [u'[,]', '[,]']:
try:
json.loads(doc)
- except json.JSONDecodeError, e:
- self.assertEquals(e.pos, 1)
- self.assertEquals(e.lineno, 1)
- self.assertEquals(e.colno, 1)
- except Exception, e:
+ except json.JSONDecodeError:
+ e = sys.exc_info()[1]
+ self.assertEqual(e.pos, 1)
+ self.assertEqual(e.lineno, 1)
+ self.assertEqual(e.colno, 2)
+ except Exception:
+ e = sys.exc_info()[1]
+ self.fail("Unexpected exception raised %r %s" % (e, e))
+ else:
+ self.fail("Unexpected success parsing '[,]'")
+
+ def test_truncated_input(self):
+ test_cases = [
+ ('', 'Expecting value', 0),
+ ('[', "Expecting value or ']'", 1),
+ ('[42', "Expecting ',' delimiter", 3),
+ ('[42,', 'Expecting value', 4),
+ ('["', 'Unterminated string starting at', 1),
+ ('["spam', 'Unterminated string starting at', 1),
+ ('["spam"', "Expecting ',' delimiter", 7),
+ ('["spam",', 'Expecting value', 8),
+ ('{', 'Expecting property name enclosed in double quotes', 1),
+ ('{"', 'Unterminated string starting at', 1),
+ ('{"spam', 'Unterminated string starting at', 1),
+ ('{"spam"', "Expecting ':' delimiter", 7),
+ ('{"spam":', 'Expecting value', 8),
+ ('{"spam":42', "Expecting ',' delimiter", 10),
+ ('{"spam":42,', 'Expecting property name enclosed in double quotes',
+ 11),
+ ('"', 'Unterminated string starting at', 0),
+ ('"spam', 'Unterminated string starting at', 0),
+ ('[,', "Expecting value", 1),
+ ]
+ for data, msg, idx in test_cases:
+ try:
+ json.loads(data)
+ except json.JSONDecodeError:
+ e = sys.exc_info()[1]
+ self.assertEqual(
+ e.msg[:len(msg)],
+ msg,
+ "%r doesn't start with %r for %r" % (e.msg, msg, data))
+ self.assertEqual(
+ e.pos, idx,
+ "pos %r != %r for %r" % (e.pos, idx, data))
+ except Exception:
+ e = sys.exc_info()[1]
self.fail("Unexpected exception raised %r %s" % (e, e))
else:
- self.fail("Unexpected success parsing '[,]'") \ No newline at end of file
+ self.fail("Unexpected success parsing '%r'" % (data,))
diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py
index 94502c6..e382ec2 100644
--- a/simplejson/tests/test_float.py
+++ b/simplejson/tests/test_float.py
@@ -1,19 +1,35 @@
import math
from unittest import TestCase
-
+from simplejson.compat import long_type, text_type
import simplejson as json
+from simplejson.decoder import NaN, PosInf, NegInf
class TestFloat(TestCase):
+ def test_degenerates_allow(self):
+ for inf in (PosInf, NegInf):
+ self.assertEqual(json.loads(json.dumps(inf)), inf)
+ # Python 2.5 doesn't have math.isnan
+ nan = json.loads(json.dumps(NaN))
+ self.assertTrue((0 + nan) != nan)
+
+ def test_degenerates_ignore(self):
+ for f in (PosInf, NegInf, NaN):
+ self.assertEqual(json.loads(json.dumps(f, ignore_nan=True)), None)
+
+ def test_degenerates_deny(self):
+ for f in (PosInf, NegInf, NaN):
+ self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
+
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100,
math.pi**-100, 3.1]:
- self.assertEquals(float(json.dumps(num)), num)
- self.assertEquals(json.loads(json.dumps(num)), num)
- self.assertEquals(json.loads(unicode(json.dumps(num))), num)
+ self.assertEqual(float(json.dumps(num)), num)
+ self.assertEqual(json.loads(json.dumps(num)), num)
+ self.assertEqual(json.loads(text_type(json.dumps(num))), num)
def test_ints(self):
- for num in [1, 1L, 1<<32, 1<<64]:
- self.assertEquals(json.dumps(num), str(num))
- self.assertEquals(int(json.dumps(num)), num)
- self.assertEquals(json.loads(json.dumps(num)), num)
- self.assertEquals(json.loads(unicode(json.dumps(num))), num)
+ for num in [1, long_type(1), 1<<32, 1<<64]:
+ self.assertEqual(json.dumps(num), str(num))
+ self.assertEqual(int(json.dumps(num)), num)
+ self.assertEqual(json.loads(json.dumps(num)), num)
+ self.assertEqual(json.loads(text_type(json.dumps(num))), num)
diff --git a/simplejson/tests/test_for_json.py b/simplejson/tests/test_for_json.py
new file mode 100644
index 0000000..b791b88
--- /dev/null
+++ b/simplejson/tests/test_for_json.py
@@ -0,0 +1,97 @@
+import unittest
+import simplejson as json
+
+
+class ForJson(object):
+ def for_json(self):
+ return {'for_json': 1}
+
+
+class NestedForJson(object):
+ def for_json(self):
+ return {'nested': ForJson()}
+
+
+class ForJsonList(object):
+ def for_json(self):
+ return ['list']
+
+
+class DictForJson(dict):
+ def for_json(self):
+ return {'alpha': 1}
+
+
+class ListForJson(list):
+ def for_json(self):
+ return ['list']
+
+
+class TestForJson(unittest.TestCase):
+ def assertRoundTrip(self, obj, other, for_json=True):
+ if for_json is None:
+ # None will use the default
+ s = json.dumps(obj)
+ else:
+ s = json.dumps(obj, for_json=for_json)
+ self.assertEqual(
+ json.loads(s),
+ other)
+
+ def test_for_json_encodes_stand_alone_object(self):
+ self.assertRoundTrip(
+ ForJson(),
+ ForJson().for_json())
+
+ def test_for_json_encodes_object_nested_in_dict(self):
+ self.assertRoundTrip(
+ {'hooray': ForJson()},
+ {'hooray': ForJson().for_json()})
+
+ def test_for_json_encodes_object_nested_in_list_within_dict(self):
+ self.assertRoundTrip(
+ {'list': [0, ForJson(), 2, 3]},
+ {'list': [0, ForJson().for_json(), 2, 3]})
+
+ def test_for_json_encodes_object_nested_within_object(self):
+ self.assertRoundTrip(
+ NestedForJson(),
+ {'nested': {'for_json': 1}})
+
+ def test_for_json_encodes_list(self):
+ self.assertRoundTrip(
+ ForJsonList(),
+ ForJsonList().for_json())
+
+ def test_for_json_encodes_list_within_object(self):
+ self.assertRoundTrip(
+ {'nested': ForJsonList()},
+ {'nested': ForJsonList().for_json()})
+
+ def test_for_json_encodes_dict_subclass(self):
+ self.assertRoundTrip(
+ DictForJson(a=1),
+ DictForJson(a=1).for_json())
+
+ def test_for_json_encodes_list_subclass(self):
+ self.assertRoundTrip(
+ ListForJson(['l']),
+ ListForJson(['l']).for_json())
+
+ def test_for_json_ignored_if_not_true_with_dict_subclass(self):
+ for for_json in (None, False):
+ self.assertRoundTrip(
+ DictForJson(a=1),
+ {'a': 1},
+ for_json=for_json)
+
+ def test_for_json_ignored_if_not_true_with_list_subclass(self):
+ for for_json in (None, False):
+ self.assertRoundTrip(
+ ListForJson(['l']),
+ ['l'],
+ for_json=for_json)
+
+ def test_raises_typeerror_if_for_json_not_true_with_object(self):
+ self.assertRaises(TypeError, json.dumps, ForJson())
+ self.assertRaises(TypeError, json.dumps, ForJson(), for_json=False)
diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py
index 1e6bdb1..cea25a5 100644
--- a/simplejson/tests/test_indent.py
+++ b/simplejson/tests/test_indent.py
@@ -1,8 +1,8 @@
from unittest import TestCase
+import textwrap
import simplejson as json
-import textwrap
-from StringIO import StringIO
+from simplejson.compat import StringIO
class TestIndent(TestCase):
def test_indent(self):
@@ -42,26 +42,26 @@ class TestIndent(TestCase):
h3 = json.loads(d3)
h4 = json.loads(d4)
- self.assertEquals(h1, h)
- self.assertEquals(h2, h)
- self.assertEquals(h3, h)
- self.assertEquals(h4, h)
- self.assertEquals(d3, expect.replace('\t', ' '))
- self.assertEquals(d4, expect.replace('\t', ' '))
+ self.assertEqual(h1, h)
+ self.assertEqual(h2, h)
+ self.assertEqual(h3, h)
+ self.assertEqual(h4, h)
+ self.assertEqual(d3, expect.replace('\t', ' '))
+ self.assertEqual(d4, expect.replace('\t', ' '))
# NOTE: Python 2.4 textwrap.dedent converts tabs to spaces,
# so the following is expected to fail. Python 2.4 is not a
# supported platform in simplejson 2.1.0+.
- self.assertEquals(d2, expect)
+ self.assertEqual(d2, expect)
def test_indent0(self):
h = {3: 1}
def check(indent, expected):
d1 = json.dumps(h, indent=indent)
- self.assertEquals(d1, expected)
+ self.assertEqual(d1, expected)
sio = StringIO()
json.dump(h, sio, indent=indent)
- self.assertEquals(sio.getvalue(), expected)
+ self.assertEqual(sio.getvalue(), expected)
# indent=0 should emit newlines
check(0, '{\n"3": 1\n}')
@@ -73,14 +73,14 @@ class TestIndent(TestCase):
expect = '[\n1,\n2,\n3,\n4\n]'
expect_spaces = '[\n1, \n2, \n3, \n4\n]'
# Ensure that separators still works
- self.assertEquals(
+ self.assertEqual(
expect_spaces,
json.dumps(lst, indent=0, separators=(', ', ': ')))
# Force the new defaults
- self.assertEquals(
+ self.assertEqual(
expect,
json.dumps(lst, indent=0, separators=(',', ': ')))
# Added in 2.1.4
- self.assertEquals(
+ self.assertEqual(
expect,
- json.dumps(lst, indent=0)) \ No newline at end of file
+ json.dumps(lst, indent=0))
diff --git a/simplejson/tests/test_item_sort_key.py b/simplejson/tests/test_item_sort_key.py
new file mode 100644
index 0000000..b05bfc8
--- /dev/null
+++ b/simplejson/tests/test_item_sort_key.py
@@ -0,0 +1,20 @@
+from unittest import TestCase
+
+import simplejson as json
+from operator import itemgetter
+
+class TestItemSortKey(TestCase):
+ def test_simple_first(self):
+ a = {'a': 1, 'c': 5, 'jack': 'jill', 'pick': 'axe', 'array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'}
+ self.assertEqual(
+ '{"a": 1, "c": 5, "crate": "dog", "jack": "jill", "pick": "axe", "zeak": "oh", "array": [1, 5, 6, 9], "tuple": [83, 12, 3]}',
+ json.dumps(a, item_sort_key=json.simple_first))
+
+ def test_case(self):
+ a = {'a': 1, 'c': 5, 'Jack': 'jill', 'pick': 'axe', 'Array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'}
+ self.assertEqual(
+ '{"Array": [1, 5, 6, 9], "Jack": "jill", "a": 1, "c": 5, "crate": "dog", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}',
+ json.dumps(a, item_sort_key=itemgetter(0)))
+ self.assertEqual(
+ '{"a": 1, "Array": [1, 5, 6, 9], "c": 5, "crate": "dog", "Jack": "jill", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}',
+ json.dumps(a, item_sort_key=lambda kv: kv[0].lower()))
diff --git a/simplejson/tests/test_namedtuple.py b/simplejson/tests/test_namedtuple.py
index 18da218..4387894 100644
--- a/simplejson/tests/test_namedtuple.py
+++ b/simplejson/tests/test_namedtuple.py
@@ -1,6 +1,7 @@
+from __future__ import absolute_import
import unittest
import simplejson as json
-from StringIO import StringIO
+from simplejson.compat import StringIO
try:
from collections import namedtuple
@@ -21,11 +22,36 @@ else:
Value = namedtuple('Value', ['value'])
Point = namedtuple('Point', ['x', 'y'])
+class DuckValue(object):
+ def __init__(self, *args):
+ self.value = Value(*args)
+
+ def _asdict(self):
+ return self.value._asdict()
+
+class DuckPoint(object):
+ def __init__(self, *args):
+ self.point = Point(*args)
+
+ def _asdict(self):
+ return self.point._asdict()
+
+class DeadDuck(object):
+ _asdict = None
+
+class DeadDict(dict):
+ _asdict = None
+
+CONSTRUCTORS = [
+ lambda v: v,
+ lambda v: [v],
+ lambda v: [{'key': v}],
+]
+
class TestNamedTuple(unittest.TestCase):
def test_namedtuple_dumps(self):
- for v in [Value(1), Point(1, 2)]:
+ for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]:
d = v._asdict()
- l = list(v)
self.assertEqual(d, json.loads(json.dumps(v)))
self.assertEqual(
d,
@@ -35,6 +61,10 @@ class TestNamedTuple(unittest.TestCase):
d,
json.loads(json.dumps(v, namedtuple_as_object=True,
tuple_as_array=False)))
+
+ def test_namedtuple_dumps_false(self):
+ for v in [Value(1), Point(1, 2)]:
+ l = list(v)
self.assertEqual(
l,
json.loads(json.dumps(v, namedtuple_as_object=False)))
@@ -42,9 +72,8 @@ class TestNamedTuple(unittest.TestCase):
tuple_as_array=False, namedtuple_as_object=False)
def test_namedtuple_dump(self):
- for v in [Value(1), Point(1, 2)]:
+ for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]:
d = v._asdict()
- l = list(v)
sio = StringIO()
json.dump(v, sio)
self.assertEqual(d, json.loads(sio.getvalue()))
@@ -62,6 +91,10 @@ class TestNamedTuple(unittest.TestCase):
self.assertEqual(
d,
json.loads(sio.getvalue()))
+
+ def test_namedtuple_dump_false(self):
+ for v in [Value(1), Point(1, 2)]:
+ l = list(v)
sio = StringIO()
json.dump(v, sio, namedtuple_as_object=False)
self.assertEqual(
@@ -69,3 +102,21 @@ class TestNamedTuple(unittest.TestCase):
json.loads(sio.getvalue()))
self.assertRaises(TypeError, json.dump, v, StringIO(),
tuple_as_array=False, namedtuple_as_object=False)
+
+ def test_asdict_not_callable_dump(self):
+ for f in CONSTRUCTORS:
+ self.assertRaises(TypeError,
+ json.dump, f(DeadDuck()), StringIO(), namedtuple_as_object=True)
+ sio = StringIO()
+ json.dump(f(DeadDict()), sio, namedtuple_as_object=True)
+ self.assertEqual(
+ json.dumps(f({})),
+ sio.getvalue())
+
+ def test_asdict_not_callable_dumps(self):
+ for f in CONSTRUCTORS:
+ self.assertRaises(TypeError,
+ json.dumps, f(DeadDuck()), namedtuple_as_object=True)
+ self.assertEqual(
+ json.dumps(f({})),
+ json.dumps(f(DeadDict()), namedtuple_as_object=True))
diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py
index c3d6302..f0b5b10 100644
--- a/simplejson/tests/test_pass1.py
+++ b/simplejson/tests/test_pass1.py
@@ -18,7 +18,7 @@ JSON = r'''
"real": -9876.543210,
"e": 0.123456789e-12,
"E": 1.234567890E+34,
- "": 23456789012E666,
+ "": 23456789012E66,
"zero": 0,
"one": 1,
"space": " ",
@@ -44,8 +44,7 @@ JSON = r'''
,
-4 , 5 , 6 ,7 ],
- "compact": [1,2,3,4,5,6,7],
+4 , 5 , 6 ,7 ],"compact": [1,2,3,4,5,6,7],
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
"quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
@@ -56,9 +55,11 @@ JSON = r'''
99.44
,
-1066
-
-
+1066,
+1e1,
+0.1e1,
+1e-1,
+1e00,2e+00,2e-00
,"rosebud"]
'''
@@ -67,10 +68,4 @@ class TestPass1(TestCase):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
- self.assertEquals(res, json.loads(out))
- try:
- json.dumps(res, allow_nan=False)
- except ValueError:
- pass
- else:
- self.fail("23456789012E666 should be out of range")
+ self.assertEqual(res, json.loads(out))
diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py
index de4ee00..5d812b3 100644
--- a/simplejson/tests/test_pass2.py
+++ b/simplejson/tests/test_pass2.py
@@ -11,4 +11,4 @@ class TestPass2(TestCase):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
- self.assertEquals(res, json.loads(out))
+ self.assertEqual(res, json.loads(out))
diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py
index f591aba..821d60b 100644
--- a/simplejson/tests/test_pass3.py
+++ b/simplejson/tests/test_pass3.py
@@ -17,4 +17,4 @@ class TestPass3(TestCase):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
- self.assertEquals(res, json.loads(out))
+ self.assertEqual(res, json.loads(out))
diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py
index 83a1d88..662eb66 100644
--- a/simplejson/tests/test_recursion.py
+++ b/simplejson/tests/test_recursion.py
@@ -57,7 +57,7 @@ class TestRecursion(TestCase):
def test_defaultrecursion(self):
enc = RecursiveJSONEncoder()
- self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"')
+ self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"')
enc.recurse = True
try:
enc.encode(JSONTestObject)
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index a7fcd46..3d98f0d 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -3,8 +3,17 @@ from unittest import TestCase
import simplejson as json
import simplejson.decoder
+from simplejson.compat import b, PY3
class TestScanString(TestCase):
+ # The bytes type is intentionally not used in most of these tests
+ # under Python 3 because the decoder immediately coerces to str before
+ # calling scanstring. In Python 2 we are testing the code paths
+ # for both unicode and str.
+ #
+ # The reason this is done is because Python 3 would require
+ # entirely different code paths for parsing bytes and str.
+ #
def test_py_scanstring(self):
self._test_scanstring(simplejson.decoder.py_scanstring)
@@ -14,104 +23,172 @@ class TestScanString(TestCase):
self._test_scanstring(simplejson.decoder.c_scanstring)
def _test_scanstring(self, scanstring):
- self.assertEquals(
- scanstring('"z\\ud834\\udd20x"', 1, None, True),
- (u'z\U0001d120x', 16))
-
if sys.maxunicode == 65535:
- self.assertEquals(
+ self.assertEqual(
scanstring(u'"z\U0001d120x"', 1, None, True),
(u'z\U0001d120x', 6))
else:
- self.assertEquals(
+ self.assertEqual(
scanstring(u'"z\U0001d120x"', 1, None, True),
(u'z\U0001d120x', 5))
- self.assertEquals(
+ self.assertEqual(
scanstring('"\\u007b"', 1, None, True),
(u'{', 8))
- self.assertEquals(
+ self.assertEqual(
scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
(u'A JSON payload should be an object or array, not a string.', 60))
- self.assertEquals(
+ self.assertEqual(
scanstring('["Unclosed array"', 2, None, True),
(u'Unclosed array', 17))
- self.assertEquals(
+ self.assertEqual(
scanstring('["extra comma",]', 2, None, True),
(u'extra comma', 14))
- self.assertEquals(
+ self.assertEqual(
scanstring('["double extra comma",,]', 2, None, True),
(u'double extra comma', 21))
- self.assertEquals(
+ self.assertEqual(
scanstring('["Comma after the close"],', 2, None, True),
(u'Comma after the close', 24))
- self.assertEquals(
+ self.assertEqual(
scanstring('["Extra close"]]', 2, None, True),
(u'Extra close', 14))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Extra comma": true,}', 2, None, True),
(u'Extra comma', 14))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
(u'Extra value after close', 26))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
(u'Illegal expression', 21))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Illegal invocation": alert()}', 2, None, True),
(u'Illegal invocation', 21))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
(u'Numbers cannot have leading zeroes', 37))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
(u'Numbers cannot be hex', 24))
- self.assertEquals(
+ self.assertEqual(
scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
(u'Too deep', 30))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Missing colon" null}', 2, None, True),
(u'Missing colon', 16))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Double colon":: null}', 2, None, True),
(u'Double colon', 15))
- self.assertEquals(
+ self.assertEqual(
scanstring('{"Comma instead of colon", null}', 2, None, True),
(u'Comma instead of colon', 25))
- self.assertEquals(
+ self.assertEqual(
scanstring('["Colon instead of comma": false]', 2, None, True),
(u'Colon instead of comma', 25))
- self.assertEquals(
+ self.assertEqual(
scanstring('["Bad value", truth]', 2, None, True),
(u'Bad value', 12))
+ for c in map(chr, range(0x00, 0x1f)):
+ self.assertEqual(
+ scanstring(c + '"', 0, None, False),
+ (c, 2))
+ self.assertRaises(
+ ValueError,
+ scanstring, c + '"', 0, None, True)
+
+ self.assertRaises(ValueError, scanstring, '', 0, None, True)
+ self.assertRaises(ValueError, scanstring, 'a', 0, None, True)
+ self.assertRaises(ValueError, scanstring, '\\', 0, None, True)
+ self.assertRaises(ValueError, scanstring, '\\u', 0, None, True)
+ self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True)
+ self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True)
+ self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True)
+ self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True)
+ if sys.maxunicode > 65535:
+ self.assertRaises(ValueError,
+ scanstring, '\\ud834\\u"', 0, None, True)
+ self.assertRaises(ValueError,
+ scanstring, '\\ud834\\x0123"', 0, None, True)
+
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
"xxx")
self.assertRaises(UnicodeDecodeError,
- json.encoder.encode_basestring_ascii, "xx\xff")
+ json.encoder.encode_basestring_ascii, b("xx\xff"))
def test_overflow(self):
- # Python 2.5 does not have maxsize
- maxsize = getattr(sys, 'maxsize', sys.maxint)
+ # Python 2.5 does not have maxsize, Python 3 does not have maxint
+ maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None))
+ assert maxsize is not None
self.assertRaises(OverflowError, json.decoder.scanstring, "xxx",
maxsize + 1)
+ def test_surrogates(self):
+ scanstring = json.decoder.scanstring
+
+ def assertScan(given, expect, test_utf8=True):
+ givens = [given]
+ if not PY3 and test_utf8:
+ givens.append(given.encode('utf8'))
+ for given in givens:
+ (res, count) = scanstring(given, 1, None, True)
+ self.assertEqual(len(given), count)
+ self.assertEqual(res, expect)
+
+ assertScan(
+ u'"z\\ud834\\u0079x"',
+ u'z\ud834yx')
+ assertScan(
+ u'"z\\ud834\\udd20x"',
+ u'z\U0001d120x')
+ assertScan(
+ u'"z\\ud834\\ud834\\udd20x"',
+ u'z\ud834\U0001d120x')
+ assertScan(
+ u'"z\\ud834x"',
+ u'z\ud834x')
+ assertScan(
+ u'"z\\udd20x"',
+ u'z\udd20x')
+ assertScan(
+ u'"z\ud834x"',
+ u'z\ud834x')
+ # It may look strange to join strings together, but Python is drunk.
+ # https://gist.github.com/etrepum/5538443
+ assertScan(
+ u'"z\\ud834\udd20x12345"',
+ u''.join([u'z\ud834', u'\udd20x12345']))
+ assertScan(
+ u'"z\ud834\\udd20x"',
+ u''.join([u'z\ud834', u'\udd20x']))
+ # these have different behavior given UTF8 input, because the surrogate
+ # pair may be joined (in maxunicode > 65535 builds)
+ assertScan(
+ u''.join([u'"z\ud834', u'\udd20x"']),
+ u''.join([u'z\ud834', u'\udd20x']),
+ test_utf8=False)
+
+ self.assertRaises(ValueError,
+ scanstring, u'"z\\ud83x"', 1, None, True)
+ self.assertRaises(ValueError,
+ scanstring, u'"z\\ud834\\udd2x"', 1, None, True)
diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py
index cbda93c..91b4d4f 100644
--- a/simplejson/tests/test_separators.py
+++ b/simplejson/tests/test_separators.py
@@ -37,6 +37,6 @@ class TestSeparators(TestCase):
h1 = json.loads(d1)
h2 = json.loads(d2)
- self.assertEquals(h1, h)
- self.assertEquals(h2, h)
- self.assertEquals(d2, expect)
+ self.assertEqual(h1, h)
+ self.assertEqual(h2, h)
+ self.assertEqual(d2, expect)
diff --git a/simplejson/tests/test_speedups.py b/simplejson/tests/test_speedups.py
index 825ecf2..0a2b63b 100644
--- a/simplejson/tests/test_speedups.py
+++ b/simplejson/tests/test_speedups.py
@@ -1,20 +1,39 @@
+import sys
+import unittest
from unittest import TestCase
from simplejson import encoder, scanner
+
def has_speedups():
return encoder.c_make_encoder is not None
+
+def skip_if_speedups_missing(func):
+ def wrapper(*args, **kwargs):
+ if not has_speedups():
+ if hasattr(unittest, 'SkipTest'):
+ raise unittest.SkipTest("C Extension not available")
+ else:
+ sys.stdout.write("C Extension not available")
+ return
+ return func(*args, **kwargs)
+
+ return wrapper
+
+
class TestDecode(TestCase):
+ @skip_if_speedups_missing
def test_make_scanner(self):
- if not has_speedups():
- return
self.assertRaises(AttributeError, scanner.c_make_scanner, 1)
+ @skip_if_speedups_missing
def test_make_encoder(self):
- if not has_speedups():
- return
- self.assertRaises(TypeError, encoder.c_make_encoder,
+ self.assertRaises(
+ TypeError,
+ encoder.c_make_encoder,
None,
- "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75",
- None)
+ ("\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7"
+ "\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75"),
+ None
+ )
diff --git a/simplejson/tests/test_subclass.py b/simplejson/tests/test_subclass.py
new file mode 100644
index 0000000..2bae3b6
--- /dev/null
+++ b/simplejson/tests/test_subclass.py
@@ -0,0 +1,37 @@
+from unittest import TestCase
+import simplejson as json
+
+from decimal import Decimal
+
+class AlternateInt(int):
+ def __repr__(self):
+ return 'invalid json'
+ __str__ = __repr__
+
+
+class AlternateFloat(float):
+ def __repr__(self):
+ return 'invalid json'
+ __str__ = __repr__
+
+
+# class AlternateDecimal(Decimal):
+# def __repr__(self):
+# return 'invalid json'
+
+
+class TestSubclass(TestCase):
+ def test_int(self):
+ self.assertEqual(json.dumps(AlternateInt(1)), '1')
+ self.assertEqual(json.dumps(AlternateInt(-1)), '-1')
+ self.assertEqual(json.loads(json.dumps({AlternateInt(1): 1})), {'1': 1})
+
+ def test_float(self):
+ self.assertEqual(json.dumps(AlternateFloat(1.0)), '1.0')
+ self.assertEqual(json.dumps(AlternateFloat(-1.0)), '-1.0')
+ self.assertEqual(json.loads(json.dumps({AlternateFloat(1.0): 1})), {'1.0': 1})
+
+ # NOTE: Decimal subclasses are not supported as-is
+ # def test_decimal(self):
+ # self.assertEqual(json.dumps(AlternateDecimal('1.0')), '1.0')
+ # self.assertEqual(json.dumps(AlternateDecimal('-1.0')), '-1.0')
diff --git a/simplejson/tests/test_tool.py b/simplejson/tests/test_tool.py
new file mode 100644
index 0000000..ac2a14c
--- /dev/null
+++ b/simplejson/tests/test_tool.py
@@ -0,0 +1,97 @@
+from __future__ import with_statement
+import os
+import sys
+import textwrap
+import unittest
+import subprocess
+import tempfile
+try:
+ # Python 3.x
+ from test.support import strip_python_stderr
+except ImportError:
+ # Python 2.6+
+ try:
+ from test.test_support import strip_python_stderr
+ except ImportError:
+ # Python 2.5
+ import re
+ def strip_python_stderr(stderr):
+ return re.sub(
+ r"\[\d+ refs\]\r?\n?$".encode(),
+ "".encode(),
+ stderr).strip()
+
+class TestTool(unittest.TestCase):
+ data = """
+
+ [["blorpie"],[ "whoops" ] , [
+ ],\t"d-shtaeou",\r"d-nthiouh",
+ "i-vhbjkhnth", {"nifty":87}, {"morefield" :\tfalse,"field"
+ :"yes"} ]
+ """
+
+ expect = textwrap.dedent("""\
+ [
+ [
+ "blorpie"
+ ],
+ [
+ "whoops"
+ ],
+ [],
+ "d-shtaeou",
+ "d-nthiouh",
+ "i-vhbjkhnth",
+ {
+ "nifty": 87
+ },
+ {
+ "field": "yes",
+ "morefield": false
+ }
+ ]
+ """)
+
+ def runTool(self, args=None, data=None):
+ argv = [sys.executable, '-m', 'simplejson.tool']
+ if args:
+ argv.extend(args)
+ proc = subprocess.Popen(argv,
+ stdin=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ out, err = proc.communicate(data)
+ self.assertEqual(strip_python_stderr(err), ''.encode())
+ self.assertEqual(proc.returncode, 0)
+ return out
+
+ def test_stdin_stdout(self):
+ self.assertEqual(
+ self.runTool(data=self.data.encode()),
+ self.expect.encode())
+
+ def test_infile_stdout(self):
+ with tempfile.NamedTemporaryFile() as infile:
+ infile.write(self.data.encode())
+ infile.flush()
+ self.assertEqual(
+ self.runTool(args=[infile.name]),
+ self.expect.encode())
+
+ def test_infile_outfile(self):
+ with tempfile.NamedTemporaryFile() as infile:
+ infile.write(self.data.encode())
+ infile.flush()
+ # outfile will get overwritten by tool, so the delete
+ # may not work on some platforms. Do it manually.
+ outfile = tempfile.NamedTemporaryFile()
+ try:
+ self.assertEqual(
+ self.runTool(args=[infile.name, outfile.name]),
+ ''.encode())
+ with open(outfile.name, 'rb') as f:
+ self.assertEqual(f.read(), self.expect.encode())
+ finally:
+ outfile.close()
+ if os.path.exists(outfile.name):
+ os.unlink(outfile.name)
diff --git a/simplejson/tests/test_tuple.py b/simplejson/tests/test_tuple.py
index cff9a75..4ad7b0e 100644
--- a/simplejson/tests/test_tuple.py
+++ b/simplejson/tests/test_tuple.py
@@ -1,6 +1,6 @@
import unittest
-from StringIO import StringIO
+from simplejson.compat import StringIO
import simplejson as json
class TestTuples(unittest.TestCase):
@@ -13,7 +13,8 @@ class TestTuples(unittest.TestCase):
self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False)
# Ensure that the "default" does not get called
self.assertEqual(expect, json.dumps(t, default=repr))
- self.assertEqual(expect, json.dumps(t, tuple_as_array=True, default=repr))
+ self.assertEqual(expect, json.dumps(t, tuple_as_array=True,
+ default=repr))
# Ensure that the "default" gets called
self.assertEqual(
json.dumps(repr(t)),
@@ -29,7 +30,8 @@ class TestTuples(unittest.TestCase):
sio = StringIO()
json.dump(t, sio, tuple_as_array=True)
self.assertEqual(expect, sio.getvalue())
- self.assertRaises(TypeError, json.dump, t, StringIO(), tuple_as_array=False)
+ self.assertRaises(TypeError, json.dump, t, StringIO(),
+ tuple_as_array=False)
# Ensure that the "default" does not get called
sio = StringIO()
json.dump(t, sio, default=repr)
diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py
index 83fe65b..3b37f65 100644
--- a/simplejson/tests/test_unicode.py
+++ b/simplejson/tests/test_unicode.py
@@ -1,6 +1,9 @@
+import sys
+import codecs
from unittest import TestCase
import simplejson as json
+from simplejson.compat import unichr, text_type, b, u, BytesIO
class TestUnicode(TestCase):
def test_encoding1(self):
@@ -9,51 +12,51 @@ class TestUnicode(TestCase):
s = u.encode('utf-8')
ju = encoder.encode(u)
js = encoder.encode(s)
- self.assertEquals(ju, js)
+ self.assertEqual(ju, js)
def test_encoding2(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
s = u.encode('utf-8')
ju = json.dumps(u, encoding='utf-8')
js = json.dumps(s, encoding='utf-8')
- self.assertEquals(ju, js)
+ self.assertEqual(ju, js)
def test_encoding3(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps(u)
- self.assertEquals(j, '"\\u03b1\\u03a9"')
+ self.assertEqual(j, '"\\u03b1\\u03a9"')
def test_encoding4(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps([u])
- self.assertEquals(j, '["\\u03b1\\u03a9"]')
+ self.assertEqual(j, '["\\u03b1\\u03a9"]')
def test_encoding5(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps(u, ensure_ascii=False)
- self.assertEquals(j, u'"' + u + u'"')
+ self.assertEqual(j, u'"' + u + u'"')
def test_encoding6(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps([u], ensure_ascii=False)
- self.assertEquals(j, u'["' + u + u'"]')
+ self.assertEqual(j, u'["' + u + u'"]')
def test_big_unicode_encode(self):
u = u'\U0001d120'
- self.assertEquals(json.dumps(u), '"\\ud834\\udd20"')
- self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"')
+ self.assertEqual(json.dumps(u), '"\\ud834\\udd20"')
+ self.assertEqual(json.dumps(u, ensure_ascii=False), u'"\U0001d120"')
def test_big_unicode_decode(self):
u = u'z\U0001d120x'
- self.assertEquals(json.loads('"' + u + '"'), u)
- self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u)
+ self.assertEqual(json.loads('"' + u + '"'), u)
+ self.assertEqual(json.loads('"z\\ud834\\udd20x"'), u)
def test_unicode_decode(self):
for i in range(0, 0xd7ff):
u = unichr(i)
#s = '"\\u{0:04x}"'.format(i)
s = '"\\u%04x"' % (i,)
- self.assertEquals(json.loads(s), u)
+ self.assertEqual(json.loads(s), u)
def test_object_pairs_hook_with_unicode(self):
s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
@@ -72,38 +75,79 @@ class TestUnicode(TestCase):
def test_default_encoding(self):
- self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
+ self.assertEqual(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
{'a': u'\xe9'})
def test_unicode_preservation(self):
- self.assertEquals(type(json.loads(u'""')), unicode)
- self.assertEquals(type(json.loads(u'"a"')), unicode)
- self.assertEquals(type(json.loads(u'["a"]')[0]), unicode)
+ self.assertEqual(type(json.loads(u'""')), text_type)
+ self.assertEqual(type(json.loads(u'"a"')), text_type)
+ self.assertEqual(type(json.loads(u'["a"]')[0]), text_type)
def test_ensure_ascii_false_returns_unicode(self):
# http://code.google.com/p/simplejson/issues/detail?id=48
- self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode)
- self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode)
- self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode)
- self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode)
+ self.assertEqual(type(json.dumps([], ensure_ascii=False)), text_type)
+ self.assertEqual(type(json.dumps(0, ensure_ascii=False)), text_type)
+ self.assertEqual(type(json.dumps({}, ensure_ascii=False)), text_type)
+ self.assertEqual(type(json.dumps("", ensure_ascii=False)), text_type)
def test_ensure_ascii_false_bytestring_encoding(self):
# http://code.google.com/p/simplejson/issues/detail?id=48
- doc1 = {u'quux': 'Arr\xc3\xaat sur images'}
- doc2 = {u'quux': u'Arr\xeat sur images'}
+ doc1 = {u'quux': b('Arr\xc3\xaat sur images')}
+ doc2 = {u'quux': u('Arr\xeat sur images')}
doc_ascii = '{"quux": "Arr\\u00eat sur images"}'
doc_unicode = u'{"quux": "Arr\xeat sur images"}'
- self.assertEquals(json.dumps(doc1), doc_ascii)
- self.assertEquals(json.dumps(doc2), doc_ascii)
- self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode)
- self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode)
+ self.assertEqual(json.dumps(doc1), doc_ascii)
+ self.assertEqual(json.dumps(doc2), doc_ascii)
+ self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode)
+ self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode)
def test_ensure_ascii_linebreak_encoding(self):
# http://timelessrepo.com/json-isnt-a-javascript-subset
s1 = u'\u2029\u2028'
s2 = s1.encode('utf8')
expect = '"\\u2029\\u2028"'
- self.assertEquals(json.dumps(s1), expect)
- self.assertEquals(json.dumps(s2), expect)
- self.assertEquals(json.dumps(s1, ensure_ascii=False), expect)
- self.assertEquals(json.dumps(s2, ensure_ascii=False), expect)
+ self.assertEqual(json.dumps(s1), expect)
+ self.assertEqual(json.dumps(s2), expect)
+ self.assertEqual(json.dumps(s1, ensure_ascii=False), expect)
+ self.assertEqual(json.dumps(s2, ensure_ascii=False), expect)
+
+ def test_invalid_escape_sequences(self):
+ # incomplete escape sequence
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234')
+ # invalid escape sequence
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"')
+ if sys.maxunicode > 65535:
+ # invalid escape sequence for low surrogate
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"')
+
+ def test_ensure_ascii_still_works(self):
+ # in the ascii range, ensure that everything is the same
+ for c in map(unichr, range(0, 127)):
+ self.assertEqual(
+ json.dumps(c, ensure_ascii=False),
+ json.dumps(c))
+ snowman = u'\N{SNOWMAN}'
+ self.assertEqual(
+ json.dumps(c, ensure_ascii=False),
+ '"' + c + '"')
+
+ def test_strip_bom(self):
+ content = u"\u3053\u3093\u306b\u3061\u308f"
+ json_doc = codecs.BOM_UTF8 + b(json.dumps(content))
+ self.assertEqual(json.load(BytesIO(json_doc)), content)
+ for doc in json_doc, json_doc.decode('utf8'):
+ self.assertEqual(json.loads(doc), content)
diff --git a/simplejson/tool.py b/simplejson/tool.py
index 73370db..062e8e2 100644
--- a/simplejson/tool.py
+++ b/simplejson/tool.py
@@ -10,6 +10,7 @@ Usage::
Expecting property name: line 1 column 2 (char 2)
"""
+from __future__ import with_statement
import sys
import simplejson as json
@@ -18,21 +19,23 @@ def main():
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
- infile = open(sys.argv[1], 'rb')
+ infile = open(sys.argv[1], 'r')
outfile = sys.stdout
elif len(sys.argv) == 3:
- infile = open(sys.argv[1], 'rb')
- outfile = open(sys.argv[2], 'wb')
+ infile = open(sys.argv[1], 'r')
+ outfile = open(sys.argv[2], 'w')
else:
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
- try:
- obj = json.load(infile,
- object_pairs_hook=json.OrderedDict,
- use_decimal=True)
- except ValueError, e:
- raise SystemExit(e)
- json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
- outfile.write('\n')
+ with infile:
+ try:
+ obj = json.load(infile,
+ object_pairs_hook=json.OrderedDict,
+ use_decimal=True)
+ except ValueError:
+ raise SystemExit(sys.exc_info()[1])
+ with outfile:
+ json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
+ outfile.write('\n')
if __name__ == '__main__':