diff options
author | Bob Ippolito <bob@redivi.com> | 2014-07-21 17:18:17 +0200 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2014-07-21 17:18:17 +0200 |
commit | 78441805700f1adccfeb4e454fa025e637b9a16b (patch) | |
tree | 201cfe69b7fcceef6e9f92887c9b431beb5ba970 | |
parent | b7486b82233ed2ec1a614dcf8944d376d12d04bf (diff) | |
parent | 925cae72acd6a0d2a789cdc363736ed826f8ffa6 (diff) | |
download | simplejson-78441805700f1adccfeb4e454fa025e637b9a16b.tar.gz |
Merge branch 'sporty-master'
-rw-r--r-- | CHANGES.txt | 6 | ||||
-rw-r--r-- | conf.py | 4 | ||||
-rw-r--r-- | simplejson/__init__.py | 2 | ||||
-rw-r--r-- | simplejson/decoder.py | 7 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 10 |
5 files changed, 25 insertions, 4 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 23c705b..8714c1a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +Version 3.6.0 released 2014-07-21 + +* Automatically strip any UTF-8 BOM from input to more closely + follow the latest specs + https://github.com/simplejson/simplejson/pull/101 + Version 3.5.3 released 2014-06-24 * Fix lower bound checking in scan_once / raw_decode API @@ -42,9 +42,9 @@ copyright = '2014, Bob Ippolito' # other places throughout the built documents. # # The short X.Y version. -version = '3.5' +version = '3.6' # The full version, including alpha/beta/rc tags. -release = '3.5.3' +release = '3.6.0' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/simplejson/__init__.py b/simplejson/__init__.py index a5c0137..0a92914 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -98,7 +98,7 @@ Using simplejson.tool from the shell to validate and pretty-print:: Expecting property name: line 1 column 3 (char 2) """ from __future__ import absolute_import -__version__ = '3.5.3' +__version__ = '3.6.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 1a6c5d9..545e658 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -390,4 +390,11 @@ class JSONDecoder(object): raise JSONDecodeError('Expecting value', s, idx) if _PY3 and not isinstance(s, text_type): raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index f04cc5c..3b37f65 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,8 +1,9 @@ import sys +import codecs from unittest import TestCase import simplejson as json -from simplejson.compat import unichr, text_type, b, u +from simplejson.compat import unichr, text_type, b, u, BytesIO class TestUnicode(TestCase): def test_encoding1(self): @@ -143,3 +144,10 @@ class TestUnicode(TestCase): self.assertEqual( json.dumps(c, ensure_ascii=False), '"' + c + '"') + + def test_strip_bom(self): + content = u"\u3053\u3093\u306b\u3061\u308f" + json_doc = codecs.BOM_UTF8 + b(json.dumps(content)) + self.assertEqual(json.load(BytesIO(json_doc)), content) + for doc in json_doc, json_doc.decode('utf8'): + self.assertEqual(json.loads(doc), content) |