diff options
author | Ryo Takahashi <rt.sporty@gmail.com> | 2014-07-21 16:33:51 +0900 |
---|---|---|
committer | Ryo Takahashi <rt.sporty@gmail.com> | 2014-07-21 16:33:51 +0900 |
commit | 3eaa8d54dfed8cd64c9f439451f5514f45cd4dd4 (patch) | |
tree | df387dcd8815dc632b07d862d693d1f2453af939 | |
parent | b7486b82233ed2ec1a614dcf8944d376d12d04bf (diff) | |
download | simplejson-3eaa8d54dfed8cd64c9f439451f5514f45cd4dd4.tar.gz |
strip utf-8 bom.
-rw-r--r-- | simplejson/__init__.py | 11 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 9 | ||||
-rw-r--r-- | simplejson/tests/utf-8-bom.json | 3 |
3 files changed, 22 insertions, 1 deletions
diff --git a/simplejson/__init__.py b/simplejson/__init__.py index a5c0137..bc5c93a 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -437,7 +437,16 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, of subclassing whenever possible. """ - return loads(fp.read(), + # Strip the UTF-8 BOM + contents = fp.read() + ord0 = ord(contents[0]) + if ord0 in (0xef, 0xfeff): + if ord0 == 0xfeff: + contents = contents[1:] + elif contents[:3] == '\xef\xbb\xbf': + contents = contents[3:] + + return loads(contents, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index f04cc5c..60492f7 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,4 +1,5 @@ import sys +import os.path from unittest import TestCase import simplejson as json @@ -143,3 +144,11 @@ class TestUnicode(TestCase): self.assertEqual( json.dumps(c, ensure_ascii=False), '"' + c + '"') + + def test_strip_bom(self): + thisdir = os.path.dirname(__file__) + json_file = os.path.join(thisdir, "utf-8-bom.json") + doc_ascii = { + u"content": u"\u3053\u3093\u306b\u3061\u308f" + } + self.assertEqual(json.load(open(json_file)), doc_ascii) diff --git a/simplejson/tests/utf-8-bom.json b/simplejson/tests/utf-8-bom.json new file mode 100644 index 0000000..1791beb --- /dev/null +++ b/simplejson/tests/utf-8-bom.json @@ -0,0 +1,3 @@ +{ + "content": "こんにちわ" +} |