summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Takahashi <rt.sporty@gmail.com>2014-07-21 16:33:51 +0900
committerRyo Takahashi <rt.sporty@gmail.com>2014-07-21 16:33:51 +0900
commit3eaa8d54dfed8cd64c9f439451f5514f45cd4dd4 (patch)
treedf387dcd8815dc632b07d862d693d1f2453af939
parentb7486b82233ed2ec1a614dcf8944d376d12d04bf (diff)
downloadsimplejson-3eaa8d54dfed8cd64c9f439451f5514f45cd4dd4.tar.gz
strip utf-8 bom.
-rw-r--r--simplejson/__init__.py11
-rw-r--r--simplejson/tests/test_unicode.py9
-rw-r--r--simplejson/tests/utf-8-bom.json3
3 files changed, 22 insertions, 1 deletions
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
index a5c0137..bc5c93a 100644
--- a/simplejson/__init__.py
+++ b/simplejson/__init__.py
@@ -437,7 +437,16 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
of subclassing whenever possible.
"""
- return loads(fp.read(),
+ # Strip the UTF-8 BOM
+ contents = fp.read()
+ ord0 = ord(contents[0])
+ if ord0 in (0xef, 0xfeff):
+ if ord0 == 0xfeff:
+ contents = contents[1:]
+ elif contents[:3] == '\xef\xbb\xbf':
+ contents = contents[3:]
+
+ return loads(contents,
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py
index f04cc5c..60492f7 100644
--- a/simplejson/tests/test_unicode.py
+++ b/simplejson/tests/test_unicode.py
@@ -1,4 +1,5 @@
import sys
+import os.path
from unittest import TestCase
import simplejson as json
@@ -143,3 +144,11 @@ class TestUnicode(TestCase):
self.assertEqual(
json.dumps(c, ensure_ascii=False),
'"' + c + '"')
+
+ def test_strip_bom(self):
+ thisdir = os.path.dirname(__file__)
+ json_file = os.path.join(thisdir, "utf-8-bom.json")
+ doc_ascii = {
+ u"content": u"\u3053\u3093\u306b\u3061\u308f"
+ }
+ self.assertEqual(json.load(open(json_file)), doc_ascii)
diff --git a/simplejson/tests/utf-8-bom.json b/simplejson/tests/utf-8-bom.json
new file mode 100644
index 0000000..1791beb
--- /dev/null
+++ b/simplejson/tests/utf-8-bom.json
@@ -0,0 +1,3 @@
+{
+ "content": "こんにちわ"
+}