summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2012-12-30 19:38:52 -0800
committerBob Ippolito <bob@redivi.com>2012-12-30 19:38:52 -0800
commitf8cdf39583e2400e5f461ab85459d4d21807a4a0 (patch)
treed4cf382894dda810df840a66edac90553062c4c1
parent5ed2285e1d88f8075d47b10f941e7581d78828ff (diff)
downloadsimplejson-f8cdf39583e2400e5f461ab85459d4d21807a4a0.tar.gz
better test coverage for invalid surrogates
-rw-r--r--simplejson/decoder.py35
-rw-r--r--simplejson/tests/test_unicode.py35
2 files changed, 60 insertions, 10 deletions
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index c844b3c..546a168 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -154,18 +154,33 @@ def py_scanstring(s, end, encoding=None, strict=True,
if len(esc) != 4:
msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
- uni = int(esc, 16)
+ try:
+ uni = int(esc, 16)
+ except ValueError:
+ msg = "Invalid \\uXXXX escape"
+ raise JSONDecodeError(msg, s, end)
# Check for surrogate pair on UCS-4 systems
- if 0xd800 <= uni <= 0xdbff and _maxunicode > 65535:
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
- if not s[end + 5:end + 7] == '\\u':
- raise JSONDecodeError(msg, s, end)
- esc2 = s[end + 7:end + 11]
- if len(esc2) != 4:
+ if _maxunicode > 65535:
+ unimask = uni & 0xfc00
+ if unimask == 0xd800:
+ msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+ if not s[end + 5:end + 7] == '\\u':
+ raise JSONDecodeError(msg, s, end)
+ esc2 = s[end + 7:end + 11]
+ if len(esc2) != 4:
+ raise JSONDecodeError(msg, s, end)
+ try:
+ uni2 = int(esc2, 16)
+ except ValueError:
+ raise JSONDecodeError(msg, s, end)
+ if uni2 & 0xfc00 != 0xdc00:
+ msg = "Unpaired high surrogate"
+ raise JSONDecodeError(msg, s, end)
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+ next_end += 6
+ elif unimask == 0xdc00:
+ msg = "Unpaired low surrogate"
raise JSONDecodeError(msg, s, end)
- uni2 = int(esc2, 16)
- uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
- next_end += 6
char = unichr(uni)
end = next_end
# Append the unescaped character
diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py
index 6558709..9afc3ac 100644
--- a/simplejson/tests/test_unicode.py
+++ b/simplejson/tests/test_unicode.py
@@ -1,3 +1,4 @@
+import sys
from unittest import TestCase
import simplejson as json
@@ -108,3 +109,37 @@ class TestUnicode(TestCase):
self.assertEquals(json.dumps(s2), expect)
self.assertEquals(json.dumps(s1, ensure_ascii=False), expect)
self.assertEquals(json.dumps(s2, ensure_ascii=False), expect)
+
+ def test_invalid_escape_sequences(self):
+ # incomplete escape sequence
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234')
+ # invalid escape sequence
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"')
+ if sys.maxunicode > 65535:
+ # unpaired low surrogate
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\udc00"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\udcff"')
+ # unpaired high surrogate
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800x"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xx"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xxxxxx"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"')
+ # invalid escape sequence for low surrogate
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"')
+ # invalid value for low surrogate
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0000"')
+ self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ufc00"') \ No newline at end of file