summaryrefslogtreecommitdiff
path: root/simplejson/tests/test_scanstring.py
diff options
context:
space:
mode:
Diffstat (limited to 'simplejson/tests/test_scanstring.py')
-rw-r--r--simplejson/tests/test_scanstring.py61
1 files changed, 54 insertions, 7 deletions
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index 3b63d6b..3d98f0d 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -23,10 +23,6 @@ class TestScanString(TestCase):
self._test_scanstring(simplejson.decoder.c_scanstring)
def _test_scanstring(self, scanstring):
- self.assertEqual(
- scanstring('"z\\ud834\\udd20x"', 1, None, True),
- (u'z\U0001d120x', 16))
-
if sys.maxunicode == 65535:
self.assertEqual(
scanstring(u'"z\U0001d120x"', 1, None, True),
@@ -129,9 +125,10 @@ class TestScanString(TestCase):
self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True)
if sys.maxunicode > 65535:
- self.assertRaises(ValueError, scanstring, '\\ud834"', 0, None, True),
- self.assertRaises(ValueError, scanstring, '\\ud834\\u"', 0, None, True),
- self.assertRaises(ValueError, scanstring, '\\ud834\\x0123"', 0, None, True),
+ self.assertRaises(ValueError,
+ scanstring, '\\ud834\\u"', 0, None, True)
+ self.assertRaises(ValueError,
+ scanstring, '\\ud834\\x0123"', 0, None, True)
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
@@ -145,3 +142,53 @@ class TestScanString(TestCase):
assert maxsize is not None
self.assertRaises(OverflowError, json.decoder.scanstring, "xxx",
maxsize + 1)
+
+ def test_surrogates(self):
+ scanstring = json.decoder.scanstring
+
+ def assertScan(given, expect, test_utf8=True):
+ givens = [given]
+ if not PY3 and test_utf8:
+ givens.append(given.encode('utf8'))
+ for given in givens:
+ (res, count) = scanstring(given, 1, None, True)
+ self.assertEqual(len(given), count)
+ self.assertEqual(res, expect)
+
+ assertScan(
+ u'"z\\ud834\\u0079x"',
+ u'z\ud834yx')
+ assertScan(
+ u'"z\\ud834\\udd20x"',
+ u'z\U0001d120x')
+ assertScan(
+ u'"z\\ud834\\ud834\\udd20x"',
+ u'z\ud834\U0001d120x')
+ assertScan(
+ u'"z\\ud834x"',
+ u'z\ud834x')
+ assertScan(
+ u'"z\\udd20x"',
+ u'z\udd20x')
+ assertScan(
+ u'"z\ud834x"',
+ u'z\ud834x')
+ # It may look strange to join strings together, but Python is drunk.
+ # https://gist.github.com/etrepum/5538443
+ assertScan(
+ u'"z\\ud834\udd20x12345"',
+ u''.join([u'z\ud834', u'\udd20x12345']))
+ assertScan(
+ u'"z\ud834\\udd20x"',
+ u''.join([u'z\ud834', u'\udd20x']))
+ # these have different behavior given UTF8 input, because the surrogate
+ # pair may be joined (in maxunicode > 65535 builds)
+ assertScan(
+ u''.join([u'"z\ud834', u'\udd20x"']),
+ u''.join([u'z\ud834', u'\udd20x']),
+ test_utf8=False)
+
+ self.assertRaises(ValueError,
+ scanstring, u'"z\\ud83x"', 1, None, True)
+ self.assertRaises(ValueError,
+ scanstring, u'"z\\ud834\\udd2x"', 1, None, True)