diff options
author | Bob Ippolito <bob@redivi.com> | 2011-05-15 19:23:52 -0700 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2011-05-15 19:23:52 -0700 |
commit | 4989e693bab39b1ce5cf6fc0b21dbacd108c312c (patch) | |
tree | bea0b1a1500c460474daff76173107182a901bbb | |
parent | e27ec198ec6d28bd0d211939122523a781414318 (diff) | |
download | simplejson-4989e693bab39b1ce5cf6fc0b21dbacd108c312c.tar.gz |
Force unicode linebreak characters to be escaped (U+2028 and U+2029)
-rw-r--r-- | CHANGES.txt | 2 | ||||
-rw-r--r-- | simplejson/encoder.py | 4 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 10 |
3 files changed, 15 insertions, 1 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 03d86ec..c391be5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Version 2.1.7 released 2011-XX-XX +* Force unicode linebreak characters to be escaped (U+2028 and U+2029) + http://timelessrepo.com/json-isnt-a-javascript-subset * Moved documentation from a git submodule to http://simplejson.readthedocs.org/ diff --git a/simplejson/encoder.py b/simplejson/encoder.py index f43f6f4..c72bd7f 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -13,7 +13,7 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from simplejson.decoder import PosInf -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -24,6 +24,8 @@ ESCAPE_DCT = { '\n': '\\n', '\r': '\\r', '\t': '\\t', + u'\u2028': '\\u2028', + u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index f73e5bf..83fe65b 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -97,3 +97,13 @@ class TestUnicode(TestCase): self.assertEquals(json.dumps(doc2), doc_ascii) self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode) self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode) + + def test_ensure_ascii_linebreak_encoding(self): + # http://timelessrepo.com/json-isnt-a-javascript-subset + s1 = u'\u2029\u2028' + s2 = s1.encode('utf8') + expect = '"\\u2029\\u2028"' + self.assertEquals(json.dumps(s1), expect) + self.assertEquals(json.dumps(s2), expect) + self.assertEquals(json.dumps(s1, ensure_ascii=False), expect) + self.assertEquals(json.dumps(s2, ensure_ascii=False), expect) |