summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_pep263.py67
-rw-r--r--Python/ast.c21
2 files changed, 49 insertions, 39 deletions
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index a3a9ade0b6..e4faa9ff56 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -1,30 +1,37 @@
-#! -*- coding: koi8-r -*-
-
-import unittest
-from test import test_support
-
-class PEP263Test(unittest.TestCase):
-
- def test_pep263(self):
- self.assertEqual(
- u"ðÉÔÏÎ".encode("utf-8"),
- '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
- )
- self.assertEqual(
- u"\ð".encode("utf-8"),
- '\\\xd0\x9f'
- )
-
- def test_compilestring(self):
- # see #1882
- c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
- d = {}
- exec c in d
- self.assertEqual(d['u'], u'\xf3')
-
-
-def test_main():
- test_support.run_unittest(PEP263Test)
-
-if __name__=="__main__":
- test_main()
+# -*- coding: koi8-r -*-
+
+import unittest
+from test import test_support
+
+class PEP263Test(unittest.TestCase):
+
+ def test_pep263(self):
+ self.assertEqual(
+ u"ðÉÔÏÎ".encode("utf-8"),
+ '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
+ )
+ self.assertEqual(
+ u"\ð".encode("utf-8"),
+ '\\\xd0\x9f'
+ )
+
+ def test_compilestring(self):
+ # see #1882
+ c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
+ d = {}
+ exec c in d
+ self.assertEqual(d['u'], u'\xf3')
+
+
+ def test_issue3297(self):
+ c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+ d = {}
+ exec(c, d)
+ self.assertEqual(d['a'], d['b'])
+ self.assertEqual(len(d['a']), len(d['b']))
+
+def test_main():
+ test_support.run_unittest(PEP263Test)
+
+if __name__=="__main__":
+ test_main()
diff --git a/Python/ast.c b/Python/ast.c
index a3fdd8998d..b89e29c55c 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3248,10 +3248,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
u = NULL;
} else {
/* check for integer overflow */
- if (len > PY_SIZE_MAX / 4)
+ if (len > PY_SIZE_MAX / 6)
return NULL;
- /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
- u = PyString_FromStringAndSize((char *)NULL, len * 4);
+ /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+ "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+ u = PyString_FromStringAndSize((char *)NULL, len * 6);
if (u == NULL)
return NULL;
p = buf = PyString_AsString(u);
@@ -3268,19 +3269,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
PyObject *w;
char *r;
Py_ssize_t rn, i;
- w = decode_utf8(c, &s, end, "utf-16-be");
+ w = decode_utf8(c, &s, end, "utf-32-be");
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
r = PyString_AsString(w);
rn = PyString_Size(w);
- assert(rn % 2 == 0);
- for (i = 0; i < rn; i += 2) {
- sprintf(p, "\\u%02x%02x",
+ assert(rn % 4 == 0);
+ for (i = 0; i < rn; i += 4) {
+ sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF,
- r[i + 1] & 0xFF);
- p += 6;
+ r[i + 1] & 0xFF,
+ r[i + 2] & 0xFF,
+ r[i + 3] & 0xFF);
+ p += 10;
}
Py_DECREF(w);
} else {