Merged revisions 75931 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r75931 | benjamin.peterson | 2009-10-28 20:49:07 -0500 (Wed, 28 Oct 2009) | 5 lines do a backport of r75928 The added test does not fail without the patch, but we still fix the issue of surrogates being used in wide builds where they should not be. ........
author: Benjamin Peterson <benjamin@python.org> 2009-10-29 02:02:47 +0000
committer: Benjamin Peterson <benjamin@python.org> 2009-10-29 02:02:47 +0000
commit: 5383bf8ff019de61dbb90792a24dc86feed9f7ed (patch)
tree: 6257de73415986453d2662e06708d5d558c92ea6
parent: 984cfc1e143eff3df9455fd105ab326b14ef4f29 (diff)
download: cpython-5383bf8ff019de61dbb90792a24dc86feed9f7ed.tar.gz
2 files changed, 49 insertions, 39 deletions
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index a3a9ade0b6..e4faa9ff56 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -1,30 +1,37 @@
-#! -*- coding: koi8-r -*-
-
-import unittest
-from test import test_support
-
-class PEP263Test(unittest.TestCase):
-
-    def test_pep263(self):
-        self.assertEqual(
-            u"Питон".encode("utf-8"),
-            '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
-        )
-        self.assertEqual(
-            u"\П".encode("utf-8"),
-            '\\\xd0\x9f'
-        )
-
-    def test_compilestring(self):
-        # see #1882
-        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
-        d = {}
-        exec c in d
-        self.assertEqual(d['u'], u'\xf3')
-
-
-def test_main():
-    test_support.run_unittest(PEP263Test)
-
-if __name__=="__main__":
-    test_main()
+# -*- coding: koi8-r -*-
+
+import unittest
+from test import test_support
+
+class PEP263Test(unittest.TestCase):
+
+    def test_pep263(self):
+        self.assertEqual(
+            u"Питон".encode("utf-8"),
+            '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
+        )
+        self.assertEqual(
+            u"\П".encode("utf-8"),
+            '\\\xd0\x9f'
+        )
+
+    def test_compilestring(self):
+        # see #1882
+        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
+        d = {}
+        exec c in d
+        self.assertEqual(d['u'], u'\xf3')
+
+
+    def test_issue3297(self):
+        c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+        d = {}
+        exec(c, d)
+        self.assertEqual(d['a'], d['b'])
+        self.assertEqual(len(d['a']), len(d['b']))
+
+def test_main():
+    test_support.run_unittest(PEP263Test)
+
+if __name__=="__main__":
+    test_main()
diff --git a/Python/ast.c b/Python/ast.c
index a3fdd8998d..b89e29c55c 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3248,10 +3248,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
                 u = NULL;
         } else {
                 /* check for integer overflow */
-                if (len > PY_SIZE_MAX / 4)
+                if (len > PY_SIZE_MAX / 6)
                         return NULL;
-                /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
-                u = PyString_FromStringAndSize((char *)NULL, len * 4);
+		/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+		   "\цє" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+                u = PyString_FromStringAndSize((char *)NULL, len * 6);
                 if (u == NULL)
                         return NULL;
                 p = buf = PyString_AsString(u);
@@ -3268,19 +3269,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
                                 PyObject *w;
                                 char *r;
                                 Py_ssize_t rn, i;
-                                w = decode_utf8(c, &s, end, "utf-16-be");
+                                w = decode_utf8(c, &s, end, "utf-32-be");
                                 if (w == NULL) {
                                         Py_DECREF(u);
                                         return NULL;
                                 }
                                 r = PyString_AsString(w);
                                 rn = PyString_Size(w);
-                                assert(rn % 2 == 0);
-                                for (i = 0; i < rn; i += 2) {
-                                        sprintf(p, "\\u%02x%02x",
+                                assert(rn % 4 == 0);
+                                for (i = 0; i < rn; i += 4) {
+                                        sprintf(p, "\\U%02x%02x%02x%02x",
                                                 r[i + 0] & 0xFF,
-                                                r[i + 1] & 0xFF);
-                                        p += 6;
+                                                r[i + 1] & 0xFF,
+						r[i + 2] & 0xFF,
+						r[i + 3] & 0xFF);
+                                        p += 10;
                                 }
                                 Py_DECREF(w);
                         } else {
author	Benjamin Peterson <benjamin@python.org>	2009-10-29 02:02:47 +0000
committer	Benjamin Peterson <benjamin@python.org>	2009-10-29 02:02:47 +0000
commit	5383bf8ff019de61dbb90792a24dc86feed9f7ed (patch)
tree	6257de73415986453d2662e06708d5d558c92ea6
parent	984cfc1e143eff3df9455fd105ab326b14ef4f29 (diff)
download	cpython-5383bf8ff019de61dbb90792a24dc86feed9f7ed.tar.gz