Encode surrogates in UTF-8 even for a wide Py_UNICODE.

Implement sys.maxunicode. Explicitly wrap around upper/lower computations for wide Py_UNICODE. When decoding large characters with UTF-8, represent expected test results using the \U notation.
author: Martin v. Löwis <martin@v.loewis.de> 2001-06-27 06:28:56 +0000
committer: Martin v. Löwis <martin@v.loewis.de> 2001-06-27 06:28:56 +0000
commit: ce9b5a55e164f1128756478b6a2bb548abec1980 (patch)
tree: 0b616e0fae5ec7204f723235d196ae2b7c124d78 /Objects/unicodectype.c
parent: 236d8b79748fec890d57ad0dd99ea3f1c3ba57df (diff)
download: cpython-git-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.gz
1 files changed, 27 insertions, 8 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index 3bc19b2d44..13fc6128c1 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -59,14 +59,21 @@ int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
 /* Returns the titlecase Unicode characters corresponding to ch or just
    ch if no titlecase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToTitlecase(register const Py_UNICODE ch)
+Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     if (ctype->title)
-        return ch + ctype->title;
-
-    return ch + ctype->upper;
+        ch += ctype->title;
+    else
+	ch += ctype->upper;
+
+#ifdef USE_UCS4_STORAGE
+    /* The database assumes that the values wrap around at 0x10000. */
+    if (ch > 0x10000)
+	ch -= 0x10000;
+#endif
+    return ch;
 }
 
 /* Returns 1 for Unicode characters having the category 'Lt', 0
@@ -348,21 +355,33 @@ int _PyUnicode_IsUppercase(register const Py_UNICODE ch)
 /* Returns the uppercase Unicode characters corresponding to ch or just
    ch if no uppercase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToUppercase(register const Py_UNICODE ch)
+Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
-    return ch + ctype->upper;
+    ch += ctype->upper;
+#ifdef USE_UCS4_STORAGE
+    /* The database assumes that the values wrap around at 0x10000. */
+    if (ch > 0x10000)
+	ch -= 0x10000;
+#endif
+    return ch;
 }
 
 /* Returns the lowercase Unicode characters corresponding to ch or just
    ch if no lowercase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToLowercase(register const Py_UNICODE ch)
+Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
-    return ch + ctype->lower;
+    ch += ctype->lower;
+#ifdef USE_UCS4_STORAGE
+    /* The database assumes that the values wrap around at 0x10000. */
+    if (ch > 0x10000)
+	ch -= 0x10000;
+#endif
+    return ch;
 }
 
 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
author	Martin v. Löwis <martin@v.loewis.de>	2001-06-27 06:28:56 +0000
committer	Martin v. Löwis <martin@v.loewis.de>	2001-06-27 06:28:56 +0000
commit	ce9b5a55e164f1128756478b6a2bb548abec1980 (patch)
tree	0b616e0fae5ec7204f723235d196ae2b7c124d78 /Objects/unicodectype.c
parent	236d8b79748fec890d57ad0dd99ea3f1c3ba57df (diff)
download	cpython-git-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.gz