summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-10-18 16:40:36 +0000
committerMartin v. Löwis <martin@v.loewis.de>2002-10-18 16:40:36 +0000
commitedf368c351ea3f75a7da19d42f12a34d76887b35 (patch)
tree0689fc2877b4cc36fee9513403c281ac6b0028d9
parentddc369a7d242eca60ee4b4dd60ad766bdeb1354e (diff)
downloadcpython-git-edf368c351ea3f75a7da19d42f12a34d76887b35.tar.gz
Make lower/upper/title work for non-BMP characters.
-rw-r--r--Objects/unicodectype.c39
1 files changed, 15 insertions, 24 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index 2e67dc50ad..106726d7fc 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -62,18 +62,17 @@ int _PyUnicode_IsLinebreak(Py_UNICODE ch)
Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+ int delta;
if (ctype->title)
- ch += ctype->title;
+ delta = ctype->title;
else
- ch += ctype->upper;
+ delta = ctype->upper;
-#ifdef Py_UNICODE_WIDE
- /* The database assumes that the values wrap around at 0x10000. */
- if (ch > 0x10000)
- ch -= 0x10000;
-#endif
- return ch;
+ if (delta >= 32768)
+ delta -= 65536;
+
+ return ch + delta;
}
/* Returns 1 for Unicode characters having the category 'Lt', 0
@@ -358,14 +357,10 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-
- ch += ctype->upper;
-#ifdef Py_UNICODE_WIDE
- /* The database assumes that the values wrap around at 0x10000. */
- if (ch > 0x10000)
- ch -= 0x10000;
-#endif
- return ch;
+ int delta = ctype->upper;
+ if (delta >= 32768)
+ delta -= 65536;
+ return ch + delta;
}
/* Returns the lowercase Unicode characters corresponding to ch or just
@@ -374,14 +369,10 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-
- ch += ctype->lower;
-#ifdef Py_UNICODE_WIDE
- /* The database assumes that the values wrap around at 0x10000. */
- if (ch > 0x10000)
- ch -= 0x10000;
-#endif
- return ch;
+ int delta = ctype->lower;
+ if (delta >= 32768)
+ delta -= 65536;
+ return ch + delta;
}
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',