diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-10-18 16:40:36 +0000 |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-10-18 16:40:36 +0000 |
commit | edf368c351ea3f75a7da19d42f12a34d76887b35 (patch) | |
tree | 0689fc2877b4cc36fee9513403c281ac6b0028d9 | |
parent | ddc369a7d242eca60ee4b4dd60ad766bdeb1354e (diff) | |
download | cpython-git-edf368c351ea3f75a7da19d42f12a34d76887b35.tar.gz |
Make lower/upper/title work for non-BMP characters.
-rw-r--r-- | Objects/unicodectype.c | 39 |
1 files changed, 15 insertions, 24 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index 2e67dc50ad..106726d7fc 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -62,18 +62,17 @@ int _PyUnicode_IsLinebreak(Py_UNICODE ch) Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); + int delta; if (ctype->title) - ch += ctype->title; + delta = ctype->title; else - ch += ctype->upper; + delta = ctype->upper; -#ifdef Py_UNICODE_WIDE - /* The database assumes that the values wrap around at 0x10000. */ - if (ch > 0x10000) - ch -= 0x10000; -#endif - return ch; + if (delta >= 32768) + delta -= 65536; + + return ch + delta; } /* Returns 1 for Unicode characters having the category 'Lt', 0 @@ -358,14 +357,10 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch) Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - - ch += ctype->upper; -#ifdef Py_UNICODE_WIDE - /* The database assumes that the values wrap around at 0x10000. */ - if (ch > 0x10000) - ch -= 0x10000; -#endif - return ch; + int delta = ctype->upper; + if (delta >= 32768) + delta -= 65536; + return ch + delta; } /* Returns the lowercase Unicode characters corresponding to ch or just @@ -374,14 +369,10 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - - ch += ctype->lower; -#ifdef Py_UNICODE_WIDE - /* The database assumes that the values wrap around at 0x10000. */ - if (ch > 0x10000) - ch -= 0x10000; -#endif - return ch; + int delta = ctype->lower; + if (delta >= 32768) + delta -= 65536; + return ch + delta; } /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', |