diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2001-06-27 06:28:56 +0000 |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2001-06-27 06:28:56 +0000 |
commit | ce9b5a55e164f1128756478b6a2bb548abec1980 (patch) | |
tree | 0b616e0fae5ec7204f723235d196ae2b7c124d78 /Objects/unicodectype.c | |
parent | 236d8b79748fec890d57ad0dd99ea3f1c3ba57df (diff) | |
download | cpython-git-ce9b5a55e164f1128756478b6a2bb548abec1980.tar.gz |
Encode surrogates in UTF-8 even for a wide Py_UNICODE.
Implement sys.maxunicode.
Explicitly wrap around upper/lower computations for wide Py_UNICODE.
When decoding large characters with UTF-8, represent expected test
results using the \U notation.
Diffstat (limited to 'Objects/unicodectype.c')
-rw-r--r-- | Objects/unicodectype.c | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index 3bc19b2d44..13fc6128c1 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -59,14 +59,21 @@ int _PyUnicode_IsLinebreak(register const Py_UNICODE ch) /* Returns the titlecase Unicode characters corresponding to ch or just ch if no titlecase mapping is known. */ -Py_UNICODE _PyUnicode_ToTitlecase(register const Py_UNICODE ch) +Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); if (ctype->title) - return ch + ctype->title; - - return ch + ctype->upper; + ch += ctype->title; + else + ch += ctype->upper; + +#ifdef USE_UCS4_STORAGE + /* The database assumes that the values wrap around at 0x10000. */ + if (ch > 0x10000) + ch -= 0x10000; +#endif + return ch; } /* Returns 1 for Unicode characters having the category 'Lt', 0 @@ -348,21 +355,33 @@ int _PyUnicode_IsUppercase(register const Py_UNICODE ch) /* Returns the uppercase Unicode characters corresponding to ch or just ch if no uppercase mapping is known. */ -Py_UNICODE _PyUnicode_ToUppercase(register const Py_UNICODE ch) +Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - return ch + ctype->upper; + ch += ctype->upper; +#ifdef USE_UCS4_STORAGE + /* The database assumes that the values wrap around at 0x10000. */ + if (ch > 0x10000) + ch -= 0x10000; +#endif + return ch; } /* Returns the lowercase Unicode characters corresponding to ch or just ch if no lowercase mapping is known. */ -Py_UNICODE _PyUnicode_ToLowercase(register const Py_UNICODE ch) +Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - return ch + ctype->lower; + ch += ctype->lower; +#ifdef USE_UCS4_STORAGE + /* The database assumes that the values wrap around at 0x10000. */ + if (ch > 0x10000) + ch -= 0x10000; +#endif + return ch; } /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', |