summaryrefslogtreecommitdiff
path: root/Modules
diff options
context:
space:
mode:
authorXiang Zhang <angwerzx@126.com>2018-06-15 21:26:55 +0800
committerGitHub <noreply@github.com>2018-06-15 21:26:55 +0800
commit1889c4cbd62e200fa4cde3d6219e0aadf9bd8149 (patch)
tree884cddef4342e2ee2ed415b122e65eaea83f5789 /Modules
parentfc8ea20c6f8571de96791bc5f7f2d693406024c7 (diff)
downloadcpython-git-1889c4cbd62e200fa4cde3d6219e0aadf9bd8149.tar.gz
bpo-29456: Fix bugs in unicodedata.normalize: u1176, u11a7 and u11c3 (GH-1958) (GH-7704)
Hangul composition check boundaries are wrong for the second character ([0x1161, 0x1176) instead of [0x1161, 0x1176]) and third character ((0x11A7, 0x11C3) instead of [0x11A7, 0x11C3]).. (cherry picked from commit d134809cd3764c6a634eab7bb8995e3e2eff14d5) Co-authored-by: Wonsup Yoon <pusnow@me.com>
Diffstat (limited to 'Modules')
-rw-r--r--Modules/unicodedata.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 6b01fc7616..df6ffe343c 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -664,14 +664,18 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
pairs, since we always have decomposed data. */
if (LBase <= *i && *i < (LBase+LCount) &&
i + 1 < end &&
- VBase <= i[1] && i[1] <= (VBase+VCount)) {
+ VBase <= i[1] && i[1] < (VBase+VCount)) {
+ /* check L character is a modern leading consonant (0x1100 ~ 0x1112)
+ and V character is a modern vowel (0x1161 ~ 0x1175). */
int LIndex, VIndex;
LIndex = i[0] - LBase;
VIndex = i[1] - VBase;
code = SBase + (LIndex*VCount+VIndex)*TCount;
i+=2;
if (i < end &&
- TBase <= *i && *i <= (TBase+TCount)) {
+ TBase < *i && *i < (TBase+TCount)) {
+ /* check T character is a modern trailing consonant
+ (0x11A8 ~ 0x11C2). */
code += *i-TBase;
i++;
}