diff options
author | Bruno Haible <bruno@clisp.org> | 2021-12-31 18:38:12 +0100 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2021-12-31 18:38:12 +0100 |
commit | 14db2b71b5bd05b94ec6126617fd32cd5f1016cd (patch) | |
tree | 3b05a875542f85aaec8171d063463e99f5cada6c /lib/uniwidth | |
parent | 5610f882b056c5cd528989f747561ee69c89045b (diff) | |
download | gnulib-14db2b71b5bd05b94ec6126617fd32cd5f1016cd.tar.gz |
Update to Unicode 14.0.0.
* lib/gen-uni-tables.c (UC_JOINING_GROUP_THIN_YEH,
UC_JOINING_GROUP_VERTICAL_TAIL): New enum values.
(fill_arabicshaping, joining_group_as_c_identifier): Recognize these
joining groups.
* lib/unictype.in.h (UC_JOINING_GROUP_THIN_YEH,
UC_JOINING_GROUP_VERTICAL_TAIL): New enum values.
* lib/unictype/joininggroup_name.h: Add the THIN_YEH, VERTICAL_TAIL
joining groups.
* lib/unictype/joininggroup_byname.gperf: Likewise.
* lib/gen-uni-tables.c (LBP_ID1, LBP_ID2): New enum values.
(LBP_ID): Assign artificial value.
(get_lbp): Use the extended_pictographic property to assign LBP_ID1,
LBP_ID2 instead of LBP_ID. Update such that unilbrk/lbrkprop.txt comes
out as expected.
(debug_output_lbp): Print either LBP_ID1 or LBP_ID2 as LBP_ID.
(lbp_value_to_string): Handle LBP_ID1, LBP_ID2 instead of LBP_ID.
(output_lbrk_rules_as_tables): Treat LBP_ID as macro that maps to two
table rows/columns. In rule LB30b, use LBP_ID2 in addition to LBP_EB.
Remove redundant part of rule LB27.
* lib/unilbrk/lbrktables.h (LBP_ID1, LBP_ID2): New enum values.
(LBP_ID): Remove enum value.
(unilbrk_table): Update declaration.
* lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks_loop):
Use LBP_ID1 instead of LBP_ID.
* lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks_loop):
Likewise.
* lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks_loop):
Likewise.
* tests/unilbrk/test-u8-possible-linebreaks.c (test_function): Add a
test of potential future emoji.
* tests/unilbrk/test-u16-possible-linebreaks.c (test_function):
Likewise.
* tests/unilbrk/test-u32-possible-linebreaks.c (test_function):
Likewise.
* lib/uniwidth/width.c (nonspacing_table_data, nonspacing_table_ind):
Update.
(uc_width): Assign width 2 to the characters 0x1AFF0..0x1AFF3,
0x1AFF5..0x1AFFB, 0x1AFFD..0x1AFFE, 0x1B120..0x1B122, 0x1F6DD..0x1F6DF,
0x1F7F0, 0x1FA7B..0x1FA7C, 0x1FAA9..0x1FAAC, 0x1FAB7..0x1FABA,
0x1FAC3..0x1FAC5, 0x1FAD7..0x1FAD9, 0x1FAE0..0x1FAE7, 0x1FAF0..0x1FAF6.
* tests/uniwidth/test-uc_width2.sh: Expect width 0 for the characters
0x0890..0x0891, 0x0898..0x089F, 0x08CA..0x0902, 0x0C3C, 0x180F,
0x1AC1..0x1ACE, 0x1DFA, 0x10F82..0x10F85, 0x11070, 0x11073..0x11074,
0x110C2, 0x1CF00..0x1CF2D, 0x1CF30..0x1CF46, 0x1E2AE. Expect ambiguous
width for the character 0x1734. Expect width 2 for the characters
0x1AFF0..0x1AFF3, 0x1AFF5..0x1AFFB, 0x1AFFD..0x1AFFE, 0x1B120..0x1B122,
0x1F6DD..0x1F6DF, 0x1F7F0, 0x1FA7B..0x1FA7C, 0x1FAA9..0x1FAAC,
0x1FAB7..0x1FABA, 0x1FAC3..0x1FAC5, 0x1FAD7..0x1FAD9, 0x1FAE0..0x1FAE7,
0x1FAF0..0x1FAF6.
* All generated files under lib/uni* and tests/uni*: Regenerate.
* tests/uniname/NameAliases.txt: Update.
* tests/uniname/UnicodeData.txt: Update.
* tests/uninorm/NormalizationTest.txt: Update.
* tests/unigbrk/GraphemeBreakTest.txt: Update.
* tests/uniwbrk/WordBreakTest.txt: Update.
* All the affected modules: Bump required libunistring version.
Diffstat (limited to 'lib/uniwidth')
-rw-r--r-- | lib/uniwidth/width.c | 59 |
1 files changed, 37 insertions, 22 deletions
diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c index da818f8f38..9c8d7fb68a 100644 --- a/lib/uniwidth/width.c +++ b/lib/uniwidth/width.c @@ -45,7 +45,7 @@ <https://sourceware.org/bugzilla/show_bug.cgi?id=21750> <https://sourceware.org/bugzilla/show_bug.cgi?id=26120> */ -static const unsigned char nonspacing_table_data[46*64] = { +static const unsigned char nonspacing_table_data[47*64] = { /* 0x0000-0x01ff */ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */ @@ -85,8 +85,8 @@ static const unsigned char nonspacing_table_data[46*64] = { /* 0x0800-0x09ff */ 0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */ 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */ - 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */ + 0x00, 0x00, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */ + 0x00, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */ 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */ 0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */ @@ -101,7 +101,7 @@ static const unsigned char nonspacing_table_data[46*64] = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0b80-0x0bbf */ 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0bc0-0x0bff */ /* 0x0c00-0x0dff */ - 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, /* 0x0c00-0x0c3f */ + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, /* 0x0c00-0x0c3f */ 0xc1, 0x3d, 0x60, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0c40-0x0c7f */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0c80-0x0cbf */ 0x00, 0x30, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0cc0-0x0cff */ @@ -141,12 +141,12 @@ static const unsigned char nonspacing_table_data[46*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1640-0x167f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1680-0x16bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16c0-0x16ff */ - 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, /* 0x1700-0x173f */ + 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1700-0x173f */ 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1740-0x177f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x3f, /* 0x1780-0x17bf */ 0x40, 0xfe, 0x0f, 0x20, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */ /* 0x1800-0x19ff */ - 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */ + 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1840-0x187f */ 0x60, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18c0-0x18ff */ @@ -158,7 +158,7 @@ static const unsigned char nonspacing_table_data[46*64] = { 0x00, 0x00, 0x80, 0x09, 0x00, 0x00, 0x00, 0x00, /* 0x1a00-0x1a3f */ 0x00, 0x00, 0x40, 0x7f, 0xe5, 0x1f, 0xf8, 0x9f, /* 0x1a40-0x1a7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x1a80-0x1abf */ - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ac0-0x1aff */ + 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ac0-0x1aff */ 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x17, /* 0x1b00-0x1b3f */ 0x04, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x0f, 0x00, /* 0x1b40-0x1b7f */ 0x03, 0x00, 0x00, 0x00, 0x3c, 0x3b, 0x00, 0x00, /* 0x1b80-0x1bbf */ @@ -171,7 +171,7 @@ static const unsigned char nonspacing_table_data[46*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d00-0x1d3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d40-0x1d7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d80-0x1dbf */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, /* 0x1dc0-0x1dff */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x1dc0-0x1dff */ /* 0x2000-0x21ff */ 0x00, 0xf8, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, /* 0x2000-0x203f */ 0x00, 0x00, 0x00, 0x00, 0xdf, 0xff, 0x00, 0x00, /* 0x2040-0x207f */ @@ -296,13 +296,13 @@ static const unsigned char nonspacing_table_data[46*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10ec0-0x10eff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f00-0x10f3f */ 0xc0, 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f40-0x10f7f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f80-0x10fbf */ + 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f80-0x10fbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10fc0-0x10fff */ /* 0x11000-0x111ff */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0x11000-0x1103f */ - 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x11040-0x1107f */ + 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x80, /* 0x11040-0x1107f */ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x26, /* 0x11080-0x110bf */ - 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */ + 0x04, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */ 0x07, 0x00, 0x00, 0x00, 0x80, 0xef, 0x1f, 0x00, /* 0x11100-0x1113f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, /* 0x11140-0x1117f */ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */ @@ -406,6 +406,15 @@ static const unsigned char nonspacing_table_data[46*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1bd40-0x1bd7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1bd80-0x1bdbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1bdc0-0x1bdff */ + /* 0x1ce00-0x1cfff */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ce00-0x1ce3f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ce40-0x1ce7f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ce80-0x1cebf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cec0-0x1ceff */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, /* 0x1cf00-0x1cf3f */ + 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cf40-0x1cf7f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cf80-0x1cfbf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cfc0-0x1cfff */ /* 0x1d000-0x1d1ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d000-0x1d03f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d040-0x1d07f */ @@ -445,7 +454,7 @@ static const unsigned char nonspacing_table_data[46*64] = { /* 0x1e200-0x1e3ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e200-0x1e23f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e240-0x1e27f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e280-0x1e2bf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, /* 0x1e280-0x1e2bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, /* 0x1e2c0-0x1e2ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e300-0x1e33f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e340-0x1e37f */ @@ -490,9 +499,9 @@ static const signed char nonspacing_table_ind[248] = { -1, -1, -1, -1, -1, -1, -1, -1, /* 0x19000-0x19fff */ -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1a000-0x1afff */ -1, -1, -1, -1, -1, -1, 39, -1, /* 0x1b000-0x1bfff */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1c000-0x1cfff */ - 40, 41, -1, -1, -1, 42, -1, -1, /* 0x1d000-0x1dfff */ - 43, 44, -1, -1, 45, -1, -1, -1 /* 0x1e000-0x1efff */ + -1, -1, -1, -1, -1, -1, -1, 40, /* 0x1c000-0x1cfff */ + 41, 42, -1, -1, -1, 43, -1, -1, /* 0x1d000-0x1dfff */ + 44, 45, -1, -1, 46, -1, -1, -1 /* 0x1e000-0x1efff */ }; /* Determine number of column positions required for UC. */ @@ -585,7 +594,9 @@ uc_width (ucs4_t uc, const char *encoding) || (uc >= 0x17000 && uc < 0x187f8) /* Tangut */ || (uc >= 0x18800 && uc < 0x18cd6) /* Tangut components */ || (uc >= 0x18d00 && uc < 0x18d09) /* Tangul Ideograph Supplement */ - || (uc >= 0x1b000 && uc < 0x1b120) /* Kana supplement, Kana Extended-A */ + || ((uc >= 0x1aff0 && uc < 0x1afff) /* Katakana letter Minnan */ + && uc != 0x1aff4 && uc != 0x1affc) + || (uc >= 0x1b000 && uc < 0x1b123) /* Kana supplement, Kana Extended-A */ || (uc >= 0x1b150 && uc < 0x1b153) /* Small Hiragana */ || (uc >= 0x1b164 && uc < 0x1b168) /* Small Katakana */ || (uc >= 0x1b170 && uc < 0x1b2fc) /* Nushu */ @@ -621,19 +632,23 @@ uc_width (ucs4_t uc, const char *encoding) || uc == 0x1f6cc || (uc >= 0x1f6d0 && uc < 0x1f6d3) || (uc >= 0x1f6d5 && uc < 0x1f6d8) + || (uc >= 0x1f6dd && uc < 0x1f6e0) || (uc >= 0x1f6eb && uc < 0x1f6ed) || (uc >= 0x1f6f4 && uc < 0x1f6fd) - || (uc >= 0x1f7e0 && uc < 0x1f7ec))) + || (uc >= 0x1f7e0 && uc < 0x1f7ec) + || uc == 0x1f7f0)) || ((uc >= 0x1f90c && uc < 0x1fa00) && uc != 0x1f93b && uc != 0x1f946) || ((uc >> 9) == 0xfd && ((uc >= 0x1fa70 && uc < 0x1fa75) - || (uc >= 0x1fa78 && uc < 0x1fa7b) + || (uc >= 0x1fa78 && uc < 0x1fa7d) || (uc >= 0x1fa80 && uc < 0x1fa87) - || (uc >= 0x1fa90 && uc < 0x1faa9) - || (uc >= 0x1fab0 && uc < 0x1fab7) - || (uc >= 0x1fac0 && uc < 0x1fac3) - || (uc >= 0x1fad0 && uc < 0x1fad7))) + || (uc >= 0x1fa90 && uc < 0x1faad) + || (uc >= 0x1fab0 && uc < 0x1fabb) + || (uc >= 0x1fac0 && uc < 0x1fac6) + || (uc >= 0x1fad0 && uc < 0x1fada) + || (uc >= 0x1fae0 && uc < 0x1fae8) + || (uc >= 0x1faf0 && uc < 0x1faf7))) || (uc >= 0x20000 && uc <= 0x2ffff) /* Supplementary Ideographic Plane */ || (uc >= 0x30000 && uc <= 0x3ffff) /* Tertiary Ideographic Plane */ ) ) |