summaryrefslogtreecommitdiff
path: root/lib/uniwidth
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2021-12-31 18:38:12 +0100
committerBruno Haible <bruno@clisp.org>2021-12-31 18:38:12 +0100
commit14db2b71b5bd05b94ec6126617fd32cd5f1016cd (patch)
tree3b05a875542f85aaec8171d063463e99f5cada6c /lib/uniwidth
parent5610f882b056c5cd528989f747561ee69c89045b (diff)
downloadgnulib-14db2b71b5bd05b94ec6126617fd32cd5f1016cd.tar.gz
Update to Unicode 14.0.0.
* lib/gen-uni-tables.c (UC_JOINING_GROUP_THIN_YEH, UC_JOINING_GROUP_VERTICAL_TAIL): New enum values. (fill_arabicshaping, joining_group_as_c_identifier): Recognize these joining groups. * lib/unictype.in.h (UC_JOINING_GROUP_THIN_YEH, UC_JOINING_GROUP_VERTICAL_TAIL): New enum values. * lib/unictype/joininggroup_name.h: Add the THIN_YEH, VERTICAL_TAIL joining groups. * lib/unictype/joininggroup_byname.gperf: Likewise. * lib/gen-uni-tables.c (LBP_ID1, LBP_ID2): New enum values. (LBP_ID): Assign artificial value. (get_lbp): Use the extended_pictographic property to assign LBP_ID1, LBP_ID2 instead of LBP_ID. Update such that unilbrk/lbrkprop.txt comes out as expected. (debug_output_lbp): Print either LBP_ID1 or LBP_ID2 as LBP_ID. (lbp_value_to_string): Handle LBP_ID1, LBP_ID2 instead of LBP_ID. (output_lbrk_rules_as_tables): Treat LBP_ID as macro that maps to two table rows/columns. In rule LB30b, use LBP_ID2 in addition to LBP_EB. Remove redundant part of rule LB27. * lib/unilbrk/lbrktables.h (LBP_ID1, LBP_ID2): New enum values. (LBP_ID): Remove enum value. (unilbrk_table): Update declaration. * lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks_loop): Use LBP_ID1 instead of LBP_ID. * lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks_loop): Likewise. * lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks_loop): Likewise. * tests/unilbrk/test-u8-possible-linebreaks.c (test_function): Add a test of potential future emoji. * tests/unilbrk/test-u16-possible-linebreaks.c (test_function): Likewise. * tests/unilbrk/test-u32-possible-linebreaks.c (test_function): Likewise. * lib/uniwidth/width.c (nonspacing_table_data, nonspacing_table_ind): Update. (uc_width): Assign width 2 to the characters 0x1AFF0..0x1AFF3, 0x1AFF5..0x1AFFB, 0x1AFFD..0x1AFFE, 0x1B120..0x1B122, 0x1F6DD..0x1F6DF, 0x1F7F0, 0x1FA7B..0x1FA7C, 0x1FAA9..0x1FAAC, 0x1FAB7..0x1FABA, 0x1FAC3..0x1FAC5, 0x1FAD7..0x1FAD9, 0x1FAE0..0x1FAE7, 0x1FAF0..0x1FAF6. * tests/uniwidth/test-uc_width2.sh: Expect width 0 for the characters 0x0890..0x0891, 0x0898..0x089F, 0x08CA..0x0902, 0x0C3C, 0x180F, 0x1AC1..0x1ACE, 0x1DFA, 0x10F82..0x10F85, 0x11070, 0x11073..0x11074, 0x110C2, 0x1CF00..0x1CF2D, 0x1CF30..0x1CF46, 0x1E2AE. Expect ambiguous width for the character 0x1734. Expect width 2 for the characters 0x1AFF0..0x1AFF3, 0x1AFF5..0x1AFFB, 0x1AFFD..0x1AFFE, 0x1B120..0x1B122, 0x1F6DD..0x1F6DF, 0x1F7F0, 0x1FA7B..0x1FA7C, 0x1FAA9..0x1FAAC, 0x1FAB7..0x1FABA, 0x1FAC3..0x1FAC5, 0x1FAD7..0x1FAD9, 0x1FAE0..0x1FAE7, 0x1FAF0..0x1FAF6. * All generated files under lib/uni* and tests/uni*: Regenerate. * tests/uniname/NameAliases.txt: Update. * tests/uniname/UnicodeData.txt: Update. * tests/uninorm/NormalizationTest.txt: Update. * tests/unigbrk/GraphemeBreakTest.txt: Update. * tests/uniwbrk/WordBreakTest.txt: Update. * All the affected modules: Bump required libunistring version.
Diffstat (limited to 'lib/uniwidth')
-rw-r--r--lib/uniwidth/width.c59
1 files changed, 37 insertions, 22 deletions
diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c
index da818f8f38..9c8d7fb68a 100644
--- a/lib/uniwidth/width.c
+++ b/lib/uniwidth/width.c
@@ -45,7 +45,7 @@
<https://sourceware.org/bugzilla/show_bug.cgi?id=21750>
<https://sourceware.org/bugzilla/show_bug.cgi?id=26120>
*/
-static const unsigned char nonspacing_table_data[46*64] = {
+static const unsigned char nonspacing_table_data[47*64] = {
/* 0x0000-0x01ff */
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */
@@ -85,8 +85,8 @@ static const unsigned char nonspacing_table_data[46*64] = {
/* 0x0800-0x09ff */
0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */
0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */
- 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
+ 0x00, 0x00, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */
+ 0x00, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */
0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */
@@ -101,7 +101,7 @@ static const unsigned char nonspacing_table_data[46*64] = {
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0b80-0x0bbf */
0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0bc0-0x0bff */
/* 0x0c00-0x0dff */
- 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, /* 0x0c00-0x0c3f */
+ 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, /* 0x0c00-0x0c3f */
0xc1, 0x3d, 0x60, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0c40-0x0c7f */
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0c80-0x0cbf */
0x00, 0x30, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0cc0-0x0cff */
@@ -141,12 +141,12 @@ static const unsigned char nonspacing_table_data[46*64] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1640-0x167f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1680-0x16bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16c0-0x16ff */
- 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, /* 0x1700-0x173f */
+ 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1700-0x173f */
0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1740-0x177f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x3f, /* 0x1780-0x17bf */
0x40, 0xfe, 0x0f, 0x20, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */
/* 0x1800-0x19ff */
- 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */
+ 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1840-0x187f */
0x60, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18c0-0x18ff */
@@ -158,7 +158,7 @@ static const unsigned char nonspacing_table_data[46*64] = {
0x00, 0x00, 0x80, 0x09, 0x00, 0x00, 0x00, 0x00, /* 0x1a00-0x1a3f */
0x00, 0x00, 0x40, 0x7f, 0xe5, 0x1f, 0xf8, 0x9f, /* 0x1a40-0x1a7f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x1a80-0x1abf */
- 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ac0-0x1aff */
+ 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ac0-0x1aff */
0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x17, /* 0x1b00-0x1b3f */
0x04, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x0f, 0x00, /* 0x1b40-0x1b7f */
0x03, 0x00, 0x00, 0x00, 0x3c, 0x3b, 0x00, 0x00, /* 0x1b80-0x1bbf */
@@ -171,7 +171,7 @@ static const unsigned char nonspacing_table_data[46*64] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d00-0x1d3f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d40-0x1d7f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d80-0x1dbf */
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, /* 0x1dc0-0x1dff */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x1dc0-0x1dff */
/* 0x2000-0x21ff */
0x00, 0xf8, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, /* 0x2000-0x203f */
0x00, 0x00, 0x00, 0x00, 0xdf, 0xff, 0x00, 0x00, /* 0x2040-0x207f */
@@ -296,13 +296,13 @@ static const unsigned char nonspacing_table_data[46*64] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10ec0-0x10eff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f00-0x10f3f */
0xc0, 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f40-0x10f7f */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f80-0x10fbf */
+ 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10f80-0x10fbf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10fc0-0x10fff */
/* 0x11000-0x111ff */
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0x11000-0x1103f */
- 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x11040-0x1107f */
+ 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x80, /* 0x11040-0x1107f */
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x26, /* 0x11080-0x110bf */
- 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */
+ 0x04, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */
0x07, 0x00, 0x00, 0x00, 0x80, 0xef, 0x1f, 0x00, /* 0x11100-0x1113f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, /* 0x11140-0x1117f */
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */
@@ -406,6 +406,15 @@ static const unsigned char nonspacing_table_data[46*64] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1bd40-0x1bd7f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1bd80-0x1bdbf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1bdc0-0x1bdff */
+ /* 0x1ce00-0x1cfff */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ce00-0x1ce3f */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ce40-0x1ce7f */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1ce80-0x1cebf */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cec0-0x1ceff */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, /* 0x1cf00-0x1cf3f */
+ 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cf40-0x1cf7f */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cf80-0x1cfbf */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1cfc0-0x1cfff */
/* 0x1d000-0x1d1ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d000-0x1d03f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d040-0x1d07f */
@@ -445,7 +454,7 @@ static const unsigned char nonspacing_table_data[46*64] = {
/* 0x1e200-0x1e3ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e200-0x1e23f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e240-0x1e27f */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e280-0x1e2bf */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, /* 0x1e280-0x1e2bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, /* 0x1e2c0-0x1e2ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e300-0x1e33f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e340-0x1e37f */
@@ -490,9 +499,9 @@ static const signed char nonspacing_table_ind[248] = {
-1, -1, -1, -1, -1, -1, -1, -1, /* 0x19000-0x19fff */
-1, -1, -1, -1, -1, -1, -1, -1, /* 0x1a000-0x1afff */
-1, -1, -1, -1, -1, -1, 39, -1, /* 0x1b000-0x1bfff */
- -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1c000-0x1cfff */
- 40, 41, -1, -1, -1, 42, -1, -1, /* 0x1d000-0x1dfff */
- 43, 44, -1, -1, 45, -1, -1, -1 /* 0x1e000-0x1efff */
+ -1, -1, -1, -1, -1, -1, -1, 40, /* 0x1c000-0x1cfff */
+ 41, 42, -1, -1, -1, 43, -1, -1, /* 0x1d000-0x1dfff */
+ 44, 45, -1, -1, 46, -1, -1, -1 /* 0x1e000-0x1efff */
};
/* Determine number of column positions required for UC. */
@@ -585,7 +594,9 @@ uc_width (ucs4_t uc, const char *encoding)
|| (uc >= 0x17000 && uc < 0x187f8) /* Tangut */
|| (uc >= 0x18800 && uc < 0x18cd6) /* Tangut components */
|| (uc >= 0x18d00 && uc < 0x18d09) /* Tangul Ideograph Supplement */
- || (uc >= 0x1b000 && uc < 0x1b120) /* Kana supplement, Kana Extended-A */
+ || ((uc >= 0x1aff0 && uc < 0x1afff) /* Katakana letter Minnan */
+ && uc != 0x1aff4 && uc != 0x1affc)
+ || (uc >= 0x1b000 && uc < 0x1b123) /* Kana supplement, Kana Extended-A */
|| (uc >= 0x1b150 && uc < 0x1b153) /* Small Hiragana */
|| (uc >= 0x1b164 && uc < 0x1b168) /* Small Katakana */
|| (uc >= 0x1b170 && uc < 0x1b2fc) /* Nushu */
@@ -621,19 +632,23 @@ uc_width (ucs4_t uc, const char *encoding)
|| uc == 0x1f6cc
|| (uc >= 0x1f6d0 && uc < 0x1f6d3)
|| (uc >= 0x1f6d5 && uc < 0x1f6d8)
+ || (uc >= 0x1f6dd && uc < 0x1f6e0)
|| (uc >= 0x1f6eb && uc < 0x1f6ed)
|| (uc >= 0x1f6f4 && uc < 0x1f6fd)
- || (uc >= 0x1f7e0 && uc < 0x1f7ec)))
+ || (uc >= 0x1f7e0 && uc < 0x1f7ec)
+ || uc == 0x1f7f0))
|| ((uc >= 0x1f90c && uc < 0x1fa00)
&& uc != 0x1f93b && uc != 0x1f946)
|| ((uc >> 9) == 0xfd
&& ((uc >= 0x1fa70 && uc < 0x1fa75)
- || (uc >= 0x1fa78 && uc < 0x1fa7b)
+ || (uc >= 0x1fa78 && uc < 0x1fa7d)
|| (uc >= 0x1fa80 && uc < 0x1fa87)
- || (uc >= 0x1fa90 && uc < 0x1faa9)
- || (uc >= 0x1fab0 && uc < 0x1fab7)
- || (uc >= 0x1fac0 && uc < 0x1fac3)
- || (uc >= 0x1fad0 && uc < 0x1fad7)))
+ || (uc >= 0x1fa90 && uc < 0x1faad)
+ || (uc >= 0x1fab0 && uc < 0x1fabb)
+ || (uc >= 0x1fac0 && uc < 0x1fac6)
+ || (uc >= 0x1fad0 && uc < 0x1fada)
+ || (uc >= 0x1fae0 && uc < 0x1fae8)
+ || (uc >= 0x1faf0 && uc < 0x1faf7)))
|| (uc >= 0x20000 && uc <= 0x2ffff) /* Supplementary Ideographic Plane */
|| (uc >= 0x30000 && uc <= 0x3ffff) /* Tertiary Ideographic Plane */
) )