summaryrefslogtreecommitdiff
path: root/iconvdata
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-09-25 21:37:47 +0000
committerUlrich Drepper <drepper@redhat.com>2000-09-25 21:37:47 +0000
commita2aa7df3d6bf73cda977ee050a503a7f7a78a82d (patch)
treeafa23ee404ae7a1e3a73d7b5dcaa62e8fe83fc23 /iconvdata
parentfbb1f75f1eda28971ab9bc9bf3c58d6718841c80 (diff)
downloadglibc-a2aa7df3d6bf73cda977ee050a503a7f7a78a82d.tar.gz
Update.
* charmaps/GBK: Add commented mappings for GBK characters not yet in Unicode. 2000-09-23 Bruno Haible <haible@clisp.cons.org>
Diffstat (limited to 'iconvdata')
-rw-r--r--iconvdata/gbgbk.c16
-rw-r--r--iconvdata/gbk.c14
-rw-r--r--iconvdata/testdata/GBK..UTF84
3 files changed, 21 insertions, 13 deletions
diff --git a/iconvdata/gbgbk.c b/iconvdata/gbgbk.c
index 02e25f31e3..b433ae2cb0 100644
--- a/iconvdata/gbgbk.c
+++ b/iconvdata/gbgbk.c
@@ -65,9 +65,11 @@
All these characters are not defined in GB2312. Besides this \
there is an incomatibility in the mapping. The Unicode tables \
say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to \
- U00B7. Since we are free to do whatever we want if a mapping \
- is not available we will not flag this as an error but instead \
- map the two positions. But this means that the mapping \
+ U00B7. Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK \
+ it maps to U2014. Since we are free to do whatever we want if \
+ a mapping is not available we will not flag this as an error \
+ but instead map the two positions. But this means that the \
+ mapping \
\
UCS4 -> GB2312 -> GBK -> UCS4 \
\
@@ -89,6 +91,10 @@
\
ch = (ch << 8) | inptr[1]; \
\
+ /* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312). */ \
+ if (__builtin_expect (ch == 0xa844, 0)) \
+ ch = 0xa1aa; \
+ \
/* Now determine whether the character is valid. */ \
if (__builtin_expect (ch, 0xa1a1) < 0xa1a1 \
|| __builtin_expect (ch, 0xa1a1) > 0xf7fe \
@@ -123,8 +129,8 @@
#define BODY \
{ \
/* We don't have to care about characters we cannot map. The only \
- problem is the mapping of 0xA1A4 but as explained above we do not \
- do anything special here. */ \
+ problem are the mapping of 0xA1A4 and 0xA1AA but as explained above \
+ we do not do anything special here. */ \
unsigned char ch = *inptr++; \
\
if (ch > 0x7f) \
diff --git a/iconvdata/gbk.c b/iconvdata/gbk.c
index c3010f30d7..f9a53ff2df 100644
--- a/iconvdata/gbk.c
+++ b/iconvdata/gbk.c
@@ -1570,7 +1570,7 @@ static const uint16_t __gbk_to_ucs[] =
[0x17fb] = 0x72d6, [0x17fc] = 0x72d8, [0x17fd] = 0x72da, [0x17fe] = 0x72db,
[0x1861] = 0x3000, [0x1862] = 0x3001, [0x1863] = 0x3002, [0x1864] = 0x00b7,
[0x1865] = 0x02c9, [0x1866] = 0x02c7, [0x1867] = 0x00a8, [0x1868] = 0x3003,
- [0x1869] = 0x3005, [0x186a] = 0x2015, [0x186b] = 0xff5e, [0x186c] = 0x2016,
+ [0x1869] = 0x3005, [0x186a] = 0x2014, [0x186b] = 0xff5e, [0x186c] = 0x2016,
[0x186d] = 0x2026, [0x186e] = 0x2018, [0x186f] = 0x2019, [0x1870] = 0x201c,
[0x1871] = 0x201d, [0x1872] = 0x3014, [0x1873] = 0x3015, [0x1874] = 0x3008,
[0x1875] = 0x3009, [0x1876] = 0x300a, [0x1877] = 0x300b, [0x1878] = 0x300c,
@@ -1712,7 +1712,7 @@ static const uint16_t __gbk_to_ucs[] =
[0x1d2a] = 0x0448, [0x1d2b] = 0x0449, [0x1d2c] = 0x044a, [0x1d2d] = 0x044b,
[0x1d2e] = 0x044c, [0x1d2f] = 0x044d, [0x1d30] = 0x044e, [0x1d31] = 0x044f,
[0x1d40] = 0x02ca, [0x1d41] = 0x02cb, [0x1d42] = 0x02d9, [0x1d43] = 0x2013,
- [0x1d44] = 0x2014, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
+ [0x1d44] = 0x2015, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
[0x1d48] = 0x2109, [0x1d49] = 0x2196, [0x1d4a] = 0x2197, [0x1d4b] = 0x2198,
[0x1d4c] = 0x2199, [0x1d4d] = 0x2215, [0x1d4e] = 0x221f, [0x1d4f] = 0x2223,
[0x1d50] = 0x2252, [0x1d51] = 0x2266, [0x1d52] = 0x2267, [0x1d53] = 0x22bf,
@@ -5661,8 +5661,8 @@ static const char __gbk_from_ucs4_tab3[][2] =
*/
static const char __gbk_from_ucs4_tab4[][2] =
{
- [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa8\x44",
- [0x0005] = "\xa1\xaa", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
+ [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa1\xaa",
+ [0x0005] = "\xa8\x44", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
[0x0009] = "\xa1\xaf", [0x000c] = "\xa1\xb0", [0x000d] = "\xa1\xb1",
[0x0015] = "\xa8\x45", [0x0016] = "\xa1\xad", [0x0020] = "\xa1\xeb",
[0x0022] = "\xa1\xe4", [0x0023] = "\xa1\xe5", [0x0025] = "\xa8\x46",
@@ -13153,8 +13153,10 @@ static const char __gbk_from_ucs4_tab12[][2] =
\
ch2 = inptr[1]; \
\
- /* All second bytes of a multibyte character must be >= 0x40. */ \
- if (__builtin_expect (ch2, 0x41) < 0x40) \
+ /* All second bytes of a multibyte character must be >= 0x40, and \
+ the __gbk_to_ucs table only covers the range up to 0xfe 0xa0. */ \
+ if (__builtin_expect (ch2, 0x41) < 0x40 \
+ || (__builtin_expect (ch, 0x81) == 0xfe && ch2 > 0xa0)) \
{ \
/* This is an illegal character. */ \
if (! ignore_errors_p ()) \
diff --git a/iconvdata/testdata/GBK..UTF8 b/iconvdata/testdata/GBK..UTF8
index 39f3d0cb2d..cadf7239a6 100644
--- a/iconvdata/testdata/GBK..UTF8
+++ b/iconvdata/testdata/GBK..UTF8
@@ -389,7 +389,7 @@
犘 犙 犚 犛 犜 犝 犞 犠 犡 犢 犣 犤 犥 犦 犧 犨
犩 犪 犫 犮 犱 犲 犳 犵 犺 犻 犼 犽 犾 犿 狀 狅
狆 狇 狉 狊 狋 狌 狏 狑 狓 狔 狕 狖 狘 狚 狛
-   、 。 · ˉ ˇ ¨ 〃 々 ― ~ ‖ … ‘ ’
+   、 。 · ˉ ˇ ¨ 〃 々 — ~ ‖ … ‘ ’
“ ” 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】
± × ÷ ∶ ∧ ∨ ∑ ∏ ∪ ∩ ∈ ∷ √ ⊥ ∥ ∠
⌒ ⊙ ∫ ∮ ≡ ≌ ≈ ∽ ∝ ≠ ≮ ≯ ≤ ≥ ∞ ∵
@@ -431,7 +431,7 @@
а б в г д е ё ж з и й к л м н
о п р с т у ф х ц ч ш щ ъ ы ь э
ю я
- ˊ ˋ ˙ – — ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣
+ ˊ ˋ ˙ – ― ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣
≒ ≦ ≧ ⊿ ═ ║ ╒ ╓ ╔ ╕ ╖ ╗ ╘ ╙ ╚ ╛
╜ ╝ ╞ ╟ ╠ ╡ ╢ ╣ ╤ ╥ ╦ ╧ ╨ ╩ ╪ ╫
╬ ╭ ╮ ╯ ╰ ╱ ╲ ╳ ▁ ▂ ▃ ▄ ▅ ▆ ▇