diff options
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c | 749 |
1 files changed, 71 insertions, 678 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index 3879f9eb5b..01cec63236 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -36,54 +36,40 @@ typedef struct _mbfl_filt_conv_wchar_cp50220_ctx { mbfl_convert_filter last; } mbfl_filt_conv_wchar_cp50220_ctx; -static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter); static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt); static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt); static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest); +static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter); -const mbfl_encoding mbfl_encoding_jis_ms = { - mbfl_no_encoding_jis_ms, - "JIS-ms", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_jis_ms_wchar, - &vtbl_wchar_jis_ms -}; +/* Previously, a dubious 'encoding' called 'cp50220raw' was supported + * This was just CP50220, but the implementation was less strict regarding + * invalid characters; it would silently pass some through + * This 'encoding' only existed in mbstring. In case some poor, lost soul is + * still using it, retain minimal support by aliasing it to CP50220 + * + * Further, mbstring also had a made-up encoding called "JIS-ms" + * This was the same as CP5022{0,1,2}, but without their special ways of + * handling conversion of Unicode half-width katakana */ +static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL}; const mbfl_encoding mbfl_encoding_cp50220 = { mbfl_no_encoding_cp50220, "CP50220", "ISO-2022-JP", - (const char *(*)[])NULL, + cp50220_aliases, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, + MBFL_ENCTYPE_GL_UNSAFE, &vtbl_cp50220_wchar, &vtbl_wchar_cp50220 }; -const mbfl_encoding mbfl_encoding_cp50220raw = { - mbfl_no_encoding_cp50220raw, - "CP50220raw", - "ISO-2022-JP", - (const char *(*)[])NULL, - NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50220raw_wchar, - &vtbl_wchar_cp50220raw -}; - const mbfl_encoding mbfl_encoding_cp50221 = { mbfl_no_encoding_cp50221, "CP50221", "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, + MBFL_ENCTYPE_GL_UNSAFE, &vtbl_cp50221_wchar, &vtbl_wchar_cp50221 }; @@ -94,68 +80,18 @@ const mbfl_encoding mbfl_encoding_cp50222 = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, + MBFL_ENCTYPE_GL_UNSAFE, &vtbl_cp50222_wchar, &vtbl_wchar_cp50222 }; -const struct mbfl_identify_vtbl vtbl_identify_jis_ms = { - mbfl_no_encoding_jis_ms, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_jis_ms -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50220 = { - mbfl_no_encoding_cp50220, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50220 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = { - mbfl_no_encoding_cp50220raw, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50220 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50221 = { - mbfl_no_encoding_cp50221, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50221 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50222 = { - mbfl_no_encoding_cp50222, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50222 -}; - -const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = { - mbfl_no_encoding_jis_ms, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_ms_wchar, - mbfl_filt_conv_common_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_jis_ms = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_jis_ms, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis_ms, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { mbfl_no_encoding_cp50220, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, NULL, - mbfl_filt_conv_jis_ms_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, NULL, }; @@ -169,33 +105,13 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { mbfl_filt_conv_wchar_cp50220_copy }; -const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = { - mbfl_no_encoding_cp50220raw, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_ms_wchar, - mbfl_filt_conv_common_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50220raw, - mbfl_filt_conv_wchar_cp50220_ctor, - mbfl_filt_conv_wchar_cp50220_dtor, - mbfl_filt_conv_wchar_cp50220raw, - mbfl_filt_conv_any_jis_flush, - mbfl_filt_conv_wchar_cp50220_copy -}; - const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { mbfl_no_encoding_cp50221, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, NULL, - mbfl_filt_conv_jis_ms_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, NULL, }; @@ -214,8 +130,8 @@ const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, NULL, - mbfl_filt_conv_jis_ms_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, NULL, }; @@ -231,11 +147,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) -/* - * JIS-ms => wchar - */ -int -mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter) +int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter) { int c1, s, w; @@ -281,10 +193,10 @@ retry: if (c > 0x20 && c < 0x7f) { s = (c1 - 0x21)*94 + c - 0x21; if (filter->status == 0x80) { - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { @@ -307,20 +219,12 @@ retry: w = 0; } if (w <= 0) { - w = (c1 << 8) | c; - w &= MBFL_WCSPLANE_MASK; - w |= MBFL_WCSPLANE_JIS0212; + w = (c1 << 8) | c | MBFL_WCSPLANE_JIS0212; } } CK((*filter->output_function)(w, filter->data)); - } else if (c == 0x1b) { - filter->status += 2; - } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); } else { - w = (c1 << 8) | c; - w &= MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; + w = (c1 << 8) | c | MBFL_WCSGROUP_THROUGH; CK((*filter->output_function)(w, filter->data)); } break; @@ -338,7 +242,7 @@ retry: filter->status += 3; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); goto retry; } break; @@ -356,7 +260,7 @@ retry: filter->status++; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x24, filter->data)); goto retry; } @@ -375,7 +279,7 @@ retry: filter->status = 0x90; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x24, filter->data)); CK((*filter->output_function)(0x28, filter->data)); goto retry; @@ -397,7 +301,7 @@ retry: filter->status = 0x20; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x28, filter->data)); goto retry; } @@ -411,161 +315,13 @@ retry: return c; } -/* - * wchar => JIS - */ -int -mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter) +static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter) { - int c1, s; - - s = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { - /* PUE => Microsoft extended (pseudo 95ku - 114ku) */ - /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ - s = c - 0xe000; - s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); - } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { - /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ - /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ - s = c - (0xe000 + 10 * 94); - s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); - } - - /* do some transliteration */ - if (s <= 0) { - c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { - s = c & MBFL_WCSPLANE_MASK; - s |= 0x8080; - } else if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0x203e) { /* OVER LINE */ - s = 0x1007e; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - } - if (s <= 0 || (s >= 0x8080 && s < 0x10000)) { - int i; - s = -1; - - for (i = 0; - i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - - if (s < 0) { - const int oh = cp932ext2_ucs_table_min / 94; - const int cp932ext2_ucs_table_size = - cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - for (i = 0; i < cp932ext2_ucs_table_size; i++) { - if (c == cp932ext2_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - } - - if (s < 0) { - const int cp932ext3_ucs_table_size = - cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; - const int limit = cp932ext3_ucs_table_size > - cp932ext3_eucjp_table_size ? - cp932ext3_eucjp_table_size: - cp932ext3_ucs_table_size; - for (i = 0; i < limit; i++) { - if (c == cp932ext3_ucs_table[i]) { - s = cp932ext3_eucjp_table[i]; - break; - } - } - } - - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } + if ((filter->status & 0xF) == 1) { + /* 2-byte (JIS X 0208 or 0212) character was truncated */ + CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data)); } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x100) { /* kana */ - if ((filter->status & 0xff00) != 0x100) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ - } - filter->status = 0x100; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else if (s < 0x10000) { /* X 0212 */ - if ((filter->status & 0xff00) != 0x300) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ - } - filter->status = 0x300; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return c; + return 0; } /* @@ -585,7 +341,7 @@ mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt) ctx->last.opaque = ctx; ctx->last.data = filt->data; filt->filter_function = vtbl_tl_jisx0201_jisx0208.filter_function; - filt->filter_flush = vtbl_tl_jisx0201_jisx0208.filter_flush; + filt->filter_flush = (filter_flush_t)vtbl_tl_jisx0201_jisx0208.filter_flush; filt->output_function = (output_function_t)ctx->last.filter_function; filt->flush_function = (flush_function_t)ctx->last.filter_flush; filt->data = &ctx->last; @@ -613,65 +369,33 @@ mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt) } /* - * wchar => cp50220raw - */ -int -mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter) -{ - if (c & MBFL_WCSPLANE_JIS0208) { - const int s = c & MBFL_WCSPLANE_MASK; - - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0x200; - } - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - return c; - } else { - return mbfl_filt_conv_wchar_cp50221(c, filter); - } -} - -/* * wchar => CP50221 */ -int -mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) +int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) { int s = 0; if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { s = ucs_i_jis_table[c - ucs_i_jis_table_min]; } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { - /* PUE => Microsoft extended */ - /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ - s = c - 0xe000; - s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); - } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { - /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ - /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ - s = c - (0xe000 + 10 * 94); - s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } else if (c >= 0xE000 && c <= 0xE757) { + /* 'private'/'user' codepoints */ + s = c - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); } if (s <= 0) { if (c == 0xa5) { /* YEN SIGN */ s = 0x1005c; - } else if (c == 0x203e) { /* OVER LINE */ - s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -684,7 +408,16 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) s = 0x224c; } } - if (s <= 0 || (s >= 0x8080 && s < 0x10000)) { + + /* Above, we do a series of lookups in `ucs_*_jis_table` to find a + * corresponding kuten code for this Unicode codepoint + * If we get zero, that means the codepoint is not in JIS X 0208 + * On the other hand, if we get a result with the high bits set on both + * upper and lower bytes, that is not a code in JIS X 0208 but rather + * in JIS X 0213 + * In either case, check if this codepoint is one of the extensions added + * to JIS X 0208 by MicroSoft (to make CP932) */ + if (s == 0 || ((s & 0x8000) && (s & 0x80))) { int i; s = -1; @@ -750,15 +483,15 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) filter->status = 0x500; } CK((*filter->output_function)(s - 0x80, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ + } else if (s <= 0x927E) { /* X 0208 + extensions */ if ((filter->status & 0xff00) != 0x200) { CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ CK((*filter->output_function)(0x24, filter->data)); /* '$' */ CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ filter->status = 0x200; } - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); } else if (s < 0x10000) { /* X0212 */ CK(mbfl_filt_conv_illegal_output(c, filter)); } else { /* X 0201 latin */ @@ -780,42 +513,31 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) /* * wchar => CP50222 */ -int -mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) +int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) { - int s; - - s = 0; + int s = 0; if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { s = ucs_i_jis_table[c - ucs_i_jis_table_min]; } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) { - /* PUE => Microsoft extended */ - /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ - s = c - 0xe000; - s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21); - } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) { - /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */ - /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */ - s = c - (0xe000 + 10 * 94); - s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1); + } else if (c >= 0xE000 && c <= 0xE757) { + /* 'private'/'user' codepoints */ + s = c - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); } if (s <= 0) { if (c == 0xa5) { /* YEN SIGN */ s = 0x1005c; - } else if (c == 0x203e) { /* OVER LINE */ - s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -828,7 +550,7 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) s = 0x224c; } } - if (s <= 0 || (s >= 0x8080 && s < 0x10000)) { + if (s == 0 || ((s & 0x8000) && (s & 0x80))) { int i; s = -1; @@ -894,7 +616,7 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) filter->status = 0x500; } CK((*filter->output_function)(s - 0x80, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ + } else if (s <= 0x927E) { /* X 0208 */ if ((filter->status & 0xff00) == 0x500) { CK((*filter->output_function)(0x0f, filter->data)); /* SO */ filter->status = 0; @@ -905,8 +627,8 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ filter->status = 0x200; } - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); } else if (s < 0x10000) { /* X0212 */ CK(mbfl_filt_conv_illegal_output(c, filter)); } else { /* X 0201 latin */ @@ -942,338 +664,9 @@ mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) } filter->status &= 0xff; - if (filter->flush_function != NULL) { - return (*filter->flush_function)(filter->data); + if (filter->flush_function) { + (*filter->flush_function)(filter->data); } return 0; } - - -static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - if (c == 0x1b) { - goto retry; - } else if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} |