summaryrefslogtreecommitdiff
path: root/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c749
1 files changed, 71 insertions, 678 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
index 3879f9eb5b..01cec63236 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
@@ -36,54 +36,40 @@ typedef struct _mbfl_filt_conv_wchar_cp50220_ctx {
mbfl_convert_filter last;
} mbfl_filt_conv_wchar_cp50220_ctx;
-static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter);
-static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter);
-static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter);
-static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter);
static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt);
static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt);
static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest);
+static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter);
-const mbfl_encoding mbfl_encoding_jis_ms = {
- mbfl_no_encoding_jis_ms,
- "JIS-ms",
- "ISO-2022-JP",
- NULL,
- NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
- &vtbl_jis_ms_wchar,
- &vtbl_wchar_jis_ms
-};
+/* Previously, a dubious 'encoding' called 'cp50220raw' was supported
+ * This was just CP50220, but the implementation was less strict regarding
+ * invalid characters; it would silently pass some through
+ * This 'encoding' only existed in mbstring. In case some poor, lost soul is
+ * still using it, retain minimal support by aliasing it to CP50220
+ *
+ * Further, mbstring also had a made-up encoding called "JIS-ms"
+ * This was the same as CP5022{0,1,2}, but without their special ways of
+ * handling conversion of Unicode half-width katakana */
+static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL};
const mbfl_encoding mbfl_encoding_cp50220 = {
mbfl_no_encoding_cp50220,
"CP50220",
"ISO-2022-JP",
- (const char *(*)[])NULL,
+ cp50220_aliases,
NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
+ MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp50220_wchar,
&vtbl_wchar_cp50220
};
-const mbfl_encoding mbfl_encoding_cp50220raw = {
- mbfl_no_encoding_cp50220raw,
- "CP50220raw",
- "ISO-2022-JP",
- (const char *(*)[])NULL,
- NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
- &vtbl_cp50220raw_wchar,
- &vtbl_wchar_cp50220raw
-};
-
const mbfl_encoding mbfl_encoding_cp50221 = {
mbfl_no_encoding_cp50221,
"CP50221",
"ISO-2022-JP",
NULL,
NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
+ MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp50221_wchar,
&vtbl_wchar_cp50221
};
@@ -94,68 +80,18 @@ const mbfl_encoding mbfl_encoding_cp50222 = {
"ISO-2022-JP",
NULL,
NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
+ MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp50222_wchar,
&vtbl_wchar_cp50222
};
-const struct mbfl_identify_vtbl vtbl_identify_jis_ms = {
- mbfl_no_encoding_jis_ms,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_jis_ms
-};
-
-const struct mbfl_identify_vtbl vtbl_identify_cp50220 = {
- mbfl_no_encoding_cp50220,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_cp50220
-};
-
-const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = {
- mbfl_no_encoding_cp50220raw,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_cp50220
-};
-
-const struct mbfl_identify_vtbl vtbl_identify_cp50221 = {
- mbfl_no_encoding_cp50221,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_cp50221
-};
-
-const struct mbfl_identify_vtbl vtbl_identify_cp50222 = {
- mbfl_no_encoding_cp50222,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_cp50222
-};
-
-const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = {
- mbfl_no_encoding_jis_ms,
- mbfl_no_encoding_wchar,
- mbfl_filt_conv_common_ctor,
- NULL,
- mbfl_filt_conv_jis_ms_wchar,
- mbfl_filt_conv_common_flush,
- NULL,
-};
-
-const struct mbfl_convert_vtbl vtbl_wchar_jis_ms = {
- mbfl_no_encoding_wchar,
- mbfl_no_encoding_jis_ms,
- mbfl_filt_conv_common_ctor,
- NULL,
- mbfl_filt_conv_wchar_jis_ms,
- mbfl_filt_conv_any_jis_flush,
- NULL,
-};
-
const struct mbfl_convert_vtbl vtbl_cp50220_wchar = {
mbfl_no_encoding_cp50220,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
- mbfl_filt_conv_jis_ms_wchar,
- mbfl_filt_conv_common_flush,
+ mbfl_filt_conv_cp5022x_wchar,
+ mbfl_filt_conv_cp5022x_wchar_flush,
NULL,
};
@@ -169,33 +105,13 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = {
mbfl_filt_conv_wchar_cp50220_copy
};
-const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = {
- mbfl_no_encoding_cp50220raw,
- mbfl_no_encoding_wchar,
- mbfl_filt_conv_common_ctor,
- NULL,
- mbfl_filt_conv_jis_ms_wchar,
- mbfl_filt_conv_common_flush,
- NULL,
-};
-
-const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = {
- mbfl_no_encoding_wchar,
- mbfl_no_encoding_cp50220raw,
- mbfl_filt_conv_wchar_cp50220_ctor,
- mbfl_filt_conv_wchar_cp50220_dtor,
- mbfl_filt_conv_wchar_cp50220raw,
- mbfl_filt_conv_any_jis_flush,
- mbfl_filt_conv_wchar_cp50220_copy
-};
-
const struct mbfl_convert_vtbl vtbl_cp50221_wchar = {
mbfl_no_encoding_cp50221,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
- mbfl_filt_conv_jis_ms_wchar,
- mbfl_filt_conv_common_flush,
+ mbfl_filt_conv_cp5022x_wchar,
+ mbfl_filt_conv_cp5022x_wchar_flush,
NULL,
};
@@ -214,8 +130,8 @@ const struct mbfl_convert_vtbl vtbl_cp50222_wchar = {
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
- mbfl_filt_conv_jis_ms_wchar,
- mbfl_filt_conv_common_flush,
+ mbfl_filt_conv_cp5022x_wchar,
+ mbfl_filt_conv_cp5022x_wchar_flush,
NULL,
};
@@ -231,11 +147,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = {
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
-/*
- * JIS-ms => wchar
- */
-int
-mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter)
+int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
@@ -281,10 +193,10 @@ retry:
if (c > 0x20 && c < 0x7f) {
s = (c1 - 0x21)*94 + c - 0x21;
if (filter->status == 0x80) {
- if (s >= 0 && s < jisx0208_ucs_table_size) {
- w = jisx0208_ucs_table[s];
- } else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
+ if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
+ } else if (s >= 0 && s < jisx0208_ucs_table_size) {
+ w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
@@ -307,20 +219,12 @@ retry:
w = 0;
}
if (w <= 0) {
- w = (c1 << 8) | c;
- w &= MBFL_WCSPLANE_MASK;
- w |= MBFL_WCSPLANE_JIS0212;
+ w = (c1 << 8) | c | MBFL_WCSPLANE_JIS0212;
}
}
CK((*filter->output_function)(w, filter->data));
- } else if (c == 0x1b) {
- filter->status += 2;
- } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
- CK((*filter->output_function)(c, filter->data));
} else {
- w = (c1 << 8) | c;
- w &= MBFL_WCSGROUP_MASK;
- w |= MBFL_WCSGROUP_THROUGH;
+ w = (c1 << 8) | c | MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
@@ -338,7 +242,7 @@ retry:
filter->status += 3;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
goto retry;
}
break;
@@ -356,7 +260,7 @@ retry:
filter->status++;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
CK((*filter->output_function)(0x24, filter->data));
goto retry;
}
@@ -375,7 +279,7 @@ retry:
filter->status = 0x90;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
@@ -397,7 +301,7 @@ retry:
filter->status = 0x20;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
@@ -411,161 +315,13 @@ retry:
return c;
}
-/*
- * wchar => JIS
- */
-int
-mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)
+static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter)
{
- int c1, s;
-
- s = 0;
- if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
- s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
- } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
- s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
- } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
- s = ucs_i_jis_table[c - ucs_i_jis_table_min];
- } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
- s = ucs_r_jis_table[c - ucs_r_jis_table_min];
- } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) {
- /* PUE => Microsoft extended (pseudo 95ku - 114ku) */
- /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
- s = c - 0xe000;
- s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21);
- } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) {
- /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */
- /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
- s = c - (0xe000 + 10 * 94);
- s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1);
- }
-
- /* do some transliteration */
- if (s <= 0) {
- c1 = c & ~MBFL_WCSPLANE_MASK;
- if (c1 == MBFL_WCSPLANE_JIS0208) {
- s = c & MBFL_WCSPLANE_MASK;
- } else if (c1 == MBFL_WCSPLANE_JIS0212) {
- s = c & MBFL_WCSPLANE_MASK;
- s |= 0x8080;
- } else if (c == 0xa5) { /* YEN SIGN */
- s = 0x1005c;
- } else if (c == 0x203e) { /* OVER LINE */
- s = 0x1007e;
- } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
- s = 0x2140;
- } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
- s = 0x2141;
- } else if (c == 0x2225) { /* PARALLEL TO */
- s = 0x2142;
- } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
- s = 0x215d;
- } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
- s = 0x2171;
- } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
- s = 0x2172;
- } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
- s = 0x224c;
- }
- }
- if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
- int i;
- s = -1;
-
- for (i = 0;
- i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
- const int oh = cp932ext1_ucs_table_min / 94;
-
- if (c == cp932ext1_ucs_table[i]) {
- s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
- break;
- }
- }
-
- if (s < 0) {
- const int oh = cp932ext2_ucs_table_min / 94;
- const int cp932ext2_ucs_table_size =
- cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
- for (i = 0; i < cp932ext2_ucs_table_size; i++) {
- if (c == cp932ext2_ucs_table[i]) {
- s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
- break;
- }
- }
- }
-
- if (s < 0) {
- const int cp932ext3_ucs_table_size =
- cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
- const int limit = cp932ext3_ucs_table_size >
- cp932ext3_eucjp_table_size ?
- cp932ext3_eucjp_table_size:
- cp932ext3_ucs_table_size;
- for (i = 0; i < limit; i++) {
- if (c == cp932ext3_ucs_table[i]) {
- s = cp932ext3_eucjp_table[i];
- break;
- }
- }
- }
-
- if (c == 0) {
- s = 0;
- } else if (s <= 0) {
- s = -1;
- }
+ if ((filter->status & 0xF) == 1) {
+ /* 2-byte (JIS X 0208 or 0212) character was truncated */
+ CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
}
-
- if (s >= 0) {
- if (s < 0x80) { /* ASCII */
- if ((filter->status & 0xff00) != 0) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x28, filter->data)); /* '(' */
- CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
- }
- filter->status = 0;
- CK((*filter->output_function)(s, filter->data));
- } else if (s < 0x100) { /* kana */
- if ((filter->status & 0xff00) != 0x100) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x28, filter->data)); /* '(' */
- CK((*filter->output_function)(0x49, filter->data)); /* 'I' */
- }
- filter->status = 0x100;
- CK((*filter->output_function)(s & 0x7f, filter->data));
- } else if (s < 0x8080) { /* X 0208 */
- if ((filter->status & 0xff00) != 0x200) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x24, filter->data)); /* '$' */
- CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
- }
- filter->status = 0x200;
- CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
- CK((*filter->output_function)(s & 0x7f, filter->data));
- } else if (s < 0x10000) { /* X 0212 */
- if ((filter->status & 0xff00) != 0x300) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x24, filter->data)); /* '$' */
- CK((*filter->output_function)(0x28, filter->data)); /* '(' */
- CK((*filter->output_function)(0x44, filter->data)); /* 'D' */
- }
- filter->status = 0x300;
- CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
- CK((*filter->output_function)(s & 0x7f, filter->data));
- } else { /* X 0201 latin */
- if ((filter->status & 0xff00) != 0x400) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x28, filter->data)); /* '(' */
- CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */
- }
- filter->status = 0x400;
- CK((*filter->output_function)(s & 0x7f, filter->data));
- }
- } else {
- CK(mbfl_filt_conv_illegal_output(c, filter));
- }
-
- return c;
+ return 0;
}
/*
@@ -585,7 +341,7 @@ mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt)
ctx->last.opaque = ctx;
ctx->last.data = filt->data;
filt->filter_function = vtbl_tl_jisx0201_jisx0208.filter_function;
- filt->filter_flush = vtbl_tl_jisx0201_jisx0208.filter_flush;
+ filt->filter_flush = (filter_flush_t)vtbl_tl_jisx0201_jisx0208.filter_flush;
filt->output_function = (output_function_t)ctx->last.filter_function;
filt->flush_function = (flush_function_t)ctx->last.filter_flush;
filt->data = &ctx->last;
@@ -613,65 +369,33 @@ mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt)
}
/*
- * wchar => cp50220raw
- */
-int
-mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter)
-{
- if (c & MBFL_WCSPLANE_JIS0208) {
- const int s = c & MBFL_WCSPLANE_MASK;
-
- if ((filter->status & 0xff00) != 0x200) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x24, filter->data)); /* '$' */
- CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
- filter->status = 0x200;
- }
- CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
- CK((*filter->output_function)(s & 0x7f, filter->data));
- return c;
- } else {
- return mbfl_filt_conv_wchar_cp50221(c, filter);
- }
-}
-
-/*
* wchar => CP50221
*/
-int
-mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
+int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
+ } else if (c == 0x203E) { /* OVERLINE */
+ s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
- } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) {
- /* PUE => Microsoft extended */
- /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
- s = c - 0xe000;
- s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21);
- } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) {
- /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */
- /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
- s = c - (0xe000 + 10 * 94);
- s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1);
+ } else if (c >= 0xE000 && c <= 0xE757) {
+ /* 'private'/'user' codepoints */
+ s = c - 0xE000;
+ s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21);
}
if (s <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
- } else if (c == 0x203e) { /* OVER LINE */
- s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
- } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
- s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
@@ -684,7 +408,16 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
s = 0x224c;
}
}
- if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
+
+ /* Above, we do a series of lookups in `ucs_*_jis_table` to find a
+ * corresponding kuten code for this Unicode codepoint
+ * If we get zero, that means the codepoint is not in JIS X 0208
+ * On the other hand, if we get a result with the high bits set on both
+ * upper and lower bytes, that is not a code in JIS X 0208 but rather
+ * in JIS X 0213
+ * In either case, check if this codepoint is one of the extensions added
+ * to JIS X 0208 by MicroSoft (to make CP932) */
+ if (s == 0 || ((s & 0x8000) && (s & 0x80))) {
int i;
s = -1;
@@ -750,15 +483,15 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
filter->status = 0x500;
}
CK((*filter->output_function)(s - 0x80, filter->data));
- } else if (s < 0x8080) { /* X 0208 */
+ } else if (s <= 0x927E) { /* X 0208 + extensions */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
filter->status = 0x200;
}
- CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
- CK((*filter->output_function)(s & 0x7f, filter->data));
+ CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
+ CK((*filter->output_function)(s & 0xff, filter->data));
} else if (s < 0x10000) { /* X0212 */
CK(mbfl_filt_conv_illegal_output(c, filter));
} else { /* X 0201 latin */
@@ -780,42 +513,31 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
/*
* wchar => CP50222
*/
-int
-mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
+int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
{
- int s;
-
- s = 0;
+ int s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
+ } else if (c == 0x203E) { /* OVERLINE */
+ s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
- } else if (c >= 0xe000 && c < (0xe000 + 10 * 94)) {
- /* PUE => Microsoft extended */
- /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
- s = c - 0xe000;
- s = (s / 94 + 0x75) << 8 | (s % 94 + 0x21);
- } else if (c >= (0xe000 + 10 * 94) && c <= (0xe000 + 20 * 94)) {
- /* PUE => JISX0212 user-defined (G3 85ku - 94ku) */
- /* See http://www.opengroup.or.jp/jvc/cde/ucs-conv.html#ch4_2 */
- s = c - (0xe000 + 10 * 94);
- s = (s / 94 + 0xf5) << 8 | (s % 94 + 0xa1);
+ } else if (c >= 0xE000 && c <= 0xE757) {
+ /* 'private'/'user' codepoints */
+ s = c - 0xE000;
+ s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21);
}
if (s <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
- } else if (c == 0x203e) { /* OVER LINE */
- s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
- } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
- s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
@@ -828,7 +550,7 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
s = 0x224c;
}
}
- if (s <= 0 || (s >= 0x8080 && s < 0x10000)) {
+ if (s == 0 || ((s & 0x8000) && (s & 0x80))) {
int i;
s = -1;
@@ -894,7 +616,7 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
filter->status = 0x500;
}
CK((*filter->output_function)(s - 0x80, filter->data));
- } else if (s < 0x8080) { /* X 0208 */
+ } else if (s <= 0x927E) { /* X 0208 */
if ((filter->status & 0xff00) == 0x500) {
CK((*filter->output_function)(0x0f, filter->data)); /* SO */
filter->status = 0;
@@ -905,8 +627,8 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
filter->status = 0x200;
}
- CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
- CK((*filter->output_function)(s & 0x7f, filter->data));
+ CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
+ CK((*filter->output_function)(s & 0xff, filter->data));
} else if (s < 0x10000) { /* X0212 */
CK(mbfl_filt_conv_illegal_output(c, filter));
} else { /* X 0201 latin */
@@ -942,338 +664,9 @@ mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
}
filter->status &= 0xff;
- if (filter->flush_function != NULL) {
- return (*filter->flush_function)(filter->data);
+ if (filter->flush_function) {
+ (*filter->flush_function)(filter->data);
}
return 0;
}
-
-
-static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter)
-{
-retry:
- switch (filter->status & 0xf) {
-/* case 0x00: ASCII */
-/* case 0x10: X 0201 latin */
-/* case 0x20: X 0201 kana */
-/* case 0x80: X 0208 */
-/* case 0x90: X 0212 */
- case 0:
- if (c == 0x1b) {
- filter->status += 2;
- } else if (c == 0x0e) { /* "kana in" */
- filter->status = 0x20;
- } else if (c == 0x0f) { /* "kana out" */
- filter->status = 0;
- } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
- filter->status += 1;
- } else if (c >= 0 && c < 0x80) { /* latin, CTLs */
- ;
- } else {
- filter->flag = 1; /* bad */
- }
- break;
-
-/* case 0x81: X 0208 second char */
-/* case 0x91: X 0212 second char */
- case 1:
- filter->status &= ~0xf;
- if (c == 0x1b) {
- goto retry;
- } else if (c < 0x21 || c > 0x7e) { /* bad */
- filter->flag = 1;
- }
- break;
-
- /* ESC */
- case 2:
- if (c == 0x24) { /* '$' */
- filter->status++;
- } else if (c == 0x28) { /* '(' */
- filter->status += 3;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC $ */
- case 3:
- if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
- filter->status = 0x80;
- } else if (c == 0x28) { /* '(' */
- filter->status++;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC $ ( */
- case 4:
- if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
- filter->status = 0x80;
- } else if (c == 0x44) { /* 'D' */
- filter->status = 0x90;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC ( */
- case 5:
- if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */
- filter->status = 0;
- } else if (c == 0x4a) { /* 'J' */
- filter->status = 0x10;
- } else if (c == 0x49) { /* 'I' */
- filter->status = 0x20;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- default:
- filter->status = 0;
- break;
- }
-
- return c;
-}
-
-static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter)
-{
-retry:
- switch (filter->status & 0xf) {
-/* case 0x00: ASCII */
-/* case 0x10: X 0201 latin */
-/* case 0x80: X 0208 */
- case 0:
- if (c == 0x1b) {
- filter->status += 2;
- } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
- filter->status += 1;
- } else if (c >= 0 && c < 0x80) { /* latin, CTLs */
- ;
- } else {
- filter->flag = 1; /* bad */
- }
- break;
-
-/* case 0x81: X 0208 second char */
- case 1:
- if (c == 0x1b) {
- filter->status++;
- } else {
- filter->status &= ~0xf;
- if (c < 0x21 || c > 0x7e) { /* bad */
- filter->flag = 1;
- }
- }
- break;
-
- /* ESC */
- case 2:
- if (c == 0x24) { /* '$' */
- filter->status++;
- } else if (c == 0x28) { /* '(' */
- filter->status += 3;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC $ */
- case 3:
- if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
- filter->status = 0x80;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC ( */
- case 5:
- if (c == 0x42) { /* 'B' */
- filter->status = 0;
- } else if (c == 0x4a) { /* 'J' */
- filter->status = 0x10;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- default:
- filter->status = 0;
- break;
- }
-
- return c;
-}
-
-static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter)
-{
-retry:
- switch (filter->status & 0xf) {
-/* case 0x00: ASCII */
-/* case 0x10: X 0201 latin */
-/* case 0x80: X 0208 */
- case 0:
- if (c == 0x1b) {
- filter->status += 2;
- } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
- filter->status += 1;
- } else if (c >= 0 && c < 0x80) { /* latin, CTLs */
- ;
- } else {
- filter->flag = 1; /* bad */
- }
- break;
-
-/* case 0x81: X 0208 second char */
- case 1:
- if (c == 0x1b) {
- filter->status++;
- } else {
- filter->status &= ~0xf;
- if (c < 0x21 || c > 0x7e) { /* bad */
- filter->flag = 1;
- }
- }
- break;
-
- /* ESC */
- case 2:
- if (c == 0x24) { /* '$' */
- filter->status++;
- } else if (c == 0x28) { /* '(' */
- filter->status += 3;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC $ */
- case 3:
- if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
- filter->status = 0x80;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC ( */
- case 5:
- if (c == 0x42) { /* 'B' */
- filter->status = 0;
- } else if (c == 0x4a) { /* 'J' */
- filter->status = 0x10;
- } else if (c == 0x49) { /* 'I' */
- filter->status = 0x20;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- default:
- filter->status = 0;
- break;
- }
-
- return c;
-}
-
-static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter)
-{
-retry:
- switch (filter->status & 0xf) {
-/* case 0x00: ASCII */
-/* case 0x10: X 0201 latin */
-/* case 0x80: X 0208 */
- case 0:
- if (c == 0x1b) {
- filter->status += 2;
- } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
- filter->status += 1;
- } else if (c >= 0 && c < 0x80) { /* latin, CTLs */
- ;
- } else {
- filter->flag = 1; /* bad */
- }
- break;
-
-/* case 0x81: X 0208 second char */
- case 1:
- if (c == 0x1b) {
- filter->status++;
- } else {
- filter->status &= ~0xf;
- if (c < 0x21 || c > 0x7e) { /* bad */
- filter->flag = 1;
- }
- }
- break;
-
- /* ESC */
- case 2:
- if (c == 0x24) { /* '$' */
- filter->status++;
- } else if (c == 0x28) { /* '(' */
- filter->status += 3;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC $ */
- case 3:
- if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
- filter->status = 0x80;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- /* ESC ( */
- case 5:
- if (c == 0x42) { /* 'B' */
- filter->status = 0;
- } else if (c == 0x4a) { /* 'J' */
- filter->status = 0x10;
- } else {
- filter->flag = 1; /* bad */
- filter->status &= ~0xf;
- goto retry;
- }
- break;
-
- default:
- filter->status = 0;
- break;
- }
-
- return c;
-}