summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Stedfast <jestedfa@microsoft.com>2020-02-16 10:24:51 -0500
committerJeffrey Stedfast <jestedfa@microsoft.com>2020-02-16 10:30:48 -0500
commit45fe84f2d709ac968fd2f342dfd79d16a5fe2ee4 (patch)
treeb42ff51a652a73b01aba17e2829183be0cce64e6
parent1b9cd7981b35c67f16bd966e88b4feec434a0302 (diff)
downloadgmime-45fe84f2d709ac968fd2f342dfd79d16a5fe2ee4.tar.gz
Added some configure logic to auto-detect the system shift-jis charset alias
May fix issue #81
-rw-r--r--gmime/gmime-charset.c55
-rw-r--r--iconv-detect.c43
2 files changed, 76 insertions, 22 deletions
diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c
index 567de1bf..938de4fd 100644
--- a/gmime/gmime-charset.c
+++ b/gmime/gmime-charset.c
@@ -60,6 +60,7 @@
#define ICONV_ISO_INT_FORMAT "iso-%u-%u"
#define ICONV_ISO_STR_FORMAT "iso-%u-%s"
#endif /* __aix__, __irix__, __sun__ */
+#define ICONV_SHIFT_JIS "shift-jis"
#define ICONV_10646 "iso-10646"
#endif /* USE_ICONV_DETECT */
@@ -133,12 +134,14 @@ static struct {
/* Japanese charsets */
{ "eucjp-0", "eucJP" }, /* should this map to "EUC-JP" instead? */
{ "ujis-0", "ujis" }, /* we might want to map this to EUC-JP */
- { "jisx0208.1983-0", "SJIS" },
- { "jisx0212.1990-0", "SJIS" },
- { "pck", "SJIS" },
{ NULL, NULL }
};
+static const char *shiftjis_aliases[] = {
+ "shift-jis", "shift_jis", "sjis", "shift_jis-2004", "shift_jisx0213",
+ "jisx0208.1983-0", "jisx0212.1990-0", "pck", NULL
+};
+
/* map CJKR charsets to their language code */
/* NOTE: only support charset names that will be returned by
* g_mime_charset_iconv_name() so that we don't have to keep track of
@@ -147,21 +150,20 @@ static struct {
const char *charset;
const char *lang;
} cjkr_lang_map[] = {
- { "Big5", "zh" },
- { "BIG5HKSCS", "zh" },
- { "gb2312", "zh" },
- { "gb18030", "zh" },
- { "gbk", "zh" },
- { "euc-tw", "zh" },
- { "iso-2022-jp", "ja" },
- { "Shift-JIS", "ja" },
- { "sjis", "ja" },
- { "ujis", "ja" },
- { "eucJP", "ja" },
- { "euc-jp", "ja" },
- { "euc-kr", "ko" },
- { "koi8-r", "ru" },
- { "koi8-u", "uk" }
+ { "Big5", "zh" },
+ { "BIG5HKSCS", "zh" },
+ { "gb2312", "zh" },
+ { "gb18030", "zh" },
+ { "gbk", "zh" },
+ { "euc-tw", "zh" },
+ { "iso-2022-jp", "ja" },
+ { ICONV_SHIFT_JIS, "ja" },
+ { "ujis", "ja" },
+ { "eucJP", "ja" },
+ { "euc-jp", "ja" },
+ { "euc-kr", "ko" },
+ { "koi8-r", "ru" },
+ { "koi8-u", "uk" }
};
static GHashTable *iconv_charsets = NULL;
@@ -409,6 +411,18 @@ g_mime_charset_language (const char *charset)
return NULL;
}
+static gboolean
+is_shift_jis (const char *name)
+{
+ int i;
+
+ for (i = 0; shiftjis_aliases[i] != NULL; i++) {
+ if (!strcmp (name, shiftjis_aliases[i]))
+ return TRUE;
+ }
+
+ return FALSE;
+}
static const char *
strdown (char *str)
@@ -483,8 +497,7 @@ g_mime_charset_iconv_name (const char *charset)
iso, codepage);
} else {
/* codepage is a string - probably iso-2022-jp or something */
- iconv_name = g_strdup_printf (ICONV_ISO_STR_FORMAT,
- iso, p);
+ iconv_name = g_strdup_printf (ICONV_ISO_STR_FORMAT, iso, p);
}
} else {
/* p == buf, which probably means we've
@@ -503,6 +516,8 @@ g_mime_charset_iconv_name (const char *charset)
buf += 2;
iconv_name = g_strdup_printf ("CP%s", buf);
+ } else if (is_shift_jis (name)) {
+ iconv_name = g_strdup (ICONV_SHIFT_JIS);
} else {
/* assume charset name is ok as is? */
iconv_name = g_strdup (charset);
diff --git a/iconv-detect.c b/iconv-detect.c
index c033fd07..16333b8b 100644
--- a/iconv-detect.c
+++ b/iconv-detect.c
@@ -1,6 +1,6 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* GMime
- * Copyright (C) 1999-2017 Jeffrey Stedfast
+ * Copyright (C) 1999-2020 Jeffrey Stedfast
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@@ -47,6 +47,12 @@ enum {
ISO_DASH_UINT_DASH_STR_LOWER = (1 << 0),
ISO_DASH_UINT_DASH_STR = (1 << 1),
ISO_UINT_DASH_STR = (1 << 2),
+
+ /* shift-jis */
+ SHIFT_DASH_JIS = (1 << 0),
+ SHIFT_UNDERSCORE_JIS = (1 << 1),
+ SJIS = (1 << 2),
+ SHIFT_JISX0213 = (1 << 3),
};
@@ -76,6 +82,15 @@ static CharInfo iso2022_tests[] = {
static int num_iso2022_tests = sizeof (iso2022_tests) / sizeof (CharInfo);
+static CharInfo shiftjis_tests[] = {
+ { "shift-jis", "shift-jis", SHIFT_DASH_JIS },
+ { "shift_jis", "shift_jis", SHIFT_UNDERSCORE_JIS },
+ { "sjis", "sjis", SJIS },
+ { "shift_jisx0213", "shift_jisx0213", SHIFT_JISX0213 },
+};
+
+static int num_shiftjis_tests = sizeof (shiftjis_tests) / sizeof (CharInfo);
+
static CharInfo iso10646_tests[] = {
{ "iso-10646-1", "iso-%u-%u", ISO_DASH_UINT_DASH_UINT_LOWER },
{ "ISO-10646-1", "ISO-%u-%u", ISO_DASH_UINT_DASH_UINT },
@@ -91,7 +106,7 @@ static int num_iso10646_tests = sizeof (iso10646_tests) / sizeof (CharInfo);
int main (int argc, char **argv)
{
- unsigned int iso8859, iso2022, iso10646;
+ unsigned int iso8859, iso2022, iso10646, shiftjis;
CharInfo *info;
iconv_t cd;
FILE *fp;
@@ -150,6 +165,30 @@ int main (int argc, char **argv)
fprintf (fp, "#define ICONV_ISO_STR_FORMAT \"%s\"\n", info[i].format);
}
+ shiftjis = ISO_UNSUPPORTED;
+ info = shiftjis_tests;
+ /*printf ("#define ISO_2022_FORMAT(iso,codepage)\t");*/
+ for (i = 0; i < num_shiftjis_tests; i++) {
+ cd = iconv_open (info[i].charset, "UTF-8");
+ if (cd != (iconv_t) -1) {
+ iconv_close (cd);
+ /*printf ("(\"%s\", (iso), (codepage))\n", info[i].format);*/
+ fprintf (stderr, "System prefers %s\n", info[i].charset);
+ shiftjis = info[i].id;
+ break;
+ }
+ }
+
+ if (shiftjis == ISO_UNSUPPORTED) {
+ fprintf (stderr, "System doesn't support any SHIFT-JIS aliases\n");
+ fprintf (fp, "#define ICONV_SHIFT_JIS \"%s\"\n", info[0].format);
+#ifdef CONFIGURE_IN
+ return EXIT_FAILURE;
+#endif
+ } else {
+ fprintf (fp, "#define ICONV_SHIFT_JIS \"%s\"\n", info[i].format);
+ }
+
iso10646 = ISO_UNSUPPORTED;
info = iso10646_tests;
/*printf ("#define ISO_10646_FORMAT(iso,codepage)\t");*/