/* * Copyright © 2002 Red Hat, Inc. * Copyright © 2008 Christian Persch * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "config.h" #include #include #include #include #include "terminal-app.h" #include "terminal-debug.h" #include "terminal-encoding.h" #include "terminal-schemas.h" #include "terminal-util.h" /* Overview * * There's a list of character sets stored in gsettings, indicating * which encodings to display in the encoding menu. * * We have a pre-canned list of available encodings * (hardcoded in the table below) that can be added to * the encoding menu, and to give a human-readable name * to certain encodings. * * If the setting list contains an encoding not in the * predetermined table, then that encoding is * labeled "user defined" but still appears in the menu. */ static const struct { const char *charset; const char *name; } encodings[] = { { "ISO-8859-1", N_("Western") }, { "ISO-8859-2", N_("Central European") }, { "ISO-8859-3", N_("South European") }, { "ISO-8859-4", N_("Baltic") }, { "ISO-8859-5", N_("Cyrillic") }, { "ISO-8859-6", N_("Arabic") }, { "ISO-8859-7", N_("Greek") }, { "ISO-8859-8", N_("Hebrew Visual") }, { "ISO-8859-8-I", N_("Hebrew") }, { "ISO-8859-9", N_("Turkish") }, { "ISO-8859-10", N_("Nordic") }, { "ISO-8859-13", N_("Baltic") }, { "ISO-8859-14", N_("Celtic") }, { "ISO-8859-15", N_("Western") }, { "ISO-8859-16", N_("Romanian") }, { "UTF-8", N_("Unicode") }, { "ARMSCII-8", N_("Armenian") }, { "BIG5", N_("Chinese Traditional") }, { "BIG5-HKSCS", N_("Chinese Traditional") }, { "CP866", N_("Cyrillic/Russian") }, { "EUC-JP", N_("Japanese") }, { "EUC-KR", N_("Korean") }, { "EUC-TW", N_("Chinese Traditional") }, { "GB18030", N_("Chinese Simplified") }, { "GB2312", N_("Chinese Simplified") }, { "GBK", N_("Chinese Simplified") }, { "GEORGIAN-PS", N_("Georgian") }, { "IBM850", N_("Western") }, { "IBM852", N_("Central European") }, { "IBM855", N_("Cyrillic") }, { "IBM857", N_("Turkish") }, { "IBM862", N_("Hebrew") }, { "IBM864", N_("Arabic") }, { "ISO-2022-JP", N_("Japanese") }, { "ISO-2022-KR", N_("Korean") }, { "ISO-IR-111", N_("Cyrillic") }, { "KOI8-R", N_("Cyrillic") }, { "KOI8-U", N_("Cyrillic/Ukrainian") }, { "MAC_ARABIC", N_("Arabic") }, { "MAC_CE", N_("Central European") }, { "MAC_CROATIAN", N_("Croatian") }, { "MAC-CYRILLIC", N_("Cyrillic") }, { "MAC_DEVANAGARI", N_("Hindi") }, { "MAC_FARSI", N_("Persian") }, { "MAC_GREEK", N_("Greek") }, { "MAC_GUJARATI", N_("Gujarati") }, { "MAC_GURMUKHI", N_("Gurmukhi") }, { "MAC_HEBREW", N_("Hebrew") }, { "MAC_ICELANDIC", N_("Icelandic") }, { "MAC_ROMAN", N_("Western") }, { "MAC_ROMANIAN", N_("Romanian") }, { "MAC_TURKISH", N_("Turkish") }, { "MAC_UKRAINIAN", N_("Cyrillic/Ukrainian") }, { "SHIFT_JIS", N_("Japanese") }, { "TCVN", N_("Vietnamese") }, { "TIS-620", N_("Thai") }, { "UHC", N_("Korean") }, { "VISCII", N_("Vietnamese") }, { "WINDOWS-1250", N_("Central European") }, { "WINDOWS-1251", N_("Cyrillic") }, { "WINDOWS-1252", N_("Western") }, { "WINDOWS-1253", N_("Greek") }, { "WINDOWS-1254", N_("Turkish") }, { "WINDOWS-1255", N_("Hebrew") }, { "WINDOWS-1256", N_("Arabic") }, { "WINDOWS-1257", N_("Baltic") }, { "WINDOWS-1258", N_("Vietnamese") }, #if 0 /* These encodings do NOT pass-through ASCII, so are always rejected. * FIXME: why are they in this table; or rather why do we need * the ASCII pass-through requirement? */ { "UTF-7", N_("Unicode") }, { "UTF-16", N_("Unicode") }, { "UCS-2", N_("Unicode") }, { "UCS-4", N_("Unicode") }, { "JOHAB", N_("Korean") }, #endif }; TerminalEncoding * terminal_encoding_new (const char *charset, const char *display_name, gboolean is_custom, gboolean force_valid) { TerminalEncoding *encoding; encoding = g_slice_new (TerminalEncoding); encoding->refcount = 1; encoding->charset = g_intern_string (charset); encoding->name = g_strdup (display_name); encoding->valid = encoding->validity_checked = force_valid || g_str_equal (charset, "UTF-8"); encoding->is_custom = is_custom; encoding->is_active = FALSE; return encoding; } TerminalEncoding* terminal_encoding_ref (TerminalEncoding *encoding) { g_return_val_if_fail (encoding != NULL, NULL); encoding->refcount++; return encoding; } void terminal_encoding_unref (TerminalEncoding *encoding) { if (--encoding->refcount > 0) return; g_free (encoding->name); g_slice_free (TerminalEncoding, encoding); } const char * terminal_encoding_get_charset (TerminalEncoding *encoding) { g_return_val_if_fail (encoding != NULL, NULL); return encoding->charset; } gboolean terminal_encoding_is_valid (TerminalEncoding *encoding) { /* All of the printing ASCII characters from space (32) to the tilde (126) */ static const char ascii_sample[] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; char *converted; gsize bytes_read = 0, bytes_written = 0; GError *error = NULL; if (encoding->validity_checked) return encoding->valid; /* Test that the encoding is a proper superset of ASCII (which naive * apps are going to use anyway) by attempting to validate the text * using the current encoding. This also flushes out any encodings * which the underlying GIConv implementation can't support. */ converted = g_convert (ascii_sample, sizeof (ascii_sample) - 1, terminal_encoding_get_charset (encoding), "UTF-8", &bytes_read, &bytes_written, &error); /* The encoding is only valid if ASCII passes through cleanly. */ encoding->valid = (bytes_read == (sizeof (ascii_sample) - 1)) && (converted != NULL) && (strcmp (converted, ascii_sample) == 0); #ifdef ENABLE_DEBUG _TERMINAL_DEBUG_IF (TERMINAL_DEBUG_ENCODINGS) { if (!encoding->valid) { _terminal_debug_print (TERMINAL_DEBUG_ENCODINGS, "Rejecting encoding %s as invalid:\n", terminal_encoding_get_charset (encoding)); _terminal_debug_print (TERMINAL_DEBUG_ENCODINGS, " input \"%s\"\n", ascii_sample); _terminal_debug_print (TERMINAL_DEBUG_ENCODINGS, " output \"%s\" bytes read %" G_GSIZE_FORMAT " written %" G_GSIZE_FORMAT "\n", converted ? converted : "(null)", bytes_read, bytes_written); if (error) _terminal_debug_print (TERMINAL_DEBUG_ENCODINGS, " Error: %s\n", error->message); } else _terminal_debug_print (TERMINAL_DEBUG_ENCODINGS, "Encoding %s is valid\n\n", terminal_encoding_get_charset (encoding)); } #endif g_clear_error (&error); g_free (converted); encoding->validity_checked = TRUE; return encoding->valid; } G_DEFINE_BOXED_TYPE (TerminalEncoding, terminal_encoding, terminal_encoding_ref, terminal_encoding_unref); GHashTable * terminal_encodings_get_builtins (void) { GHashTable *encodings_hashtable; guint i; TerminalEncoding *encoding; encodings_hashtable = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, (GDestroyNotify) terminal_encoding_unref); for (i = 0; i < G_N_ELEMENTS (encodings); ++i) { encoding = terminal_encoding_new (encodings[i].charset, _(encodings[i].name), FALSE, FALSE); g_hash_table_insert (encodings_hashtable, (gpointer) terminal_encoding_get_charset (encoding), encoding); } return encodings_hashtable; }