diff options
Diffstat (limited to 'src/iso2022.c')
-rw-r--r-- | src/iso2022.c | 302 |
1 files changed, 165 insertions, 137 deletions
diff --git a/src/iso2022.c b/src/iso2022.c index 79a9e017..f493496c 100644 --- a/src/iso2022.c +++ b/src/iso2022.c @@ -1,19 +1,19 @@ /* * Copyright (C) 2002,2003 Red Hat, Inc. * - * This is free software; you can redistribute it and/or modify it under - * the terms of the GNU Library General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * Lesser General Public License for more details. * - * You should have received a copy of the GNU Library General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include <config.h> @@ -37,13 +37,6 @@ #include <gtk/gtk.h> -#if GTK_CHECK_VERSION (2, 90, 7) -#define GDK_KEY(symbol) GDK_KEY_##symbol -#else -#include <gdk/gdkkeysyms.h> -#define GDK_KEY(symbol) GDK_##symbol -#endif - /* Maps which jive with XTerm's ESC ()*+ ? sequences, RFC 1468. Add the * PC437 map because despite knowing that XTerm doesn't support it, certain * applications try to use it anyway. */ @@ -80,10 +73,11 @@ struct _vte_iso2022_state { gunichar g[4]; const gchar *codeset, *native_codeset, *utf8_codeset, *target_codeset; gint ambiguous_width; + gint utf8_ambiguous_width; VteConv conv; _vte_iso2022_codeset_changed_cb_fn codeset_changed; gpointer codeset_changed_data; - VteBuffer *buffer; + VteByteArray *buffer; }; /* DEC Special Character and Line Drawing Set. VT100 and higher (per XTerm @@ -123,7 +117,7 @@ static const struct _vte_iso2022_map16 _vte_iso2022_map_0[] = { }; /* United Kingdom. VT100 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_A[] = { - {'$', GDK_KEY (sterling)}, + {'$', GDK_KEY_sterling}, }; /* US-ASCII (no conversions). VT100 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_B[] = { @@ -131,128 +125,128 @@ static const struct _vte_iso2022_map16 _vte_iso2022_map_B[] = { }; /* Dutch. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_4[] = { - {'#', GDK_KEY (sterling)}, - {'@', GDK_KEY (threequarters)}, - {'[', GDK_KEY (ydiaeresis)}, - {'\\', GDK_KEY (onehalf)}, - {']', GDK_KEY (bar)}, /* FIXME? not in XTerm 170 */ - {'{', GDK_KEY (diaeresis)}, + {'#', GDK_KEY_sterling}, + {'@', GDK_KEY_threequarters}, + {'[', GDK_KEY_ydiaeresis}, + {'\\', GDK_KEY_onehalf}, + {']', GDK_KEY_bar}, /* FIXME? not in XTerm 170 */ + {'{', GDK_KEY_diaeresis}, {'|', 0x192}, /* f with hook (florin) */ /* FIXME? not in XTerm 170 */ - {'}', GDK_KEY (onequarter)}, - {'~', GDK_KEY (acute)} + {'}', GDK_KEY_onequarter}, + {'~', GDK_KEY_acute}, }; /* Finnish. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_C[] = { - {'[', GDK_KEY (Adiaeresis)}, - {'\\', GDK_KEY (Odiaeresis)}, - {']', GDK_KEY (Aring)}, - {'^', GDK_KEY (Udiaeresis)}, - {'`', GDK_KEY (eacute)}, - {'{', GDK_KEY (adiaeresis)}, - {'|', GDK_KEY (odiaeresis)}, - {'}', GDK_KEY (aring)}, - {'~', GDK_KEY (udiaeresis)}, + {'[', GDK_KEY_Adiaeresis}, + {'\\', GDK_KEY_Odiaeresis}, + {']', GDK_KEY_Aring}, + {'^', GDK_KEY_Udiaeresis}, + {'`', GDK_KEY_eacute}, + {'{', GDK_KEY_adiaeresis}, + {'|', GDK_KEY_odiaeresis}, + {'}', GDK_KEY_aring}, + {'~', GDK_KEY_udiaeresis}, }; /* French. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_R[] = { - {'#', GDK_KEY (sterling)}, - {'@', GDK_KEY (agrave)}, - {'[', GDK_KEY (degree)}, - {'\\', GDK_KEY (ccedilla)}, - {']', GDK_KEY (section)}, - {'{', GDK_KEY (eacute)}, - {'|', GDK_KEY (ugrave)}, - {'}', GDK_KEY (egrave)}, - {'~', GDK_KEY (diaeresis)}, + {'#', GDK_KEY_sterling}, + {'@', GDK_KEY_agrave}, + {'[', GDK_KEY_degree}, + {'\\', GDK_KEY_ccedilla}, + {']', GDK_KEY_section}, + {'{', GDK_KEY_eacute}, + {'|', GDK_KEY_ugrave}, + {'}', GDK_KEY_egrave}, + {'~', GDK_KEY_diaeresis}, }; /* French Canadian. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_Q[] = { - {'@', GDK_KEY (agrave)}, - {'[', GDK_KEY (acircumflex)}, - {'\\', GDK_KEY (ccedilla)}, - {']', GDK_KEY (ecircumflex)}, - {'^', GDK_KEY (icircumflex)}, - {'`', GDK_KEY (ocircumflex)}, - {'{', GDK_KEY (eacute)}, - {'|', GDK_KEY (ugrave)}, - {'}', GDK_KEY (egrave)}, - {'~', GDK_KEY (ucircumflex)}, + {'@', GDK_KEY_agrave}, + {'[', GDK_KEY_acircumflex}, + {'\\', GDK_KEY_ccedilla}, + {']', GDK_KEY_ecircumflex}, + {'^', GDK_KEY_icircumflex}, + {'`', GDK_KEY_ocircumflex}, + {'{', GDK_KEY_eacute}, + {'|', GDK_KEY_ugrave}, + {'}', GDK_KEY_egrave}, + {'~', GDK_KEY_ucircumflex}, }; /* German. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_K[] = { - {'@', GDK_KEY (section)}, - {'[', GDK_KEY (Adiaeresis)}, - {'\\', GDK_KEY (Odiaeresis)}, - {']', GDK_KEY (Udiaeresis)}, - {'{', GDK_KEY (adiaeresis)}, - {'|', GDK_KEY (odiaeresis)}, - {'}', GDK_KEY (udiaeresis)}, - {'~', GDK_KEY (ssharp)}, + {'@', GDK_KEY_section}, + {'[', GDK_KEY_Adiaeresis}, + {'\\', GDK_KEY_Odiaeresis}, + {']', GDK_KEY_Udiaeresis}, + {'{', GDK_KEY_adiaeresis}, + {'|', GDK_KEY_odiaeresis}, + {'}', GDK_KEY_udiaeresis}, + {'~', GDK_KEY_ssharp}, }; /* Italian. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_Y[] = { - {'#', GDK_KEY (sterling)}, - {'@', GDK_KEY (section)}, - {'[', GDK_KEY (degree)}, - {'\\', GDK_KEY (ccedilla)}, - {']', GDK_KEY (eacute)}, - {'`', GDK_KEY (ugrave)}, - {'{', GDK_KEY (agrave)}, - {'|', GDK_KEY (ograve)}, - {'}', GDK_KEY (egrave)}, - {'~', GDK_KEY (igrave)}, + {'#', GDK_KEY_sterling}, + {'@', GDK_KEY_section}, + {'[', GDK_KEY_degree}, + {'\\', GDK_KEY_ccedilla}, + {']', GDK_KEY_eacute}, + {'`', GDK_KEY_ugrave}, + {'{', GDK_KEY_agrave}, + {'|', GDK_KEY_ograve}, + {'}', GDK_KEY_egrave}, + {'~', GDK_KEY_igrave}, }; /* Norwegian and Danish. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_E[] = { - {'@', GDK_KEY (Adiaeresis)}, - {'[', GDK_KEY (AE)}, - {'\\', GDK_KEY (Ooblique)}, - {']', GDK_KEY (Aring)}, - {'^', GDK_KEY (Udiaeresis)}, - {'`', GDK_KEY (adiaeresis)}, - {'{', GDK_KEY (ae)}, - {'|', GDK_KEY (oslash)}, - {'}', GDK_KEY (aring)}, - {'~', GDK_KEY (udiaeresis)}, + {'@', GDK_KEY_Adiaeresis}, + {'[', GDK_KEY_AE}, + {'\\', GDK_KEY_Ooblique}, + {']', GDK_KEY_Aring}, + {'^', GDK_KEY_Udiaeresis}, + {'`', GDK_KEY_adiaeresis}, + {'{', GDK_KEY_ae}, + {'|', GDK_KEY_oslash}, + {'}', GDK_KEY_aring}, + {'~', GDK_KEY_udiaeresis}, }; /* Spanish. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_Z[] = { - {'#', GDK_KEY (sterling)}, - {'@', GDK_KEY (section)}, - {'[', GDK_KEY (exclamdown)}, - {'\\', GDK_KEY (Ntilde)}, - {']', GDK_KEY (questiondown)}, - {'{', GDK_KEY (degree)}, - {'|', GDK_KEY (ntilde)}, - {'}', GDK_KEY (ccedilla)}, + {'#', GDK_KEY_sterling}, + {'@', GDK_KEY_section}, + {'[', GDK_KEY_exclamdown}, + {'\\', GDK_KEY_Ntilde}, + {']', GDK_KEY_questiondown}, + {'{', GDK_KEY_degree}, + {'|', GDK_KEY_ntilde}, + {'}', GDK_KEY_ccedilla}, }; /* Swedish. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_H[] = { - {'@', GDK_KEY (Eacute)}, - {'[', GDK_KEY (Adiaeresis)}, - {'\\', GDK_KEY (Odiaeresis)}, - {']', GDK_KEY (Aring)}, - {'^', GDK_KEY (Udiaeresis)}, - {'`', GDK_KEY (eacute)}, - {'{', GDK_KEY (adiaeresis)}, - {'|', GDK_KEY (odiaeresis)}, - {'}', GDK_KEY (aring)}, - {'~', GDK_KEY (udiaeresis)}, + {'@', GDK_KEY_Eacute}, + {'[', GDK_KEY_Adiaeresis}, + {'\\', GDK_KEY_Odiaeresis}, + {']', GDK_KEY_Aring}, + {'^', GDK_KEY_Udiaeresis}, + {'`', GDK_KEY_eacute}, + {'{', GDK_KEY_adiaeresis}, + {'|', GDK_KEY_odiaeresis}, + {'}', GDK_KEY_aring}, + {'~', GDK_KEY_udiaeresis}, }; /* Swiss. VT220 and higher (per XTerm docs). */ static const struct _vte_iso2022_map16 _vte_iso2022_map_equal[] = { - {'#', GDK_KEY (ugrave)}, - {'@', GDK_KEY (agrave)}, - {'[', GDK_KEY (eacute)}, - {'\\', GDK_KEY (ccedilla)}, - {']', GDK_KEY (ecircumflex)}, - {'^', GDK_KEY (icircumflex)}, - {'_', GDK_KEY (egrave)}, - {'`', GDK_KEY (ocircumflex)}, - {'{', GDK_KEY (adiaeresis)}, - {'|', GDK_KEY (odiaeresis)}, - {'}', GDK_KEY (udiaeresis)}, - {'~', GDK_KEY (ucircumflex)}, + {'#', GDK_KEY_ugrave}, + {'@', GDK_KEY_agrave}, + {'[', GDK_KEY_eacute}, + {'\\', GDK_KEY_ccedilla}, + {']', GDK_KEY_ecircumflex}, + {'^', GDK_KEY_icircumflex}, + {'_', GDK_KEY_egrave}, + {'`', GDK_KEY_ocircumflex}, + {'{', GDK_KEY_adiaeresis}, + {'|', GDK_KEY_odiaeresis}, + {'}', GDK_KEY_udiaeresis}, + {'~', GDK_KEY_ucircumflex}, }; /* Codepage 437. */ static const struct _vte_iso2022_map16 _vte_iso2022_map_U[] = { @@ -345,15 +339,29 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state) * current locale is UTF-8. */ if (strcmp (codeset, "utf8") == 0) { - const char *env = g_getenv ("VTE_CJK_WIDTH"); - if (env && (g_ascii_strcasecmp (env, "wide")==0 || g_ascii_strcasecmp (env, "1")==0)) - return 2; + return state->utf8_ambiguous_width; } /* Not in the list => not wide. */ return 1; } +static gboolean +_vte_unichar_iswide_cjk(gunichar c) +{ + /* U+254C..U+254F and U+2574..U+257F have East Asian Width property + * N (neutral) while all the other line drawing characters are + * A (ambigous). This makes those characters not match up with those + * other line drawing characters from the Box Drawing (U+2500..U+257F) + * block; so we treat them as ambigous too. + * + * And we also do the same for the Terminal Graphic Characters + * (U+2596..U+259F) as well as U+2590 and U+2591 from the Block Elements + * (U+2580..U+259F) block. + */ + return G_UNLIKELY(c >= 0x2500 && c <= 0x259f); +} + static inline gboolean _vte_iso2022_is_ambiguous(gunichar c) { @@ -361,7 +369,7 @@ _vte_iso2022_is_ambiguous(gunichar c) return FALSE; if (G_UNLIKELY (g_unichar_iszerowidth (c))) return FALSE; - return G_UNLIKELY (!g_unichar_iswide (c) && g_unichar_iswide_cjk (c)); + return G_UNLIKELY (!g_unichar_iswide (c) && (g_unichar_iswide_cjk (c) || _vte_unichar_iswide_cjk (c))); } int @@ -748,10 +756,14 @@ _vte_iso2022_set_encoded_width(gunichar c, int width) struct _vte_iso2022_state * _vte_iso2022_state_new(const char *native_codeset, + int utf8_ambiguous_width, _vte_iso2022_codeset_changed_cb_fn fn, gpointer data) { struct _vte_iso2022_state *state; + + g_return_val_if_fail(utf8_ambiguous_width == 1 || utf8_ambiguous_width == 2, NULL); + state = g_slice_new0(struct _vte_iso2022_state); state->nrc_enabled = TRUE; state->current = 0; @@ -760,13 +772,13 @@ _vte_iso2022_state_new(const char *native_codeset, state->g[1] = 'B'; state->g[2] = 'B'; state->g[3] = 'B'; - state->codeset = native_codeset; - state->native_codeset = state->codeset; + state->native_codeset = state->codeset = g_intern_string(native_codeset); if (native_codeset == NULL) { - g_get_charset(&state->codeset); - state->native_codeset = state->codeset; - } - state->utf8_codeset = "UTF-8"; + const char *codeset; + g_get_charset(&codeset); + state->native_codeset = state->codeset = g_intern_string(codeset); + } + state->utf8_codeset = g_intern_string("UTF-8"); state->target_codeset = VTE_CONV_GUNICHAR_TYPE; _vte_debug_print(VTE_DEBUG_SUBSTITUTION, "Native codeset \"%s\", currently %s\n", @@ -774,7 +786,7 @@ _vte_iso2022_state_new(const char *native_codeset, state->conv = _vte_conv_open(state->target_codeset, state->codeset); state->codeset_changed = fn; state->codeset_changed_data = data; - state->buffer = _vte_buffer_new(); + state->buffer = _vte_byte_array_new(); if (state->conv == VTE_INVALID_CONV) { g_warning(_("Unable to convert characters from %s to %s."), state->codeset, state->target_codeset); @@ -788,14 +800,14 @@ _vte_iso2022_state_new(const char *native_codeset, state->codeset, state->target_codeset); } } - state->ambiguous_width = _vte_iso2022_ambiguous_width(state); + _vte_iso2022_state_set_utf8_ambiguous_width(state, utf8_ambiguous_width); return state; } void _vte_iso2022_state_free(struct _vte_iso2022_state *state) { - _vte_buffer_free(state->buffer); + _vte_byte_array_free(state->buffer); if (state->conv != VTE_INVALID_CONV) { _vte_conv_close(state->conv); } @@ -825,6 +837,7 @@ _vte_iso2022_state_set_codeset(struct _vte_iso2022_state *state, state->codeset = g_intern_string (codeset); state->conv = conv; state->ambiguous_width = _vte_iso2022_ambiguous_width (state); + } const char * @@ -833,6 +846,14 @@ _vte_iso2022_state_get_codeset(struct _vte_iso2022_state *state) return state->codeset; } +void +_vte_iso2022_state_set_utf8_ambiguous_width(struct _vte_iso2022_state *state, + int utf8_ambiguous_width) +{ + state->utf8_ambiguous_width = utf8_ambiguous_width; + state->ambiguous_width = _vte_iso2022_ambiguous_width(state); +} + static const guchar * _vte_iso2022_find_nextctl(const guchar *p, const guchar * const q) { @@ -1140,7 +1161,7 @@ process_8_bit_sequence(struct _vte_iso2022_state *state, static glong process_cdata(struct _vte_iso2022_state *state, const guchar *cdata, gsize length, - GArray *gunichars) + gboolean incomplete_is_invalid, GArray *gunichars) { int ambiguous_width; glong processed = 0; @@ -1169,7 +1190,7 @@ process_cdata(struct _vte_iso2022_state *state, const guchar *cdata, gsize lengt if (!state->nrc_enabled || (state->g[current] == 'B')) { inbuf = cdata; inbytes = length; - _vte_buffer_set_minimum_size(state->buffer, + _vte_byte_array_set_minimum_size(state->buffer, sizeof(gunichar) * length * 2); buf = (gunichar *)state->buffer->data; outbuf = buf; @@ -1181,6 +1202,9 @@ process_cdata(struct _vte_iso2022_state *state, const guchar *cdata, gsize lengt stop = FALSE; switch (converted) { case ((gsize)-1): + if (errno == EINVAL && incomplete_is_invalid) { + errno = EILSEQ; + } switch (errno) { case EILSEQ: /* Check if it's an 8-bit sequence. */ @@ -1611,6 +1635,7 @@ process_block (struct _vte_iso2022_state *state, guchar *input, struct _vte_iso2022_block *block, gboolean last, + gboolean incomplete_is_invalid, GArray *gunichars) { guint preserve_last = -1; @@ -1644,6 +1669,7 @@ process_block (struct _vte_iso2022_state *state, block->end - block->start - initial, + incomplete_is_invalid, gunichars); if (j == 0) { break; @@ -1699,6 +1725,7 @@ _vte_iso2022_process(struct _vte_iso2022_state *state, preserve_last = process_block (state, input, &block, TRUE, + FALSE, gunichars); break; } @@ -1707,7 +1734,7 @@ _vte_iso2022_process(struct _vte_iso2022_state *state, block.type = _vte_iso2022_cdata; block.start = p - input; block.end = nextctl - input; - process_block (state, input, &block, FALSE, gunichars); + process_block (state, input, &block, FALSE, TRUE, gunichars); } /* Move on to the control data. */ p = nextctl; @@ -1742,6 +1769,7 @@ _vte_iso2022_process(struct _vte_iso2022_state *state, preserve_last = process_block (state, input, &block, FALSE, + FALSE, gunichars); } while (p < q); if (preserve_last != (guint) -1) { @@ -1757,7 +1785,7 @@ _vte_iso2022_process(struct _vte_iso2022_state *state, int main(int argc, char **argv) { - VteBuffer *buffer; + VteByteArray *buffer; struct _vte_iso2022_state *state; GString *string; GArray *gunichars; @@ -1784,8 +1812,8 @@ main(int argc, char **argv) FILE *fp; guchar b; - state = _vte_iso2022_state_new(NULL, NULL, NULL); - buffer = _vte_buffer_new(); + state = _vte_iso2022_state_new(NULL, VTE_ISO2022_DEFAULT_UTF8_AMBIGUOUS_WIDTH, NULL, NULL); + buffer = _vte_byte_array_new(); gunichars = g_array_new(FALSE, FALSE, sizeof(gunichar)); if (argc > 1) { string = g_string_new(NULL); @@ -1802,20 +1830,20 @@ main(int argc, char **argv) fclose(fp); } } - _vte_buffer_append(buffer, string->str, string->len); - _vte_iso2022_process(state, buffer->data, _vte_buffer_length (buffer), gunichars); + _vte_byte_array_append(buffer, string->str, string->len); + _vte_iso2022_process(state, buffer->data, _vte_byte_array_length (buffer), gunichars); g_string_free(string, TRUE); } else { for (i = 0; i < G_N_ELEMENTS(strings); i++) { string = g_string_new(strings[i].s); - _vte_buffer_append(buffer, string->str, string->len); + _vte_byte_array_append(buffer, string->str, string->len); g_string_free(string, TRUE); if (strings[i].process) { - _vte_iso2022_process(state, buffer->data, _vte_buffer_length (buffer), gunichars); + _vte_iso2022_process(state, buffer->data, _vte_byte_array_length (buffer), gunichars); } } } - _vte_buffer_free(buffer); + _vte_byte_array_free(buffer); _vte_iso2022_state_free(state); string = g_string_new(NULL); |