diff options
author | Behdad Esfahbod <behdad@gnome.org> | 2008-12-12 06:57:09 +0000 |
---|---|---|
committer | Behdad Esfahbod <behdad@src.gnome.org> | 2008-12-12 06:57:09 +0000 |
commit | 646a71162cabe13f96627ea6cbeb91864e9fc625 (patch) | |
tree | 1ee118dae12bfddba6ec9e6d929efd1e4b0766e2 | |
parent | 2d0c1717a14e5c735877d45d79668d5519f0e838 (diff) | |
download | vte-646a71162cabe13f96627ea6cbeb91864e9fc625.tar.gz |
Bug 149631 – gnome-terminal doesn't combine combining chars in utf8
2008-12-12 Behdad Esfahbod <behdad@gnome.org>
Bug 149631 – gnome-terminal doesn't combine combining chars in utf8
* src/vteunistr.c:
* src/vteunistr.h:
An extended UTF-32 type that assigns numeric values to UTF-8 sequences
on demand. Can be used to efficiently store a string, instead of one
character, at each cell.
* src/vte-private.h:
* src/vte.c:
* src/vtedraw.c:
* src/vtedraw.h:
* src/vtepangocairo.c:
Port to vteunistr instead of gunichar for cell content.
* src/vte.c:
Handle zerowidth insertions by sticking it on the previous cell.
* src/iso2022.h:
* src/iso2022.c:
Cleanup ambiguous-width handling. Handle zero-width chars.
svn path=/trunk/; revision=2340
-rw-r--r-- | ChangeLog | 26 | ||||
-rw-r--r-- | src/Makefile.am | 2 | ||||
-rw-r--r-- | src/iso2022.c | 90 | ||||
-rw-r--r-- | src/iso2022.h | 5 | ||||
-rw-r--r-- | src/vte-private.h | 3 | ||||
-rw-r--r-- | src/vte.c | 103 | ||||
-rw-r--r-- | src/vtedraw.c | 6 | ||||
-rw-r--r-- | src/vtedraw.h | 11 | ||||
-rw-r--r-- | src/vtepangocairo.c | 127 | ||||
-rw-r--r-- | src/vteunistr.c | 136 | ||||
-rw-r--r-- | src/vteunistr.h | 47 |
11 files changed, 421 insertions, 135 deletions
@@ -1,3 +1,27 @@ +2008-12-12 Behdad Esfahbod <behdad@gnome.org> + + Bug 149631 – gnome-terminal doesn't combine combining chars in utf8 + + * src/vteunistr.c: + * src/vteunistr.h: + An extended UTF-32 type that assigns numeric values to UTF-8 sequences + on demand. Can be used to efficiently store a string, instead of one + character, at each cell. + + * src/vte-private.h: + * src/vte.c: + * src/vtedraw.c: + * src/vtedraw.h: + * src/vtepangocairo.c: + Port to vteunistr instead of gunichar for cell content. + + * src/vte.c: + Handle zerowidth insertions by sticking it on the previous cell. + + * src/iso2022.h: + * src/iso2022.c: + Cleanup ambiguous-width handling. Handle zero-width chars. + 2008-12-11 Christian Persch <chpe@gnome.org> Bug 564057 – src/pty.c does not compile with @@ -16,7 +40,7 @@ Bug 562695 - ship pkg-config file for python bindings * configure.in: - * python/Makefile.am: + * python/Makefile.am: * python/pyvte.pc.in: 2008-12-08 Behdad Esfahbod <behdad@gnome.org> diff --git a/src/Makefile.am b/src/Makefile.am index 46e953c7..cc766359 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -80,6 +80,8 @@ libvte_la_SOURCES = \ vtetree.h \ vtetypebuiltins.c \ vtetypebuiltins.h \ + vteunistr.c \ + vteunistr.h \ vteversion.h $(NULL) diff --git a/src/iso2022.c b/src/iso2022.c index d67307aa..94e7bb8e 100644 --- a/src/iso2022.c +++ b/src/iso2022.c @@ -284,39 +284,26 @@ _vte_direct_compare(gconstpointer a, gconstpointer b) return GPOINTER_TO_INT(a) - GPOINTER_TO_INT(b); } -static inline gboolean -_vte_iso2022_is_ambiguous(gunichar c) -{ - /* ASCII chars are not ambiguous */ - if (G_LIKELY (c < 0x80)) - return FALSE; - - return g_unichar_iswide (c) != g_unichar_iswide_cjk (c); -} - /* If we only have a codepoint, guess what the ambiguous width should be based * on the default region. Just hope we don't do this too often. */ static int _vte_iso2022_ambiguous_width_guess(void) { static int guess; - if (guess == 0) { + if (G_UNLIKELY (guess == 0)) { const char *lang = NULL; guess = 1; - if ((lang == NULL) && (g_getenv("LC_ALL") != NULL)) { + if (lang == NULL) lang = g_getenv("LC_ALL"); - } - if ((lang == NULL) && (g_getenv("LC_CTYPE") != NULL)) { + if (lang == NULL) lang = g_getenv("LC_CTYPE"); - } - if ((lang == NULL) && (g_getenv("LANG") != NULL)) { + if (lang == NULL) lang = g_getenv("LANG"); - } - if (lang != NULL) { + if (lang) { if (g_ascii_strncasecmp(lang, "ja", 2) == 0 || - g_ascii_strncasecmp(lang, "ko", 2) == 0 || - g_ascii_strncasecmp(lang, "vi", 2) == 0 || - g_ascii_strncasecmp(lang, "zh", 2) == 0) { + g_ascii_strncasecmp(lang, "ko", 2) == 0 || + g_ascii_strncasecmp(lang, "vi", 2) == 0 || + g_ascii_strncasecmp(lang, "zh", 2) == 0) { guess = 2; } } @@ -357,12 +344,11 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state) /* Sort-of canonify the encoding name. */ i = j = 0; for (i = 0; state->codeset[i] != '\0'; i++) { - if (g_ascii_isalnum(state->codeset[i])) { + if (g_ascii_isalnum(state->codeset[i])) codeset[j++] = g_ascii_tolower(state->codeset[i]); - } - if (j >= sizeof(codeset) - 1) { + + if (j >= sizeof(codeset) - 1) break; - } } codeset[j] = '\0'; @@ -377,11 +363,11 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state) * Decide the ambiguous width according to the default region if * current locale is UTF-8. */ - if (strcmp (codeset, "utf8") == 0 && g_getenv("VTE_CJK_WIDTH") != NULL) { + if (strcmp (codeset, "utf8") == 0) { const char *env = g_getenv ("VTE_CJK_WIDTH"); - if ((g_ascii_strcasecmp (env, "narrow")==0) || (g_ascii_strcasecmp (env, "0")==0)) + if (env && (g_ascii_strcasecmp (env, "narrow")==0 || g_ascii_strcasecmp (env, "0")==0)) return 1; - if ((g_ascii_strcasecmp (env, "wide")==0) || (g_ascii_strcasecmp (env, "1")==0)) + if (env && (g_ascii_strcasecmp (env, "wide")==0 || g_ascii_strcasecmp (env, "1")==0)) return 2; else return _vte_iso2022_ambiguous_width_guess (); @@ -391,6 +377,33 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state) return 1; } +static inline gboolean +_vte_iso2022_is_ambiguous(gunichar c) +{ + if (G_LIKELY (c < 0x80)) + return FALSE; + if (G_UNLIKELY (g_unichar_iszerowidth (c))) + return FALSE; + return G_UNLIKELY (!g_unichar_iswide (c) && g_unichar_iswide_cjk (c)); +} + +int +_vte_iso2022_unichar_width(struct _vte_iso2022_state *state, + gunichar c) +{ + if (G_LIKELY (c < 0x80)) + return 1; + if (G_UNLIKELY (g_unichar_iszerowidth (c))) + return 0; + if (G_UNLIKELY (g_unichar_iswide (c))) + return 2; + if (G_LIKELY (state->ambiguous_width == 1)) + return 1; + if (G_UNLIKELY (g_unichar_iswide_cjk (c))) + return 2; + return 1; +} + static GHashTable * _vte_iso2022_map_init(const struct _vte_iso2022_map *map, gssize length) { @@ -722,16 +735,16 @@ _vte_iso2022_map_get(gunichar mapname, } } -gssize +int _vte_iso2022_get_encoded_width(gunichar c) { - gssize width; + int width; width = (c & VTE_ISO2022_ENCODED_WIDTH_MASK) >> VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET; return CLAMP(width, 0, 2); } static gunichar -_vte_iso2022_set_encoded_width(gunichar c, gssize width) +_vte_iso2022_set_encoded_width(gunichar c, int width) { width = CLAMP(width, 0, 2); c &= ~(VTE_ISO2022_ENCODED_WIDTH_MASK); @@ -817,7 +830,7 @@ _vte_iso2022_state_set_codeset(struct _vte_iso2022_state *state, } state->codeset = g_intern_string (codeset); state->conv = conv; - state->ambiguous_width = _vte_iso2022_ambiguous_width(state); + state->ambiguous_width = _vte_iso2022_ambiguous_width (state); } const char * @@ -1739,19 +1752,6 @@ _vte_iso2022_process(struct _vte_iso2022_state *state, return length; } -gssize -_vte_iso2022_unichar_width(gunichar c) -{ - c = c & ~(VTE_ISO2022_ENCODED_WIDTH_MASK); /* just in case */ - if (G_UNLIKELY (_vte_iso2022_is_ambiguous(c))) { - return _vte_iso2022_ambiguous_width_guess(); - } - if (g_unichar_iswide(c)) { - return 2; - } - return 1; -} - #ifdef ISO2022_MAIN #include <stdio.h> int diff --git a/src/iso2022.h b/src/iso2022.h index aad2e75e..cb54f819 100644 --- a/src/iso2022.h +++ b/src/iso2022.h @@ -49,8 +49,9 @@ void _vte_iso2022_state_free(struct _vte_iso2022_state *); #define VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET 28 #define VTE_ISO2022_ENCODED_WIDTH_MASK (3 << VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET) #define VTE_ISO2022_HAS_ENCODED_WIDTH(__c) (((__c) & VTE_ISO2022_ENCODED_WIDTH_MASK) != 0) -gssize _vte_iso2022_get_encoded_width(gunichar c); -gssize _vte_iso2022_unichar_width(gunichar c); +int _vte_iso2022_get_encoded_width(gunichar c); +int _vte_iso2022_unichar_width(struct _vte_iso2022_state *state, + gunichar c); G_END_DECLS diff --git a/src/vte-private.h b/src/vte-private.h index de961f79..d43cba46 100644 --- a/src/vte-private.h +++ b/src/vte-private.h @@ -41,6 +41,7 @@ #include <unistd.h> #include <glib/gi18n-lib.h> +#include "vteunistr.h" #include "vte.h" #include "buffer.h" #include "debug.h" @@ -98,7 +99,7 @@ G_BEGIN_DECLS /* The structure we use to hold characters we're supposed to display -- this * includes any supported visible attributes. */ struct vte_charcell { - gunichar c; /* The Unicode character. */ + vteunistr c; /* The Unicode string for the cell. */ struct vte_charcell_attr { guint32 columns: 4; /* Number of visible columns @@ -669,7 +669,7 @@ vte_terminal_preedit_width(VteTerminal *terminal, gboolean left_only) (!left_only || (i < terminal->pvt->im_preedit_cursor)); i++) { c = g_utf8_get_char(preedit); - ret += _vte_iso2022_unichar_width(c); + ret += _vte_iso2022_unichar_width(terminal->pvt->iso2022, c); preedit = g_utf8_next_char(preedit); } } @@ -3069,7 +3069,7 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c, if (G_UNLIKELY (screen->alternate_charset)) { _vte_debug_print(VTE_DEBUG_SUBSTITUTION, "Attempting charset substitution" - "for 0x%04x.\n", c); + "for U+%04X.\n", c); /* See if there's a mapping for it. */ c = _vte_iso2022_process_single(terminal->pvt->iso2022, c, '0'); } @@ -3086,12 +3086,13 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c, columns = _vte_iso2022_get_encoded_width(c); c &= ~VTE_ISO2022_ENCODED_WIDTH_MASK; } else { - columns = _vte_iso2022_unichar_width(c); + columns = _vte_iso2022_unichar_width(terminal->pvt->iso2022, c); } + /* If we're autowrapping here, do it. */ col = screen->cursor_current.col; - if (G_UNLIKELY (col + columns > terminal->column_count)) { + if (G_UNLIKELY (columns && col + columns > terminal->column_count)) { if (terminal->pvt->flags.am) { _vte_debug_print(VTE_DEBUG_ADJ, "Autowrapping before character\n"); @@ -3118,6 +3119,73 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c, col, columns, (long)screen->cursor_current.row, (long)screen->insert_delta); + + if (G_UNLIKELY (columns == 0)) { + + /* It's a combining mark */ + + long row_num; + struct vte_charcell *cell; + + _vte_debug_print(VTE_DEBUG_PARSE, "combining U+%04X", c); + + row_num = screen->cursor_current.row; + row = NULL; + if (col == 0) { + /* We are at first column. See if the previous line softwrapped. + * If it did, move there. Otherwise skip inserting. */ + + if (row_num > 0) { + row_num--; + row = _vte_terminal_find_row_data (terminal, row_num); + + if (!row->soft_wrapped) + row = NULL; + else + col = row->cells->len; + } + } else { + row = _vte_terminal_find_row_data (terminal, row_num); + } + + if (G_UNLIKELY (!row || !col)) + goto not_inserted; + + /* Combine it on the previous cell */ + + col--; + cell = _vte_row_data_find_charcell(row, col); + + if (G_UNLIKELY (!cell)) + goto not_inserted; + + /* Find the previous cell */ + while (cell->attr.fragment && col > 0) { + cell = _vte_row_data_find_charcell(row, --col); + } + if (G_UNLIKELY (!cell || cell->c == '\t')) + goto not_inserted; + + /* Combine the new character on top of the cell string */ + c = _vte_unistr_append_unichar (cell->c, c); + + /* And set it */ + columns = cell->attr.columns; + for (i = 0; i < columns; i++) { + cell = _vte_row_data_find_charcell(row, col++); + cell->c = c; + } + + /* Always invalidate since we put the mark on the *previous* cell + * and the higher level code doesn't know this. */ + _vte_invalidate_cells(terminal, + col - columns, + columns, + row_num, 1); + + goto done; + } + /* Make sure we have enough rows to hold this data. */ row = vte_terminal_ensure_cursor (terminal); g_assert(row != NULL); @@ -3207,9 +3275,11 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c, } } +done: /* We added text, so make a note of it. */ terminal->pvt->text_inserted_flag = TRUE; +not_inserted: _vte_debug_print(VTE_DEBUG_ADJ|VTE_DEBUG_PARSE, "insertion delta => %ld.\n", (long)screen->insert_delta); @@ -5193,7 +5263,7 @@ vte_same_class(VteTerminal *terminal, glong acol, glong arow, struct vte_charcell *pcell = NULL; gboolean word_char; if ((pcell = vte_terminal_find_charcell(terminal, acol, arow)) != NULL && pcell->c != 0) { - word_char = vte_terminal_is_word_char(terminal, pcell->c); + word_char = vte_terminal_is_word_char(terminal, _vte_unistr_get_base (pcell->c)); /* Lets not group non-wordchars together (bug #25290) */ if (!word_char) @@ -5204,7 +5274,7 @@ vte_same_class(VteTerminal *terminal, glong acol, glong arow, return FALSE; } if (word_char != vte_terminal_is_word_char(terminal, - pcell->c)) { + _vte_unistr_get_base (pcell->c))) { return FALSE; } return TRUE; @@ -5846,21 +5916,19 @@ vte_terminal_get_text_range_maybe_wrapped(VteTerminal *terminal, attr.underline = pcell->attr.underline; attr.strikethrough = pcell->attr.strikethrough; - /* Store the character. */ - string = g_string_append_unichar(string, - pcell->c ? - pcell->c : - ' '); + /* Store the cell string */ if (pcell->c == 0) { + g_string_append_c (string, ' '); last_empty = string->len; last_emptycol = col; } else { + _vte_unistr_append_to_string (pcell->c, string); last_nonempty = string->len; last_nonemptycol = col; } - /* If we added a character to the string, record its - * attributes, one per char. */ + /* If we added text to the string, record its + * attributes, one per byte. */ if (attributes) { vte_g_array_fill(attributes, &attr, string->len); @@ -8744,7 +8812,7 @@ vte_terminal_determine_colors(VteTerminal *terminal, /* Check if a unicode character is actually a graphic character we draw * ourselves to handle cases where fonts don't have glyphs for them. */ static gboolean -vte_unichar_is_local_graphic(gunichar c) +vte_unichar_is_local_graphic(vteunistr c) { if ((c >= 0x2500) && (c <= 0x257f)) { return TRUE; @@ -8783,7 +8851,7 @@ vte_unichar_is_local_graphic(gunichar c) return FALSE; } static gboolean -vte_terminal_unichar_is_local_graphic(VteTerminal *terminal, gunichar c) +vte_terminal_unichar_is_local_graphic(VteTerminal *terminal, vteunistr c) { return vte_unichar_is_local_graphic (c) && !_vte_draw_has_char (terminal->pvt->draw, c); @@ -8870,7 +8938,7 @@ vte_terminal_draw_point(VteTerminal *terminal, /* Draw the graphic representation of a line-drawing or special graphics * character. */ static gboolean -vte_terminal_draw_graphic(VteTerminal *terminal, gunichar c, +vte_terminal_draw_graphic(VteTerminal *terminal, vteunistr c, gint fore, gint back, gboolean draw_default_bg, gint x, gint y, gint column_width, gint columns, gint row_height) @@ -10708,7 +10776,8 @@ vte_terminal_paint_im_preedit_string(VteTerminal *terminal) items = g_new(struct _vte_draw_text_request, len); for (i = columns = 0; i < len; i++) { items[i].c = g_utf8_get_char(preedit); - items[i].columns = _vte_iso2022_unichar_width(items[i].c); + items[i].columns = _vte_iso2022_unichar_width(terminal->pvt->iso2022, + items[i].c); items[i].x = (col + columns) * width; items[i].y = row * height; columns += items[i].columns; diff --git a/src/vtedraw.c b/src/vtedraw.c index 76628ace..494d38df 100644 --- a/src/vtedraw.c +++ b/src/vtedraw.c @@ -312,7 +312,7 @@ _vte_draw_get_text_metrics(struct _vte_draw *draw, } int -_vte_draw_get_char_width (struct _vte_draw *draw, gunichar c, int columns) +_vte_draw_get_char_width (struct _vte_draw *draw, vteunistr c, int columns) { int width = 0; @@ -370,11 +370,11 @@ _vte_draw_char (struct _vte_draw *draw, return has_char; } gboolean -_vte_draw_has_char (struct _vte_draw *draw, gunichar c) +_vte_draw_has_char (struct _vte_draw *draw, vteunistr c) { gboolean has_char = TRUE; - _vte_debug_print (VTE_DEBUG_DRAW, "draw_has_char ('%c')\n", c); + _vte_debug_print (VTE_DEBUG_DRAW, "draw_has_char ('0x%04X')\n", c); if (draw->impl->has_char) has_char = draw->impl->has_char (draw, c); diff --git a/src/vtedraw.h b/src/vtedraw.h index dbf29261..df27ce63 100644 --- a/src/vtedraw.h +++ b/src/vtedraw.h @@ -26,6 +26,7 @@ #include <gtk/gtk.h> #include "vtebg.h" #include "vte.h" +#include "vteunistr.h" G_BEGIN_DECLS @@ -55,7 +56,7 @@ struct _vte_draw; corner of the cell into which the character will be drawn instead of the left end of the baseline. */ struct _vte_draw_text_request { - gunichar c; + vteunistr c; gshort x, y, columns; }; @@ -86,11 +87,11 @@ struct _vte_draw_impl { const PangoFontDescription *, VteTerminalAntiAlias); void (*get_text_metrics)(struct _vte_draw *, gint *, gint *, gint *); - int (*get_char_width)(struct _vte_draw *, gunichar c, int columns); + int (*get_char_width)(struct _vte_draw *, vteunistr c, int columns); void (*draw_text)(struct _vte_draw *, struct _vte_draw_text_request *, gsize, GdkColor *, guchar); - gboolean (*has_char)(struct _vte_draw *, gunichar); + gboolean (*has_char)(struct _vte_draw *, vteunistr); void (*draw_rectangle)(struct _vte_draw *, gint, gint, gint, gint, GdkColor *, guchar); @@ -150,7 +151,7 @@ void _vte_draw_set_text_font(struct _vte_draw *draw, VteTerminalAntiAlias anti_alias); void _vte_draw_get_text_metrics(struct _vte_draw *draw, gint *width, gint *height, gint *ascent); -int _vte_draw_get_char_width(struct _vte_draw *draw, gunichar c, int columns); +int _vte_draw_get_char_width(struct _vte_draw *draw, vteunistr c, int columns); void _vte_draw_text(struct _vte_draw *draw, struct _vte_draw_text_request *requests, gsize n_requests, @@ -158,7 +159,7 @@ void _vte_draw_text(struct _vte_draw *draw, gboolean _vte_draw_char(struct _vte_draw *draw, struct _vte_draw_text_request *request, GdkColor *color, guchar alpha); -gboolean _vte_draw_has_char(struct _vte_draw *draw, gunichar c); +gboolean _vte_draw_has_char(struct _vte_draw *draw, vteunistr c); void _vte_draw_fill_rectangle(struct _vte_draw *draw, gint x, gint y, gint width, gint height, diff --git a/src/vtepangocairo.c b/src/vtepangocairo.c index 5e78fa16..6500cc3d 100644 --- a/src/vtepangocairo.c +++ b/src/vtepangocairo.c @@ -45,16 +45,16 @@ * - We attach a font_info to draw as our private data. A font_info has * all the information to quickly draw text. * - * - A font_info keeps uses unichar_font_info structs that represent all - * information needed to quickly draw a single gunichar. The font_info - * creates those unichar_font_info structs on demand and caches them + * - A font_info keeps uses unistr_font_info structs that represent all + * information needed to quickly draw a single vteunistr. The font_info + * creates those unistr_font_info structs on demand and caches them * indefinitely. It uses a direct array for the ASCII range and a hash * table for the rest. * * - * Fast rendering of unichars: + * Fast rendering of unistrs: * - * A unichar_font_info (uinfo) calls Pango to set text for the unichar upon + * A unistr_font_info (uinfo) calls Pango to set text for the unistr upon * initialization and then caches information needed to draw the results * later. It uses three different internal representations and respectively * three drawing paths: @@ -64,7 +64,7 @@ * fastest way to draw text as it bypasses Pango completely and allows * for stuffing multiple glyphs into a single cairo_show_glyphs() request * (if scaled-fonts match). This method is used if the glyphs used for - * the gunichar as determined by Pango consists of a single regular glyph + * the vteunistr as determined by Pango consists of a single regular glyph * positioned at 0,0 using a regular font. This method is used for more * than 99% of the cases. Only exceptional cases fall through to the * other two methods. @@ -79,7 +79,7 @@ * * - COVERAGE_USE_PANGO_LAYOUT_LINE: * Keeping a pango layout line. This method is used only in the very - * weird and exception case that a single gunichar uses more than one font + * weird and exception case that a single vteunistr uses more than one font * to be drawn. This is not expected to happen, but exists for * completeness, to make sure we can deal with any junk pango decides to * throw at us. @@ -112,7 +112,7 @@ * * When initializing a font info struct we measure a string consisting of all * ASCII letters and some other ASCII characters. Since we have a shaped pango - * layout at hand, we walk over it and cache unichar font info for the ASCII + * layout at hand, we walk over it and cache unistr font info for the ASCII * letters if we can do that easily using COVERAGE_USE_CAIRO_GLYPH. This * means that we precache all ASCII letters without any extra pango shaping * involved. @@ -139,7 +139,7 @@ #define MAX_RUN_LENGTH 100 -enum unichar_coverage { +enum unistr_coverage { /* in increasing order of speed */ COVERAGE_UNKNOWN = 0, /* we don't know about the character yet */ COVERAGE_USE_PANGO_LAYOUT_LINE, /* use a PangoLayoutLine for the character */ @@ -147,7 +147,7 @@ enum unichar_coverage { COVERAGE_USE_CAIRO_GLYPH /* use a cairo_glyph_t for the character */ }; -union unichar_font_info { +union unistr_font_info { /* COVERAGE_USE_PANGO_LAYOUT_LINE */ struct { PangoLayoutLine *line; @@ -164,23 +164,23 @@ union unichar_font_info { } using_cairo_glyph; }; -struct unichar_info { +struct unistr_info { guchar coverage; guchar has_unknown_chars; guint16 width; - union unichar_font_info ufi; + union unistr_font_info ufi; }; -static struct unichar_info * -unichar_info_create (void) +static struct unistr_info * +unistr_info_create (void) { - return g_slice_new0 (struct unichar_info); + return g_slice_new0 (struct unistr_info); } static void -unichar_info_finish (struct unichar_info *uinfo) +unistr_info_finish (struct unistr_info *uinfo) { - union unichar_font_info *ufi = &uinfo->ufi; + union unistr_font_info *ufi = &uinfo->ufi; switch (uinfo->coverage) { default: @@ -209,10 +209,10 @@ unichar_info_finish (struct unichar_info *uinfo) } static void -unichar_info_destroy (struct unichar_info *uinfo) +unistr_info_destroy (struct unistr_info *uinfo) { - unichar_info_finish (uinfo); - g_slice_free (struct unichar_info, uinfo); + unistr_info_finish (uinfo); + g_slice_free (struct unistr_info, uinfo); } struct font_info { @@ -220,16 +220,19 @@ struct font_info { int ref_count; guint destroy_timeout; /* only used when ref_count == 0 */ - /* reusable layout set with font and everything */ + /* reusable layout set with font and everything set */ PangoLayout *layout; /* cache of character info */ - struct unichar_info ascii_unichar_info[128]; - GHashTable *other_unichar_info; + struct unistr_info ascii_unistr_info[128]; + GHashTable *other_unistr_info; /* cell metrics */ gint width, height, ascent; + /* reusable string for UTF-8 conversion */ + GString *string; + #ifdef VTE_DEBUG /* profiling info */ int coverage_count[4]; @@ -237,24 +240,24 @@ struct font_info { }; -static struct unichar_info * -font_info_find_unichar_info (struct font_info *info, - gunichar c) +static struct unistr_info * +font_info_find_unistr_info (struct font_info *info, + vteunistr c) { - struct unichar_info *uinfo; + struct unistr_info *uinfo; - if (G_LIKELY (c < G_N_ELEMENTS (info->ascii_unichar_info))) - return &info->ascii_unichar_info[c]; + if (G_LIKELY (c < G_N_ELEMENTS (info->ascii_unistr_info))) + return &info->ascii_unistr_info[c]; - if (G_UNLIKELY (info->other_unichar_info == NULL)) - info->other_unichar_info = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) unichar_info_destroy); + if (G_UNLIKELY (info->other_unistr_info == NULL)) + info->other_unistr_info = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) unistr_info_destroy); - uinfo = g_hash_table_lookup (info->other_unichar_info, GINT_TO_POINTER (c)); + uinfo = g_hash_table_lookup (info->other_unistr_info, GINT_TO_POINTER (c)); if (G_LIKELY (uinfo)) return uinfo; - uinfo = unichar_info_create (); - g_hash_table_insert (info->other_unichar_info, GINT_TO_POINTER (c), uinfo); + uinfo = unistr_info_create (); + g_hash_table_insert (info->other_unistr_info, GINT_TO_POINTER (c), uinfo); return uinfo; } @@ -307,11 +310,11 @@ font_info_cache_ascii (struct font_info *info) more; more = pango_glyph_item_iter_next_cluster (&iter)) { - struct unichar_info *uinfo; - union unichar_font_info *ufi; + struct unistr_info *uinfo; + union unistr_font_info *ufi; PangoGlyphGeometry *geometry; PangoGlyph glyph; - gunichar c; + vteunistr c; /* Only cache simple clusters */ if (iter.start_char +1 != iter.end_char || @@ -334,7 +337,7 @@ font_info_cache_ascii (struct font_info *info) if (!(glyph <= 0xFFFF) || (geometry->x_offset | geometry->y_offset) != 0) continue; - uinfo = font_info_find_unichar_info (info, c); + uinfo = font_info_find_unistr_info (info, c); if (G_UNLIKELY (uinfo->coverage != COVERAGE_UNKNOWN)) continue; @@ -405,6 +408,7 @@ font_info_allocate (PangoContext *context) info); info->layout = pango_layout_new (context); + info->string = g_string_sized_new (VTE_UTF8_BPC+1); font_info_measure_font (info); @@ -414,7 +418,7 @@ font_info_allocate (PangoContext *context) static void font_info_free (struct font_info *info) { - gunichar i; + vteunistr i; #ifdef VTE_DEBUG _vte_debug_print (VTE_DEBUG_PANGOCAIRO, @@ -426,13 +430,14 @@ font_info_free (struct font_info *info) info->coverage_count[3]); #endif + g_string_free (info->string, TRUE); g_object_unref (info->layout); - for (i = 0; i < G_N_ELEMENTS (info->ascii_unichar_info); i++) - unichar_info_finish (&info->ascii_unichar_info[i]); + for (i = 0; i < G_N_ELEMENTS (info->ascii_unistr_info); i++) + unistr_info_finish (&info->ascii_unistr_info[i]); - if (info->other_unichar_info) { - g_hash_table_destroy (info->other_unichar_info); + if (info->other_unistr_info) { + g_hash_table_destroy (info->other_unistr_info); } g_slice_free (struct font_info, info); @@ -669,24 +674,24 @@ font_info_create_for_widget (GtkWidget *widget, return font_info_create_for_screen (screen, desc, antialias, language); } -static struct unichar_info * -font_info_get_unichar_info (struct font_info *info, - gunichar c) +static struct unistr_info * +font_info_get_unistr_info (struct font_info *info, + vteunistr c) { - struct unichar_info *uinfo; - union unichar_font_info *ufi; - char buf[VTE_UTF8_BPC+1]; + struct unistr_info *uinfo; + union unistr_font_info *ufi; PangoRectangle logical; PangoLayoutLine *line; - uinfo = font_info_find_unichar_info (info, c); + uinfo = font_info_find_unistr_info (info, c); if (G_LIKELY (uinfo->coverage != COVERAGE_UNKNOWN)) return uinfo; ufi = &uinfo->ufi; - buf[g_unichar_to_utf8 (c, buf)] = '\0'; - pango_layout_set_text (info->layout, buf, -1); + g_string_set_size (info->string, 0); + _vte_unistr_append_to_string (c, info->string); + pango_layout_set_text (info->layout, info->string->str, -1); pango_layout_get_extents (info->layout, NULL, &logical); uinfo->width = PANGO_PIXELS_CEIL (logical.width); @@ -929,14 +934,14 @@ _vte_pangocairo_get_text_metrics(struct _vte_draw *draw, static int -_vte_pangocairo_get_char_width (struct _vte_draw *draw, gunichar c, int columns) +_vte_pangocairo_get_char_width (struct _vte_draw *draw, vteunistr c, int columns) { struct _vte_pangocairo_data *data = draw->impl_data; - struct unichar_info *uinfo; + struct unistr_info *uinfo; g_return_val_if_fail (data->font != NULL, 0); - uinfo = font_info_get_unichar_info (data->font, c); + uinfo = font_info_get_unistr_info (data->font, c); return uinfo->width; } @@ -969,11 +974,11 @@ _vte_pangocairo_draw_text (struct _vte_draw *draw, cairo_set_operator (data->cr, CAIRO_OPERATOR_OVER); for (i = 0; i < n_requests; i++) { - gunichar c = requests[i].c; + vteunistr c = requests[i].c; int x = requests[i].x; int y = requests[i].y + data->font->ascent; - struct unichar_info *uinfo = font_info_get_unichar_info (data->font, c); - union unichar_font_info *ufi = &uinfo->ufi; + struct unistr_info *uinfo = font_info_get_unistr_info (data->font, c); + union unistr_font_info *ufi = &uinfo->ufi; switch (uinfo->coverage) { default: @@ -1019,14 +1024,14 @@ _vte_pangocairo_draw_text (struct _vte_draw *draw, } static gboolean -_vte_pangocairo_draw_has_char (struct _vte_draw *draw, gunichar c) +_vte_pangocairo_draw_has_char (struct _vte_draw *draw, vteunistr c) { struct _vte_pangocairo_data *data = draw->impl_data; - struct unichar_info *uinfo; + struct unistr_info *uinfo; g_return_val_if_fail (data->font != NULL, FALSE); - uinfo = font_info_get_unichar_info (data->font, c); + uinfo = font_info_get_unistr_info (data->font, c); return !uinfo->has_unknown_chars; } diff --git a/src/vteunistr.c b/src/vteunistr.c new file mode 100644 index 00000000..d00bd3a1 --- /dev/null +++ b/src/vteunistr.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2008 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author(s): + * Behdad Esfahbod + */ + +#include <config.h> + +#include <string.h> + +#include "vteunistr.h" + +#define VTE_UNISTR_START 0x80000000 + + +static vteunistr unistr_next = VTE_UNISTR_START + 1; + +struct VteUnistrDecomp { + vteunistr prefix; + gunichar suffix; +}; + +GArray *unistr_decomp; +GHashTable *unistr_comp; + +static guint +unistr_comp_hash (gconstpointer key) +{ + struct VteUnistrDecomp *decomp; + decomp = &g_array_index (unistr_decomp, + struct VteUnistrDecomp, + GPOINTER_TO_UINT (key)); + return decomp->prefix ^ decomp->suffix; +} + +static gboolean +unistr_comp_equal (gconstpointer a, + gconstpointer b) +{ + return 0 == memcmp (&g_array_index (unistr_decomp, + struct VteUnistrDecomp, + GPOINTER_TO_UINT (a)), + &g_array_index (unistr_decomp, + struct VteUnistrDecomp, + GPOINTER_TO_UINT (b)), + sizeof (struct VteUnistrDecomp)); +} + +vteunistr +_vte_unistr_append_unichar (vteunistr s, gunichar c) +{ + struct VteUnistrDecomp decomp; + vteunistr ret = 0; + + decomp.prefix = s; + decomp.suffix = c; + + if (G_UNLIKELY (!unistr_decomp)) { + unistr_decomp = g_array_new (FALSE, TRUE, + sizeof (struct VteUnistrDecomp)); + g_array_set_size (unistr_decomp, 1); + unistr_comp = g_hash_table_new (unistr_comp_hash, + unistr_comp_equal); + } else { + g_array_index (unistr_decomp, + struct VteUnistrDecomp, + 0) = decomp; + ret = GPOINTER_TO_UINT (g_hash_table_lookup (unistr_comp, + GUINT_TO_POINTER (0))); + } + + if (G_UNLIKELY (!ret)) { + ret = unistr_next++; + g_array_append_val (unistr_decomp, decomp); + g_hash_table_insert (unistr_comp, + GUINT_TO_POINTER (ret - VTE_UNISTR_START), + GUINT_TO_POINTER (ret)); + } + + return ret; +} + +int +_vte_unistr_strlen (vteunistr s) +{ + int len = 1; + g_return_val_if_fail (s < unistr_next, len); + while (G_UNLIKELY (s >= VTE_UNISTR_START)) { + s = g_array_index (unistr_decomp, + struct VteUnistrDecomp, + s - VTE_UNISTR_START).prefix; + len++; + } + return len; +} + +gunichar +_vte_unistr_get_base (vteunistr s) +{ + g_return_val_if_fail (s < unistr_next, s); + while (G_UNLIKELY (s >= VTE_UNISTR_START)) + s = g_array_index (unistr_decomp, + struct VteUnistrDecomp, + s - VTE_UNISTR_START).prefix; + return (gunichar) s; +} + +void +_vte_unistr_append_to_string (vteunistr s, GString *gs) +{ + g_return_if_fail (s < unistr_next); + if (G_UNLIKELY (s >= VTE_UNISTR_START)) { + struct VteUnistrDecomp *decomp; + decomp = &g_array_index (unistr_decomp, + struct VteUnistrDecomp, + s - VTE_UNISTR_START); + _vte_unistr_append_to_string (decomp->prefix, gs); + s = decomp->suffix; + } + g_string_append_unichar (gs, (gunichar) s); +} diff --git a/src/vteunistr.h b/src/vteunistr.h new file mode 100644 index 00000000..faba4d67 --- /dev/null +++ b/src/vteunistr.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2008 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author(s): + * Behdad Esfahbod + */ + +#ifndef vte_vteunistr_h_included +#define vte_vteunistr_h_included + +#include <glib.h> + +G_BEGIN_DECLS + +typedef guint32 vteunistr; + +#define vte_unistr_from_unichar(c) ((vteunistr) c) + +vteunistr +_vte_unistr_append_unichar (vteunistr s, gunichar c); + +int +_vte_unistr_strlen (vteunistr s); + +gunichar +_vte_unistr_get_base (vteunistr s); + +void +_vte_unistr_append_to_string (vteunistr s, GString *gs); + +G_END_DECLS + +#endif |