summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@gnome.org>2008-12-12 06:57:09 +0000
committerBehdad Esfahbod <behdad@src.gnome.org>2008-12-12 06:57:09 +0000
commit646a71162cabe13f96627ea6cbeb91864e9fc625 (patch)
tree1ee118dae12bfddba6ec9e6d929efd1e4b0766e2
parent2d0c1717a14e5c735877d45d79668d5519f0e838 (diff)
downloadvte-646a71162cabe13f96627ea6cbeb91864e9fc625.tar.gz
Bug 149631 – gnome-terminal doesn't combine combining chars in utf8
2008-12-12 Behdad Esfahbod <behdad@gnome.org> Bug 149631 – gnome-terminal doesn't combine combining chars in utf8 * src/vteunistr.c: * src/vteunistr.h: An extended UTF-32 type that assigns numeric values to UTF-8 sequences on demand. Can be used to efficiently store a string, instead of one character, at each cell. * src/vte-private.h: * src/vte.c: * src/vtedraw.c: * src/vtedraw.h: * src/vtepangocairo.c: Port to vteunistr instead of gunichar for cell content. * src/vte.c: Handle zerowidth insertions by sticking it on the previous cell. * src/iso2022.h: * src/iso2022.c: Cleanup ambiguous-width handling. Handle zero-width chars. svn path=/trunk/; revision=2340
-rw-r--r--ChangeLog26
-rw-r--r--src/Makefile.am2
-rw-r--r--src/iso2022.c90
-rw-r--r--src/iso2022.h5
-rw-r--r--src/vte-private.h3
-rw-r--r--src/vte.c103
-rw-r--r--src/vtedraw.c6
-rw-r--r--src/vtedraw.h11
-rw-r--r--src/vtepangocairo.c127
-rw-r--r--src/vteunistr.c136
-rw-r--r--src/vteunistr.h47
11 files changed, 421 insertions, 135 deletions
diff --git a/ChangeLog b/ChangeLog
index cb349399..c5ca925a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,27 @@
+2008-12-12 Behdad Esfahbod <behdad@gnome.org>
+
+ Bug 149631 – gnome-terminal doesn't combine combining chars in utf8
+
+ * src/vteunistr.c:
+ * src/vteunistr.h:
+ An extended UTF-32 type that assigns numeric values to UTF-8 sequences
+ on demand. Can be used to efficiently store a string, instead of one
+ character, at each cell.
+
+ * src/vte-private.h:
+ * src/vte.c:
+ * src/vtedraw.c:
+ * src/vtedraw.h:
+ * src/vtepangocairo.c:
+ Port to vteunistr instead of gunichar for cell content.
+
+ * src/vte.c:
+ Handle zerowidth insertions by sticking it on the previous cell.
+
+ * src/iso2022.h:
+ * src/iso2022.c:
+ Cleanup ambiguous-width handling. Handle zero-width chars.
+
2008-12-11 Christian Persch <chpe@gnome.org>
Bug 564057 – src/pty.c does not compile with
@@ -16,7 +40,7 @@
Bug 562695 - ship pkg-config file for python bindings
* configure.in:
- * python/Makefile.am:
+ * python/Makefile.am:
* python/pyvte.pc.in:
2008-12-08 Behdad Esfahbod <behdad@gnome.org>
diff --git a/src/Makefile.am b/src/Makefile.am
index 46e953c7..cc766359 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -80,6 +80,8 @@ libvte_la_SOURCES = \
vtetree.h \
vtetypebuiltins.c \
vtetypebuiltins.h \
+ vteunistr.c \
+ vteunistr.h \
vteversion.h
$(NULL)
diff --git a/src/iso2022.c b/src/iso2022.c
index d67307aa..94e7bb8e 100644
--- a/src/iso2022.c
+++ b/src/iso2022.c
@@ -284,39 +284,26 @@ _vte_direct_compare(gconstpointer a, gconstpointer b)
return GPOINTER_TO_INT(a) - GPOINTER_TO_INT(b);
}
-static inline gboolean
-_vte_iso2022_is_ambiguous(gunichar c)
-{
- /* ASCII chars are not ambiguous */
- if (G_LIKELY (c < 0x80))
- return FALSE;
-
- return g_unichar_iswide (c) != g_unichar_iswide_cjk (c);
-}
-
/* If we only have a codepoint, guess what the ambiguous width should be based
* on the default region. Just hope we don't do this too often. */
static int
_vte_iso2022_ambiguous_width_guess(void)
{
static int guess;
- if (guess == 0) {
+ if (G_UNLIKELY (guess == 0)) {
const char *lang = NULL;
guess = 1;
- if ((lang == NULL) && (g_getenv("LC_ALL") != NULL)) {
+ if (lang == NULL)
lang = g_getenv("LC_ALL");
- }
- if ((lang == NULL) && (g_getenv("LC_CTYPE") != NULL)) {
+ if (lang == NULL)
lang = g_getenv("LC_CTYPE");
- }
- if ((lang == NULL) && (g_getenv("LANG") != NULL)) {
+ if (lang == NULL)
lang = g_getenv("LANG");
- }
- if (lang != NULL) {
+ if (lang) {
if (g_ascii_strncasecmp(lang, "ja", 2) == 0 ||
- g_ascii_strncasecmp(lang, "ko", 2) == 0 ||
- g_ascii_strncasecmp(lang, "vi", 2) == 0 ||
- g_ascii_strncasecmp(lang, "zh", 2) == 0) {
+ g_ascii_strncasecmp(lang, "ko", 2) == 0 ||
+ g_ascii_strncasecmp(lang, "vi", 2) == 0 ||
+ g_ascii_strncasecmp(lang, "zh", 2) == 0) {
guess = 2;
}
}
@@ -357,12 +344,11 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state)
/* Sort-of canonify the encoding name. */
i = j = 0;
for (i = 0; state->codeset[i] != '\0'; i++) {
- if (g_ascii_isalnum(state->codeset[i])) {
+ if (g_ascii_isalnum(state->codeset[i]))
codeset[j++] = g_ascii_tolower(state->codeset[i]);
- }
- if (j >= sizeof(codeset) - 1) {
+
+ if (j >= sizeof(codeset) - 1)
break;
- }
}
codeset[j] = '\0';
@@ -377,11 +363,11 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state)
* Decide the ambiguous width according to the default region if
* current locale is UTF-8.
*/
- if (strcmp (codeset, "utf8") == 0 && g_getenv("VTE_CJK_WIDTH") != NULL) {
+ if (strcmp (codeset, "utf8") == 0) {
const char *env = g_getenv ("VTE_CJK_WIDTH");
- if ((g_ascii_strcasecmp (env, "narrow")==0) || (g_ascii_strcasecmp (env, "0")==0))
+ if (env && (g_ascii_strcasecmp (env, "narrow")==0 || g_ascii_strcasecmp (env, "0")==0))
return 1;
- if ((g_ascii_strcasecmp (env, "wide")==0) || (g_ascii_strcasecmp (env, "1")==0))
+ if (env && (g_ascii_strcasecmp (env, "wide")==0 || g_ascii_strcasecmp (env, "1")==0))
return 2;
else
return _vte_iso2022_ambiguous_width_guess ();
@@ -391,6 +377,33 @@ _vte_iso2022_ambiguous_width(struct _vte_iso2022_state *state)
return 1;
}
+static inline gboolean
+_vte_iso2022_is_ambiguous(gunichar c)
+{
+ if (G_LIKELY (c < 0x80))
+ return FALSE;
+ if (G_UNLIKELY (g_unichar_iszerowidth (c)))
+ return FALSE;
+ return G_UNLIKELY (!g_unichar_iswide (c) && g_unichar_iswide_cjk (c));
+}
+
+int
+_vte_iso2022_unichar_width(struct _vte_iso2022_state *state,
+ gunichar c)
+{
+ if (G_LIKELY (c < 0x80))
+ return 1;
+ if (G_UNLIKELY (g_unichar_iszerowidth (c)))
+ return 0;
+ if (G_UNLIKELY (g_unichar_iswide (c)))
+ return 2;
+ if (G_LIKELY (state->ambiguous_width == 1))
+ return 1;
+ if (G_UNLIKELY (g_unichar_iswide_cjk (c)))
+ return 2;
+ return 1;
+}
+
static GHashTable *
_vte_iso2022_map_init(const struct _vte_iso2022_map *map, gssize length)
{
@@ -722,16 +735,16 @@ _vte_iso2022_map_get(gunichar mapname,
}
}
-gssize
+int
_vte_iso2022_get_encoded_width(gunichar c)
{
- gssize width;
+ int width;
width = (c & VTE_ISO2022_ENCODED_WIDTH_MASK) >> VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET;
return CLAMP(width, 0, 2);
}
static gunichar
-_vte_iso2022_set_encoded_width(gunichar c, gssize width)
+_vte_iso2022_set_encoded_width(gunichar c, int width)
{
width = CLAMP(width, 0, 2);
c &= ~(VTE_ISO2022_ENCODED_WIDTH_MASK);
@@ -817,7 +830,7 @@ _vte_iso2022_state_set_codeset(struct _vte_iso2022_state *state,
}
state->codeset = g_intern_string (codeset);
state->conv = conv;
- state->ambiguous_width = _vte_iso2022_ambiguous_width(state);
+ state->ambiguous_width = _vte_iso2022_ambiguous_width (state);
}
const char *
@@ -1739,19 +1752,6 @@ _vte_iso2022_process(struct _vte_iso2022_state *state,
return length;
}
-gssize
-_vte_iso2022_unichar_width(gunichar c)
-{
- c = c & ~(VTE_ISO2022_ENCODED_WIDTH_MASK); /* just in case */
- if (G_UNLIKELY (_vte_iso2022_is_ambiguous(c))) {
- return _vte_iso2022_ambiguous_width_guess();
- }
- if (g_unichar_iswide(c)) {
- return 2;
- }
- return 1;
-}
-
#ifdef ISO2022_MAIN
#include <stdio.h>
int
diff --git a/src/iso2022.h b/src/iso2022.h
index aad2e75e..cb54f819 100644
--- a/src/iso2022.h
+++ b/src/iso2022.h
@@ -49,8 +49,9 @@ void _vte_iso2022_state_free(struct _vte_iso2022_state *);
#define VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET 28
#define VTE_ISO2022_ENCODED_WIDTH_MASK (3 << VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET)
#define VTE_ISO2022_HAS_ENCODED_WIDTH(__c) (((__c) & VTE_ISO2022_ENCODED_WIDTH_MASK) != 0)
-gssize _vte_iso2022_get_encoded_width(gunichar c);
-gssize _vte_iso2022_unichar_width(gunichar c);
+int _vte_iso2022_get_encoded_width(gunichar c);
+int _vte_iso2022_unichar_width(struct _vte_iso2022_state *state,
+ gunichar c);
G_END_DECLS
diff --git a/src/vte-private.h b/src/vte-private.h
index de961f79..d43cba46 100644
--- a/src/vte-private.h
+++ b/src/vte-private.h
@@ -41,6 +41,7 @@
#include <unistd.h>
#include <glib/gi18n-lib.h>
+#include "vteunistr.h"
#include "vte.h"
#include "buffer.h"
#include "debug.h"
@@ -98,7 +99,7 @@ G_BEGIN_DECLS
/* The structure we use to hold characters we're supposed to display -- this
* includes any supported visible attributes. */
struct vte_charcell {
- gunichar c; /* The Unicode character. */
+ vteunistr c; /* The Unicode string for the cell. */
struct vte_charcell_attr {
guint32 columns: 4; /* Number of visible columns
diff --git a/src/vte.c b/src/vte.c
index bd8b906c..b5436d37 100644
--- a/src/vte.c
+++ b/src/vte.c
@@ -669,7 +669,7 @@ vte_terminal_preedit_width(VteTerminal *terminal, gboolean left_only)
(!left_only || (i < terminal->pvt->im_preedit_cursor));
i++) {
c = g_utf8_get_char(preedit);
- ret += _vte_iso2022_unichar_width(c);
+ ret += _vte_iso2022_unichar_width(terminal->pvt->iso2022, c);
preedit = g_utf8_next_char(preedit);
}
}
@@ -3069,7 +3069,7 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c,
if (G_UNLIKELY (screen->alternate_charset)) {
_vte_debug_print(VTE_DEBUG_SUBSTITUTION,
"Attempting charset substitution"
- "for 0x%04x.\n", c);
+ "for U+%04X.\n", c);
/* See if there's a mapping for it. */
c = _vte_iso2022_process_single(terminal->pvt->iso2022, c, '0');
}
@@ -3086,12 +3086,13 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c,
columns = _vte_iso2022_get_encoded_width(c);
c &= ~VTE_ISO2022_ENCODED_WIDTH_MASK;
} else {
- columns = _vte_iso2022_unichar_width(c);
+ columns = _vte_iso2022_unichar_width(terminal->pvt->iso2022, c);
}
+
/* If we're autowrapping here, do it. */
col = screen->cursor_current.col;
- if (G_UNLIKELY (col + columns > terminal->column_count)) {
+ if (G_UNLIKELY (columns && col + columns > terminal->column_count)) {
if (terminal->pvt->flags.am) {
_vte_debug_print(VTE_DEBUG_ADJ,
"Autowrapping before character\n");
@@ -3118,6 +3119,73 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c,
col, columns, (long)screen->cursor_current.row,
(long)screen->insert_delta);
+
+ if (G_UNLIKELY (columns == 0)) {
+
+ /* It's a combining mark */
+
+ long row_num;
+ struct vte_charcell *cell;
+
+ _vte_debug_print(VTE_DEBUG_PARSE, "combining U+%04X", c);
+
+ row_num = screen->cursor_current.row;
+ row = NULL;
+ if (col == 0) {
+ /* We are at first column. See if the previous line softwrapped.
+ * If it did, move there. Otherwise skip inserting. */
+
+ if (row_num > 0) {
+ row_num--;
+ row = _vte_terminal_find_row_data (terminal, row_num);
+
+ if (!row->soft_wrapped)
+ row = NULL;
+ else
+ col = row->cells->len;
+ }
+ } else {
+ row = _vte_terminal_find_row_data (terminal, row_num);
+ }
+
+ if (G_UNLIKELY (!row || !col))
+ goto not_inserted;
+
+ /* Combine it on the previous cell */
+
+ col--;
+ cell = _vte_row_data_find_charcell(row, col);
+
+ if (G_UNLIKELY (!cell))
+ goto not_inserted;
+
+ /* Find the previous cell */
+ while (cell->attr.fragment && col > 0) {
+ cell = _vte_row_data_find_charcell(row, --col);
+ }
+ if (G_UNLIKELY (!cell || cell->c == '\t'))
+ goto not_inserted;
+
+ /* Combine the new character on top of the cell string */
+ c = _vte_unistr_append_unichar (cell->c, c);
+
+ /* And set it */
+ columns = cell->attr.columns;
+ for (i = 0; i < columns; i++) {
+ cell = _vte_row_data_find_charcell(row, col++);
+ cell->c = c;
+ }
+
+ /* Always invalidate since we put the mark on the *previous* cell
+ * and the higher level code doesn't know this. */
+ _vte_invalidate_cells(terminal,
+ col - columns,
+ columns,
+ row_num, 1);
+
+ goto done;
+ }
+
/* Make sure we have enough rows to hold this data. */
row = vte_terminal_ensure_cursor (terminal);
g_assert(row != NULL);
@@ -3207,9 +3275,11 @@ _vte_terminal_insert_char(VteTerminal *terminal, gunichar c,
}
}
+done:
/* We added text, so make a note of it. */
terminal->pvt->text_inserted_flag = TRUE;
+not_inserted:
_vte_debug_print(VTE_DEBUG_ADJ|VTE_DEBUG_PARSE,
"insertion delta => %ld.\n",
(long)screen->insert_delta);
@@ -5193,7 +5263,7 @@ vte_same_class(VteTerminal *terminal, glong acol, glong arow,
struct vte_charcell *pcell = NULL;
gboolean word_char;
if ((pcell = vte_terminal_find_charcell(terminal, acol, arow)) != NULL && pcell->c != 0) {
- word_char = vte_terminal_is_word_char(terminal, pcell->c);
+ word_char = vte_terminal_is_word_char(terminal, _vte_unistr_get_base (pcell->c));
/* Lets not group non-wordchars together (bug #25290) */
if (!word_char)
@@ -5204,7 +5274,7 @@ vte_same_class(VteTerminal *terminal, glong acol, glong arow,
return FALSE;
}
if (word_char != vte_terminal_is_word_char(terminal,
- pcell->c)) {
+ _vte_unistr_get_base (pcell->c))) {
return FALSE;
}
return TRUE;
@@ -5846,21 +5916,19 @@ vte_terminal_get_text_range_maybe_wrapped(VteTerminal *terminal,
attr.underline = pcell->attr.underline;
attr.strikethrough = pcell->attr.strikethrough;
- /* Store the character. */
- string = g_string_append_unichar(string,
- pcell->c ?
- pcell->c :
- ' ');
+ /* Store the cell string */
if (pcell->c == 0) {
+ g_string_append_c (string, ' ');
last_empty = string->len;
last_emptycol = col;
} else {
+ _vte_unistr_append_to_string (pcell->c, string);
last_nonempty = string->len;
last_nonemptycol = col;
}
- /* If we added a character to the string, record its
- * attributes, one per char. */
+ /* If we added text to the string, record its
+ * attributes, one per byte. */
if (attributes) {
vte_g_array_fill(attributes,
&attr, string->len);
@@ -8744,7 +8812,7 @@ vte_terminal_determine_colors(VteTerminal *terminal,
/* Check if a unicode character is actually a graphic character we draw
* ourselves to handle cases where fonts don't have glyphs for them. */
static gboolean
-vte_unichar_is_local_graphic(gunichar c)
+vte_unichar_is_local_graphic(vteunistr c)
{
if ((c >= 0x2500) && (c <= 0x257f)) {
return TRUE;
@@ -8783,7 +8851,7 @@ vte_unichar_is_local_graphic(gunichar c)
return FALSE;
}
static gboolean
-vte_terminal_unichar_is_local_graphic(VteTerminal *terminal, gunichar c)
+vte_terminal_unichar_is_local_graphic(VteTerminal *terminal, vteunistr c)
{
return vte_unichar_is_local_graphic (c) &&
!_vte_draw_has_char (terminal->pvt->draw, c);
@@ -8870,7 +8938,7 @@ vte_terminal_draw_point(VteTerminal *terminal,
/* Draw the graphic representation of a line-drawing or special graphics
* character. */
static gboolean
-vte_terminal_draw_graphic(VteTerminal *terminal, gunichar c,
+vte_terminal_draw_graphic(VteTerminal *terminal, vteunistr c,
gint fore, gint back, gboolean draw_default_bg,
gint x, gint y,
gint column_width, gint columns, gint row_height)
@@ -10708,7 +10776,8 @@ vte_terminal_paint_im_preedit_string(VteTerminal *terminal)
items = g_new(struct _vte_draw_text_request, len);
for (i = columns = 0; i < len; i++) {
items[i].c = g_utf8_get_char(preedit);
- items[i].columns = _vte_iso2022_unichar_width(items[i].c);
+ items[i].columns = _vte_iso2022_unichar_width(terminal->pvt->iso2022,
+ items[i].c);
items[i].x = (col + columns) * width;
items[i].y = row * height;
columns += items[i].columns;
diff --git a/src/vtedraw.c b/src/vtedraw.c
index 76628ace..494d38df 100644
--- a/src/vtedraw.c
+++ b/src/vtedraw.c
@@ -312,7 +312,7 @@ _vte_draw_get_text_metrics(struct _vte_draw *draw,
}
int
-_vte_draw_get_char_width (struct _vte_draw *draw, gunichar c, int columns)
+_vte_draw_get_char_width (struct _vte_draw *draw, vteunistr c, int columns)
{
int width = 0;
@@ -370,11 +370,11 @@ _vte_draw_char (struct _vte_draw *draw,
return has_char;
}
gboolean
-_vte_draw_has_char (struct _vte_draw *draw, gunichar c)
+_vte_draw_has_char (struct _vte_draw *draw, vteunistr c)
{
gboolean has_char = TRUE;
- _vte_debug_print (VTE_DEBUG_DRAW, "draw_has_char ('%c')\n", c);
+ _vte_debug_print (VTE_DEBUG_DRAW, "draw_has_char ('0x%04X')\n", c);
if (draw->impl->has_char)
has_char = draw->impl->has_char (draw, c);
diff --git a/src/vtedraw.h b/src/vtedraw.h
index dbf29261..df27ce63 100644
--- a/src/vtedraw.h
+++ b/src/vtedraw.h
@@ -26,6 +26,7 @@
#include <gtk/gtk.h>
#include "vtebg.h"
#include "vte.h"
+#include "vteunistr.h"
G_BEGIN_DECLS
@@ -55,7 +56,7 @@ struct _vte_draw;
corner of the cell into which the character will be drawn instead of the
left end of the baseline. */
struct _vte_draw_text_request {
- gunichar c;
+ vteunistr c;
gshort x, y, columns;
};
@@ -86,11 +87,11 @@ struct _vte_draw_impl {
const PangoFontDescription *,
VteTerminalAntiAlias);
void (*get_text_metrics)(struct _vte_draw *, gint *, gint *, gint *);
- int (*get_char_width)(struct _vte_draw *, gunichar c, int columns);
+ int (*get_char_width)(struct _vte_draw *, vteunistr c, int columns);
void (*draw_text)(struct _vte_draw *,
struct _vte_draw_text_request *, gsize,
GdkColor *, guchar);
- gboolean (*has_char)(struct _vte_draw *, gunichar);
+ gboolean (*has_char)(struct _vte_draw *, vteunistr);
void (*draw_rectangle)(struct _vte_draw *,
gint, gint, gint, gint,
GdkColor *, guchar);
@@ -150,7 +151,7 @@ void _vte_draw_set_text_font(struct _vte_draw *draw,
VteTerminalAntiAlias anti_alias);
void _vte_draw_get_text_metrics(struct _vte_draw *draw,
gint *width, gint *height, gint *ascent);
-int _vte_draw_get_char_width(struct _vte_draw *draw, gunichar c, int columns);
+int _vte_draw_get_char_width(struct _vte_draw *draw, vteunistr c, int columns);
void _vte_draw_text(struct _vte_draw *draw,
struct _vte_draw_text_request *requests, gsize n_requests,
@@ -158,7 +159,7 @@ void _vte_draw_text(struct _vte_draw *draw,
gboolean _vte_draw_char(struct _vte_draw *draw,
struct _vte_draw_text_request *request,
GdkColor *color, guchar alpha);
-gboolean _vte_draw_has_char(struct _vte_draw *draw, gunichar c);
+gboolean _vte_draw_has_char(struct _vte_draw *draw, vteunistr c);
void _vte_draw_fill_rectangle(struct _vte_draw *draw,
gint x, gint y, gint width, gint height,
diff --git a/src/vtepangocairo.c b/src/vtepangocairo.c
index 5e78fa16..6500cc3d 100644
--- a/src/vtepangocairo.c
+++ b/src/vtepangocairo.c
@@ -45,16 +45,16 @@
* - We attach a font_info to draw as our private data. A font_info has
* all the information to quickly draw text.
*
- * - A font_info keeps uses unichar_font_info structs that represent all
- * information needed to quickly draw a single gunichar. The font_info
- * creates those unichar_font_info structs on demand and caches them
+ * - A font_info keeps uses unistr_font_info structs that represent all
+ * information needed to quickly draw a single vteunistr. The font_info
+ * creates those unistr_font_info structs on demand and caches them
* indefinitely. It uses a direct array for the ASCII range and a hash
* table for the rest.
*
*
- * Fast rendering of unichars:
+ * Fast rendering of unistrs:
*
- * A unichar_font_info (uinfo) calls Pango to set text for the unichar upon
+ * A unistr_font_info (uinfo) calls Pango to set text for the unistr upon
* initialization and then caches information needed to draw the results
* later. It uses three different internal representations and respectively
* three drawing paths:
@@ -64,7 +64,7 @@
* fastest way to draw text as it bypasses Pango completely and allows
* for stuffing multiple glyphs into a single cairo_show_glyphs() request
* (if scaled-fonts match). This method is used if the glyphs used for
- * the gunichar as determined by Pango consists of a single regular glyph
+ * the vteunistr as determined by Pango consists of a single regular glyph
* positioned at 0,0 using a regular font. This method is used for more
* than 99% of the cases. Only exceptional cases fall through to the
* other two methods.
@@ -79,7 +79,7 @@
*
* - COVERAGE_USE_PANGO_LAYOUT_LINE:
* Keeping a pango layout line. This method is used only in the very
- * weird and exception case that a single gunichar uses more than one font
+ * weird and exception case that a single vteunistr uses more than one font
* to be drawn. This is not expected to happen, but exists for
* completeness, to make sure we can deal with any junk pango decides to
* throw at us.
@@ -112,7 +112,7 @@
*
* When initializing a font info struct we measure a string consisting of all
* ASCII letters and some other ASCII characters. Since we have a shaped pango
- * layout at hand, we walk over it and cache unichar font info for the ASCII
+ * layout at hand, we walk over it and cache unistr font info for the ASCII
* letters if we can do that easily using COVERAGE_USE_CAIRO_GLYPH. This
* means that we precache all ASCII letters without any extra pango shaping
* involved.
@@ -139,7 +139,7 @@
#define MAX_RUN_LENGTH 100
-enum unichar_coverage {
+enum unistr_coverage {
/* in increasing order of speed */
COVERAGE_UNKNOWN = 0, /* we don't know about the character yet */
COVERAGE_USE_PANGO_LAYOUT_LINE, /* use a PangoLayoutLine for the character */
@@ -147,7 +147,7 @@ enum unichar_coverage {
COVERAGE_USE_CAIRO_GLYPH /* use a cairo_glyph_t for the character */
};
-union unichar_font_info {
+union unistr_font_info {
/* COVERAGE_USE_PANGO_LAYOUT_LINE */
struct {
PangoLayoutLine *line;
@@ -164,23 +164,23 @@ union unichar_font_info {
} using_cairo_glyph;
};
-struct unichar_info {
+struct unistr_info {
guchar coverage;
guchar has_unknown_chars;
guint16 width;
- union unichar_font_info ufi;
+ union unistr_font_info ufi;
};
-static struct unichar_info *
-unichar_info_create (void)
+static struct unistr_info *
+unistr_info_create (void)
{
- return g_slice_new0 (struct unichar_info);
+ return g_slice_new0 (struct unistr_info);
}
static void
-unichar_info_finish (struct unichar_info *uinfo)
+unistr_info_finish (struct unistr_info *uinfo)
{
- union unichar_font_info *ufi = &uinfo->ufi;
+ union unistr_font_info *ufi = &uinfo->ufi;
switch (uinfo->coverage) {
default:
@@ -209,10 +209,10 @@ unichar_info_finish (struct unichar_info *uinfo)
}
static void
-unichar_info_destroy (struct unichar_info *uinfo)
+unistr_info_destroy (struct unistr_info *uinfo)
{
- unichar_info_finish (uinfo);
- g_slice_free (struct unichar_info, uinfo);
+ unistr_info_finish (uinfo);
+ g_slice_free (struct unistr_info, uinfo);
}
struct font_info {
@@ -220,16 +220,19 @@ struct font_info {
int ref_count;
guint destroy_timeout; /* only used when ref_count == 0 */
- /* reusable layout set with font and everything */
+ /* reusable layout set with font and everything set */
PangoLayout *layout;
/* cache of character info */
- struct unichar_info ascii_unichar_info[128];
- GHashTable *other_unichar_info;
+ struct unistr_info ascii_unistr_info[128];
+ GHashTable *other_unistr_info;
/* cell metrics */
gint width, height, ascent;
+ /* reusable string for UTF-8 conversion */
+ GString *string;
+
#ifdef VTE_DEBUG
/* profiling info */
int coverage_count[4];
@@ -237,24 +240,24 @@ struct font_info {
};
-static struct unichar_info *
-font_info_find_unichar_info (struct font_info *info,
- gunichar c)
+static struct unistr_info *
+font_info_find_unistr_info (struct font_info *info,
+ vteunistr c)
{
- struct unichar_info *uinfo;
+ struct unistr_info *uinfo;
- if (G_LIKELY (c < G_N_ELEMENTS (info->ascii_unichar_info)))
- return &info->ascii_unichar_info[c];
+ if (G_LIKELY (c < G_N_ELEMENTS (info->ascii_unistr_info)))
+ return &info->ascii_unistr_info[c];
- if (G_UNLIKELY (info->other_unichar_info == NULL))
- info->other_unichar_info = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) unichar_info_destroy);
+ if (G_UNLIKELY (info->other_unistr_info == NULL))
+ info->other_unistr_info = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) unistr_info_destroy);
- uinfo = g_hash_table_lookup (info->other_unichar_info, GINT_TO_POINTER (c));
+ uinfo = g_hash_table_lookup (info->other_unistr_info, GINT_TO_POINTER (c));
if (G_LIKELY (uinfo))
return uinfo;
- uinfo = unichar_info_create ();
- g_hash_table_insert (info->other_unichar_info, GINT_TO_POINTER (c), uinfo);
+ uinfo = unistr_info_create ();
+ g_hash_table_insert (info->other_unistr_info, GINT_TO_POINTER (c), uinfo);
return uinfo;
}
@@ -307,11 +310,11 @@ font_info_cache_ascii (struct font_info *info)
more;
more = pango_glyph_item_iter_next_cluster (&iter))
{
- struct unichar_info *uinfo;
- union unichar_font_info *ufi;
+ struct unistr_info *uinfo;
+ union unistr_font_info *ufi;
PangoGlyphGeometry *geometry;
PangoGlyph glyph;
- gunichar c;
+ vteunistr c;
/* Only cache simple clusters */
if (iter.start_char +1 != iter.end_char ||
@@ -334,7 +337,7 @@ font_info_cache_ascii (struct font_info *info)
if (!(glyph <= 0xFFFF) || (geometry->x_offset | geometry->y_offset) != 0)
continue;
- uinfo = font_info_find_unichar_info (info, c);
+ uinfo = font_info_find_unistr_info (info, c);
if (G_UNLIKELY (uinfo->coverage != COVERAGE_UNKNOWN))
continue;
@@ -405,6 +408,7 @@ font_info_allocate (PangoContext *context)
info);
info->layout = pango_layout_new (context);
+ info->string = g_string_sized_new (VTE_UTF8_BPC+1);
font_info_measure_font (info);
@@ -414,7 +418,7 @@ font_info_allocate (PangoContext *context)
static void
font_info_free (struct font_info *info)
{
- gunichar i;
+ vteunistr i;
#ifdef VTE_DEBUG
_vte_debug_print (VTE_DEBUG_PANGOCAIRO,
@@ -426,13 +430,14 @@ font_info_free (struct font_info *info)
info->coverage_count[3]);
#endif
+ g_string_free (info->string, TRUE);
g_object_unref (info->layout);
- for (i = 0; i < G_N_ELEMENTS (info->ascii_unichar_info); i++)
- unichar_info_finish (&info->ascii_unichar_info[i]);
+ for (i = 0; i < G_N_ELEMENTS (info->ascii_unistr_info); i++)
+ unistr_info_finish (&info->ascii_unistr_info[i]);
- if (info->other_unichar_info) {
- g_hash_table_destroy (info->other_unichar_info);
+ if (info->other_unistr_info) {
+ g_hash_table_destroy (info->other_unistr_info);
}
g_slice_free (struct font_info, info);
@@ -669,24 +674,24 @@ font_info_create_for_widget (GtkWidget *widget,
return font_info_create_for_screen (screen, desc, antialias, language);
}
-static struct unichar_info *
-font_info_get_unichar_info (struct font_info *info,
- gunichar c)
+static struct unistr_info *
+font_info_get_unistr_info (struct font_info *info,
+ vteunistr c)
{
- struct unichar_info *uinfo;
- union unichar_font_info *ufi;
- char buf[VTE_UTF8_BPC+1];
+ struct unistr_info *uinfo;
+ union unistr_font_info *ufi;
PangoRectangle logical;
PangoLayoutLine *line;
- uinfo = font_info_find_unichar_info (info, c);
+ uinfo = font_info_find_unistr_info (info, c);
if (G_LIKELY (uinfo->coverage != COVERAGE_UNKNOWN))
return uinfo;
ufi = &uinfo->ufi;
- buf[g_unichar_to_utf8 (c, buf)] = '\0';
- pango_layout_set_text (info->layout, buf, -1);
+ g_string_set_size (info->string, 0);
+ _vte_unistr_append_to_string (c, info->string);
+ pango_layout_set_text (info->layout, info->string->str, -1);
pango_layout_get_extents (info->layout, NULL, &logical);
uinfo->width = PANGO_PIXELS_CEIL (logical.width);
@@ -929,14 +934,14 @@ _vte_pangocairo_get_text_metrics(struct _vte_draw *draw,
static int
-_vte_pangocairo_get_char_width (struct _vte_draw *draw, gunichar c, int columns)
+_vte_pangocairo_get_char_width (struct _vte_draw *draw, vteunistr c, int columns)
{
struct _vte_pangocairo_data *data = draw->impl_data;
- struct unichar_info *uinfo;
+ struct unistr_info *uinfo;
g_return_val_if_fail (data->font != NULL, 0);
- uinfo = font_info_get_unichar_info (data->font, c);
+ uinfo = font_info_get_unistr_info (data->font, c);
return uinfo->width;
}
@@ -969,11 +974,11 @@ _vte_pangocairo_draw_text (struct _vte_draw *draw,
cairo_set_operator (data->cr, CAIRO_OPERATOR_OVER);
for (i = 0; i < n_requests; i++) {
- gunichar c = requests[i].c;
+ vteunistr c = requests[i].c;
int x = requests[i].x;
int y = requests[i].y + data->font->ascent;
- struct unichar_info *uinfo = font_info_get_unichar_info (data->font, c);
- union unichar_font_info *ufi = &uinfo->ufi;
+ struct unistr_info *uinfo = font_info_get_unistr_info (data->font, c);
+ union unistr_font_info *ufi = &uinfo->ufi;
switch (uinfo->coverage) {
default:
@@ -1019,14 +1024,14 @@ _vte_pangocairo_draw_text (struct _vte_draw *draw,
}
static gboolean
-_vte_pangocairo_draw_has_char (struct _vte_draw *draw, gunichar c)
+_vte_pangocairo_draw_has_char (struct _vte_draw *draw, vteunistr c)
{
struct _vte_pangocairo_data *data = draw->impl_data;
- struct unichar_info *uinfo;
+ struct unistr_info *uinfo;
g_return_val_if_fail (data->font != NULL, FALSE);
- uinfo = font_info_get_unichar_info (data->font, c);
+ uinfo = font_info_get_unistr_info (data->font, c);
return !uinfo->has_unknown_chars;
}
diff --git a/src/vteunistr.c b/src/vteunistr.c
new file mode 100644
index 00000000..d00bd3a1
--- /dev/null
+++ b/src/vteunistr.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author(s):
+ * Behdad Esfahbod
+ */
+
+#include <config.h>
+
+#include <string.h>
+
+#include "vteunistr.h"
+
+#define VTE_UNISTR_START 0x80000000
+
+
+static vteunistr unistr_next = VTE_UNISTR_START + 1;
+
+struct VteUnistrDecomp {
+ vteunistr prefix;
+ gunichar suffix;
+};
+
+GArray *unistr_decomp;
+GHashTable *unistr_comp;
+
+static guint
+unistr_comp_hash (gconstpointer key)
+{
+ struct VteUnistrDecomp *decomp;
+ decomp = &g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ GPOINTER_TO_UINT (key));
+ return decomp->prefix ^ decomp->suffix;
+}
+
+static gboolean
+unistr_comp_equal (gconstpointer a,
+ gconstpointer b)
+{
+ return 0 == memcmp (&g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ GPOINTER_TO_UINT (a)),
+ &g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ GPOINTER_TO_UINT (b)),
+ sizeof (struct VteUnistrDecomp));
+}
+
+vteunistr
+_vte_unistr_append_unichar (vteunistr s, gunichar c)
+{
+ struct VteUnistrDecomp decomp;
+ vteunistr ret = 0;
+
+ decomp.prefix = s;
+ decomp.suffix = c;
+
+ if (G_UNLIKELY (!unistr_decomp)) {
+ unistr_decomp = g_array_new (FALSE, TRUE,
+ sizeof (struct VteUnistrDecomp));
+ g_array_set_size (unistr_decomp, 1);
+ unistr_comp = g_hash_table_new (unistr_comp_hash,
+ unistr_comp_equal);
+ } else {
+ g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ 0) = decomp;
+ ret = GPOINTER_TO_UINT (g_hash_table_lookup (unistr_comp,
+ GUINT_TO_POINTER (0)));
+ }
+
+ if (G_UNLIKELY (!ret)) {
+ ret = unistr_next++;
+ g_array_append_val (unistr_decomp, decomp);
+ g_hash_table_insert (unistr_comp,
+ GUINT_TO_POINTER (ret - VTE_UNISTR_START),
+ GUINT_TO_POINTER (ret));
+ }
+
+ return ret;
+}
+
+int
+_vte_unistr_strlen (vteunistr s)
+{
+ int len = 1;
+ g_return_val_if_fail (s < unistr_next, len);
+ while (G_UNLIKELY (s >= VTE_UNISTR_START)) {
+ s = g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ s - VTE_UNISTR_START).prefix;
+ len++;
+ }
+ return len;
+}
+
+gunichar
+_vte_unistr_get_base (vteunistr s)
+{
+ g_return_val_if_fail (s < unistr_next, s);
+ while (G_UNLIKELY (s >= VTE_UNISTR_START))
+ s = g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ s - VTE_UNISTR_START).prefix;
+ return (gunichar) s;
+}
+
+void
+_vte_unistr_append_to_string (vteunistr s, GString *gs)
+{
+ g_return_if_fail (s < unistr_next);
+ if (G_UNLIKELY (s >= VTE_UNISTR_START)) {
+ struct VteUnistrDecomp *decomp;
+ decomp = &g_array_index (unistr_decomp,
+ struct VteUnistrDecomp,
+ s - VTE_UNISTR_START);
+ _vte_unistr_append_to_string (decomp->prefix, gs);
+ s = decomp->suffix;
+ }
+ g_string_append_unichar (gs, (gunichar) s);
+}
diff --git a/src/vteunistr.h b/src/vteunistr.h
new file mode 100644
index 00000000..faba4d67
--- /dev/null
+++ b/src/vteunistr.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author(s):
+ * Behdad Esfahbod
+ */
+
+#ifndef vte_vteunistr_h_included
+#define vte_vteunistr_h_included
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef guint32 vteunistr;
+
+#define vte_unistr_from_unichar(c) ((vteunistr) c)
+
+vteunistr
+_vte_unistr_append_unichar (vteunistr s, gunichar c);
+
+int
+_vte_unistr_strlen (vteunistr s);
+
+gunichar
+_vte_unistr_get_base (vteunistr s);
+
+void
+_vte_unistr_append_to_string (vteunistr s, GString *gs);
+
+G_END_DECLS
+
+#endif