diff options
author | Behdad Esfahbod <behdad@gnome.org> | 2008-12-12 11:20:31 +0000 |
---|---|---|
committer | Behdad Esfahbod <behdad@src.gnome.org> | 2008-12-12 11:20:31 +0000 |
commit | d453dce12191a1ee19fa70d83bab013999006385 (patch) | |
tree | 3840c3511ddcda498e1b113de39cd2cfd530942a | |
parent | beb61da0f7d5d1730f532cc2eb37df507a267a87 (diff) | |
download | vte-d453dce12191a1ee19fa70d83bab013999006385.tar.gz |
Document vteunistr.
2008-12-12 Behdad Esfahbod <behdad@gnome.org>
* doc/reference/Makefile.am:
* src/vteunistr.c (unistr_comp_hash), (unistr_comp_equal),
(_vte_unistr_append_unichar), (_vte_unistr_get_base),
(_vte_unistr_append_to_string), (_vte_unistr_strlen):
* src/vteunistr.h:
Document vteunistr.
svn path=/trunk/; revision=2344
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | doc/reference/Makefile.am | 1 | ||||
-rw-r--r-- | src/vteunistr.c | 120 | ||||
-rw-r--r-- | src/vteunistr.h | 52 |
4 files changed, 134 insertions, 48 deletions
@@ -1,5 +1,14 @@ 2008-12-12 Behdad Esfahbod <behdad@gnome.org> + * doc/reference/Makefile.am: + * src/vteunistr.c (unistr_comp_hash), (unistr_comp_equal), + (_vte_unistr_append_unichar), (_vte_unistr_get_base), + (_vte_unistr_append_to_string), (_vte_unistr_strlen): + * src/vteunistr.h: + Document vteunistr. + +2008-12-12 Behdad Esfahbod <behdad@gnome.org> + * src/vteunistr.h: Minor cleanup. 2008-12-12 Behdad Esfahbod <behdad@gnome.org> diff --git a/doc/reference/Makefile.am b/doc/reference/Makefile.am index 385969c8..b2d07a07 100644 --- a/doc/reference/Makefile.am +++ b/doc/reference/Makefile.am @@ -47,6 +47,7 @@ IGNORE_HFILES= \ vteskel.h \ vtetc.h \ vtetree.h \ + vteunistr.h \ vtexft.h diff --git a/src/vteunistr.c b/src/vteunistr.c index 534d15b2..f6f7bc31 100644 --- a/src/vteunistr.c +++ b/src/vteunistr.c @@ -25,8 +25,57 @@ #include "vteunistr.h" -#define VTE_UNISTR_START 0x80000000 +/* Overview: + * + * The way vteunistr is implemented is very simple: Unicode only defines + * codepoints less than 0x10FFFF. That leaves plenty of room in a guint32 to + * use for other things. So, whenever our "string" contains only one Unicode + * character, we use its code as our vteunistr. Otherwise, we register the + * string in a central registry and use assign a unique number to it. That + * number can be thought as "our own private non-unicode code for this + * sequence of characters". + * + * The rest of the problem would be how to efficiently implement this + * registry. It does *NOT* really have to be efficient at all, as it will + * only be access in case of combining marks. And the strings are pretty + * short most of the time. But our implementation is quite efficient and + * nifty anyway. + * + * The access pattern of using vteunistr's is that we have a vteunistr in a + * terminal cell, a new gunichar comes in and we decide to combine with it, + * and we combine them and get a new vteunistr. So, that is exactly how we + * encode vteunistr's: all we need to know about a vteunistr to be able to + * reconstruct its string is the vteunistr and the gunichar that joined to + * form it. That's what VteUnistrDecomp is. That is the decomposition. + * + * We start giving new vteunistr's unique numbers starting at + * VTE_UNISTR_START+1 and going up. We keep the decompositions in a GArray, + * called unistr_decomp. The first entry of the array is unused (that's why + * we start from VTE_UNISTR_START plus one). The decomposition table provides + * enough information to efficiently answer questions like "what's the first + * gunichar in this vteunistr?", "what's the sequence of gunichar's in this + * vteunistr?", and "how many gunichar's are there in this vteunistr?". + * + * We still do not have any efficient way to construct new vteunistr's though. + * Given a vteunistr and a gunichar, we have to walk over the entire + * decomposition table to see if we have already registered (encoded) this + * combination. To make that operation fast, we use a reverse map, that is, + * a GHashTable named unistr_comp. The hash table maps a decomposition to its + * encoded vteunistr value. The value obivously fits in a pointer and does + * not need memory allocation. We also want to avoid allocating memory for + * the keys of the hash table entries, as we already have those decompositions + * in the memory in the unistr_decomp array. We cannot use direct pointers + * though as when growing the GArray may resize and move to a new memory + * buffer, rendering all our pointers invalid. For this reason, we keep the + * index into the array as our hash table key. When we want to perform a + * lookup in the hash table, we insert the decomposition that we are searching + * for as item zero in the unistr_decomp table, then lookup for an equal entry + * of it in the hash table. Finally, if the hash lookup fails, we add the new + * decomposition to the lookup array and the hash table and return the newly + * encoded vteunistr value. + */ +#define VTE_UNISTR_START 0x80000000 static vteunistr unistr_next = VTE_UNISTR_START + 1; @@ -38,26 +87,22 @@ struct VteUnistrDecomp { GArray *unistr_decomp; GHashTable *unistr_comp; +#define DECOMP_FROM_INDEX(i) g_array_index (unistr_decomp, struct VteUnistrDecomp, (i)) +#define DECOMP_FROM_UNISTR(s) DECOMP_FROM_INDEX ((s) - VTE_UNISTR_START) + static guint unistr_comp_hash (gconstpointer key) { struct VteUnistrDecomp *decomp; - decomp = &g_array_index (unistr_decomp, - struct VteUnistrDecomp, - GPOINTER_TO_UINT (key)); + decomp = &DECOMP_FROM_INDEX (GPOINTER_TO_UINT (key)); return decomp->prefix ^ decomp->suffix; } static gboolean -unistr_comp_equal (gconstpointer a, - gconstpointer b) +unistr_comp_equal (gconstpointer a, gconstpointer b) { - return 0 == memcmp (&g_array_index (unistr_decomp, - struct VteUnistrDecomp, - GPOINTER_TO_UINT (a)), - &g_array_index (unistr_decomp, - struct VteUnistrDecomp, - GPOINTER_TO_UINT (b)), + return 0 == memcmp (&DECOMP_FROM_INDEX (GPOINTER_TO_UINT (a)), + &DECOMP_FROM_INDEX (GPOINTER_TO_UINT (b)), sizeof (struct VteUnistrDecomp)); } @@ -71,17 +116,12 @@ _vte_unistr_append_unichar (vteunistr s, gunichar c) decomp.suffix = c; if (G_UNLIKELY (!unistr_decomp)) { - unistr_decomp = g_array_new (FALSE, TRUE, - sizeof (struct VteUnistrDecomp)); + unistr_decomp = g_array_new (FALSE, TRUE, sizeof (struct VteUnistrDecomp)); g_array_set_size (unistr_decomp, 1); - unistr_comp = g_hash_table_new (unistr_comp_hash, - unistr_comp_equal); + unistr_comp = g_hash_table_new (unistr_comp_hash, unistr_comp_equal); } else { - g_array_index (unistr_decomp, - struct VteUnistrDecomp, - 0) = decomp; - ret = GPOINTER_TO_UINT (g_hash_table_lookup (unistr_comp, - GUINT_TO_POINTER (0))); + DECOMP_FROM_INDEX (0) = decomp; + ret = GPOINTER_TO_UINT (g_hash_table_lookup (unistr_comp, GUINT_TO_POINTER (0))); } if (G_UNLIKELY (!ret)) { @@ -95,30 +135,12 @@ _vte_unistr_append_unichar (vteunistr s, gunichar c) return ret; } -/* Unused -int -_vte_unistr_strlen (vteunistr s) -{ - int len = 1; - g_return_val_if_fail (s < unistr_next, len); - while (G_UNLIKELY (s >= VTE_UNISTR_START)) { - s = g_array_index (unistr_decomp, - struct VteUnistrDecomp, - s - VTE_UNISTR_START).prefix; - len++; - } - return len; -} -*/ - gunichar _vte_unistr_get_base (vteunistr s) { g_return_val_if_fail (s < unistr_next, s); while (G_UNLIKELY (s >= VTE_UNISTR_START)) - s = g_array_index (unistr_decomp, - struct VteUnistrDecomp, - s - VTE_UNISTR_START).prefix; + s = DECOMP_FROM_UNISTR (s).prefix; return (gunichar) s; } @@ -128,11 +150,23 @@ _vte_unistr_append_to_string (vteunistr s, GString *gs) g_return_if_fail (s < unistr_next); if (G_UNLIKELY (s >= VTE_UNISTR_START)) { struct VteUnistrDecomp *decomp; - decomp = &g_array_index (unistr_decomp, - struct VteUnistrDecomp, - s - VTE_UNISTR_START); + decomp = &DECOMP_FROM_UNISTR (s); _vte_unistr_append_to_string (decomp->prefix, gs); s = decomp->suffix; } g_string_append_unichar (gs, (gunichar) s); } + +#if 0 /* unused */ +int +_vte_unistr_strlen (vteunistr s) +{ + int len = 1; + g_return_val_if_fail (s < unistr_next, len); + while (G_UNLIKELY (s >= VTE_UNISTR_START)) { + s = DECOMP_FROM_UNISTR (s).prefix; + len++; + } + return len; +} +#endif diff --git a/src/vteunistr.h b/src/vteunistr.h index 730e3239..f8b7bf78 100644 --- a/src/vteunistr.h +++ b/src/vteunistr.h @@ -26,22 +26,64 @@ G_BEGIN_DECLS +/** + * vteunistr: + * + * vteunistr is a gunichar-compatible way to store strings. A string + * consisting of a single unichar c is represented as the same value + * as c itself. In that sense, gunichars can be readily used as + * vteunistrs. Longer strings can be built by appending a unichar + * to an already existing string. + * + * vteunistr is essentially just a gunicode-compatible quark value. + * It can be used to store strings (of a base followed by combining + * characters) where the code was designed to only allow one character. + * + * Strings are internalized efficiently and never freed. No memory + * management of vteunistr values is needed. + **/ typedef guint32 vteunistr; +/** + * _vte_unistr_append_unichar: + * @s: a #vteunistr + * @c: Unicode character to append to @s + * + * Creates a vteunistr value for the string @s followed by the + * character @c. + * + * Returns: the new #vteunistr value + **/ vteunistr _vte_unistr_append_unichar (vteunistr s, gunichar c); -/* Unused -int -_vte_unistr_strlen (vteunistr s); -*/ - gunichar _vte_unistr_get_base (vteunistr s); +/** + * _vte_unistr_append_to_string: + * @s: a #vteunistr + * @gs: a #GString to append @s to + * + * Appends @s to @gs. This is how one converts a #vteunistr to a + * traditional string. + **/ void _vte_unistr_append_to_string (vteunistr s, GString *gs); +#if 0 /* unused */ +/** + * _vte_unistr_strlen: + * @s: a #vteunistr + * + * Counts the number of character in @s. + * + * Returns: length of @s in characters. + **/ +int +_vte_unistr_strlen (vteunistr s); +#endif + G_END_DECLS #endif |