diff options
author | Ran Benita <ran@unusedvar.com> | 2021-03-29 16:05:14 +0300 |
---|---|---|
committer | Ran Benita <ran@unusedvar.com> | 2021-03-31 12:10:52 +0300 |
commit | 02b9cabf9827dd004912fb0b002c2a37a1aa90e0 (patch) | |
tree | 68366cff22aea14237138d1f38ac88ff61ee4121 /src/compose | |
parent | 3a6c3b2c48614d6ce94cc89a349c3f422e429931 (diff) | |
download | xorg-lib-libxkbcommon-02b9cabf9827dd004912fb0b002c2a37a1aa90e0.tar.gz |
compose: use a ternary tree instead of a regular trie
Previously we used a simple trie with a linked list for each chain.
Unfortunately most compose files have very long chains which means the
constructions performs an almost quadratic number of comparisons.
Switch to using a ternary search tree instead. This is very similar to a
trie, only the linked list is essentially replaced with a binary tree.
On the en_US/Compose file, the perf diff is the following (the modified
function is `parse`):
Event 'cycles:u'
Baseline Delta Abs Shared Object Symbol
........ ......... ................ .................................
39.91% -17.62% bench-compose [.] parse.constprop.0
20.54% +6.47% bench-compose [.] lex
17.28% +5.55% libc-2.33.so [.] __strcmp_avx2
12.78% +4.01% bench-compose [.] xkb_keysym_from_name
2.30% +0.83% libc-2.33.so [.] __GI_____strtoull_l_internal
3.36% +0.78% bench-compose [.] strcmp@plt
Thanks to some careful packing, the memory usage is pretty much the
same.
Signed-off-by: Ran Benita <ran@unusedvar.com>
Diffstat (limited to 'src/compose')
-rw-r--r-- | src/compose/parser.c | 165 | ||||
-rw-r--r-- | src/compose/state.c | 17 | ||||
-rw-r--r-- | src/compose/table.c | 15 | ||||
-rw-r--r-- | src/compose/table.h | 70 |
4 files changed, 139 insertions, 128 deletions
diff --git a/src/compose/parser.c b/src/compose/parser.c index a8458ac..a47ae4b 100644 --- a/src/compose/parser.c +++ b/src/compose/parser.c @@ -327,114 +327,107 @@ struct production { xkb_mod_mask_t mods; }; -static uint16_t -add_node(struct xkb_compose_table *table, xkb_keysym_t keysym) -{ - struct compose_node new = { - .keysym = keysym, - .next = 0, - .is_leaf = true, - }; - darray_append(table->nodes, new); - return darray_size(table->nodes) - 1; -} - static void add_production(struct xkb_compose_table *table, struct scanner *s, const struct production *production) { - unsigned lhs_pos; - uint16_t curr; - struct compose_node *node; + unsigned lhs_pos = 0; + uint16_t curr = darray_size(table->nodes) == 1 ? 0 : 1; + uint16_t *pptr = NULL; + struct compose_node *node = NULL; if (darray_size(table->nodes) + 1 == MAX_COMPOSE_NODES) scanner_warn(s, "too many sequences for one Compose file; will ignore further lines"); if (darray_size(table->nodes) >= MAX_COMPOSE_NODES) return; - curr = 0; - node = &darray_item(table->nodes, curr); - /* - * Insert the sequence to the trie, creating new nodes as needed. + * Insert the sequence to the ternary search tree, creating new nodes as + * needed. * - * TODO: This can be sped up a bit by first trying the path that the - * previous production took, and only then doing the linear search - * through the trie levels. This will work because sequences in the - * Compose files are often clustered by a common prefix; especially - * in the 1st and 2nd keysyms, which is where the largest variation - * (thus, longest search) is. + * TODO: We insert in the order given, this means some inputs can create + * long O(n) chains, which results in total O(n^2) parsing time. We should + * ensure the tree is reasonably balanced somehow. */ - for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) { - while (production->lhs[lhs_pos] != node->keysym) { - if (node->next == 0) { - uint16_t next = add_node(table, production->lhs[lhs_pos]); - /* Refetch since add_node could have realloc()ed. */ - node = &darray_item(table->nodes, curr); - node->next = next; + while (true) { + const xkb_keysym_t keysym = production->lhs[lhs_pos]; + const bool last = lhs_pos + 1 == production->len; + + if (curr == 0) { + /* + * Create a new node and update the parent pointer to it. + * Update the pointer first because the append invalidates it. + */ + struct compose_node new = { + .keysym = keysym, + .lokid = 0, + .hikid = 0, + .internal = { + .eqkid = 0, + .is_leaf = false, + }, + }; + curr = darray_size(table->nodes); + if (pptr != NULL) { + *pptr = curr; + pptr = NULL; } - - curr = node->next; - node = &darray_item(table->nodes, curr); + darray_append(table->nodes, new); } - if (lhs_pos + 1 == production->len) - break; + node = &darray_item(table->nodes, curr); - if (node->is_leaf) { - if (node->leaf.utf8 != 0 || - node->leaf.keysym != XKB_KEY_NoSymbol) { + if (keysym < node->keysym) { + pptr = &node->lokid; + curr = node->lokid; + } else if (keysym > node->keysym) { + pptr = &node->hikid; + curr = node->hikid; + } else if (!last) { + if (node->is_leaf) { scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding"); - node->leaf.utf8 = 0; - node->leaf.keysym = XKB_KEY_NoSymbol; + node->internal.eqkid = node->lokid = node->hikid = 0; + node->internal.is_leaf = false; } - - { - uint16_t successor = add_node(table, production->lhs[lhs_pos + 1]); - /* Refetch since add_node could have realloc()ed. */ - node = &darray_item(table->nodes, curr); - node->is_leaf = false; - node->internal.successor = successor; + lhs_pos++; + pptr = &node->internal.eqkid; + curr = node->internal.eqkid; + } else { + if (node->is_leaf) { + bool same_string = + (node->leaf.utf8 == 0 && !production->has_string) || + ( + node->leaf.utf8 != 0 && production->has_string && + streq(&darray_item(table->utf8, node->leaf.utf8), + production->string) + ); + bool same_keysym = + (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) || + ( + node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym && + node->leaf.keysym == production->keysym + ); + if (same_string && same_keysym) { + scanner_warn(s, "this compose sequence is a duplicate of another; skipping line"); + return; + } else { + scanner_warn(s, "this compose sequence already exists; overriding"); + } + } else if (node->internal.eqkid != 0) { + scanner_warn(s, "this compose sequence is a prefix of another; skipping line"); + return; + } + node->is_leaf = true; + if (production->has_string) { + node->leaf.utf8 = darray_size(table->utf8); + darray_append_items(table->utf8, production->string, + strlen(production->string) + 1); + } + if (production->has_keysym) { + node->leaf.keysym = production->keysym; } - } - - curr = node->internal.successor; - node = &darray_item(table->nodes, curr); - } - - if (!node->is_leaf) { - scanner_warn(s, "this compose sequence is a prefix of another; skipping line"); - return; - } - - if (node->leaf.utf8 != 0 || node->leaf.keysym != XKB_KEY_NoSymbol) { - bool same_string = - (node->leaf.utf8 == 0 && !production->has_string) || - ( - node->leaf.utf8 != 0 && production->has_string && - streq(&darray_item(table->utf8, node->leaf.utf8), - production->string) - ); - bool same_keysym = - (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) || - ( - node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym && - node->leaf.keysym == production->keysym - ); - if (same_string && same_keysym) { - scanner_warn(s, "this compose sequence is a duplicate of another; skipping line"); return; } - scanner_warn(s, "this compose sequence already exists; overriding"); - } - - if (production->has_string) { - node->leaf.utf8 = darray_size(table->utf8); - darray_append_items(table->utf8, production->string, - strlen(production->string) + 1); - } - if (production->has_keysym) { - node->leaf.keysym = production->keysym; } } diff --git a/src/compose/state.c b/src/compose/state.c index de08a90..6ba0344 100644 --- a/src/compose/state.c +++ b/src/compose/state.c @@ -109,17 +109,20 @@ xkb_compose_state_feed(struct xkb_compose_state *state, xkb_keysym_t keysym) node = &darray_item(state->table->nodes, state->context); - context = (node->is_leaf ? 0 : node->internal.successor); - node = &darray_item(state->table->nodes, context); + context = (node->is_leaf ? 1 : node->internal.eqkid); + if (context == 1 && darray_size(state->table->nodes) == 1) + context = 0; - while (node->keysym != keysym && node->next != 0) { - context = node->next; + while (context != 0) { node = &darray_item(state->table->nodes, context); + if (keysym < node->keysym) + context = node->lokid; + else if (keysym > node->keysym) + context = node->hikid; + else + break; } - if (node->keysym != keysym) - context = 0; - state->prev_context = state->context; state->context = context; return XKB_COMPOSE_FEED_ACCEPTED; diff --git a/src/compose/table.c b/src/compose/table.c index dbd255e..1691555 100644 --- a/src/compose/table.c +++ b/src/compose/table.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013 Ran Benita <ran234@gmail.com> + * Copyright © 2013,2021 Ran Benita <ran234@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,7 +36,7 @@ xkb_compose_table_new(struct xkb_context *ctx, { char *resolved_locale; struct xkb_compose_table *table; - struct compose_node root; + struct compose_node dummy; resolved_locale = resolve_locale(locale); if (!resolved_locale) @@ -58,12 +58,11 @@ xkb_compose_table_new(struct xkb_context *ctx, darray_init(table->nodes); darray_init(table->utf8); - root.keysym = XKB_KEY_NoSymbol; - root.next = 0; - root.is_leaf = true; - root.leaf.utf8 = 0; - root.leaf.keysym = XKB_KEY_NoSymbol; - darray_append(table->nodes, root); + dummy.keysym = XKB_KEY_NoSymbol; + dummy.leaf.is_leaf = true; + dummy.leaf.utf8 = 0; + dummy.leaf.keysym = XKB_KEY_NoSymbol; + darray_append(table->nodes, dummy); darray_append(table->utf8, '\0'); diff --git a/src/compose/table.h b/src/compose/table.h index 467ccf7..6be4348 100644 --- a/src/compose/table.h +++ b/src/compose/table.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013 Ran Benita <ran234@gmail.com> + * Copyright © 2013,2021 Ran Benita <ran234@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,36 +29,43 @@ #include "context.h" /* - * The compose table data structure is a simple trie. An example will - * help. Given these sequences: + * The compose table data structure is a ternary search tree. * - * <A> <B> : "first" dead_a - * <A> <C> <D> : "second" dead_b - * <E> <F> : "third" dead_c + * Reference: https://www.drdobbs.com/database/ternary-search-trees/184410528 + * Visualization: https://www.cs.usfca.edu/~galles/visualization/TST.html * - * the trie would look like: + * Short example. Given these sequences: + * + * <B> <C> : "first" dead_a + * <B> <D> <E> : "second" dead_b + * <A> <F> : "third" dead_c + * + * the tree would look like: + * + * -------- [<B>]--------- + * | | # + * v V + * -- [<A>] -- [<C>] -------- + * # | # | | + * v # -- [<D>] -- + * -- [<F>] -- # | # + * # | # v + * # -- [<E>] -- + * # | # + * # * - * [root] ---> [<A>] -----------------> [<E>] -# - * | | | - * # v v - * [<B>] ---> [<C>] -# [<F>] -# - * | | - - * # v # - * [<D>] -# - * | - * # * where: - * - [root] is a special empty root node. * - [<X>] is a node for a sequence keysym <X>. - * - right arrows are `next` pointers. - * - down arrows are `successor` pointers. + * - right arrows are `hikid` pointers. + * - left arrows are `lokid` pointers. + * - down arrows are `eqkid` pointers. * - # is a nil pointer. * * The nodes are all kept in a contiguous array. Pointers are represented * as integer offsets into this array. A nil pointer is represented as 0 - * (which, helpfully, is the offset of the empty root node). + * (which, helpfully, is the offset of an empty dummy node). * - * Nodes without a successor are leaf nodes. Since a sequence cannot be a + * Nodes without an eqkid are leaf nodes. Since a sequence cannot be a * prefix of another, these are exactly the nodes which terminate the * sequences (in a bijective manner). * @@ -73,18 +80,27 @@ struct compose_node { xkb_keysym_t keysym; - /* Offset into xkb_compose_table::nodes. */ - uint16_t next; - bool is_leaf; + + /* Offset into xkb_compose_table::nodes or 0. */ + uint16_t lokid; + /* Offset into xkb_compose_table::nodes or 0. */ + uint16_t hikid; union { struct { - /* Offset into xkb_compose_table::nodes. */ - uint16_t successor; + uint32_t _pad:31; + bool is_leaf:1; + }; + struct { + uint32_t _pad:31; + bool is_leaf:1; + /* Offset into xkb_compose_table::nodes or 0. */ + uint16_t eqkid; } internal; struct { /* Offset into xkb_compose_table::utf8. */ - uint32_t utf8; + uint32_t utf8:31; + bool is_leaf:1; xkb_keysym_t keysym; } leaf; }; |