diff options
author | Peng Wu <alexepico@gmail.com> | 2022-09-14 17:45:36 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2022-09-14 17:47:25 +0800 |
commit | baedddb15b41d7d6dbe1030195c0b343159a3b6c (patch) | |
tree | b7c8f13947709194d8fc366b0b1448db57be753f | |
parent | 95d7370ba98e9a669206e1c4709dc62fa9357f72 (diff) | |
download | libpinyin-baedddb15b41d7d6dbe1030195c0b343159a3b6c.tar.gz |
Write search_suggestion_with_matrix function
-rw-r--r-- | src/pinyin.cpp | 5 | ||||
-rw-r--r-- | src/storage/phonetic_key_matrix.cpp | 94 | ||||
-rw-r--r-- | src/storage/phonetic_key_matrix.h | 5 |
3 files changed, 104 insertions, 0 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index 6bef386..59e424b 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -79,6 +79,7 @@ struct _pinyin_instance_t{ /* cached parsed pinyin keys. */ PhoneticKeyMatrix m_matrix; size_t m_parsed_len; + size_t m_parsed_key_len; /* cached pinyin lookup variables. */ ForwardPhoneticConstraints * m_constraints; @@ -1123,6 +1124,7 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){ instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t)); instance->m_parsed_len = 0; + instance->m_parsed_key_len = 0; instance->m_constraints = new ForwardPhoneticConstraints (context->m_phrase_index); @@ -1312,6 +1314,7 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance, key_rests, pinyins, strlen(pinyins)); instance->m_parsed_len = parsed_len; + instance->m_parsed_key_len = keys->len; fill_matrix(&matrix, keys, key_rests, parsed_len); @@ -1354,6 +1357,7 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance, key_rests, pinyins, strlen(pinyins)); instance->m_parsed_len = parsed_len; + instance->m_parsed_key_len = keys->len; fill_matrix(&matrix, keys, key_rests, parsed_len); @@ -1398,6 +1402,7 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance, key_rests, chewings, strlen(chewings)); instance->m_parsed_len = parsed_len; + instance->m_parsed_key_len = keys->len; fill_matrix(&matrix, keys, key_rests, parsed_len); diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp index 058c2e4..ab7e879 100644 --- a/src/storage/phonetic_key_matrix.cpp +++ b/src/storage/phonetic_key_matrix.cpp @@ -437,6 +437,100 @@ int search_matrix(const FacadeChewingTable2 * table, return result; } +int search_suggestion_with_matrix_recur(GArray * cached_keys, + const FacadeChewingTable2 * table, + const PhoneticKeyMatrix * matrix, + size_t prefix_len, + size_t start, size_t end, + PhraseTokens tokens) { + if (start > end) + return SEARCH_NONE; + + /* only do chewing table search with 'start' and 'end'. */ + if (start == end) { + /* exceed the maximum phrase length. */ + if (cached_keys->len > MAX_PHRASE_LENGTH) + return SEARCH_NONE; + + /* skip the phrase longer than prefix_len * 2 + 1, + use the m_parsed_key_len variable for the prefix_len. */ + if (cached_keys->len > prefix_len * 2) + return SEARCH_NONE; + + /* only "'" here. */ + if (0 == cached_keys->len) + return SEARCH_NONE; + +#if 0 + printf("search table for suggestion candidate:%d\n", cached_keys->len); +#endif + return table->search_suggestion + (cached_keys->len, (ChewingKey *)cached_keys->data, tokens); + } + + int result = SEARCH_NONE; + + const size_t size = matrix->get_column_size(start); + /* assume pinyin parsers will filter invalid keys. */ + assert(size > 0); + + for (size_t i = 0; i < size; ++i) { + ChewingKey key; ChewingKeyRest key_rest; + matrix->get_item(start, i, key, key_rest); + + const size_t newstart = key_rest.m_raw_end; + + const ChewingKey zero_key; + if (zero_key == key) { + /* assume only one key here for "'" or the last key. */ + assert(1 == size); + return search_suggestion_with_matrix_recur + (cached_keys, table, matrix, prefix_len, newstart, end, tokens); + } + + /* push value */ + g_array_append_val(cached_keys, key); + + result |= search_suggestion_with_matrix_recur + (cached_keys, table, matrix, prefix_len, newstart, end, tokens); + + /* pop value */ + g_array_set_size(cached_keys, cached_keys->len - 1); + } + + return result; +} + +int search_suggestion_with_matrix(const FacadeChewingTable2 * table, + const PhoneticKeyMatrix * matrix, + size_t prefix_len, + PhraseTokens tokens) { + int result = SEARCH_NONE; + + /* skip the prefix phrase is equal or longer than MAX_PHRASE_LENGTH, + as the prefix phrase candidate will always longer than prefix_len. */ + if (prefix_len >= MAX_PHRASE_LENGTH) + return result; + + size_t start = 0, end = matrix->size() - 1; + + const size_t start_len = matrix->get_column_size(start); + if (0 == start_len) + return result; + + const size_t end_len = matrix->get_column_size(end); + if (0 == end_len) + return result; + + GArray * cached_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey)); + + result = search_suggestion_with_matrix_recur + (cached_keys, table, matrix, prefix_len, start, end, tokens); + + g_array_free(cached_keys, TRUE); + return result; +} + gfloat compute_pronunciation_possibility_recur(const PhoneticKeyMatrix * matrix, size_t start, size_t end, GArray * cached_keys, diff --git a/src/storage/phonetic_key_matrix.h b/src/storage/phonetic_key_matrix.h index ca7aa84..0b80a96 100644 --- a/src/storage/phonetic_key_matrix.h +++ b/src/storage/phonetic_key_matrix.h @@ -212,6 +212,11 @@ int search_matrix(const FacadeChewingTable2 * table, size_t start, size_t end, PhraseIndexRanges ranges); +int search_suggestion_with_matrix(const FacadeChewingTable2 * table, + const PhoneticKeyMatrix * matrix, + size_t prefix_len, + PhraseTokens tokens); + gfloat compute_pronunciation_possibility(const PhoneticKeyMatrix * matrix, size_t start, size_t end, GArray * cached_keys, |