summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2022-09-14 17:45:36 +0800
committerPeng Wu <alexepico@gmail.com>2022-09-14 17:47:25 +0800
commitbaedddb15b41d7d6dbe1030195c0b343159a3b6c (patch)
treeb7c8f13947709194d8fc366b0b1448db57be753f
parent95d7370ba98e9a669206e1c4709dc62fa9357f72 (diff)
downloadlibpinyin-baedddb15b41d7d6dbe1030195c0b343159a3b6c.tar.gz
Write search_suggestion_with_matrix function
-rw-r--r--src/pinyin.cpp5
-rw-r--r--src/storage/phonetic_key_matrix.cpp94
-rw-r--r--src/storage/phonetic_key_matrix.h5
3 files changed, 104 insertions, 0 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 6bef386..59e424b 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -79,6 +79,7 @@ struct _pinyin_instance_t{
/* cached parsed pinyin keys. */
PhoneticKeyMatrix m_matrix;
size_t m_parsed_len;
+ size_t m_parsed_key_len;
/* cached pinyin lookup variables. */
ForwardPhoneticConstraints * m_constraints;
@@ -1123,6 +1124,7 @@ pinyin_instance_t * pinyin_alloc_instance(pinyin_context_t * context){
instance->m_prefixes = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
instance->m_parsed_len = 0;
+ instance->m_parsed_key_len = 0;
instance->m_constraints = new ForwardPhoneticConstraints
(context->m_phrase_index);
@@ -1312,6 +1314,7 @@ size_t pinyin_parse_more_full_pinyins(pinyin_instance_t * instance,
key_rests, pinyins, strlen(pinyins));
instance->m_parsed_len = parsed_len;
+ instance->m_parsed_key_len = keys->len;
fill_matrix(&matrix, keys, key_rests, parsed_len);
@@ -1354,6 +1357,7 @@ size_t pinyin_parse_more_double_pinyins(pinyin_instance_t * instance,
key_rests, pinyins, strlen(pinyins));
instance->m_parsed_len = parsed_len;
+ instance->m_parsed_key_len = keys->len;
fill_matrix(&matrix, keys, key_rests, parsed_len);
@@ -1398,6 +1402,7 @@ size_t pinyin_parse_more_chewings(pinyin_instance_t * instance,
key_rests, chewings, strlen(chewings));
instance->m_parsed_len = parsed_len;
+ instance->m_parsed_key_len = keys->len;
fill_matrix(&matrix, keys, key_rests, parsed_len);
diff --git a/src/storage/phonetic_key_matrix.cpp b/src/storage/phonetic_key_matrix.cpp
index 058c2e4..ab7e879 100644
--- a/src/storage/phonetic_key_matrix.cpp
+++ b/src/storage/phonetic_key_matrix.cpp
@@ -437,6 +437,100 @@ int search_matrix(const FacadeChewingTable2 * table,
return result;
}
+int search_suggestion_with_matrix_recur(GArray * cached_keys,
+ const FacadeChewingTable2 * table,
+ const PhoneticKeyMatrix * matrix,
+ size_t prefix_len,
+ size_t start, size_t end,
+ PhraseTokens tokens) {
+ if (start > end)
+ return SEARCH_NONE;
+
+ /* only do chewing table search with 'start' and 'end'. */
+ if (start == end) {
+ /* exceed the maximum phrase length. */
+ if (cached_keys->len > MAX_PHRASE_LENGTH)
+ return SEARCH_NONE;
+
+ /* skip the phrase longer than prefix_len * 2 + 1,
+ use the m_parsed_key_len variable for the prefix_len. */
+ if (cached_keys->len > prefix_len * 2)
+ return SEARCH_NONE;
+
+ /* only "'" here. */
+ if (0 == cached_keys->len)
+ return SEARCH_NONE;
+
+#if 0
+ printf("search table for suggestion candidate:%d\n", cached_keys->len);
+#endif
+ return table->search_suggestion
+ (cached_keys->len, (ChewingKey *)cached_keys->data, tokens);
+ }
+
+ int result = SEARCH_NONE;
+
+ const size_t size = matrix->get_column_size(start);
+ /* assume pinyin parsers will filter invalid keys. */
+ assert(size > 0);
+
+ for (size_t i = 0; i < size; ++i) {
+ ChewingKey key; ChewingKeyRest key_rest;
+ matrix->get_item(start, i, key, key_rest);
+
+ const size_t newstart = key_rest.m_raw_end;
+
+ const ChewingKey zero_key;
+ if (zero_key == key) {
+ /* assume only one key here for "'" or the last key. */
+ assert(1 == size);
+ return search_suggestion_with_matrix_recur
+ (cached_keys, table, matrix, prefix_len, newstart, end, tokens);
+ }
+
+ /* push value */
+ g_array_append_val(cached_keys, key);
+
+ result |= search_suggestion_with_matrix_recur
+ (cached_keys, table, matrix, prefix_len, newstart, end, tokens);
+
+ /* pop value */
+ g_array_set_size(cached_keys, cached_keys->len - 1);
+ }
+
+ return result;
+}
+
+int search_suggestion_with_matrix(const FacadeChewingTable2 * table,
+ const PhoneticKeyMatrix * matrix,
+ size_t prefix_len,
+ PhraseTokens tokens) {
+ int result = SEARCH_NONE;
+
+ /* skip the prefix phrase is equal or longer than MAX_PHRASE_LENGTH,
+ as the prefix phrase candidate will always longer than prefix_len. */
+ if (prefix_len >= MAX_PHRASE_LENGTH)
+ return result;
+
+ size_t start = 0, end = matrix->size() - 1;
+
+ const size_t start_len = matrix->get_column_size(start);
+ if (0 == start_len)
+ return result;
+
+ const size_t end_len = matrix->get_column_size(end);
+ if (0 == end_len)
+ return result;
+
+ GArray * cached_keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
+
+ result = search_suggestion_with_matrix_recur
+ (cached_keys, table, matrix, prefix_len, start, end, tokens);
+
+ g_array_free(cached_keys, TRUE);
+ return result;
+}
+
gfloat compute_pronunciation_possibility_recur(const PhoneticKeyMatrix * matrix,
size_t start, size_t end,
GArray * cached_keys,
diff --git a/src/storage/phonetic_key_matrix.h b/src/storage/phonetic_key_matrix.h
index ca7aa84..0b80a96 100644
--- a/src/storage/phonetic_key_matrix.h
+++ b/src/storage/phonetic_key_matrix.h
@@ -212,6 +212,11 @@ int search_matrix(const FacadeChewingTable2 * table,
size_t start, size_t end,
PhraseIndexRanges ranges);
+int search_suggestion_with_matrix(const FacadeChewingTable2 * table,
+ const PhoneticKeyMatrix * matrix,
+ size_t prefix_len,
+ PhraseTokens tokens);
+
gfloat compute_pronunciation_possibility(const PhoneticKeyMatrix * matrix,
size_t start, size_t end,
GArray * cached_keys,