summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2022-09-23 16:18:27 +0800
committerPeng Wu <alexepico@gmail.com>2022-09-23 16:18:27 +0800
commitc694fbb4ebbaf923658a2e2c0336c711ca70f34f (patch)
treed58e871c4e956bad8e1efe9d31a3b59b822be4a3
parent0d94acd57cefd09991d6df7887a2d68d1366a6c6 (diff)
downloadlibpinyin-c694fbb4ebbaf923658a2e2c0336c711ca70f34f.tar.gz
Update pinyin_choose_candidate function
-rw-r--r--src/pinyin.cpp17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index e3304db..4cc357c 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -2240,6 +2240,20 @@ int pinyin_choose_candidate(pinyin_instance_t * instance,
return matrix.size() - 1;
}
+ if (LONGER_CANDIDATE == candidate->m_candidate_type) {
+ /* only train uni-gram for longer candidate. */
+ const guint32 initial_seed = 23 * 3;
+ const guint32 unigram_factor = 7;
+
+ phrase_token_t token = candidate->m_token;
+ int error = context->m_phrase_index->add_unigram_frequency
+ (token, initial_seed * unigram_factor);
+ if (ERROR_INTEGER_OVERFLOW == error)
+ return false;
+
+ return true;
+ }
+
if (ADDON_CANDIDATE == candidate->m_candidate_type) {
PhraseItem item;
context->m_addon_phrase_index->get_phrase_item
@@ -3402,7 +3416,8 @@ bool pinyin_remember_user_input(pinyin_instance_t * instance,
bool pinyin_is_user_candidate(pinyin_instance_t * instance,
lookup_candidate_t * candidate) {
- if (NORMAL_CANDIDATE != candidate->m_candidate_type)
+ if (NORMAL_CANDIDATE != candidate->m_candidate_type &&
+ LONGER_CANDIDATE != candidate->m_candidate_type)
return false;
phrase_token_t token = candidate->m_token;