diff options
author | Peng Wu <alexepico@gmail.com> | 2022-09-23 16:18:27 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2022-09-23 16:18:27 +0800 |
commit | c694fbb4ebbaf923658a2e2c0336c711ca70f34f (patch) | |
tree | d58e871c4e956bad8e1efe9d31a3b59b822be4a3 | |
parent | 0d94acd57cefd09991d6df7887a2d68d1366a6c6 (diff) | |
download | libpinyin-c694fbb4ebbaf923658a2e2c0336c711ca70f34f.tar.gz |
Update pinyin_choose_candidate function
-rw-r--r-- | src/pinyin.cpp | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp index e3304db..4cc357c 100644 --- a/src/pinyin.cpp +++ b/src/pinyin.cpp @@ -2240,6 +2240,20 @@ int pinyin_choose_candidate(pinyin_instance_t * instance, return matrix.size() - 1; } + if (LONGER_CANDIDATE == candidate->m_candidate_type) { + /* only train uni-gram for longer candidate. */ + const guint32 initial_seed = 23 * 3; + const guint32 unigram_factor = 7; + + phrase_token_t token = candidate->m_token; + int error = context->m_phrase_index->add_unigram_frequency + (token, initial_seed * unigram_factor); + if (ERROR_INTEGER_OVERFLOW == error) + return false; + + return true; + } + if (ADDON_CANDIDATE == candidate->m_candidate_type) { PhraseItem item; context->m_addon_phrase_index->get_phrase_item @@ -3402,7 +3416,8 @@ bool pinyin_remember_user_input(pinyin_instance_t * instance, bool pinyin_is_user_candidate(pinyin_instance_t * instance, lookup_candidate_t * candidate) { - if (NORMAL_CANDIDATE != candidate->m_candidate_type) + if (NORMAL_CANDIDATE != candidate->m_candidate_type && + LONGER_CANDIDATE != candidate->m_candidate_type) return false; phrase_token_t token = candidate->m_token; |