summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2022-09-22 11:33:16 +0800
committerPeng Wu <alexepico@gmail.com>2022-09-22 11:48:23 +0800
commitc1302b9a5bfc111508ae0c373af78017a3723101 (patch)
treed30c372bf11e13b25092c6cc3fdc73d04588bcc7
parent698c01539cc8636dd79bd12a49e2d9a960209f98 (diff)
downloadlibpinyin-c1302b9a5bfc111508ae0c373af78017a3723101.tar.gz
Refactor sort_option_t enum
-rw-r--r--src/pinyin.cpp65
-rw-r--r--src/pinyin.h10
-rw-r--r--tests/test_pinyin.cpp4
3 files changed, 46 insertions, 33 deletions
diff --git a/src/pinyin.cpp b/src/pinyin.cpp
index 68e237c..e3304db 100644
--- a/src/pinyin.cpp
+++ b/src/pinyin.cpp
@@ -1461,7 +1461,6 @@ static gint compare_item_with_token(gconstpointer lhs,
return (token_lhs - token_rhs);
}
-#endif
static gint compare_item_with_phrase_length_and_frequency(gconstpointer lhs,
gconstpointer rhs) {
@@ -1479,28 +1478,39 @@ static gint compare_item_with_phrase_length_and_frequency(gconstpointer lhs,
return -(freq_lhs - freq_rhs); /* in descendant order */
}
+#endif
-static gint compare_item_with_phrase_length_and_pinyin_length_and_frequency
-(gconstpointer lhs, gconstpointer rhs) {
+static gint compare_item_with_sort_option
+(gconstpointer lhs, gconstpointer rhs, gpointer user_data) {
lookup_candidate_t * item_lhs = (lookup_candidate_t *)lhs;
lookup_candidate_t * item_rhs = (lookup_candidate_t *)rhs;
+ guint sort_option = GPOINTER_TO_UINT(user_data);
- guint8 len_lhs = item_lhs->m_phrase_length;
- guint8 len_rhs = item_rhs->m_phrase_length;
+ if (sort_option & SORT_BY_PHRASE_LENGTH) {
+ guint8 len_lhs = item_lhs->m_phrase_length;
+ guint8 len_rhs = item_rhs->m_phrase_length;
- if (len_lhs != len_rhs)
- return -(len_lhs - len_rhs); /* in descendant order */
+ if (len_lhs != len_rhs)
+ return -(len_lhs - len_rhs); /* in descendant order */
+ }
- len_lhs = item_lhs->m_end - item_lhs->m_begin;
- len_rhs = item_rhs->m_end - item_rhs->m_begin;
+ if (sort_option & SORT_BY_PINYIN_LENGTH) {
+ guint8 len_lhs = item_lhs->m_end - item_lhs->m_begin;
+ guint8 len_rhs = item_rhs->m_end - item_rhs->m_begin;
- if (len_lhs != len_rhs)
- return -(len_lhs - len_rhs); /* in descendant order */
+ if (len_lhs != len_rhs)
+ return -(len_lhs - len_rhs); /* in descendant order */
+ }
- guint32 freq_lhs = item_lhs->m_freq;
- guint32 freq_rhs = item_rhs->m_freq;
+ if (sort_option & SORT_BY_FREQUENCY) {
+ guint32 freq_lhs = item_lhs->m_freq;
+ guint32 freq_rhs = item_rhs->m_freq;
- return -(freq_lhs - freq_rhs); /* in descendant order */
+ if (freq_lhs != freq_rhs)
+ return -(freq_lhs - freq_rhs); /* in descendant order */
+ }
+
+ return 0;
}
static phrase_token_t _get_previous_token(pinyin_instance_t * instance,
@@ -1971,7 +1981,7 @@ static bool _check_offset(PhoneticKeyMatrix & matrix, size_t offset) {
bool pinyin_guess_candidates(pinyin_instance_t * instance,
size_t offset,
- sort_option_t sort_option) {
+ guint sort_option) {
pinyin_context_t * & context = instance->m_context;
pinyin_option_t & options = context->m_options;
@@ -2069,22 +2079,17 @@ bool pinyin_guess_candidates(pinyin_instance_t * instance,
_compute_frequency_of_items(context, prev_token, &merged_gram, candidates);
/* sort the candidates. */
- switch (sort_option) {
- case SORT_BY_PHRASE_LENGTH_AND_FREQUENCY:
- g_array_sort(candidates,
- compare_item_with_phrase_length_and_frequency);
- break;
- case SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY:
- g_array_sort(candidates,
- compare_item_with_phrase_length_and_pinyin_length_and_frequency);
- break;
- }
+ g_array_sort_with_data
+ (candidates, compare_item_with_sort_option,
+ GUINT_TO_POINTER(sort_option));
/* post process to remove duplicated candidates */
- _prepend_longer_candidates(instance, instance->m_candidates);
+ if (!(sort_option & SORT_WITHOUT_LONGER_CANDIDATE))
+ _prepend_longer_candidates(instance, instance->m_candidates);
- _prepend_sentence_candidates(instance, instance->m_candidates);
+ if (!(sort_option & SORT_WITHOUT_SENTENCE_CANDIDATE))
+ _prepend_sentence_candidates(instance, instance->m_candidates);
_compute_phrase_strings_of_items(instance, instance->m_candidates);
@@ -2201,7 +2206,11 @@ bool pinyin_guess_predicted_candidates(pinyin_instance_t * instance,
_compute_frequency_of_items(context, prev_token, &merged_gram, candidates);
/* sort the candidates by phrase length and frequency. */
- g_array_sort(candidates, compare_item_with_phrase_length_and_frequency);
+ guint sort_option = SORT_BY_PHRASE_LENGTH | SORT_BY_FREQUENCY;
+
+ g_array_sort_with_data
+ (candidates, compare_item_with_sort_option,
+ GUINT_TO_POINTER(sort_option));
/* post process to remove duplicated candidates */
diff --git a/src/pinyin.h b/src/pinyin.h
index cc1a5cb..480103a 100644
--- a/src/pinyin.h
+++ b/src/pinyin.h
@@ -50,8 +50,12 @@ typedef enum _lookup_candidate_type_t{
} lookup_candidate_type_t;
typedef enum _sort_option_t{
- SORT_BY_PHRASE_LENGTH_AND_FREQUENCY = 1,
- SORT_BY_PHRASE_LENGTH_AND_PINYIN_LENGTH_AND_FREQUENCY,
+ /* The sort order is phrase length, pinyin length, frequency. */
+ SORT_WITHOUT_SENTENCE_CANDIDATE = 0x1,
+ SORT_WITHOUT_LONGER_CANDIDATE = 0x2,
+ SORT_BY_PHRASE_LENGTH = 0x4,
+ SORT_BY_PINYIN_LENGTH = 0x8,
+ SORT_BY_FREQUENCY = 0x10,
} sort_option_t;
/**
@@ -485,7 +489,7 @@ bool pinyin_in_chewing_keyboard(pinyin_instance_t * instance,
*/
bool pinyin_guess_candidates(pinyin_instance_t * instance,
size_t offset,
- sort_option_t sort_option);
+ guint sort_option);
/**
* pinyin_choose_candidate:
diff --git a/tests/test_pinyin.cpp b/tests/test_pinyin.cpp
index 8eadf89..316cf1e 100644
--- a/tests/test_pinyin.cpp
+++ b/tests/test_pinyin.cpp
@@ -69,8 +69,8 @@ int main(int argc, char * argv[]){
size_t len = pinyin_parse_more_full_pinyins(instance, linebuf);
pinyin_guess_sentence_with_prefix(instance, prefixbuf);
- pinyin_guess_candidates(instance, 0,
- SORT_BY_PHRASE_LENGTH_AND_FREQUENCY);
+ guint sort_option = SORT_BY_PHRASE_LENGTH | SORT_BY_FREQUENCY;
+ pinyin_guess_candidates(instance, 0, sort_option);
size_t i = 0;
for (i = 0; i <= len; ++i) {