diff options
author | Peng Wu <alexepico@gmail.com> | 2022-07-26 16:18:39 +0800 |
---|---|---|
committer | Peng Wu <alexepico@gmail.com> | 2022-07-26 16:18:39 +0800 |
commit | ef20489350ad3c1ba8f7805f27c19a3917bd6976 (patch) | |
tree | 6fa63e2d48d606af74bf47a135ce180c7d2bc86d | |
parent | bd0414a0eb1775c9e8831191f2474721b0e4d802 (diff) | |
download | libpinyin-ef20489350ad3c1ba8f7805f27c19a3917bd6976.tar.gz |
Write search_suggestion method
-rw-r--r-- | src/storage/chewing_large_table2_kyotodb.cpp | 1 | ||||
-rw-r--r-- | src/storage/phrase_large_table3_bdb.cpp | 61 | ||||
-rw-r--r-- | src/storage/phrase_large_table3_bdb.h | 2 | ||||
-rw-r--r-- | src/storage/phrase_large_table3_kyotodb.cpp | 58 | ||||
-rw-r--r-- | src/storage/phrase_large_table3_kyotodb.h | 2 |
5 files changed, 119 insertions, 5 deletions
diff --git a/src/storage/chewing_large_table2_kyotodb.cpp b/src/storage/chewing_large_table2_kyotodb.cpp index c606982..dbcb934 100644 --- a/src/storage/chewing_large_table2_kyotodb.cpp +++ b/src/storage/chewing_large_table2_kyotodb.cpp @@ -21,6 +21,7 @@ #include "chewing_large_table2.h" #include <kchashdb.h> #include <kccachedb.h> +#include "pinyin_utils.h" #include "kyotodb_utils.h" using namespace kyotocabinet; diff --git a/src/storage/phrase_large_table3_bdb.cpp b/src/storage/phrase_large_table3_bdb.cpp index 1576ca2..da2e6f2 100644 --- a/src/storage/phrase_large_table3_bdb.cpp +++ b/src/storage/phrase_large_table3_bdb.cpp @@ -41,8 +41,7 @@ inline int compare_phrase(ucs4_t * lhs, ucs4_t * rhs, int phrase_length) { /* keep dbm key compare function inside the corresponding dbm file to get more flexibility. */ -static bool bdb_phrase_continue_search(DB *db, - const DBT *dbt1, +static bool bdb_phrase_continue_search(const DBT *dbt1, const DBT *dbt2) { ucs4_t * lhs_phrase = (ucs4_t *) dbt1->data; int lhs_phrase_length = dbt1->size / sizeof(ucs4_t); @@ -208,6 +207,64 @@ int PhraseLargeTable3::search(int phrase_length, return result; } +int PhraseLargeTable3::search_suggestion(int phrase_length, + /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const { + int result = SEARCH_NONE; + + if (NULL == m_db) + return result; + assert(NULL != m_entry); + + DBC * cursorp = NULL; + /* Get a cursor */ + int ret = m_db->cursor(m_db, NULL, &cursorp, 0); + if (ret != 0) + return result; + + DBT db_key1; + memset(&db_key1, 0, sizeof(DBT)); + db_key1.data = (void *) phrase; + db_key1.size = phrase_length * sizeof(ucs4_t); + + DBT db_data; + memset(&db_data, 0, sizeof(DBT)); + /* Get the prefix entry */ + ret = cursorp->c_get(cursorp, &db_key1, &db_data, 0); + if (ret != 0) { + cursorp->c_close(cursorp); + return result; + } + + /* Get the next entry */ + DBT db_key2; + memset(&db_key2, 0, sizeof(DBT)); + memset(&db_data, 0, sizeof(DBT)); + ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT); + if (ret != 0) { + cursorp->c_close(cursorp); + return result; + } + + while(bdb_phrase_continue_search(&db_key1, &db_key2)) { + + m_entry->m_chunk.set_chunk(db_data.data, db_data.size, NULL); + result = m_entry->search(tokens) | result; + m_entry->m_chunk.set_size(0); + + memset(&db_key2, 0, sizeof(DBT)); + memset(&db_data, 0, sizeof(DBT)); + ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT); + if (ret != 0) { + cursorp->c_close(cursorp); + return result; + } + } + + cursorp->c_close(cursorp); + return result; +} + /* add_index/remove_index method */ int PhraseLargeTable3::add_index(int phrase_length, /* in */ const ucs4_t phrase[], diff --git a/src/storage/phrase_large_table3_bdb.h b/src/storage/phrase_large_table3_bdb.h index 73f7625..da8f199 100644 --- a/src/storage/phrase_large_table3_bdb.h +++ b/src/storage/phrase_large_table3_bdb.h @@ -58,6 +58,8 @@ public: /* search method */ int search(int phrase_length, /* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const; + int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const; /* add_index/remove_index method */ int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); diff --git a/src/storage/phrase_large_table3_kyotodb.cpp b/src/storage/phrase_large_table3_kyotodb.cpp index c8ce32c..7b605d5 100644 --- a/src/storage/phrase_large_table3_kyotodb.cpp +++ b/src/storage/phrase_large_table3_kyotodb.cpp @@ -46,10 +46,10 @@ inline int compare_phrase(ucs4_t * lhs, ucs4_t * rhs, int phrase_length) { bool kyotodb_phrase_continue_search(const char* akbuf, size_t aksiz, const char* bkbuf, size_t bksiz) { - ucs4_t * lhs_phrase = akbuf; + ucs4_t * lhs_phrase = (ucs4_t *) akbuf; int lhs_phrase_length = aksiz / sizeof(ucs4_t); - ucs4_t * rhs_phrase = bkbuf; - int rhs_phrase_length = bkbuf / sizeof(ucs4_t); + ucs4_t * rhs_phrase = (ucs4_t *) bkbuf; + int rhs_phrase_length = bksiz / sizeof(ucs4_t); /* The key in dbm is longer than the key in application. */ if (lhs_phrase_length >= rhs_phrase_length) @@ -192,6 +192,58 @@ int PhraseLargeTable3::search(int phrase_length, return result; } +int PhraseLargeTable3::search_suggestion(int phrase_length, + /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const { + int result = SEARCH_NONE; + + if (NULL == m_db) + return result; + assert(NULL != m_entry); + + const char * akbuf = (char *) phrase; + const size_t aksiz = phrase_length * sizeof(ucs4_t); + const int32_t vsiz = m_db->check(akbuf, aksiz); + /* -1 on failure. */ + if (-1 == vsiz) + return result; + + kyotocabinet::BasicDB::Cursor * cursor = m_db->cursor(); + bool retval = cursor->jump(akbuf, aksiz); + if (!retval) { + delete cursor; + return result; + } + + /* Get the next entry */ + retval = cursor->step(); + if (!retval) { + delete cursor; + return result; + } + + size_t bksiz = 0; + const char * bkbuf = cursor->get_key(&bksiz); + while(kyotodb_phrase_continue_search(akbuf, aksiz, bkbuf, bksiz)) { + + size_t bvsiz = 0; + char * bvbuf = cursor->get_value(&bvsiz); + m_entry->m_chunk.set_chunk(bvbuf, bvsiz, NULL); + result = m_entry->search(tokens) | result; + m_entry->m_chunk.set_size(0); + delete [] bvbuf; + + retval = cursor->step(); + if (!retval) { + delete cursor; + return result; + } + } + + delete cursor; + return result; +} + /* add_index/remove_index method */ int PhraseLargeTable3::add_index(int phrase_length, /* in */ const ucs4_t phrase[], diff --git a/src/storage/phrase_large_table3_kyotodb.h b/src/storage/phrase_large_table3_kyotodb.h index d122de0..c7f3b87 100644 --- a/src/storage/phrase_large_table3_kyotodb.h +++ b/src/storage/phrase_large_table3_kyotodb.h @@ -60,6 +60,8 @@ public: /* search method */ int search(int phrase_length, /* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const; + int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[], + /* out */ PhraseTokens tokens) const; /* add_index/remove_index method */ int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); |