summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2022-07-26 16:18:39 +0800
committerPeng Wu <alexepico@gmail.com>2022-07-26 16:18:39 +0800
commitef20489350ad3c1ba8f7805f27c19a3917bd6976 (patch)
tree6fa63e2d48d606af74bf47a135ce180c7d2bc86d
parentbd0414a0eb1775c9e8831191f2474721b0e4d802 (diff)
downloadlibpinyin-ef20489350ad3c1ba8f7805f27c19a3917bd6976.tar.gz
Write search_suggestion method
-rw-r--r--src/storage/chewing_large_table2_kyotodb.cpp1
-rw-r--r--src/storage/phrase_large_table3_bdb.cpp61
-rw-r--r--src/storage/phrase_large_table3_bdb.h2
-rw-r--r--src/storage/phrase_large_table3_kyotodb.cpp58
-rw-r--r--src/storage/phrase_large_table3_kyotodb.h2
5 files changed, 119 insertions, 5 deletions
diff --git a/src/storage/chewing_large_table2_kyotodb.cpp b/src/storage/chewing_large_table2_kyotodb.cpp
index c606982..dbcb934 100644
--- a/src/storage/chewing_large_table2_kyotodb.cpp
+++ b/src/storage/chewing_large_table2_kyotodb.cpp
@@ -21,6 +21,7 @@
#include "chewing_large_table2.h"
#include <kchashdb.h>
#include <kccachedb.h>
+#include "pinyin_utils.h"
#include "kyotodb_utils.h"
using namespace kyotocabinet;
diff --git a/src/storage/phrase_large_table3_bdb.cpp b/src/storage/phrase_large_table3_bdb.cpp
index 1576ca2..da2e6f2 100644
--- a/src/storage/phrase_large_table3_bdb.cpp
+++ b/src/storage/phrase_large_table3_bdb.cpp
@@ -41,8 +41,7 @@ inline int compare_phrase(ucs4_t * lhs, ucs4_t * rhs, int phrase_length) {
/* keep dbm key compare function inside the corresponding dbm file
to get more flexibility. */
-static bool bdb_phrase_continue_search(DB *db,
- const DBT *dbt1,
+static bool bdb_phrase_continue_search(const DBT *dbt1,
const DBT *dbt2) {
ucs4_t * lhs_phrase = (ucs4_t *) dbt1->data;
int lhs_phrase_length = dbt1->size / sizeof(ucs4_t);
@@ -208,6 +207,64 @@ int PhraseLargeTable3::search(int phrase_length,
return result;
}
+int PhraseLargeTable3::search_suggestion(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const {
+ int result = SEARCH_NONE;
+
+ if (NULL == m_db)
+ return result;
+ assert(NULL != m_entry);
+
+ DBC * cursorp = NULL;
+ /* Get a cursor */
+ int ret = m_db->cursor(m_db, NULL, &cursorp, 0);
+ if (ret != 0)
+ return result;
+
+ DBT db_key1;
+ memset(&db_key1, 0, sizeof(DBT));
+ db_key1.data = (void *) phrase;
+ db_key1.size = phrase_length * sizeof(ucs4_t);
+
+ DBT db_data;
+ memset(&db_data, 0, sizeof(DBT));
+ /* Get the prefix entry */
+ ret = cursorp->c_get(cursorp, &db_key1, &db_data, 0);
+ if (ret != 0) {
+ cursorp->c_close(cursorp);
+ return result;
+ }
+
+ /* Get the next entry */
+ DBT db_key2;
+ memset(&db_key2, 0, sizeof(DBT));
+ memset(&db_data, 0, sizeof(DBT));
+ ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT);
+ if (ret != 0) {
+ cursorp->c_close(cursorp);
+ return result;
+ }
+
+ while(bdb_phrase_continue_search(&db_key1, &db_key2)) {
+
+ m_entry->m_chunk.set_chunk(db_data.data, db_data.size, NULL);
+ result = m_entry->search(tokens) | result;
+ m_entry->m_chunk.set_size(0);
+
+ memset(&db_key2, 0, sizeof(DBT));
+ memset(&db_data, 0, sizeof(DBT));
+ ret = cursorp->c_get(cursorp, &db_key2, &db_data, DB_NEXT);
+ if (ret != 0) {
+ cursorp->c_close(cursorp);
+ return result;
+ }
+ }
+
+ cursorp->c_close(cursorp);
+ return result;
+}
+
/* add_index/remove_index method */
int PhraseLargeTable3::add_index(int phrase_length,
/* in */ const ucs4_t phrase[],
diff --git a/src/storage/phrase_large_table3_bdb.h b/src/storage/phrase_large_table3_bdb.h
index 73f7625..da8f199 100644
--- a/src/storage/phrase_large_table3_bdb.h
+++ b/src/storage/phrase_large_table3_bdb.h
@@ -58,6 +58,8 @@ public:
/* search method */
int search(int phrase_length, /* in */ const ucs4_t phrase[],
/* out */ PhraseTokens tokens) const;
+ int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const;
/* add_index/remove_index method */
int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
diff --git a/src/storage/phrase_large_table3_kyotodb.cpp b/src/storage/phrase_large_table3_kyotodb.cpp
index c8ce32c..7b605d5 100644
--- a/src/storage/phrase_large_table3_kyotodb.cpp
+++ b/src/storage/phrase_large_table3_kyotodb.cpp
@@ -46,10 +46,10 @@ inline int compare_phrase(ucs4_t * lhs, ucs4_t * rhs, int phrase_length) {
bool kyotodb_phrase_continue_search(const char* akbuf, size_t aksiz,
const char* bkbuf, size_t bksiz) {
- ucs4_t * lhs_phrase = akbuf;
+ ucs4_t * lhs_phrase = (ucs4_t *) akbuf;
int lhs_phrase_length = aksiz / sizeof(ucs4_t);
- ucs4_t * rhs_phrase = bkbuf;
- int rhs_phrase_length = bkbuf / sizeof(ucs4_t);
+ ucs4_t * rhs_phrase = (ucs4_t *) bkbuf;
+ int rhs_phrase_length = bksiz / sizeof(ucs4_t);
/* The key in dbm is longer than the key in application. */
if (lhs_phrase_length >= rhs_phrase_length)
@@ -192,6 +192,58 @@ int PhraseLargeTable3::search(int phrase_length,
return result;
}
+int PhraseLargeTable3::search_suggestion(int phrase_length,
+ /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const {
+ int result = SEARCH_NONE;
+
+ if (NULL == m_db)
+ return result;
+ assert(NULL != m_entry);
+
+ const char * akbuf = (char *) phrase;
+ const size_t aksiz = phrase_length * sizeof(ucs4_t);
+ const int32_t vsiz = m_db->check(akbuf, aksiz);
+ /* -1 on failure. */
+ if (-1 == vsiz)
+ return result;
+
+ kyotocabinet::BasicDB::Cursor * cursor = m_db->cursor();
+ bool retval = cursor->jump(akbuf, aksiz);
+ if (!retval) {
+ delete cursor;
+ return result;
+ }
+
+ /* Get the next entry */
+ retval = cursor->step();
+ if (!retval) {
+ delete cursor;
+ return result;
+ }
+
+ size_t bksiz = 0;
+ const char * bkbuf = cursor->get_key(&bksiz);
+ while(kyotodb_phrase_continue_search(akbuf, aksiz, bkbuf, bksiz)) {
+
+ size_t bvsiz = 0;
+ char * bvbuf = cursor->get_value(&bvsiz);
+ m_entry->m_chunk.set_chunk(bvbuf, bvsiz, NULL);
+ result = m_entry->search(tokens) | result;
+ m_entry->m_chunk.set_size(0);
+ delete [] bvbuf;
+
+ retval = cursor->step();
+ if (!retval) {
+ delete cursor;
+ return result;
+ }
+ }
+
+ delete cursor;
+ return result;
+}
+
/* add_index/remove_index method */
int PhraseLargeTable3::add_index(int phrase_length,
/* in */ const ucs4_t phrase[],
diff --git a/src/storage/phrase_large_table3_kyotodb.h b/src/storage/phrase_large_table3_kyotodb.h
index d122de0..c7f3b87 100644
--- a/src/storage/phrase_large_table3_kyotodb.h
+++ b/src/storage/phrase_large_table3_kyotodb.h
@@ -60,6 +60,8 @@ public:
/* search method */
int search(int phrase_length, /* in */ const ucs4_t phrase[],
/* out */ PhraseTokens tokens) const;
+ int search_suggestion(int phrase_length, /* in */ const ucs4_t phrase[],
+ /* out */ PhraseTokens tokens) const;
/* add_index/remove_index method */
int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);