diff options
author | Carlos Garnacho <carlosg@gnome.org> | 2023-03-20 11:19:54 +0100 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2023-03-20 16:33:43 +0100 |
commit | 4e0c7c752728943223671677e2667cdbad0adbde (patch) | |
tree | b75846b627e7819396faf0be0a2bf658ae14c151 | |
parent | 00e9909ada1a79ac775a99f1c089854f0cf7c074 (diff) | |
download | tracker-4e0c7c752728943223671677e2667cdbad0adbde.tar.gz |
core: Add helper SQLite function to tokenize FTS search terms
This function splits the search terms, and surrounds each of them with
double quotes so they are taken as individual literal strings to be
matched, quotes explicitly present in the search string are observed
and handled as a single search term, and implicitly closed in unfinished.
-rw-r--r-- | src/libtracker-sparql/core/tracker-db-interface-sqlite.c | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c index 24c863616..2aa493ca9 100644 --- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c +++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c @@ -1681,6 +1681,66 @@ function_sparql_print_value (sqlite3_context *context, } } +static void +function_sparql_fts_tokenize (sqlite3_context *context, + int argc, + sqlite3_value *argv[]) +{ + const gchar *fn = "SparqlFtsTokenizer helper"; + gchar *text; + const gchar *p; + gboolean in_quote = FALSE; + gboolean in_space = FALSE; + gboolean started = FALSE; + int n_output_quotes = 0; + gunichar ch; + GString *str; + int len; + gchar *result; + + if (argc > 1) { + result_context_function_error (context, fn, "Invalid argument count"); + return; + } + + text = g_strstrip (g_strdup (sqlite3_value_text (argv[0]))); + str = g_string_new (NULL); + p = text; + + while ((ch = g_utf8_get_char (p)) != 0) { + if (ch == '\"') { + n_output_quotes++; + in_quote = !in_quote; + } else if ((ch == ' ') != !!in_space) { + /* Ensure terms get independently quoted, unless + * they are within a explicitly quoted part of the text. + */ + if (!in_quote && started) { + g_string_append_c (str, '"'); + n_output_quotes++; + } + + in_space = ch == ' '; + } else if (!started) { + /* Not a quote, nor a space at the first char. Add the starting quote */ + g_string_append_c (str, '"'); + n_output_quotes++; + } + + g_string_append_unichar (str, ch); + started = TRUE; + p = g_utf8_next_char (p); + } + + if (n_output_quotes % 2 != 0) + g_string_append_c (str, '"'); + + len = str->len; + result = g_string_free (str, FALSE); + sqlite3_result_text (context, result, len, g_free); + g_free (text); +} + static int check_interrupt (void *user_data) { @@ -1757,6 +1817,8 @@ initialize_functions (TrackerDBInterface *db_interface) function_sparql_strlang }, { "SparqlPrintValue", 2, SQLITE_ANY | SQLITE_DETERMINISTIC, function_sparql_print_value }, + { "SparqlFtsTokenize", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, + function_sparql_fts_tokenize }, /* Numbers */ { "SparqlCeil", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, function_sparql_ceil }, |