summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2023-03-20 11:19:54 +0100
committerCarlos Garnacho <carlosg@gnome.org>2023-03-20 16:33:43 +0100
commit4e0c7c752728943223671677e2667cdbad0adbde (patch)
treeb75846b627e7819396faf0be0a2bf658ae14c151
parent00e9909ada1a79ac775a99f1c089854f0cf7c074 (diff)
downloadtracker-4e0c7c752728943223671677e2667cdbad0adbde.tar.gz
core: Add helper SQLite function to tokenize FTS search terms
This function splits the search terms, and surrounds each of them with double quotes so they are taken as individual literal strings to be matched, quotes explicitly present in the search string are observed and handled as a single search term, and implicitly closed in unfinished.
-rw-r--r--src/libtracker-sparql/core/tracker-db-interface-sqlite.c62
1 files changed, 62 insertions, 0 deletions
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
index 24c863616..2aa493ca9 100644
--- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
+++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
@@ -1681,6 +1681,66 @@ function_sparql_print_value (sqlite3_context *context,
}
}
+static void
+function_sparql_fts_tokenize (sqlite3_context *context,
+ int argc,
+ sqlite3_value *argv[])
+{
+ const gchar *fn = "SparqlFtsTokenizer helper";
+ gchar *text;
+ const gchar *p;
+ gboolean in_quote = FALSE;
+ gboolean in_space = FALSE;
+ gboolean started = FALSE;
+ int n_output_quotes = 0;
+ gunichar ch;
+ GString *str;
+ int len;
+ gchar *result;
+
+ if (argc > 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
+
+ text = g_strstrip (g_strdup (sqlite3_value_text (argv[0])));
+ str = g_string_new (NULL);
+ p = text;
+
+ while ((ch = g_utf8_get_char (p)) != 0) {
+ if (ch == '\"') {
+ n_output_quotes++;
+ in_quote = !in_quote;
+ } else if ((ch == ' ') != !!in_space) {
+ /* Ensure terms get independently quoted, unless
+ * they are within a explicitly quoted part of the text.
+ */
+ if (!in_quote && started) {
+ g_string_append_c (str, '"');
+ n_output_quotes++;
+ }
+
+ in_space = ch == ' ';
+ } else if (!started) {
+ /* Not a quote, nor a space at the first char. Add the starting quote */
+ g_string_append_c (str, '"');
+ n_output_quotes++;
+ }
+
+ g_string_append_unichar (str, ch);
+ started = TRUE;
+ p = g_utf8_next_char (p);
+ }
+
+ if (n_output_quotes % 2 != 0)
+ g_string_append_c (str, '"');
+
+ len = str->len;
+ result = g_string_free (str, FALSE);
+ sqlite3_result_text (context, result, len, g_free);
+ g_free (text);
+}
+
static int
check_interrupt (void *user_data)
{
@@ -1757,6 +1817,8 @@ initialize_functions (TrackerDBInterface *db_interface)
function_sparql_strlang },
{ "SparqlPrintValue", 2, SQLITE_ANY | SQLITE_DETERMINISTIC,
function_sparql_print_value },
+ { "SparqlFtsTokenize", 1, SQLITE_ANY | SQLITE_DETERMINISTIC,
+ function_sparql_fts_tokenize },
/* Numbers */
{ "SparqlCeil", 1, SQLITE_ANY | SQLITE_DETERMINISTIC,
function_sparql_ceil },