summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam@afuera.me.uk>2023-03-23 18:14:24 +0000
committerSam Thursfield <sam@afuera.me.uk>2023-03-23 18:14:24 +0000
commitba6a9608f3b82d7708eae5b6477719668830e7e6 (patch)
treee90447c58310bfe6f6e135c4026e7f5c56503a7f
parent6409812d31697ba430ebb843133304b46991d637 (diff)
parent827ba585eec44cf3710a82406783f47bcb64f9c3 (diff)
downloadtracker-ba6a9608f3b82d7708eae5b6477719668830e7e6.tar.gz
Merge branch 'wip/carlosg/orderless-fts-terms' into 'master'
core: handle FTS search terms individually See merge request https://gitlab.gnome.org/GNOME/tracker/-/merge_requests/585
-rw-r--r--src/libtracker-sparql/core/tracker-db-interface-sqlite.c62
-rw-r--r--src/libtracker-sparql/core/tracker-sparql.c12
-rw-r--r--tests/fts/fts3aa-4.out8
-rw-r--r--tests/fts/fts3aa-4.rq1
-rw-r--r--tests/fts/fts3aa-5.out4
-rw-r--r--tests/fts/fts3aa-5.rq1
-rw-r--r--tests/fts/fts3aa-6.out2
-rw-r--r--tests/fts/fts3aa-6.rq1
-rw-r--r--tests/fts/fts3aa-7.out8
-rw-r--r--tests/fts/fts3aa-7.rq2
-rw-r--r--tests/fts/fts3aa-8.out4
-rw-r--r--tests/fts/fts3aa-8.rq2
-rw-r--r--tests/fts/fts3aa-9.out1
-rw-r--r--tests/fts/fts3aa-9.rq1
-rw-r--r--tests/fts/tracker-fts-test.c2
15 files changed, 104 insertions, 7 deletions
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
index 24c863616..2aa493ca9 100644
--- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
+++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
@@ -1681,6 +1681,66 @@ function_sparql_print_value (sqlite3_context *context,
}
}
+static void
+function_sparql_fts_tokenize (sqlite3_context *context,
+ int argc,
+ sqlite3_value *argv[])
+{
+ const gchar *fn = "SparqlFtsTokenizer helper";
+ gchar *text;
+ const gchar *p;
+ gboolean in_quote = FALSE;
+ gboolean in_space = FALSE;
+ gboolean started = FALSE;
+ int n_output_quotes = 0;
+ gunichar ch;
+ GString *str;
+ int len;
+ gchar *result;
+
+ if (argc > 1) {
+ result_context_function_error (context, fn, "Invalid argument count");
+ return;
+ }
+
+ text = g_strstrip (g_strdup (sqlite3_value_text (argv[0])));
+ str = g_string_new (NULL);
+ p = text;
+
+ while ((ch = g_utf8_get_char (p)) != 0) {
+ if (ch == '\"') {
+ n_output_quotes++;
+ in_quote = !in_quote;
+ } else if ((ch == ' ') != !!in_space) {
+ /* Ensure terms get independently quoted, unless
+ * they are within a explicitly quoted part of the text.
+ */
+ if (!in_quote && started) {
+ g_string_append_c (str, '"');
+ n_output_quotes++;
+ }
+
+ in_space = ch == ' ';
+ } else if (!started) {
+ /* Not a quote, nor a space at the first char. Add the starting quote */
+ g_string_append_c (str, '"');
+ n_output_quotes++;
+ }
+
+ g_string_append_unichar (str, ch);
+ started = TRUE;
+ p = g_utf8_next_char (p);
+ }
+
+ if (n_output_quotes % 2 != 0)
+ g_string_append_c (str, '"');
+
+ len = str->len;
+ result = g_string_free (str, FALSE);
+ sqlite3_result_text (context, result, len, g_free);
+ g_free (text);
+}
+
static int
check_interrupt (void *user_data)
{
@@ -1757,6 +1817,8 @@ initialize_functions (TrackerDBInterface *db_interface)
function_sparql_strlang },
{ "SparqlPrintValue", 2, SQLITE_ANY | SQLITE_DETERMINISTIC,
function_sparql_print_value },
+ { "SparqlFtsTokenize", 1, SQLITE_ANY | SQLITE_DETERMINISTIC,
+ function_sparql_fts_tokenize },
/* Numbers */
{ "SparqlCeil", 1, SQLITE_ANY | SQLITE_DETERMINISTIC,
function_sparql_ceil },
diff --git a/src/libtracker-sparql/core/tracker-sparql.c b/src/libtracker-sparql/core/tracker-sparql.c
index 500a9af44..745ed46b3 100644
--- a/src/libtracker-sparql/core/tracker-sparql.c
+++ b/src/libtracker-sparql/core/tracker-sparql.c
@@ -1711,11 +1711,11 @@ tracker_sparql_add_fts_subquery (TrackerSparql *sparql,
if (tracker_sparql_find_graph (sparql, tracker_token_get_idstring (graph))) {
_append_string_printf (sparql,
"%s FROM \"%s\".\"fts5\" "
- "WHERE fts5 = '\"' || REPLACE (",
+ "WHERE fts5 = SparqlFtsTokenize(",
select_items->str,
tracker_token_get_idstring (graph));
_append_literal_sql (sparql, binding);
- _append_string (sparql, ", '\"', ' ') || '\"*'");
+ _append_string (sparql, ") || '*' ");
} else {
_append_empty_select (sparql, n_properties);
}
@@ -1727,10 +1727,10 @@ tracker_sparql_add_fts_subquery (TrackerSparql *sparql,
if (!sparql->policy.filter_unnamed_graph) {
_append_string_printf (sparql,
"%s, 0 FROM \"main\".\"fts5\" "
- "WHERE fts5 = '\"' || REPLACE (",
+ "WHERE fts5 = SparqlFtsTokenize(",
select_items->str);
_append_literal_sql (sparql, binding);
- _append_string (sparql, ", '\"', ' ') || '\"*'");
+ _append_string (sparql, ") || '*' ");
} else {
_append_empty_select (sparql, n_properties);
}
@@ -1744,12 +1744,12 @@ tracker_sparql_add_fts_subquery (TrackerSparql *sparql,
_append_string_printf (sparql,
"UNION ALL %s, %" G_GINT64_FORMAT " AS graph "
"FROM \"%s\".\"fts5\" "
- "WHERE fts5 = '\"' || REPLACE (",
+ "WHERE fts5 = SparqlFtsTokenize(",
select_items->str,
*graph_id,
(gchar *) graph_name);
_append_literal_sql (sparql, binding);
- _append_string (sparql, ", '\"', ' ') || '\"*'");
+ _append_string (sparql, ") || '*' ");
}
g_hash_table_unref (graphs);
diff --git a/tests/fts/fts3aa-4.out b/tests/fts/fts3aa-4.out
new file mode 100644
index 000000000..4bdf28b05
--- /dev/null
+++ b/tests/fts/fts3aa-4.out
@@ -0,0 +1,8 @@
+"http://www.example.org/test#20"
+"http://www.example.org/test#21"
+"http://www.example.org/test#22"
+"http://www.example.org/test#23"
+"http://www.example.org/test#28"
+"http://www.example.org/test#29"
+"http://www.example.org/test#30"
+"http://www.example.org/test#31"
diff --git a/tests/fts/fts3aa-4.rq b/tests/fts/fts3aa-4.rq
new file mode 100644
index 000000000..76f97168c
--- /dev/null
+++ b/tests/fts/fts3aa-4.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "three five" } order by asc ?o
diff --git a/tests/fts/fts3aa-5.out b/tests/fts/fts3aa-5.out
new file mode 100644
index 000000000..a2f64826a
--- /dev/null
+++ b/tests/fts/fts3aa-5.out
@@ -0,0 +1,4 @@
+"http://www.example.org/test#20"
+"http://www.example.org/test#21"
+"http://www.example.org/test#22"
+"http://www.example.org/test#23"
diff --git a/tests/fts/fts3aa-5.rq b/tests/fts/fts3aa-5.rq
new file mode 100644
index 000000000..9dbb5f174
--- /dev/null
+++ b/tests/fts/fts3aa-5.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "\"three five\"" } order by ?o
diff --git a/tests/fts/fts3aa-6.out b/tests/fts/fts3aa-6.out
new file mode 100644
index 000000000..2e57c7ed9
--- /dev/null
+++ b/tests/fts/fts3aa-6.out
@@ -0,0 +1,2 @@
+"http://www.example.org/test#15"
+"http://www.example.org/test#31"
diff --git a/tests/fts/fts3aa-6.rq b/tests/fts/fts3aa-6.rq
new file mode 100644
index 000000000..3482fe180
--- /dev/null
+++ b/tests/fts/fts3aa-6.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match " \"two three\" four one" } order by ?o
diff --git a/tests/fts/fts3aa-7.out b/tests/fts/fts3aa-7.out
new file mode 100644
index 000000000..a082546ee
--- /dev/null
+++ b/tests/fts/fts3aa-7.out
@@ -0,0 +1,8 @@
+"http://www.example.org/test#10"
+"http://www.example.org/test#11"
+"http://www.example.org/test#14"
+"http://www.example.org/test#15"
+"http://www.example.org/test#26"
+"http://www.example.org/test#27"
+"http://www.example.org/test#30"
+"http://www.example.org/test#31"
diff --git a/tests/fts/fts3aa-7.rq b/tests/fts/fts3aa-7.rq
new file mode 100644
index 000000000..46aab7bda
--- /dev/null
+++ b/tests/fts/fts3aa-7.rq
@@ -0,0 +1,2 @@
+# Match contains an unfinished quote
+SELECT ?o WHERE { ?o fts:match "four \"two" } order by ?o
diff --git a/tests/fts/fts3aa-8.out b/tests/fts/fts3aa-8.out
new file mode 100644
index 000000000..9de74cce2
--- /dev/null
+++ b/tests/fts/fts3aa-8.out
@@ -0,0 +1,4 @@
+"http://www.example.org/test#10"
+"http://www.example.org/test#11"
+"http://www.example.org/test#26"
+"http://www.example.org/test#27"
diff --git a/tests/fts/fts3aa-8.rq b/tests/fts/fts3aa-8.rq
new file mode 100644
index 000000000..0874c737c
--- /dev/null
+++ b/tests/fts/fts3aa-8.rq
@@ -0,0 +1,2 @@
+# Match contains an unfinished quote
+SELECT ?o WHERE { ?o fts:match " \"two four " } order by ?o
diff --git a/tests/fts/fts3aa-9.out b/tests/fts/fts3aa-9.out
new file mode 100644
index 000000000..feaf7fc17
--- /dev/null
+++ b/tests/fts/fts3aa-9.out
@@ -0,0 +1 @@
+"http://www.example.org/test#31"
diff --git a/tests/fts/fts3aa-9.rq b/tests/fts/fts3aa-9.rq
new file mode 100644
index 000000000..72e1c98ed
--- /dev/null
+++ b/tests/fts/fts3aa-9.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "\"four five\" three \"one two\"" } order by ?o
diff --git a/tests/fts/tracker-fts-test.c b/tests/fts/tracker-fts-test.c
index b05da1aee..fd2308c4b 100644
--- a/tests/fts/tracker-fts-test.c
+++ b/tests/fts/tracker-fts-test.c
@@ -33,7 +33,7 @@ struct _TestInfo {
};
const TestInfo tests[] = {
- { "fts3aa", 3 },
+ { "fts3aa", 9 },
{ "fts3ae", 1 },
{ "consistency/partial-update", 2 },
{ "consistency/insert-or-replace", 2 },