diff options
author | Kevin Haller <kevin.haller@outofbits.com> | 2015-09-13 20:26:56 +0200 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2016-05-08 19:20:05 +0200 |
commit | 60d0f54fc6bc7d41502a588298a5040019149b2a (patch) | |
tree | c28fde2d57f69771fb886f33e7c4e526ad57550d | |
parent | 369fdc9c1c1c1c8bb74b6dcb3a095392c40e0526 (diff) | |
download | tracker-60d0f54fc6bc7d41502a588298a5040019149b2a.tar.gz |
libtracker-data: Support regular expressions for fn:replace().
Extends the sqlite database by a new function (with the name
SparqlReplace). The function makes use of the g_regex_replace() function
of glib.
To fullfill the XPath 2.0 standard some constraints must be checked for
fn:replace(input, pattern, replacement, flags). The given pattern must
not match a zero-length string. The given replacement string have to use
$ followed by a number for backreferences. If the dollar sign shall be used
"as is", it must be escaped (\$).
For checking and interpreting the given replacement string of fn:replace()
some regular expressions are needed. This expressions are precompiled and
saved in the function_regex hashset of the TrackerDBInterface. The
pre-compilation and initialization of the hashset are done by the
prepare_database() method.
The glib method g_regex_replace() make use of the backslash followed by a
number to inidcate backreferences. So the dollar signs must be interpreted
- the backslashes can be still used for this purpose.
In the sparql expression class the corresponding section is adapted, so
that the new SparqlReplace function is used for fn:replace(..) statements.
https://bugzilla.gnome.org/show_bug.cgi?id=754961
-rw-r--r-- | src/libtracker-data/tracker-db-interface-sqlite.c | 193 | ||||
-rw-r--r-- | src/libtracker-data/tracker-sparql-expression.vala | 8 |
2 files changed, 198 insertions, 3 deletions
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c b/src/libtracker-data/tracker-db-interface-sqlite.c index 1bd455548..88cbd904d 100644 --- a/src/libtracker-data/tracker-db-interface-sqlite.c +++ b/src/libtracker-data/tracker-db-interface-sqlite.c @@ -63,6 +63,12 @@ typedef struct { guint max; } TrackerDBStatementLru; +typedef struct { + GRegex *syntax_check; + GRegex *replacement; + GRegex *unescape; +} TrackerDBReplaceFuncChecks; + struct TrackerDBInterface { GObject parent_instance; @@ -71,6 +77,9 @@ struct TrackerDBInterface { GHashTable *dynamic_statements; + /* Compiled regular expressions */ + TrackerDBReplaceFuncChecks replace_func_checks; + GSList *function_data; /* Collation and locale change */ @@ -569,6 +578,179 @@ function_sparql_regex (sqlite3_context *context, sqlite3_result_int (context, ret); } +static void +ensure_replace_checks (TrackerDBInterface *db_interface) +{ + if (db_interface->replace_func_checks.syntax_check != NULL) + return; + + db_interface->replace_func_checks.syntax_check = + g_regex_new ("(?<!\\\\)\\$\\D", G_REGEX_OPTIMIZE, 0, NULL); + db_interface->replace_func_checks.replacement = + g_regex_new("(?<!\\\\)\\$(\\d)", G_REGEX_OPTIMIZE, 0, NULL); + db_interface->replace_func_checks.unescape = + g_regex_new("\\\\\\$", G_REGEX_OPTIMIZE, 0, NULL); +} + +static void +function_sparql_replace (sqlite3_context *context, + int argc, + sqlite3_value *argv[]) +{ + TrackerDBInterface *db_interface = sqlite3_user_data (context); + TrackerDBReplaceFuncChecks *checks = &db_interface->replace_func_checks; + gboolean store_regex = FALSE, store_replace_regex = FALSE; + const gchar *input, *pattern, *replacement, *flags; + gchar *err_str, *output, *replaced = NULL, *unescaped = NULL; + GError *error = NULL; + GRegexCompileFlags regex_flags = 0; + GRegex *regex, *replace_regex; + gint capture_count, i; + + ensure_replace_checks (db_interface); + + if (argc == 3) { + flags = ""; + } else if (argc == 4) { + flags = sqlite3_value_text (argv[3]); + } else { + sqlite3_result_error (context, "Invalid argument count", -1); + return; + } + + input = sqlite3_value_text (argv[0]); + regex = sqlite3_get_auxdata (context, 1); + replacement = sqlite3_value_text (argv[2]); + + if (regex == NULL) { + pattern = sqlite3_value_text (argv[1]); + + for (i = 0; flags[i]; i++) { + switch (flags[i]) { + case 's': + regex_flags |= G_REGEX_DOTALL; + break; + case 'm': + regex_flags |= G_REGEX_MULTILINE; + break; + case 'i': + regex_flags |= G_REGEX_CASELESS; + break; + case 'x': + regex_flags |= G_REGEX_EXTENDED; + break; + default: + err_str = g_strdup_printf ("Invalid SPARQL regex flag '%c'", flags[i]); + sqlite3_result_error (context, err_str, -1); + g_free (err_str); + return; + } + } + + regex = g_regex_new (pattern, regex_flags, 0, &error); + + if (error) { + sqlite3_result_error (context, error->message, -1); + g_clear_error (&error); + return; + } + + /* According to the XPath 2.0 standard, an error shall be raised, if the given + * pattern matches a zero-length string. + */ + if (g_regex_match (regex, "", 0, NULL)) { + err_str = g_strdup_printf ("The given pattern '%s' matches a zero-length string.", + pattern); + sqlite3_result_error (context, err_str, -1); + g_regex_unref (regex); + g_free (err_str); + return; + } + + store_regex = TRUE; + } + + /* According to the XPath 2.0 standard, an error shall be raised, if all dollar + * signs ($) of the given replacement string are not immediately followed by + * a digit 0-9 or not immediately preceded by a \. + */ + if (g_regex_match (checks->syntax_check, replacement, 0, NULL)) { + err_str = g_strdup_printf ("The replacement string '%s' contains a \"$\" character " + "that is not immediately followed by a digit 0-9 and " + "not immediately preceded by a \"\\\".", + replacement); + sqlite3_result_error (context, err_str, -1); + g_free (err_str); + return; + } + + /* According to the XPath 2.0 standard, the dollar sign ($) followed by a number + * indicates backreferences. GRegex uses the backslash (\) for this purpose. + * So the ($) backreferences in the given replacement string are replaced by (\) + * backreferences to support the standard. + */ + capture_count = g_regex_get_capture_count (regex); + replace_regex = sqlite3_get_auxdata (context, 2); + + if (capture_count > 9 && !replace_regex) { + gint i; + GString *backref_range; + gchar *regex_interpret; + + /* S ... capture_count, N ... the given decimal number. + * If N>S and N>9, The last digit of N is taken to be a literal character + * to be included "as is" in the replacement string, and the rules are + * reapplied using the number N formed by stripping off this last digit. + */ + backref_range = g_string_new ("("); + for (i = 10; i <= capture_count; i++) { + g_string_append_printf (backref_range, "%d|", i); + } + + g_string_append (backref_range, "\\d)"); + regex_interpret = g_strdup_printf ("(?<!\\\\)\\$%s", + backref_range->str); + + replace_regex = g_regex_new (regex_interpret, 0, 0, NULL); + + g_string_free (backref_range, TRUE); + g_free (regex_interpret); + + store_replace_regex = TRUE; + } else if (capture_count <= 9) { + replace_regex = checks->replacement; + } + + replaced = g_regex_replace (replace_regex, + replacement, -1, 0, "\\\\g<\\1>", 0, &error); + + if (replaced) { + /* All '\$' pairs are replaced by '$' */ + unescaped = g_regex_replace (checks->unescape, + replaced, -1, 0, "$", 0, &error); + } + + if (unescaped) { + output = g_regex_replace (regex, input, -1, 0, unescaped, 0, &error); + } + + if (error) { + sqlite3_result_error (context, error->message, -1); + g_clear_error (&error); + return; + } + + sqlite3_result_text (context, output, -1, g_free); + + if (store_replace_regex) + sqlite3_set_auxdata (context, 2, replace_regex, (GDestroyNotify) g_regex_unref); + if (store_regex) + sqlite3_set_auxdata (context, 1, regex, (GDestroyNotify) g_regex_unref); + + g_free (replaced); + g_free (unescaped); +} + #ifdef HAVE_LIBUNISTRING static void @@ -1215,6 +1397,10 @@ open_database (TrackerDBInterface *db_interface, db_interface, &function_sparql_checksum, NULL, NULL); + sqlite3_create_function (db_interface->db, "SparqlReplace", -1, SQLITE_ANY, + db_interface, &function_sparql_replace, + NULL, NULL); + sqlite3_extended_result_codes (db_interface->db, 0); sqlite3_busy_timeout (db_interface->db, 100000); } @@ -1299,6 +1485,13 @@ close_database (TrackerDBInterface *db_interface) db_interface->dynamic_statements = NULL; } + if (db_interface->replace_func_checks.syntax_check) + g_regex_unref (db_interface->replace_func_checks.syntax_check); + if (db_interface->replace_func_checks.replacement) + g_regex_unref (db_interface->replace_func_checks.replacement); + if (db_interface->replace_func_checks.unescape) + g_regex_unref (db_interface->replace_func_checks.unescape); + if (db_interface->function_data) { g_slist_foreach (db_interface->function_data, (GFunc) g_free, NULL); g_slist_free (db_interface->function_data); diff --git a/src/libtracker-data/tracker-sparql-expression.vala b/src/libtracker-data/tracker-sparql-expression.vala index 91c6c10a0..85dd59468 100644 --- a/src/libtracker-data/tracker-sparql-expression.vala +++ b/src/libtracker-data/tracker-sparql-expression.vala @@ -682,7 +682,7 @@ class Tracker.Sparql.Expression : Object { return PropertyType.INTEGER; } else if (uri == FN_NS + "replace") { - sql.append ("replace("); + sql.append ("SparqlReplace("); translate_expression_as_string (sql); sql.append (", "); @@ -693,9 +693,11 @@ class Tracker.Sparql.Expression : Object { expect (SparqlTokenType.COMMA); translate_expression_as_string (sql); - // FIXME: No regex (nor its modifier flags) support + if (accept (SparqlTokenType.COMMA)) { + sql.append (", "); + sql.append (escape_sql_string_literal (parse_string_literal ())); + } sql.append (")"); - return PropertyType.STRING; } else if (uri == FTS_NS + "rank") { bool is_var; |