diff options
author | Adrian Perez de Castro <aperez@igalia.com> | 2017-02-03 00:54:51 +0200 |
---|---|---|
committer | Michael Catanzaro <mcatanzaro@gnome.org> | 2017-02-08 22:12:35 -0600 |
commit | d1d3db843c2c73e5fef732839c9a3c25147a294d (patch) | |
tree | 559a1dc69656fd800623182fefd67723fa951a4b | |
parent | 2578255dd1971b1d70321f428d33de810dea495f (diff) | |
download | epiphany-d1d3db843c2c73e5fef732839c9a3c25147a294d.tar.gz |
uri-tester: Ensure regexps are properly constructed
This adds a few more cases to the escaping done when converting an AdBlock
non-regepx "simple pattern" from a rule into a GRegex. This patch does the
following:
- Adds escaping to some of the regexp metacharacters which were not being
handled: (){}+.|\
- Adds support for using a vertical bar at the end of a pattern to anchor the
match at the end.
- Adds support for using ^ to match a "separator character" (a non-letter,
non-number, or one of _-.%).
This also adds as much comment lines as code, which in this particular case
is probably a good thing, so reading the code in the future does not need
checking each case against the GRegex documentation.
https://bugzilla.gnome.org/show_bug.cgi?id=777714
-rw-r--r-- | embed/web-extension/uri-tester.c | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/embed/web-extension/uri-tester.c b/embed/web-extension/uri-tester.c index 271ae80a9..aa3a6399a 100644 --- a/embed/web-extension/uri-tester.c +++ b/embed/web-extension/uri-tester.c @@ -421,7 +421,6 @@ static GString * uri_tester_fixup_regexp (const char *prefix, char *src) { GString *str; - int len = 0; if (!src) return NULL; @@ -441,21 +440,27 @@ uri_tester_fixup_regexp (const char *prefix, char *src) case '*': g_string_append (str, ".*"); break; - /*case '.': - g_string_append (str, "\\."); - break;*/ + case '^': + g_string_append (str, "([^a-zA-Z\\d]|[_\\-\\.%])"); + break; + case '|': + if (src[1] == '\0') + g_string_append (str, "$"); + else + g_string_append (str, "\\|"); + break; + case '.': + case '+': case '?': case '[': case ']': + case '{': + case '}': + case '(': + case ')': + case '\\': g_string_append_printf (str, "\\%c", *src); break; - case '|': - /* FIXME: We actually need to match :[0-9]+ or '/'. Sign means - "here could be port number or nothing". So bla.com^ will match - bla.com/ or bla.com:8080/ but not bla.com.au/ */ - case '^': - case '+': - break; default: g_string_append_printf (str,"%c", *src); break; @@ -464,11 +469,6 @@ uri_tester_fixup_regexp (const char *prefix, char *src) } while (*src); - len = str->len; - /* We dont need .* in the end of url. Thats stupid */ - if (str->str && str->str[len-1] == '*' && str->str[len-2] == '.') - g_string_erase (str, len-2, 2); - return str; } |