summaryrefslogtreecommitdiff
path: root/embed/ephy-uri-tester.c
diff options
context:
space:
mode:
Diffstat (limited to 'embed/ephy-uri-tester.c')
-rw-r--r--embed/ephy-uri-tester.c1066
1 files changed, 1066 insertions, 0 deletions
diff --git a/embed/ephy-uri-tester.c b/embed/ephy-uri-tester.c
new file mode 100644
index 000000000..860ff3233
--- /dev/null
+++ b/embed/ephy-uri-tester.c
@@ -0,0 +1,1066 @@
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Copyright © 2011 Igalia S.L.
+ *
+ * This file is part of Epiphany.
+ *
+ * Epiphany is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Epiphany is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Epiphany. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Some parts of this file based on the Midori's 'adblock' extension,
+ * licensed with the GNU Lesser General Public License 2.1, Copyright
+ * (C) 2009-2010 Christian Dywan <christian@twotoasts.de> and 2009
+ * Alexander Butenko <a.butenka@gmail.com>. Check Midori's web site
+ * at http://www.twotoasts.de
+ */
+
+#include "config.h"
+#include "ephy-uri-tester.h"
+
+#include "ephy-dbus-names.h"
+#include "ephy-debug.h"
+#include "ephy-file-helpers.h"
+#include "ephy-settings.h"
+#include "ephy-uri-helpers.h"
+#include "ephy-uri-tester-interface.h"
+
+#include <gio/gio.h>
+#include <glib/gstdio.h>
+#include <libsoup/soup.h>
+#include <string.h>
+
+#define DEFAULT_FILTER_URL "https://easylist-downloads.adblockplus.org/easylist.txt"
+#define FILTERS_LIST_FILENAME "filters.list"
+#define SIGNATURE_SIZE 8
+#define UPDATE_FREQUENCY 24 * 60 * 60 /* In seconds */
+
+struct _EphyUriTester {
+ GObject parent_instance;
+
+ GSList *filters;
+ char *data_dir;
+
+ GHashTable *pattern;
+ GHashTable *keys;
+ GHashTable *optslist;
+ GHashTable *urlcache;
+
+ GHashTable *whitelisted_pattern;
+ GHashTable *whitelisted_keys;
+ GHashTable *whitelisted_optslist;
+ GHashTable *whitelisted_urlcache;
+
+ GString *blockcss;
+ GString *blockcssprivate;
+
+ GRegex *regex_third_party;
+ GRegex *regex_pattern;
+ GRegex *regex_subdocument;
+ GRegex *regex_frame_add;
+};
+
+enum {
+ PROP_0,
+ PROP_FILTERS,
+ PROP_BASE_DATA_DIR,
+ LAST_PROP
+};
+
+static GParamSpec *obj_properties[LAST_PROP];
+
+G_DEFINE_TYPE (EphyUriTester, ephy_uri_tester, G_TYPE_OBJECT)
+
+/* Private functions. */
+
+static GString *
+ephy_uri_tester_fixup_regexp (const char *prefix, char *src);
+
+static void
+ephy_uri_tester_parse_file_at_uri (EphyUriTester *tester, const char *fileuri);
+
+static char *
+ephy_uri_tester_ensure_data_dir (const char *base_data_dir)
+{
+ char *folder;
+
+ /* Ensure adblock's dir is there. */
+ folder = g_build_filename (base_data_dir, "adblock", NULL);
+ g_mkdir_with_parents (folder, 0700);
+
+ return folder;
+}
+
+static char *
+ephy_uri_tester_get_fileuri_for_url (EphyUriTester *tester,
+ const char *url)
+{
+ char *filename = NULL;
+ char *path = NULL;
+ char *uri = NULL;
+
+ if (!strncmp (url, "file", 4))
+ return g_strndup (url + 7, strlen (url) - 7);
+
+ filename = g_compute_checksum_for_string (G_CHECKSUM_MD5, url, -1);
+
+ path = g_build_filename (tester->data_dir, filename, NULL);
+ uri = g_filename_to_uri (path, NULL, NULL);
+
+ g_free (filename);
+ g_free (path);
+
+ return uri;
+}
+
+typedef struct {
+ EphyUriTester *tester;
+ char *dest_uri;
+} RetrieveFilterAsyncData;
+
+static void
+ephy_uri_tester_retrieve_filter_finished (GFile *src,
+ GAsyncResult *result,
+ RetrieveFilterAsyncData *data)
+{
+ GError *error = NULL;
+
+ if (!g_file_copy_finish (src, result, &error)) {
+ LOG ("Error retrieving filter: %s\n", error->message);
+ g_error_free (error);
+ } else
+ ephy_uri_tester_parse_file_at_uri (data->tester, data->dest_uri);
+
+ g_object_unref (data->tester);
+ g_free (data->dest_uri);
+ g_slice_free (RetrieveFilterAsyncData, data);
+}
+
+static void
+ephy_uri_tester_retrieve_filter (EphyUriTester *tester,
+ const char *url,
+ const char *fileuri)
+{
+ GFile *src;
+ GFile *dest;
+ RetrieveFilterAsyncData *data;
+
+ g_return_if_fail (EPHY_IS_URI_TESTER (tester));
+ g_return_if_fail (url != NULL);
+ g_return_if_fail (fileuri != NULL);
+
+ src = g_file_new_for_uri (url);
+ dest = g_file_new_for_uri (fileuri);
+
+ data = g_slice_new (RetrieveFilterAsyncData);
+ data->tester = g_object_ref (tester);
+ data->dest_uri = g_file_get_uri (dest);
+
+ g_file_copy_async (src, dest,
+ G_FILE_COPY_OVERWRITE,
+ G_PRIORITY_DEFAULT,
+ NULL, NULL, NULL,
+ (GAsyncReadyCallback)ephy_uri_tester_retrieve_filter_finished,
+ data);
+
+ g_object_unref (src);
+ g_object_unref (dest);
+}
+
+static gboolean
+ephy_uri_tester_filter_is_valid (const char *fileuri)
+{
+ GFile *file = NULL;
+ GFileInfo *file_info = NULL;
+ gboolean result;
+
+ /* Now check if the local file is too old. */
+ file = g_file_new_for_uri (fileuri);
+ file_info = g_file_query_info (file,
+ G_FILE_ATTRIBUTE_TIME_MODIFIED,
+ G_FILE_QUERY_INFO_NONE,
+ NULL,
+ NULL);
+ result = FALSE;
+ if (file_info) {
+ GTimeVal current_time;
+ GTimeVal mod_time;
+
+ g_get_current_time (&current_time);
+ g_file_info_get_modification_time (file_info, &mod_time);
+
+ if (current_time.tv_sec > mod_time.tv_sec) {
+ gint64 expire_time = mod_time.tv_sec + UPDATE_FREQUENCY;
+ result = current_time.tv_sec < expire_time;
+ }
+ g_object_unref (file_info);
+ }
+
+ g_object_unref (file);
+
+ return result;
+}
+
+static void
+ephy_uri_tester_load_patterns (EphyUriTester *tester)
+{
+ GSList *filter = NULL;
+ char *url = NULL;
+ char *fileuri = NULL;
+
+ /* Load patterns from the list of filters. */
+ for (filter = tester->filters; filter; filter = g_slist_next (filter)) {
+ url = (char *)filter->data;
+ fileuri = ephy_uri_tester_get_fileuri_for_url (tester, url);
+
+ if (!ephy_uri_tester_filter_is_valid (fileuri))
+ ephy_uri_tester_retrieve_filter (tester, url, fileuri);
+ else
+ ephy_uri_tester_parse_file_at_uri (tester, fileuri);
+
+ g_free (fileuri);
+ }
+}
+
+static void
+ephy_uri_tester_set_filters (EphyUriTester *tester, GSList *filters)
+{
+ if (tester->filters)
+ g_slist_free_full (tester->filters, g_free);
+
+ tester->filters = filters;
+}
+
+static void
+ephy_uri_tester_load_filters (EphyUriTester *tester)
+{
+ GSList *list = NULL;
+ char *filepath = NULL;
+
+ filepath = g_build_filename (tester->data_dir, FILTERS_LIST_FILENAME, NULL);
+
+ if (g_file_test (filepath, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) {
+ GFile *file = NULL;
+ char *contents = NULL;
+ gsize length = 0;
+ GError *error = NULL;
+
+ file = g_file_new_for_path (filepath);
+ if (g_file_load_contents (file, NULL, &contents, &length, NULL, &error)) {
+ char **urls_array = NULL;
+ char *url = NULL;
+ int i = 0;
+
+ urls_array = g_strsplit (contents, ";", -1);
+ for (i = 0; urls_array [i]; i++) {
+ url = g_strstrip (g_strdup (urls_array[i]));
+ if (!g_str_equal (url, ""))
+ list = g_slist_prepend (list, url);
+ }
+ g_strfreev (urls_array);
+
+ g_free (contents);
+ }
+
+ if (error) {
+ LOG ("Error loading filters from %s: %s", filepath, error->message);
+ g_error_free (error);
+ }
+
+ g_object_unref (file);
+ } else {
+ /* No file exists yet, so use the default filter and save it. */
+ list = g_slist_prepend (list, g_strdup (DEFAULT_FILTER_URL));
+ }
+
+ g_free (filepath);
+
+ ephy_uri_tester_set_filters (tester, g_slist_reverse (list));
+}
+
+#if 0
+ TODO: Use this to create a filters dialog, or something.
+
+static void
+ephy_uri_tester_save_filters (EphyUriTester *tester)
+{
+ FILE *file = NULL;
+ char *filepath = NULL;
+
+ filepath = g_build_filename (tester->data_dir, FILTERS_LIST_FILENAME, NULL);
+
+ if ((file = g_fopen (filepath, "w"))) {
+ GSList *item = NULL;
+ char *filter = NULL;
+
+ for (item = tester->filters; item; item = g_slist_next (item)) {
+ filter = g_strdup_printf ("%s;", (char *)item->data);
+ fputs (filter, file);
+ g_free (filter);
+ }
+ fclose (file);
+ }
+ g_free (filepath);
+}
+#endif
+
+static inline int
+ephy_uri_tester_check_rule (EphyUriTester *tester,
+ GRegex *regex,
+ const char *patt,
+ const char *req_uri,
+ const char *page_uri,
+ gboolean whitelist)
+{
+ char *opts;
+ GHashTable *optslist = tester->optslist;
+ if (whitelist)
+ optslist = tester->whitelisted_optslist;
+
+ if (!g_regex_match_full (regex, req_uri, -1, 0, 0, NULL, NULL))
+ return FALSE;
+
+ opts = g_hash_table_lookup (optslist, patt);
+ if (opts && g_regex_match (tester->regex_third_party, opts, 0, NULL)) {
+ if (page_uri && g_regex_match_full (regex, page_uri, -1, 0, 0, NULL, NULL))
+ return FALSE;
+ }
+ /* TODO: Domain and document opt check */
+ if (whitelist)
+ LOG ("whitelisted by pattern regexp=%s -- %s", g_regex_get_pattern (regex), req_uri);
+ else
+ LOG ("blocked by pattern regexp=%s -- %s", g_regex_get_pattern (regex), req_uri);
+ return TRUE;
+}
+
+static inline gboolean
+ephy_uri_tester_is_matched_by_pattern (EphyUriTester *tester,
+ const char *req_uri,
+ const char *page_uri,
+ gboolean whitelist)
+{
+ GHashTableIter iter;
+ gpointer patt, regex;
+ GHashTable *pattern = tester->pattern;
+ if (whitelist)
+ pattern = tester->whitelisted_pattern;
+
+ g_hash_table_iter_init (&iter, pattern);
+ while (g_hash_table_iter_next (&iter, &patt, &regex)) {
+ if (ephy_uri_tester_check_rule (tester, regex, patt, req_uri, page_uri, whitelist))
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static inline gboolean
+ephy_uri_tester_is_matched_by_key (EphyUriTester *tester,
+ const char *opts,
+ const char *req_uri,
+ const char *page_uri,
+ gboolean whitelist)
+{
+ char *uri;
+ int len;
+ int pos = 0;
+ GList *regex_bl = NULL;
+ GString *guri;
+ gboolean ret = FALSE;
+ char sig[SIGNATURE_SIZE + 1];
+ GHashTable *keys = tester->keys;
+ if (whitelist)
+ keys = tester->whitelisted_keys;
+
+ memset (&sig[0], 0, sizeof (sig));
+ /* Signatures are made on pattern, so we need to convert url to a pattern as well */
+ guri = ephy_uri_tester_fixup_regexp ("", (char *)req_uri);
+ uri = guri->str;
+ len = guri->len;
+
+ for (pos = len - SIGNATURE_SIZE; pos >= 0; pos--) {
+ GRegex *regex;
+ strncpy (sig, uri + pos, SIGNATURE_SIZE);
+ regex = g_hash_table_lookup (keys, sig);
+
+ /* Dont check if regex is already blacklisted */
+ if (!regex || g_list_find (regex_bl, regex))
+ continue;
+ ret = ephy_uri_tester_check_rule (tester, regex, sig, req_uri, page_uri, whitelist);
+ if (ret)
+ break;
+ regex_bl = g_list_prepend (regex_bl, regex);
+ }
+ g_string_free (guri, TRUE);
+ g_list_free (regex_bl);
+ return ret;
+}
+
+static gboolean
+ephy_uri_tester_is_matched (EphyUriTester *tester,
+ const char *opts,
+ const char *req_uri,
+ const char *page_uri,
+ gboolean whitelist)
+{
+ char *value;
+ GHashTable *urlcache = tester->urlcache;
+ if (whitelist)
+ urlcache = tester->whitelisted_urlcache;
+
+ /* Check cached URLs first. */
+ if ((value = g_hash_table_lookup (urlcache, req_uri)))
+ return GPOINTER_TO_INT (value);
+
+ /* Look for a match either by key or by pattern. */
+ if (ephy_uri_tester_is_matched_by_key (tester, opts, req_uri, page_uri, whitelist)) {
+ g_hash_table_insert (urlcache, g_strdup (req_uri), g_strdup ("1"));
+ return TRUE;
+ }
+
+ /* Matching by pattern is pretty expensive, so do it if needed only. */
+ if (ephy_uri_tester_is_matched_by_pattern (tester, req_uri, page_uri, whitelist)) {
+ g_hash_table_insert (urlcache, g_strdup (req_uri), GINT_TO_POINTER (TRUE));
+ return TRUE;
+ }
+
+ g_hash_table_insert (urlcache, g_strdup (req_uri), GINT_TO_POINTER (FALSE));
+ return FALSE;
+}
+
+static GString *
+ephy_uri_tester_fixup_regexp (const char *prefix, char *src)
+{
+ GString *str;
+ int len = 0;
+
+ if (!src)
+ return NULL;
+
+ str = g_string_new (prefix);
+
+ /* lets strip first .* */
+ if (src[0] == '*') {
+ (void)*src++;
+ }
+
+ do {
+ switch (*src) {
+ case '*':
+ g_string_append (str, ".*");
+ break;
+ /*case '.':
+ g_string_append (str, "\\.");
+ break;*/
+ case '?':
+ case '[':
+ case ']':
+ g_string_append_printf (str, "\\%c", *src);
+ break;
+ case '|':
+ /* FIXME: We actually need to match :[0-9]+ or '/'. Sign means
+ "here could be port number or nothing". So bla.com^ will match
+ bla.com/ or bla.com:8080/ but not bla.com.au/ */
+ case '^':
+ case '+':
+ break;
+ default:
+ g_string_append_printf (str, "%c", *src);
+ break;
+ }
+ src++;
+ } while (*src);
+
+ len = str->len;
+ /* We dont need .* in the end of url. Thats stupid */
+ if (str->str && str->str[len - 1] == '*' && str->str[len - 2] == '.')
+ g_string_erase (str, len - 2, 2);
+
+ return str;
+}
+
+static void
+ephy_uri_tester_compile_regexp (EphyUriTester *tester,
+ GString *gpatt,
+ const char *opts,
+ gboolean whitelist)
+{
+ GHashTable *pattern;
+ GHashTable *keys;
+ GHashTable *optslist;
+ GRegex *regex;
+ GError *error = NULL;
+ char *patt;
+ int len;
+
+ if (!gpatt)
+ return;
+
+ patt = gpatt->str;
+ len = gpatt->len;
+
+ /* TODO: Play with optimization flags */
+ regex = g_regex_new (patt, G_REGEX_OPTIMIZE | G_REGEX_JAVASCRIPT_COMPAT,
+ G_REGEX_MATCH_NOTEMPTY, &error);
+ if (error) {
+ g_warning ("%s: %s", G_STRFUNC, error->message);
+ g_error_free (error);
+ return;
+ }
+
+ pattern = tester->pattern;
+ keys = tester->keys;
+ optslist = tester->optslist;
+ if (whitelist) {
+ pattern = tester->whitelisted_pattern;
+ keys = tester->whitelisted_keys;
+ optslist = tester->whitelisted_optslist;
+ }
+
+ if (!g_regex_match (tester->regex_pattern, patt, 0, NULL)) {
+ int signature_count = 0;
+ int pos = 0;
+ char *sig;
+
+ for (pos = len - SIGNATURE_SIZE; pos >= 0; pos--) {
+ sig = g_strndup (patt + pos, SIGNATURE_SIZE);
+ if (!strchr (sig, '*') &&
+ !g_hash_table_lookup (keys, sig)) {
+ LOG ("sig: %s %s", sig, patt);
+ g_hash_table_insert (keys, g_strdup (sig), g_regex_ref (regex));
+ g_hash_table_insert (optslist, g_strdup (sig), g_strdup (opts));
+ signature_count++;
+ } else {
+ if (sig[0] == '*' &&
+ !g_hash_table_lookup (pattern, patt)) {
+ LOG ("patt2: %s %s", sig, patt);
+ g_hash_table_insert (pattern, g_strdup (patt), g_regex_ref (regex));
+ g_hash_table_insert (optslist, g_strdup (patt), g_strdup (opts));
+ }
+ }
+ g_free (sig);
+ }
+ g_regex_unref (regex);
+
+ if (signature_count > 1 && g_hash_table_lookup (pattern, patt))
+ g_hash_table_remove (pattern, patt);
+ } else {
+ LOG ("patt: %s%s", patt, "");
+ /* Pattern is a regexp chars */
+ g_hash_table_insert (pattern, g_strdup (patt), regex);
+ g_hash_table_insert (optslist, g_strdup (patt), g_strdup (opts));
+ }
+}
+
+static void
+ephy_uri_tester_add_url_pattern (EphyUriTester *tester,
+ const char *prefix,
+ const char *type,
+ char *line,
+ gboolean whitelist)
+{
+ char **data;
+ char *patt;
+ GString *format_patt;
+ const char *opts;
+
+ data = g_strsplit (line, "$", -1);
+ if (!data || !data[0]) {
+ g_strfreev (data);
+ return;
+ }
+
+ if (data[1] && data[2]) {
+ patt = g_strconcat (data[0], data[1], NULL);
+ opts = g_strconcat (type, ",", data[2], NULL);
+ } else if (data[1]) {
+ patt = data[0];
+ opts = g_strconcat (type, ",", data[1], NULL);
+ } else {
+ patt = data[0];
+ opts = type;
+ }
+
+ if (g_regex_match (tester->regex_subdocument, opts, 0, NULL)) {
+ if (data[1] && data[2])
+ g_free (patt);
+ if (data[1])
+ g_free ((char *)opts);
+ g_strfreev (data);
+ return;
+ }
+
+ format_patt = ephy_uri_tester_fixup_regexp (prefix, patt);
+
+ if (whitelist)
+ LOG ("whitelist: %s opts %s", format_patt->str, opts);
+ else
+ LOG ("blacklist: %s opts %s", format_patt->str, opts);
+
+ ephy_uri_tester_compile_regexp (tester, format_patt, opts, whitelist);
+
+ if (data[1] && data[2])
+ g_free (patt);
+ if (data[1])
+ g_free ((char *)opts);
+ g_strfreev (data);
+
+ g_string_free (format_patt, TRUE);
+}
+
+static inline void
+ephy_uri_tester_frame_add (EphyUriTester *tester, char *line)
+{
+ const char *separator = " , ";
+
+ (void)*line++;
+ (void)*line++;
+ if (strchr (line, '\'')
+ || (strchr (line, ':')
+ && !g_regex_match (tester->regex_frame_add, line, 0, NULL))) {
+ return;
+ }
+ g_string_append (tester->blockcss, separator);
+ g_string_append (tester->blockcss, line);
+}
+
+static inline void
+ephy_uri_tester_frame_add_private (EphyUriTester *tester,
+ const char *line,
+ const char *sep)
+{
+ char **data;
+ data = g_strsplit (line, sep, 2);
+
+ if (!(data[1] && *data[1])
+ || strchr (data[1], '\'')
+ || (strchr (data[1], ':')
+ && !g_regex_match (tester->regex_frame_add, data[1], 0, NULL))) {
+ g_strfreev (data);
+ return;
+ }
+
+ if (strchr (data[0], ',')) {
+ char **domains;
+ int i;
+
+ domains = g_strsplit (data[0], ",", -1);
+ for (i = 0; domains[i]; i++) {
+ g_string_append_printf (tester->blockcssprivate, ";sites['%s']+=',%s'",
+ g_strstrip (domains[i]), data[1]);
+ }
+ g_strfreev (domains);
+ } else {
+ g_string_append_printf (tester->blockcssprivate, ";sites['%s']+=',%s'",
+ data[0], data[1]);
+ }
+ g_strfreev (data);
+}
+
+static void
+ephy_uri_tester_parse_line (EphyUriTester *tester,
+ char *line,
+ gboolean whitelist)
+{
+ if (!line)
+ return;
+
+ g_strchomp (line);
+ /* Ignore comments and new lines */
+ if (line[0] == '!')
+ return;
+ /* FIXME: No support for [include] and [exclude] tags */
+ if (line[0] == '[')
+ return;
+
+ /* Whitelisted exception rules */
+ if (g_str_has_prefix (line, "@@")) {
+ ephy_uri_tester_parse_line (tester, line + 2, TRUE);
+ return;
+ }
+
+ /* FIXME: No support for domain= */
+ if (strstr (line, "domain="))
+ return;
+
+ /* Skip garbage */
+ if (line[0] == ' ' || !line[0])
+ return;
+
+ /* Got CSS block hider */
+ if (line[0] == '#' && line[1] == '#') {
+ ephy_uri_tester_frame_add (tester, line);
+ return;
+ }
+ /* Got CSS block hider. Workaround */
+ if (line[0] == '#')
+ return;
+
+ /* Got per domain CSS hider rule */
+ if (strstr (line, "##")) {
+ ephy_uri_tester_frame_add_private (tester, line, "##");
+ return;
+ }
+
+ /* Got per domain CSS hider rule. Workaround */
+ if (strchr (line, '#')) {
+ ephy_uri_tester_frame_add_private (tester, line, "#");
+ return;
+ }
+ /* Got URL blocker rule */
+ if (line[0] == '|' && line[1] == '|') {
+ (void)*line++;
+ (void)*line++;
+ /* set a regex prefix to ensure that '||' patterns are anchored at the
+ * start and that any characters (if any) preceding the domain specified
+ * by the rule is separated from it by a dot '.' */
+ ephy_uri_tester_add_url_pattern (tester, "^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?", "fulluri", line, whitelist);
+ return;
+ }
+ if (line[0] == '|') {
+ (void)*line++;
+ ephy_uri_tester_add_url_pattern (tester, "^", "fulluri", line, whitelist);
+ return;
+ }
+ ephy_uri_tester_add_url_pattern (tester, "", "uri", line, whitelist);
+}
+
+static void
+file_parse_cb (GDataInputStream *stream, GAsyncResult *result, EphyUriTester *tester)
+{
+ char *line;
+ GError *error = NULL;
+
+ line = g_data_input_stream_read_line_finish (stream, result, NULL, &error);
+ if (!line) {
+ if (error) {
+ LOG ("Error parsing file: %s\n", error->message);
+ g_error_free (error);
+ }
+
+ return;
+ }
+
+ ephy_uri_tester_parse_line (tester, line, FALSE);
+ g_free (line);
+
+ g_data_input_stream_read_line_async (stream, G_PRIORITY_DEFAULT_IDLE, NULL,
+ (GAsyncReadyCallback)file_parse_cb, tester);
+}
+
+static void
+file_read_cb (GFile *file, GAsyncResult *result, EphyUriTester *tester)
+{
+ GFileInputStream *stream;
+ GDataInputStream *data_stream;
+ GError *error = NULL;
+
+ stream = g_file_read_finish (file, result, &error);
+ if (!stream) {
+ char *path;
+
+ path = g_file_get_path (file);
+ LOG ("Error opening file %s for parsing: %s\n", path, error->message);
+ g_free (path);
+ g_error_free (error);
+
+ return;
+ }
+
+ data_stream = g_data_input_stream_new (G_INPUT_STREAM (stream));
+ g_object_unref (stream);
+
+ g_data_input_stream_read_line_async (data_stream, G_PRIORITY_DEFAULT_IDLE, NULL,
+ (GAsyncReadyCallback)file_parse_cb, tester);
+ g_object_unref (data_stream);
+}
+
+static void
+ephy_uri_tester_parse_file_at_uri (EphyUriTester *tester, const char *fileuri)
+{
+ GFile *file;
+
+ file = g_file_new_for_uri (fileuri);
+ g_file_read_async (file, G_PRIORITY_DEFAULT_IDLE, NULL, (GAsyncReadyCallback)file_read_cb, tester);
+ g_object_unref (file);
+}
+
+static void
+ephy_uri_tester_init (EphyUriTester *tester)
+{
+ LOG ("EphyUriTester initializing %p", tester);
+
+ tester->filters = NULL;
+ tester->pattern = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_regex_unref);
+ tester->keys = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_regex_unref);
+ tester->optslist = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+ tester->urlcache = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ NULL);
+
+ tester->whitelisted_pattern = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_regex_unref);
+ tester->whitelisted_keys = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_regex_unref);
+ tester->whitelisted_optslist = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+ tester->whitelisted_urlcache = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ NULL);
+
+ tester->blockcss = g_string_new ("z-non-exist");
+ tester->blockcssprivate = g_string_new ("");
+
+ tester->regex_third_party = g_regex_new (",third-party",
+ G_REGEX_CASELESS | G_REGEX_OPTIMIZE,
+ G_REGEX_MATCH_NOTEMPTY,
+ NULL);
+ tester->regex_pattern = g_regex_new ("^/.*[\\^\\$\\*].*/$",
+ G_REGEX_UNGREEDY | G_REGEX_OPTIMIZE,
+ G_REGEX_MATCH_NOTEMPTY,
+ NULL);
+ tester->regex_subdocument = g_regex_new ("subdocument",
+ G_REGEX_CASELESS | G_REGEX_OPTIMIZE,
+ G_REGEX_MATCH_NOTEMPTY,
+ NULL);
+ tester->regex_frame_add = g_regex_new (".*\\[.*:.*\\].*",
+ G_REGEX_CASELESS | G_REGEX_OPTIMIZE,
+ G_REGEX_MATCH_NOTEMPTY,
+ NULL);
+}
+
+static void
+ephy_uri_tester_constructed (GObject *object)
+{
+ EphyUriTester *tester = EPHY_URI_TESTER (object);
+
+ G_OBJECT_CLASS (ephy_uri_tester_parent_class)->constructed (object);
+
+ ephy_uri_tester_load_filters (tester);
+ ephy_uri_tester_load_patterns (tester);
+}
+
+static void
+ephy_uri_tester_set_property (GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ EphyUriTester *tester = EPHY_URI_TESTER (object);
+
+ switch (prop_id) {
+ case PROP_FILTERS:
+ ephy_uri_tester_set_filters (tester, (GSList *)g_value_get_pointer (value));
+ break;
+ case PROP_BASE_DATA_DIR:
+ tester->data_dir = ephy_uri_tester_ensure_data_dir (g_value_get_string (value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+ephy_uri_tester_finalize (GObject *object)
+{
+ EphyUriTester *tester = EPHY_URI_TESTER (object);
+
+ LOG ("EphyUriTester finalizing %p", object);
+
+ g_slist_free_full (tester->filters, g_free);
+ g_free (tester->data_dir);
+
+ g_hash_table_destroy (tester->pattern);
+ g_hash_table_destroy (tester->keys);
+ g_hash_table_destroy (tester->optslist);
+ g_hash_table_destroy (tester->urlcache);
+
+ g_hash_table_destroy (tester->whitelisted_pattern);
+ g_hash_table_destroy (tester->whitelisted_keys);
+ g_hash_table_destroy (tester->whitelisted_optslist);
+ g_hash_table_destroy (tester->whitelisted_urlcache);
+
+ g_string_free (tester->blockcss, TRUE);
+ g_string_free (tester->blockcssprivate, TRUE);
+
+ g_regex_unref (tester->regex_third_party);
+ g_regex_unref (tester->regex_pattern);
+ g_regex_unref (tester->regex_subdocument);
+ g_regex_unref (tester->regex_frame_add);
+
+ G_OBJECT_CLASS (ephy_uri_tester_parent_class)->finalize (object);
+}
+
+static void
+ephy_uri_tester_class_init (EphyUriTesterClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->set_property = ephy_uri_tester_set_property;
+ object_class->constructed = ephy_uri_tester_constructed;
+ object_class->finalize = ephy_uri_tester_finalize;
+
+ obj_properties[PROP_FILTERS] =
+ g_param_spec_pointer ("filters",
+ "filters",
+ "filters",
+ G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS);
+
+ obj_properties[PROP_BASE_DATA_DIR] =
+ g_param_spec_string ("base-data-dir",
+ "Base data dir",
+ "The base dir where to create the adblock data dir",
+ NULL,
+ G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS);
+
+ g_object_class_install_properties (object_class, LAST_PROP, obj_properties);
+}
+
+EphyUriTester *
+ephy_uri_tester_new (void)
+{
+ return g_object_new (EPHY_TYPE_URI_TESTER, "base-data-dir", ephy_dot_dir (), NULL);
+}
+
+static gboolean
+ephy_uri_tester_test_uri (EphyUriTester *tester,
+ const char *req_uri,
+ const char *page_uri)
+{
+ /* Always load the main resource. */
+ if (g_strcmp0 (req_uri, page_uri) == 0)
+ return FALSE;
+
+ /* Always load data requests, as uri_tester won't do any good here. */
+ if (g_str_has_prefix (req_uri, SOUP_URI_SCHEME_DATA))
+ return FALSE;
+
+ /* check whitelisting rules before the normal ones */
+ if (ephy_uri_tester_is_matched (tester, NULL, req_uri, page_uri, TRUE))
+ return FALSE;
+ return ephy_uri_tester_is_matched (tester, NULL, req_uri, page_uri, FALSE);
+}
+
+static char *
+ephy_uri_tester_rewrite_uri (EphyUriTester *uri_tester,
+ const char *request_uri,
+ const char *page_uri)
+{
+ char *modified_uri;
+
+ /* Should we block the URL outright? */
+ if (g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_ADBLOCK) &&
+ ephy_uri_tester_test_uri (uri_tester, request_uri, page_uri)) {
+ g_debug ("Request '%s' blocked (page: '%s')", request_uri, page_uri);
+ return g_strdup ("");
+ }
+
+ if (g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_DO_NOT_TRACK)) {
+ /* Remove analytics from URL. Note that this function is a bit annoying to
+ * use: it returns NULL if it doesn't remove any query parameters. */
+ modified_uri = ephy_remove_tracking_from_uri (request_uri);
+ }
+
+ if (!modified_uri)
+ modified_uri = g_strdup (request_uri);
+
+ /* FIXME: Rewrite URL to use HTTPS if directed by HTTPS Everywhere */
+
+ return modified_uri;
+}
+
+static void
+handle_method_call (GDBusConnection *connection,
+ const char *sender,
+ const char *object_path,
+ const char *interface_name,
+ const char *method_name,
+ GVariant *parameters,
+ GDBusMethodInvocation *invocation,
+ gpointer user_data)
+{
+ EphyUriTester *uri_tester = EPHY_URI_TESTER (user_data);
+
+ if (g_strcmp0 (interface_name, EPHY_URI_TESTER_INTERFACE) != 0)
+ return;
+
+ if (g_strcmp0 (method_name, "MaybeRewriteUri") == 0) {
+ const char *request_uri;
+ const char *page_uri;
+ char *rewritten_uri;
+
+ g_variant_get (parameters, "(&s&s)", &request_uri, &page_uri);
+
+ if (request_uri == NULL || request_uri == '\0') {
+ g_dbus_method_invocation_return_error (invocation, G_DBUS_ERROR, G_DBUS_ERROR_INVALID_ARGS,
+ "Request URI cannot be NULL or empty");
+ return;
+ }
+
+ if (page_uri == NULL || page_uri == '\0') {
+ g_dbus_method_invocation_return_error (invocation, G_DBUS_ERROR, G_DBUS_ERROR_INVALID_ARGS,
+ "Page URI cannot be NULL or empty");
+ return;
+ }
+
+ rewritten_uri = ephy_uri_tester_rewrite_uri (uri_tester, request_uri, page_uri);
+
+ if (!rewritten_uri)
+ rewritten_uri = g_strdup ("");
+
+ g_dbus_method_invocation_return_value (invocation,
+ g_variant_new ("(s)", rewritten_uri));
+
+ g_free (rewritten_uri);
+ }
+}
+
+static const GDBusInterfaceVTable interface_vtable = {
+ handle_method_call,
+ NULL,
+ NULL
+};
+
+void
+ephy_uri_tester_register_dbus_object (EphyUriTester *tester,
+ GDBusConnection *connection)
+{
+ static GDBusNodeInfo *introspection_data = NULL;
+ guint registration_id;
+ GError *error = NULL;
+
+ if (!introspection_data)
+ introspection_data = g_dbus_node_info_new_for_xml (ephy_uri_tester_introspection_xml, NULL);
+
+ registration_id =
+ g_dbus_connection_register_object (connection,
+ EPHY_URI_TESTER_OBJECT_PATH,
+ introspection_data->interfaces[0],
+ &interface_vtable,
+ g_object_ref (tester),
+ g_object_unref,
+ &error);
+ if (!registration_id) {
+ g_warning ("Failed to register URI tester object: %s\n", error->message);
+ g_error_free (error);
+ return;
+ }
+}