summaryrefslogtreecommitdiff
path: root/lib/safe-browsing/ephy-gsb-utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/safe-browsing/ephy-gsb-utils.c')
-rw-r--r--lib/safe-browsing/ephy-gsb-utils.c919
1 files changed, 0 insertions, 919 deletions
diff --git a/lib/safe-browsing/ephy-gsb-utils.c b/lib/safe-browsing/ephy-gsb-utils.c
deleted file mode 100644
index d3cc81605..000000000
--- a/lib/safe-browsing/ephy-gsb-utils.c
+++ /dev/null
@@ -1,919 +0,0 @@
-/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/*
- * Copyright © 2017 Gabriel Ivascu <gabrielivascu@gnome.org>
- *
- * This file is part of Epiphany.
- *
- * Epiphany is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Epiphany is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Epiphany. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "config.h"
-#include "ephy-gsb-utils.h"
-
-#include "ephy-debug.h"
-#include "ephy-string.h"
-
-#include <arpa/inet.h>
-#include <libsoup/soup.h>
-#include <stdio.h>
-#include <string.h>
-
-#define MAX_HOST_SUFFIXES 5
-#define MAX_PATH_PREFIXES 6
-#define MAX_UNESCAPE_STEP 1024
-
-typedef struct {
- guint8 *data; /* The bit stream as an array of bytes */
- gsize data_len; /* The number of bytes in the array */
- guint8 *curr; /* The current byte in the bit stream */
- guint8 mask; /* Bit mask to read a bit within a byte */
- gsize num_read; /* The number of bits read so far */
-} EphyGSBBitReader;
-
-typedef struct {
- EphyGSBBitReader *reader;
- guint parameter; /* Golomb-Rice parameter, between 2 and 28 */
-} EphyGSBRiceDecoder;
-
-static inline EphyGSBBitReader *
-ephy_gsb_bit_reader_new (const guint8 *data,
- gsize data_len)
-{
- EphyGSBBitReader *reader;
-
- g_assert (data);
- g_assert (data_len > 0);
-
- reader = g_new (EphyGSBBitReader, 1);
- reader->curr = reader->data = g_malloc (data_len);
- memcpy (reader->data, data, data_len);
- reader->data_len = data_len;
- reader->mask = 0x01;
- reader->num_read = 0;
-
- return reader;
-}
-
-static inline void
-ephy_gsb_bit_reader_free (EphyGSBBitReader *reader)
-{
- g_assert (reader);
-
- g_free (reader->data);
- g_free (reader);
-}
-
-/*
- * https://developers.google.com/safe-browsing/v4/compression#bit-encoderdecoder
- */
-static guint32
-ephy_gsb_bit_reader_read (EphyGSBBitReader *reader,
- guint num_bits)
-{
- guint32 retval = 0;
-
- /* Cannot read more than 4 bytes at once. */
- g_assert (num_bits <= 32);
- /* Cannot read more bits than the buffer has left. */
- g_assert (reader->num_read + num_bits <= reader->data_len * 8);
-
- /* Within a byte, the least-significant bits come before the most-significant
- * bits in the bit stream. */
- for (guint i = 0; i < num_bits; i++) {
- if (*reader->curr & reader->mask)
- retval |= 1 << i;
-
- reader->mask <<= 1;
- if (reader->mask == 0) {
- reader->curr++;
- reader->mask = 0x01;
- }
- }
-
- reader->num_read += num_bits;
-
- return retval;
-}
-
-static inline EphyGSBRiceDecoder *
-ephy_gsb_rice_decoder_new (const guint8 *data,
- gsize data_len,
- guint parameter)
-{
- EphyGSBRiceDecoder *decoder;
-
- g_assert (data);
- g_assert (data_len > 0);
-
- decoder = g_new (EphyGSBRiceDecoder, 1);
- decoder->reader = ephy_gsb_bit_reader_new (data, data_len);
- decoder->parameter = parameter;
-
- return decoder;
-}
-
-static inline void
-ephy_gsb_rice_decoder_free (EphyGSBRiceDecoder *decoder)
-{
- g_assert (decoder);
-
- ephy_gsb_bit_reader_free (decoder->reader);
- g_free (decoder);
-}
-
-static guint32
-ephy_gsb_rice_decoder_next (EphyGSBRiceDecoder *decoder)
-{
- guint32 quotient = 0;
- guint32 remainder;
- guint32 bit;
-
- g_assert (decoder);
-
- while ((bit = ephy_gsb_bit_reader_read (decoder->reader, 1)) != 0)
- quotient += bit;
-
- remainder = ephy_gsb_bit_reader_read (decoder->reader, decoder->parameter);
-
- return (quotient << decoder->parameter) + remainder;
-}
-
-EphyGSBThreatList *
-ephy_gsb_threat_list_new (const char *threat_type,
- const char *platform_type,
- const char *threat_entry_type,
- const char *client_state)
-{
- EphyGSBThreatList *list;
-
- g_assert (threat_type);
- g_assert (platform_type);
- g_assert (threat_entry_type);
-
- list = g_new (EphyGSBThreatList, 1);
- list->threat_type = g_strdup (threat_type);
- list->platform_type = g_strdup (platform_type);
- list->threat_entry_type = g_strdup (threat_entry_type);
- list->client_state = g_strdup (client_state);
-
- return list;
-}
-void
-ephy_gsb_threat_list_free (EphyGSBThreatList *list)
-{
- g_assert (list);
-
- g_free (list->threat_type);
- g_free (list->platform_type);
- g_free (list->threat_entry_type);
- g_free (list->client_state);
- g_free (list);
-}
-
-gboolean
-ephy_gsb_threat_list_equal (EphyGSBThreatList *l1,
- EphyGSBThreatList *l2)
-{
- g_assert (l1);
- g_assert (l2);
-
- if (g_strcmp0 (l1->threat_type, l2->threat_type) != 0)
- return FALSE;
- if (g_strcmp0 (l1->platform_type, l2->platform_type) != 0)
- return FALSE;
- if (g_strcmp0 (l1->threat_entry_type, l2->threat_entry_type) != 0)
- return FALSE;
-
- return TRUE;
-}
-
-EphyGSBHashPrefixLookup *
-ephy_gsb_hash_prefix_lookup_new (const guint8 *prefix,
- gsize length,
- gboolean negative_expired)
-{
- EphyGSBHashPrefixLookup *lookup;
-
- g_assert (prefix);
-
- lookup = g_new (EphyGSBHashPrefixLookup, 1);
- lookup->prefix = g_bytes_new (prefix, length);
- lookup->negative_expired = negative_expired;
-
- return lookup;
-}
-
-void
-ephy_gsb_hash_prefix_lookup_free (EphyGSBHashPrefixLookup *lookup)
-{
- g_assert (lookup);
-
- g_bytes_unref (lookup->prefix);
- g_free (lookup);
-}
-
-EphyGSBHashFullLookup *
-ephy_gsb_hash_full_lookup_new (const guint8 *hash,
- const char *threat_type,
- const char *platform_type,
- const char *threat_entry_type,
- gboolean expired)
-{
- EphyGSBHashFullLookup *lookup;
-
- g_assert (hash);
- g_assert (threat_type);
- g_assert (platform_type);
- g_assert (threat_entry_type);
-
- lookup = g_new (EphyGSBHashFullLookup, 1);
- lookup->hash = g_bytes_new (hash, GSB_HASH_SIZE);
- lookup->threat_type = g_strdup (threat_type);
- lookup->platform_type = g_strdup (platform_type);
- lookup->threat_entry_type = g_strdup (threat_entry_type);
- lookup->expired = expired;
-
- return lookup;
-}
-
-void
-ephy_gsb_hash_full_lookup_free (EphyGSBHashFullLookup *lookup)
-{
- g_assert (lookup);
-
- g_bytes_unref (lookup->hash);
- g_free (lookup->threat_type);
- g_free (lookup->platform_type);
- g_free (lookup->threat_entry_type);
- g_free (lookup);
-}
-
-static JsonObject *
-ephy_gsb_utils_make_client_info (void)
-{
- JsonObject *client_info;
-
- client_info = json_object_new ();
- json_object_set_string_member (client_info, "clientId", "Epiphany");
- json_object_set_string_member (client_info, "clientVersion", VERSION);
-
- return client_info;
-}
-
-static JsonObject *
-ephy_gsb_utils_make_contraints (void)
-{
- JsonObject *constraints;
- JsonArray *compressions;
-
- compressions = json_array_new ();
- json_array_add_string_element (compressions, GSB_COMPRESSION_TYPE_RAW);
- json_array_add_string_element (compressions, GSB_COMPRESSION_TYPE_RICE);
-
- constraints = json_object_new ();
- /* No restriction for the number of update entries. */
- json_object_set_int_member (constraints, "maxUpdateEntries", 0);
- /* No restriction for the number of database entries. */
- json_object_set_int_member (constraints, "maxDatabaseEntries", 0);
- /* Let the server pick the geographic region automatically. */
- json_object_set_null_member (constraints, "region");
- json_object_set_array_member (constraints, "supportedCompressions", compressions);
-
- return constraints;
-}
-
-/**
- * ephy_gsb_utils_make_list_updates_request:
- * @threat_lists: a #GList of #EphyGSBThreatList
- *
- * Create the request body for a threatListUpdates:fetch request.
- *
- * https://developers.google.com/safe-browsing/v4/reference/rest/v4/threatListUpdates/fetch#request-body
- *
- * Return value: (transfer full): the string representation of the request body
- **/
-char *
-ephy_gsb_utils_make_list_updates_request (GList *threat_lists)
-{
- JsonArray *requests;
- JsonObject *body_obj;
- JsonNode *body_node;
- char *retval;
-
- g_assert (threat_lists);
-
- requests = json_array_new ();
- for (GList *l = threat_lists; l && l->data; l = l->next) {
- EphyGSBThreatList *list = (EphyGSBThreatList *)l->data;
- JsonObject *request = json_object_new ();
-
- json_object_set_string_member (request, "threatType", list->threat_type);
- json_object_set_string_member (request, "platformType", list->platform_type);
- json_object_set_string_member (request, "threatEntryType", list->threat_entry_type);
- json_object_set_string_member (request, "state", list->client_state);
- json_object_set_object_member (request, "constraints", ephy_gsb_utils_make_contraints ());
- json_array_add_object_element (requests, request);
- }
-
- body_obj = json_object_new ();
- json_object_set_object_member (body_obj, "client", ephy_gsb_utils_make_client_info ());
- json_object_set_array_member (body_obj, "listUpdateRequests", requests);
-
- body_node = json_node_new (JSON_NODE_OBJECT);
- json_node_set_object (body_node, body_obj);
- retval = json_to_string (body_node, FALSE);
-
- json_object_unref (body_obj);
- json_node_unref (body_node);
-
- return retval;
-}
-
-/**
- * ephy_gsb_utils_make_full_hashes_request:
- * @threat_lists: a #GList of #EphyGSBThreatList
- * @hash_prefixes: a #GList of #GBytes
- *
- * Create the request body for a fullHashes:find request.
- *
- * https://developers.google.com/safe-browsing/v4/reference/rest/v4/fullHashes/find#request-body
- *
- * Return value: (transfer full): the string representation of the request body
- **/
-char *
-ephy_gsb_utils_make_full_hashes_request (GList *threat_lists,
- GList *hash_prefixes)
-{
- GHashTable *threat_types_set;
- GHashTable *platform_types_set;
- GHashTable *threat_entry_types_set;
- GList *threat_types_list;
- GList *platform_types_list;
- GList *threat_entry_types_list;
- JsonArray *threat_types;
- JsonArray *platform_types;
- JsonArray *threat_entry_types;
- JsonArray *threat_entries;
- JsonArray *client_states;
- JsonObject *threat_info;
- JsonObject *body_obj;
- JsonNode *body_node;
- char *body;
-
- g_assert (threat_lists);
- g_assert (hash_prefixes);
-
- client_states = json_array_new ();
- threat_types_set = g_hash_table_new (g_str_hash, g_str_equal);
- platform_types_set = g_hash_table_new (g_str_hash, g_str_equal);
- threat_entry_types_set = g_hash_table_new (g_str_hash, g_str_equal);
-
- for (GList *l = threat_lists; l && l->data; l = l->next) {
- EphyGSBThreatList *list = (EphyGSBThreatList *)l->data;
-
- if (!g_hash_table_contains (threat_types_set, list->threat_type))
- g_hash_table_add (threat_types_set, list->threat_type);
- if (!g_hash_table_contains (platform_types_set, list->platform_type))
- g_hash_table_add (platform_types_set, list->platform_type);
- if (!g_hash_table_contains (threat_entry_types_set, list->threat_entry_type))
- g_hash_table_add (threat_entry_types_set, list->threat_entry_type);
-
- json_array_add_string_element (client_states, list->client_state);
- }
-
- threat_types = json_array_new ();
- threat_types_list = g_hash_table_get_keys (threat_types_set);
- for (GList *l = threat_types_list; l && l->data; l = l->next)
- json_array_add_string_element (threat_types, (const char *)l->data);
-
- platform_types = json_array_new ();
- platform_types_list = g_hash_table_get_keys (platform_types_set);
- for (GList *l = platform_types_list; l && l->data; l = l->next)
- json_array_add_string_element (platform_types, (const char *)l->data);
-
- threat_entry_types = json_array_new ();
- threat_entry_types_list = g_hash_table_get_keys (threat_entry_types_set);
- for (GList *l = threat_entry_types_list; l && l->data; l = l->next)
- json_array_add_string_element (threat_entry_types, (const char *)l->data);
-
- threat_entries = json_array_new ();
- for (GList *l = hash_prefixes; l && l->data; l = l->next) {
- JsonObject *threat_entry = json_object_new ();
- char *hash = g_base64_encode (g_bytes_get_data (l->data, NULL),
- g_bytes_get_size (l->data));
-
- json_object_set_string_member (threat_entry, "hash", hash);
- json_array_add_object_element (threat_entries, threat_entry);
-
- g_free (hash);
- }
-
- threat_info = json_object_new ();
- json_object_set_array_member (threat_info, "threatTypes", threat_types);
- json_object_set_array_member (threat_info, "platformTypes", platform_types);
- json_object_set_array_member (threat_info, "threatEntryTypes", threat_entry_types);
- json_object_set_array_member (threat_info, "threatEntries", threat_entries);
-
- body_obj = json_object_new ();
- json_object_set_object_member (body_obj, "client", ephy_gsb_utils_make_client_info ());
- json_object_set_array_member (body_obj, "clientStates", client_states);
- json_object_set_object_member (body_obj, "threatInfo", threat_info);
- json_object_set_null_member (body_obj, "apiClient");
-
- body_node = json_node_new (JSON_NODE_OBJECT);
- json_node_set_object (body_node, body_obj);
- body = json_to_string (body_node, TRUE);
-
- g_list_free (threat_types_list);
- g_list_free (platform_types_list);
- g_list_free (threat_entry_types_list);
- g_hash_table_unref (threat_types_set);
- g_hash_table_unref (platform_types_set);
- g_hash_table_unref (threat_entry_types_set);
- json_object_unref (body_obj);
- json_node_unref (body_node);
-
- return body;
-}
-
-/**
- * ephy_gsb_utils_rice_delta_decode:
- * @rde: a RiceDeltaEncoding object as a #JsonObject
- * @num_items: out parameter for the length of the returned array. This will be
- * equal to 1 + RiceDeltaEncoding.numEntries
- *
- * Decompress the Rice-encoded data of a ThreatEntrySet received from a
- * threatListUpdates:fetch response.
- *
- * https://developers.google.com/safe-browsing/v4/compression#rice-compression
- * https://developers.google.com/safe-browsing/v4/reference/rest/v4/threatListUpdates/fetch#ricedeltaencoding
- *
- * Return value: (transfer full): the decompressed values as an array of guint32s
- **/
-guint32 *
-ephy_gsb_utils_rice_delta_decode (JsonObject *rde,
- gsize *num_items)
-{
- EphyGSBRiceDecoder *decoder;
- const char *data_b64 = NULL;
- const char *first_value_str = NULL;
- guint32 *items;
- guint8 *data;
- gsize data_len;
- gsize num_entries = 0;
- guint parameter = 0;
-
- g_assert (rde);
- g_assert (num_items);
-
- if (json_object_has_member (rde, "firstValue"))
- first_value_str = json_object_get_string_member (rde, "firstValue");
- if (json_object_has_member (rde, "riceParameter"))
- parameter = json_object_get_int_member (rde, "riceParameter");
- if (json_object_has_member (rde, "numEntries"))
- num_entries = json_object_get_int_member (rde, "numEntries");
- if (json_object_has_member (rde, "encodedData"))
- data_b64 = json_object_get_string_member (rde, "encodedData");
-
- *num_items = 1 + num_entries;
- items = g_malloc (*num_items * sizeof (guint32));
- items[0] = first_value_str ? g_ascii_strtoull (first_value_str, NULL, 10) : 0;
-
- if (num_entries == 0)
- return items;
-
- /* Sanity check. */
- if (parameter < 2 || parameter > 28 || data_b64 == NULL)
- return items;
-
- data = g_base64_decode (data_b64, &data_len);
- decoder = ephy_gsb_rice_decoder_new (data, data_len, parameter);
-
- for (gsize i = 1; i <= num_entries; i++)
- items[i] = items[i - 1] + ephy_gsb_rice_decoder_next (decoder);
-
- g_free (data);
- ephy_gsb_rice_decoder_free (decoder);
-
- return items;
-}
-
-static char *
-ephy_gsb_utils_full_unescape (const char *part)
-{
- char *prev;
- char *prev_prev;
- char *retval;
- int attempts = 0;
-
- g_assert (part);
-
- prev = g_strdup (part);
- retval = soup_uri_decode (part);
-
- /* Iteratively unescape the string until it cannot be unescaped anymore.
- * This is useful for strings that have been escaped multiple times.
- */
- while (g_strcmp0 (prev, retval) != 0 && attempts++ < MAX_UNESCAPE_STEP) {
- prev_prev = prev;
- prev = retval;
- retval = soup_uri_decode (retval);
- g_free (prev_prev);
- }
-
- g_free (prev);
-
- return retval;
-}
-
-static char *
-ephy_gsb_utils_escape (const char *part)
-{
- const guchar *s = (const guchar *)part;
- GString *str;
-
- g_assert (part);
-
- str = g_string_new (NULL);
-
- /* Use this instead of soup_uri_encode() because that escapes other
- * characters that we don't want to be escaped.
- */
- while (*s) {
- if (*s < 0x20 || *s >= 0x7f || *s == ' ' || *s == '#' || *s == '%')
- g_string_append_printf (str, "%%%02X", *s++);
- else
- g_string_append_c (str, *s++);
- }
-
- return g_string_free (str, FALSE);
-}
-
-static char *
-ephy_gsb_utils_normalize_escape (const char *part)
-{
- char *tmp;
- char *retval;
-
- g_assert (part);
-
- /* Perform a full unescape and then escape the string exactly once. */
- tmp = ephy_gsb_utils_full_unescape (part);
- retval = ephy_gsb_utils_escape (tmp);
-
- g_free (tmp);
-
- return retval;
-}
-
-static char *
-ephy_gsb_utils_canonicalize_host (const char *host)
-{
- struct in_addr addr;
- char *tmp;
- char *trimmed;
- char *retval;
-
- g_assert (host);
-
- trimmed = g_strdup (host);
- ephy_string_remove_leading (trimmed, '.');
- ephy_string_remove_trailing (trimmed, '.');
-
- /* This actually replaces groups of consecutive dots with a single dot. */
- tmp = ephy_string_find_and_replace (trimmed, "..", ".");
-
- /* If host is as an IP address, normalize it to 4 dot-separated decimal values.
- * If host is not an IP address, then it's a string and needs to be lowercased.
- *
- * inet_aton() handles octal, hex and fewer than 4 components addresses.
- * See https://linux.die.net/man/3/inet_network
- */
- if (inet_aton (tmp, &addr) != 0) {
- retval = g_strdup (inet_ntoa (addr));
- } else {
- retval = g_ascii_strdown (tmp, -1);
- }
-
- g_free (trimmed);
- g_free (tmp);
-
- return retval;
-}
-
-/**
- * ephy_gsb_utils_canonicalize:
- * @url: the URL to canonicalize
- * @host_out: out parameter for the host value of the canonicalized URL or %NULL
- * @path_out: out parameter for the path value of the canonicalized URL or %NULL
- * @query_out: out parameter for the query value of the canonicalized URL or %NULL
- *
- * Canonicalize @url according to Google Safe Browsing API v4 specification.
- *
- * https://developers.google.com/safe-browsing/v4/urls-hashing#canonicalization
- *
- * Return value: (transfer full): the canonical form of @url or %NULL if @url
- * is not a valid URL
- **/
-char *
-ephy_gsb_utils_canonicalize (const char *url,
- char **host_out,
- char **path_out,
- char **query_out)
-{
- SoupURI *uri;
- char *tmp;
- char *host;
- char *path;
- char *host_canonical;
- char *path_canonical;
- char *retval;
- const char *query;
-
- g_assert (url);
-
- /* Handle URLs with no scheme. */
- if (g_str_has_prefix (url, "//"))
- tmp = g_strdup_printf ("http:%s", url);
- else if (g_str_has_prefix (url, "://"))
- tmp = g_strdup_printf ("http%s", url);
- else if (!strstr (url, "://"))
- tmp = g_strdup_printf ("http://%s", url);
- else
- tmp = g_strdup (url);
-
- /* soup_uri_new() prepares the URL for us:
- * 1. Strips trailing and leading whitespaces.
- * 2. Includes the path component if missing.
- * 3. Removes tab (0x09), CR (0x0d), LF (0x0a) characters.
- */
- uri = soup_uri_new (tmp);
- g_free (tmp);
- if (!uri) {
- LOG ("Cannot make SoupURI from URL %s", url);
- return NULL;
- }
-
- /* Check for e.g. blob or data URIs */
- if (!uri->host) {
- soup_uri_free (uri);
- return NULL;
- }
-
- /* Remove fragment. */
- soup_uri_set_fragment (uri, NULL);
-
- /* Canonicalize host. */
- host = ephy_gsb_utils_normalize_escape (soup_uri_get_host (uri));
- host_canonical = ephy_gsb_utils_canonicalize_host (host);
-
- /* Canonicalize path. "/../" and "/./" have already been resolved by soup_uri_new(). */
- path = ephy_gsb_utils_normalize_escape (soup_uri_get_path (uri));
- path_canonical = ephy_string_find_and_replace (path, "//", "/");
-
- /* Combine all parts. */
- query = soup_uri_get_query (uri);
- if (query) {
- retval = g_strdup_printf ("%s://%s%s?%s",
- soup_uri_get_scheme (uri),
- host_canonical, path_canonical,
- query);
- } else {
- retval = g_strdup_printf ("%s://%s%s",
- soup_uri_get_scheme (uri),
- host_canonical, path_canonical);
- }
-
- if (host_out)
- *host_out = g_strdup (host_canonical);
- if (path_out)
- *path_out = g_strdup (path_canonical);
- if (query_out)
- *query_out = g_strdup (query);
-
- g_free (host);
- g_free (path);
- g_free (host_canonical);
- g_free (path_canonical);
- soup_uri_free (uri);
-
- return retval;
-}
-
-/*
- * https://developers.google.com/safe-browsing/v4/urls-hashing#suffixprefix-expressions
- */
-static GList *
-ephy_gsb_utils_compute_host_suffixes (const char *host)
-{
- GList *retval = NULL;
- struct in_addr addr;
- char **tokens;
- int steps;
- int start;
- int num_tokens;
-
- g_assert (host);
-
- retval = g_list_prepend (retval, g_strdup (host));
-
- /* If host is an IP address, return immediately. */
- if (inet_aton (host, &addr) != 0)
- return retval;
-
- tokens = g_strsplit (host, ".", -1);
- num_tokens = g_strv_length (tokens);
- start = MAX (num_tokens - MAX_HOST_SUFFIXES, 1);
- steps = MIN (num_tokens - 1 - start, MAX_HOST_SUFFIXES - 1);
-
- for (int i = start; i < start + steps; i++)
- retval = g_list_prepend (retval, g_strjoinv (".", tokens + i));
-
- g_strfreev (tokens);
-
- return g_list_reverse (retval);
-}
-
-/*
- * https://developers.google.com/safe-browsing/v4/urls-hashing#suffixprefix-expressions
- */
-static GList *
-ephy_gsb_utils_compute_path_prefixes (const char *path,
- const char *query)
-{
- GList *retval = NULL;
- char *no_trailing;
- char **tokens;
- int steps;
- int num_tokens;
- int no_trailing_len;
- gboolean has_trailing;
-
- g_assert (path);
-
- if (query)
- retval = g_list_prepend (retval, g_strjoin ("?", path, query, NULL));
- retval = g_list_prepend (retval, g_strdup (path));
-
- if (!g_strcmp0 (path, "/"))
- return retval;
-
- has_trailing = path[strlen (path) - 1] == '/';
- no_trailing = ephy_string_remove_trailing (g_strdup (path), '/');
- no_trailing_len = strlen (no_trailing);
-
- tokens = g_strsplit (no_trailing, "/", -1);
- num_tokens = g_strv_length (tokens);
- steps = MIN (num_tokens, MAX_PATH_PREFIXES - 2);
-
- for (int i = 0; i < steps; i++) {
- char *value = g_strconcat (i > 0 ? retval->data : "", tokens[i], "/", NULL);
-
- if ((has_trailing && !g_strcmp0 (value, path)) ||
- (!has_trailing && !strncmp (value, no_trailing, no_trailing_len))) {
- g_free (value);
- break;
- }
-
- retval = g_list_prepend (retval, value);
- }
-
- g_free (no_trailing);
- g_strfreev (tokens);
-
- return g_list_reverse (retval);
-}
-
-/**
- * ephy_gsb_utils_compute_hashes:
- * @url: the URL whose hashes to be computed
- *
- * Compute the SHA256 hashes of @url.
- *
- * https://developers.google.com/safe-browsing/v4/urls-hashing#hash-computations
- *
- * Return value: (element-type #GBytes) (transfer full): a #GList containing the
- * full hashes of @url. The caller takes ownership of the list and
- * its content. Use g_list_free_full() with g_bytes_unref() as
- * free_func when done using the list.
- **/
-GList *
-ephy_gsb_utils_compute_hashes (const char *url)
-{
- GChecksum *checksum;
- GList *retval = NULL;
- GList *host_suffixes;
- GList *path_prefixes;
- char *url_canonical;
- char *host = NULL;
- char *path = NULL;
- char *query = NULL;
- gsize hash_len = GSB_HASH_SIZE;
-
- g_assert (url);
-
- url_canonical = ephy_gsb_utils_canonicalize (url, &host, &path, &query);
- if (!url_canonical)
- return NULL;
-
- host_suffixes = ephy_gsb_utils_compute_host_suffixes (host);
- path_prefixes = ephy_gsb_utils_compute_path_prefixes (path, query);
- checksum = g_checksum_new (G_CHECKSUM_SHA256);
-
- /* Get the hash of every host-path combination.
- * The maximum number of combinations is MAX_HOST_SUFFIXES * MAX_PATH_PREFIXES.
- */
- for (GList *h = host_suffixes; h && h->data; h = h->next) {
- for (GList *p = path_prefixes; p && p->data; p = p->next) {
- char *value = g_strconcat (h->data, p->data, NULL);
- guint8 *hash = g_malloc (hash_len);
-
- g_checksum_reset (checksum);
- g_checksum_update (checksum, (const guint8 *)value, strlen (value));
- g_checksum_get_digest (checksum, hash, &hash_len);
- retval = g_list_prepend (retval, g_bytes_new (hash, hash_len));
-
- g_free (hash);
- g_free (value);
- }
- }
-
- g_free (host);
- g_free (path);
- g_free (query);
- g_free (url_canonical);
- g_checksum_free (checksum);
- g_list_free_full (host_suffixes, g_free);
- g_list_free_full (path_prefixes, g_free);
-
- return g_list_reverse (retval);
-}
-
-/**
- * ephy_gsb_utils_get_hash_cues:
- * @hashes: a #GList of #GBytes
- *
- * Get the hash cues from a list of full hashes. The hash cue length is
- * specified by the GSB_HASH_CUE_LEN macro.
- *
- * Return value: (element-type #GBytes) (transfer full): a #GList containing
- * the cues of each hash in @hashes. The caller takes ownership
- * of the list and its content. Use g_list_free_full() with
- * g_bytes_unref() as free_func when done using the list.
- **/
-GList *
-ephy_gsb_utils_get_hash_cues (GList *hashes)
-{
- GList *retval = NULL;
-
- g_assert (hashes);
-
- for (GList *l = hashes; l && l->data; l = l->next) {
- const char *hash = g_bytes_get_data (l->data, NULL);
- retval = g_list_prepend (retval, g_bytes_new (hash, GSB_HASH_CUE_LEN));
- }
-
- return g_list_reverse (retval);
-}
-
-/**
- * ephy_gsb_utils_hash_has_prefix:
- * @hash: the full hash to verify
- * @prefix: the hash prefix to verify
- *
- * Verify whether @hash begins with the prefix @prefix.
- *
- * Return value: %TRUE if @hash begins with @prefix
- **/
-gboolean
-ephy_gsb_utils_hash_has_prefix (GBytes *hash,
- GBytes *prefix)
-{
- const guint8 *hash_data;
- const guint8 *prefix_data;
- gsize prefix_len;
-
- g_assert (hash);
- g_assert (prefix);
-
- hash_data = g_bytes_get_data (hash, NULL);
- prefix_data = g_bytes_get_data (prefix, &prefix_len);
-
- for (gsize i = 0; i < prefix_len; i++) {
- if (hash_data[i] != prefix_data[i])
- return FALSE;
- }
-
- return TRUE;
-}