diff options
author | Gabriel Ivascu <gabrielivascu@gnome.org> | 2017-09-22 00:32:44 +0300 |
---|---|---|
committer | Gabriel Ivascu <gabrielivascu@gnome.org> | 2017-10-03 18:29:53 +0200 |
commit | 57f22713c0361d6a05fe82b4a5f8be3673029cb7 (patch) | |
tree | 314789f153b76affdc0a72494b3d18c89d6c82bf | |
parent | 171d4ffb7fa97c0e39f32a83614713959c791bed (diff) | |
download | epiphany-57f22713c0361d6a05fe82b4a5f8be3673029cb7.tar.gz |
safe-browsing: Add support for Rice compression scheme
https://developers.google.com/safe-browsing/v4/compression
-rw-r--r-- | lib/safe-browsing/ephy-gsb-service.c | 9 | ||||
-rw-r--r-- | lib/safe-browsing/ephy-gsb-storage.c | 183 | ||||
-rw-r--r-- | lib/safe-browsing/ephy-gsb-storage.h | 6 | ||||
-rw-r--r-- | lib/safe-browsing/ephy-gsb-utils.c | 175 | ||||
-rw-r--r-- | lib/safe-browsing/ephy-gsb-utils.h | 12 |
5 files changed, 316 insertions, 69 deletions
diff --git a/lib/safe-browsing/ephy-gsb-service.c b/lib/safe-browsing/ephy-gsb-service.c index 340297f37..adc25e7e9 100644 --- a/lib/safe-browsing/ephy-gsb-service.c +++ b/lib/safe-browsing/ephy-gsb-service.c @@ -263,9 +263,7 @@ ephy_gsb_service_update_thread (GTask *task, JsonArray *removals = json_object_get_array_member (lur, "removals"); for (guint k = 0; k < json_array_get_length (removals); k++) { JsonObject *tes = json_array_get_object_element (removals, k); - JsonObject *raw_indices = json_object_get_object_member (tes, "rawIndices"); - JsonArray *indices = json_object_get_array_member (raw_indices, "indices"); - ephy_gsb_storage_delete_hash_prefixes (self->storage, list, indices); + ephy_gsb_storage_delete_hash_prefixes (self->storage, list, tes); } } @@ -274,10 +272,7 @@ ephy_gsb_service_update_thread (GTask *task, JsonArray *additions = json_object_get_array_member (lur, "additions"); for (guint k = 0; k < json_array_get_length (additions); k++) { JsonObject *tes = json_array_get_object_element (additions, k); - JsonObject *raw_hashes = json_object_get_object_member (tes, "rawHashes"); - gint64 prefix_size = json_object_get_int_member (raw_hashes, "prefixSize"); - const char *hashes = json_object_get_string_member (raw_hashes, "rawHashes"); - ephy_gsb_storage_insert_hash_prefixes (self->storage, list, prefix_size, hashes); + ephy_gsb_storage_insert_hash_prefixes (self->storage, list, tes); } } diff --git a/lib/safe-browsing/ephy-gsb-storage.c b/lib/safe-browsing/ephy-gsb-storage.c index ec0028138..01002bf88 100644 --- a/lib/safe-browsing/ephy-gsb-storage.c +++ b/lib/safe-browsing/ephy-gsb-storage.c @@ -841,7 +841,7 @@ ephy_gsb_storage_get_hash_prefixes_to_delete (EphyGSBStorage *self, } while (ephy_sqlite_statement_step (statement, &error)) { - if (g_hash_table_contains (indices, GINT_TO_POINTER (index))) { + if (g_hash_table_contains (indices, GUINT_TO_POINTER (index))) { const guint8 *blob = ephy_sqlite_statement_get_column_as_blob (statement, 0); gsize size = ephy_sqlite_statement_get_column_size (statement, 0); prefixes = g_list_prepend (prefixes, g_bytes_new (blob, size)); @@ -891,11 +891,11 @@ ephy_gsb_storage_make_delete_hash_prefix_statement (EphyGSBStorage *self, } static GList * -ephy_gsb_storage_delete_hash_prefix_batch (EphyGSBStorage *self, - EphyGSBThreatList *list, - GList *prefixes, - gsize num_prefixes, - EphySQLiteStatement *stmt) +ephy_gsb_storage_delete_hash_prefixes_batch (EphyGSBStorage *self, + EphyGSBThreatList *list, + GList *prefixes, + gsize num_prefixes, + EphySQLiteStatement *stmt) { EphySQLiteStatement *statement = NULL; GError *error = NULL; @@ -945,10 +945,11 @@ out: return prefixes; } -void -ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self, - EphyGSBThreatList *list, - JsonArray *indices) +static void +ephy_gsb_storage_delete_hash_prefixes_internal (EphyGSBStorage *self, + EphyGSBThreatList *list, + guint32 *indices, + gsize num_indices) { EphySQLiteStatement *statement = NULL; GList *prefixes = NULL; @@ -961,12 +962,12 @@ ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self, g_assert (list); g_assert (indices); - LOG ("Deleting %u hash prefixes...", json_array_get_length (indices)); + LOG ("Deleting %lu hash prefixes...", num_indices); - /* Move indices from the JSON array to a hash table set. */ + /* Move indices from the array to a hash table set. */ set = g_hash_table_new (g_direct_hash, g_direct_equal); - for (guint i = 0; i < json_array_get_length (indices); i++) - g_hash_table_add (set, GINT_TO_POINTER (json_array_get_int_element (indices, i))); + for (gsize i = 0; i < num_indices; i++) + g_hash_table_add (set, GUINT_TO_POINTER (indices[i])); prefixes = ephy_gsb_storage_get_hash_prefixes_to_delete (self, list, set, &num_prefixes); head = prefixes; @@ -978,16 +979,16 @@ ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self, statement = ephy_gsb_storage_make_delete_hash_prefix_statement (self, BATCH_SIZE); for (gsize i = 0; i < num_prefixes / BATCH_SIZE; i++) { - head = ephy_gsb_storage_delete_hash_prefix_batch (self, list, - head, BATCH_SIZE, - statement); + head = ephy_gsb_storage_delete_hash_prefixes_batch (self, list, + head, BATCH_SIZE, + statement); } } if (num_prefixes % BATCH_SIZE != 0) { - ephy_gsb_storage_delete_hash_prefix_batch (self, list, - head, num_prefixes % BATCH_SIZE, - NULL); + ephy_gsb_storage_delete_hash_prefixes_batch (self, list, + head, num_prefixes % BATCH_SIZE, + NULL); } ephy_gsb_storage_end_transaction (self); @@ -998,6 +999,42 @@ ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self, g_object_unref (statement); } +void +ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self, + EphyGSBThreatList *list, + JsonObject *tes) +{ + JsonObject *raw_indices; + JsonObject *rice_indices; + JsonArray *indices_arr; + const char *compression; + guint32 *indices; + gsize num_indices; + + g_assert (EPHY_IS_GSB_STORAGE (self)); + g_assert (self->is_operable); + g_assert (list); + g_assert (tes); + + compression = json_object_get_string_member (tes, "compressionType"); + if (!g_strcmp0 (compression, GSB_COMPRESSION_TYPE_RICE)) { + rice_indices = json_object_get_object_member (tes, "riceIndices"); + indices = ephy_gsb_utils_rice_delta_decode (rice_indices, &num_indices); + } else { + raw_indices = json_object_get_object_member (tes, "rawIndices"); + indices_arr = json_object_get_array_member (raw_indices, "indices"); + num_indices = json_array_get_length (indices_arr); + + indices = g_malloc (num_indices * sizeof (guint32)); + for (gsize i = 0; i < num_indices; i++) + indices[i] = json_array_get_int_element (indices_arr, i); + } + + ephy_gsb_storage_delete_hash_prefixes_internal (self, list, indices, num_indices); + + g_free (indices); +} + static EphySQLiteStatement * ephy_gsb_storage_make_insert_hash_prefix_statement (EphyGSBStorage *self, gsize num_prefixes) @@ -1028,13 +1065,13 @@ ephy_gsb_storage_make_insert_hash_prefix_statement (EphyGSBStorage *self, } static void -ephy_gsb_storage_insert_hash_prefix_batch (EphyGSBStorage *self, - EphyGSBThreatList *list, - const guint8 *prefixes, - gsize start, - gsize end, - gsize len, - EphySQLiteStatement *stmt) +ephy_gsb_storage_insert_hash_prefixes_batch (EphyGSBStorage *self, + EphyGSBThreatList *list, + const guint8 *prefixes, + gsize start, + gsize end, + gsize len, + EphySQLiteStatement *stmt) { EphySQLiteStatement *statement = NULL; GError *error = NULL; @@ -1057,7 +1094,7 @@ ephy_gsb_storage_insert_hash_prefix_batch (EphyGSBStorage *self, } for (gsize k = start; k < end; k += len) { - if (!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, GSB_CUE_LEN, NULL) || + if (!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, GSB_HASH_CUE_LEN, NULL) || !ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, len, NULL) || !bind_threat_list_params (statement, list, id, id + 1, id + 2, -1)) { g_warning ("Failed to bind values in hash prefix statement"); @@ -1077,60 +1114,98 @@ out: g_object_unref (statement); } -void -ephy_gsb_storage_insert_hash_prefixes (EphyGSBStorage *self, - EphyGSBThreatList *list, - gsize prefix_len, - const char *prefixes_b64) +static void +ephy_gsb_storage_insert_hash_prefixes_internal (EphyGSBStorage *self, + EphyGSBThreatList *list, + const guint8 *prefixes, + gsize num_prefixes, + gsize prefix_len) { EphySQLiteStatement *statement = NULL; - guint8 *prefixes; - gsize prefixes_len; - gsize num_prefixes; gsize num_batches; g_assert (EPHY_IS_GSB_STORAGE (self)); g_assert (self->is_operable); g_assert (list); - g_assert (prefix_len > 0); - g_assert (prefixes_b64); - - prefixes = g_base64_decode (prefixes_b64, &prefixes_len); - num_prefixes = prefixes_len / prefix_len; - num_batches = num_prefixes / BATCH_SIZE; + g_assert (prefixes); LOG ("Inserting %lu hash prefixes of size %ld...", num_prefixes, prefix_len); ephy_gsb_storage_start_transaction (self); + num_batches = num_prefixes / BATCH_SIZE; if (num_batches > 0) { /* Reuse statement to increase performance. */ statement = ephy_gsb_storage_make_insert_hash_prefix_statement (self, BATCH_SIZE); for (gsize i = 0; i < num_batches; i++) { - ephy_gsb_storage_insert_hash_prefix_batch (self, list, prefixes, - i * prefix_len * BATCH_SIZE, - (i + 1) * prefix_len * BATCH_SIZE, - prefix_len, - statement); + ephy_gsb_storage_insert_hash_prefixes_batch (self, list, prefixes, + i * prefix_len * BATCH_SIZE, + (i + 1) * prefix_len * BATCH_SIZE, + prefix_len, + statement); } } if (num_prefixes % BATCH_SIZE != 0) { - ephy_gsb_storage_insert_hash_prefix_batch (self, list, prefixes, - num_batches * prefix_len * BATCH_SIZE, - prefixes_len - 1, - prefix_len, - NULL); + ephy_gsb_storage_insert_hash_prefixes_batch (self, list, prefixes, + num_batches * prefix_len * BATCH_SIZE, + num_prefixes * prefix_len - 1, + prefix_len, + NULL); } ephy_gsb_storage_end_transaction (self); - g_free (prefixes); if (statement) g_object_unref (statement); } +void +ephy_gsb_storage_insert_hash_prefixes (EphyGSBStorage *self, + EphyGSBThreatList *list, + JsonObject *tes) +{ + JsonObject *raw_hashes; + JsonObject *rice_hashes; + const char *compression; + const char *prefixes_b64; + guint32 *items = NULL; + guint8 *prefixes; + gsize prefixes_len; + gsize prefix_len; + gsize num_prefixes; + + g_assert (EPHY_IS_GSB_STORAGE (self)); + g_assert (self->is_operable); + g_assert (list); + g_assert (tes); + + compression = json_object_get_string_member (tes, "compressionType"); + if (!g_strcmp0 (compression, GSB_COMPRESSION_TYPE_RICE)) { + rice_hashes = json_object_get_object_member (tes, "riceHashes"); + items = ephy_gsb_utils_rice_delta_decode (rice_hashes, &num_prefixes); + + prefixes = g_malloc (num_prefixes * GSB_RICE_PREFIX_LEN); + for (gsize i = 0; i < num_prefixes; i++) + memcpy (prefixes + i * GSB_RICE_PREFIX_LEN, &items[i], GSB_RICE_PREFIX_LEN); + + prefix_len = GSB_RICE_PREFIX_LEN; + } else { + raw_hashes = json_object_get_object_member (tes, "rawHashes"); + prefix_len = json_object_get_int_member (raw_hashes, "prefixSize"); + prefixes_b64 = json_object_get_string_member (raw_hashes, "rawHashes"); + + prefixes = g_base64_decode (prefixes_b64, &prefixes_len); + num_prefixes = prefixes_len / prefix_len; + } + + ephy_gsb_storage_insert_hash_prefixes_internal (self, list, prefixes, num_prefixes, prefix_len); + + g_free (items); + g_free (prefixes); +} + GList * ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self, GList *cues) @@ -1164,7 +1239,7 @@ ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self, for (GList *l = cues; l && l->data; l = l->next) { ephy_sqlite_statement_bind_blob (statement, id++, - g_bytes_get_data (l->data, NULL), GSB_CUE_LEN, + g_bytes_get_data (l->data, NULL), GSB_HASH_CUE_LEN, &error); if (error) { g_warning ("Failed to bind cue value as blob: %s", error->message); diff --git a/lib/safe-browsing/ephy-gsb-storage.h b/lib/safe-browsing/ephy-gsb-storage.h index 2d6854f11..5fbd685e9 100644 --- a/lib/safe-browsing/ephy-gsb-storage.h +++ b/lib/safe-browsing/ephy-gsb-storage.h @@ -23,7 +23,6 @@ #include "ephy-gsb-utils.h" #include <glib-object.h> -#include <json-glib/json-glib.h> G_BEGIN_DECLS @@ -46,11 +45,10 @@ void ephy_gsb_storage_clear_hash_prefixes (EphyGSBStorage EphyGSBThreatList *list); void ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self, EphyGSBThreatList *list, - JsonArray *indices); + JsonObject *tes); void ephy_gsb_storage_insert_hash_prefixes (EphyGSBStorage *self, EphyGSBThreatList *list, - gsize prefix_len, - const char *prefixes_b64); + JsonObject *tes); GList *ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self, GList *cues); GList *ephy_gsb_storage_lookup_full_hashes (EphyGSBStorage *self, diff --git a/lib/safe-browsing/ephy-gsb-utils.c b/lib/safe-browsing/ephy-gsb-utils.c index cf8c51d4b..271784e31 100644 --- a/lib/safe-browsing/ephy-gsb-utils.c +++ b/lib/safe-browsing/ephy-gsb-utils.c @@ -25,14 +25,130 @@ #include "ephy-string.h" #include <arpa/inet.h> -#include <json-glib/json-glib.h> #include <libsoup/soup.h> +#include <stdio.h> #include <string.h> #define MAX_HOST_SUFFIXES 5 #define MAX_PATH_PREFIXES 6 #define MAX_UNESCAPE_STEP 1024 +typedef struct { + guint8 *data; /* The bit stream as an array of bytes */ + gsize data_len; /* The number of bytes in the array */ + guint8 *curr; /* The current byte in the bit stream */ + guint8 mask; /* Bit mask to read a bit within a byte */ + gsize num_read; /* The number of bits read so far */ +} EphyGSBBitReader; + +typedef struct { + EphyGSBBitReader *reader; + guint parameter; /* Golomb-Rice parameter, between 2 and 28 */ +} EphyGSBRiceDecoder; + +static inline EphyGSBBitReader * +ephy_gsb_bit_reader_new (const guint8 *data, + gsize data_len) +{ + EphyGSBBitReader *reader; + + g_assert (data); + g_assert (data_len > 0); + + reader = g_slice_new (EphyGSBBitReader); + reader->curr = reader->data = g_malloc (data_len); + memcpy (reader->data, data, data_len); + reader->data_len = data_len; + reader->mask = 0x01; + reader->num_read = 0; + + return reader; +} + +static inline void +ephy_gsb_bit_reader_free (EphyGSBBitReader *reader) +{ + g_assert (reader); + + g_free (reader->data); + g_slice_free (EphyGSBBitReader, reader); +} + +/* + * https://developers.google.com/safe-browsing/v4/compression#bit-encoderdecoder + */ +static guint32 +ephy_gsb_bit_reader_read (EphyGSBBitReader *reader, + guint num_bits) +{ + guint32 retval = 0; + + /* Cannot read more than 4 bytes at once. */ + g_assert (num_bits <= 32); + /* Cannot read more bits than the buffer has left. */ + g_assert (reader->num_read + num_bits <= reader->data_len * 8); + + /* Within a byte, the least-significant bits come before the most-significant + * bits in the bit stream. */ + for (guint i = 0; i < num_bits; i++) { + if (*reader->curr & reader->mask) + retval |= 1 << i; + + reader->mask <<= 1; + if (reader->mask == 0) { + reader->curr++; + reader->mask = 0x01; + } + } + + reader->num_read += num_bits; + + return retval; +} + +static inline EphyGSBRiceDecoder * +ephy_gsb_rice_decoder_new (const guint8 *data, + gsize data_len, + guint parameter) +{ + EphyGSBRiceDecoder *decoder; + + g_assert (data); + g_assert (data_len > 0); + + decoder = g_slice_new (EphyGSBRiceDecoder); + decoder->reader = ephy_gsb_bit_reader_new (data, data_len); + decoder->parameter = parameter; + + return decoder; +} + +static inline void +ephy_gsb_rice_decoder_free (EphyGSBRiceDecoder *decoder) +{ + g_assert (decoder); + + ephy_gsb_bit_reader_free (decoder->reader); + g_slice_free (EphyGSBRiceDecoder, decoder); +} + +static guint32 +ephy_gsb_rice_decoder_next (EphyGSBRiceDecoder *decoder) +{ + guint32 quotient = 0; + guint32 remainder; + guint32 bit; + + g_assert (decoder); + + while ((bit = ephy_gsb_bit_reader_read (decoder->reader, 1)) != 0) + quotient += bit; + + remainder = ephy_gsb_bit_reader_read (decoder->reader, decoder->parameter); + + return (quotient << decoder->parameter) + remainder; +} + EphyGSBThreatList * ephy_gsb_threat_list_new (const char *threat_type, const char *platform_type, @@ -176,7 +292,8 @@ ephy_gsb_utils_make_contraints (void) JsonArray *compressions; compressions = json_array_new (); - json_array_add_string_element (compressions, "RAW"); + json_array_add_string_element (compressions, GSB_COMPRESSION_TYPE_RAW); + json_array_add_string_element (compressions, GSB_COMPRESSION_TYPE_RICE); constraints = json_object_new (); /* No restriction for the number of update entries. */ @@ -323,6 +440,58 @@ ephy_gsb_utils_make_full_hashes_request (GList *threat_lists, return body; } +/* + * https://developers.google.com/safe-browsing/v4/compression#rice-compression + */ +guint32 * +ephy_gsb_utils_rice_delta_decode (JsonObject *rde, + gsize *num_items) +{ + EphyGSBRiceDecoder *decoder; + const char *data_b64 = NULL; + const char *first_value_str; + guint32 *items; + guint8 *data; + gsize data_len; + gsize num_entries = 0; + guint parameter = 0; + + g_assert (rde); + g_assert (num_items); + + /* This field is never missing. */ + first_value_str = json_object_get_string_member (rde, "firstValue"); + + if (json_object_has_member (rde, "riceParameter")) + parameter = json_object_get_int_member (rde, "riceParameter"); + if (json_object_has_member (rde, "numEntries")) + num_entries = json_object_get_int_member (rde, "numEntries"); + if (json_object_has_member (rde, "encodedData")) + data_b64 = json_object_get_string_member (rde, "encodedData"); + + *num_items = 1 + num_entries; + items = g_malloc (*num_items * sizeof (guint32)); + sscanf (first_value_str, "%u", &items[0]); + + if (num_entries == 0) + return items; + + /* Sanity check. */ + if (parameter < 2 || parameter > 28 || data_b64 == NULL) + return items; + + data = g_base64_decode (data_b64, &data_len); + decoder = ephy_gsb_rice_decoder_new (data, data_len, parameter); + + for (gsize i = 1; i <= num_entries; i++) + items[i] = items[i - 1] + ephy_gsb_rice_decoder_next (decoder); + + g_free (data); + ephy_gsb_rice_decoder_free (decoder); + + return items; +} + static char * ephy_gsb_utils_full_unescape (const char *part) { @@ -653,7 +822,7 @@ ephy_gsb_utils_get_hash_cues (GList *hashes) for (GList *l = hashes; l && l->data; l = l->next) { const char *hash = g_bytes_get_data (l->data, NULL); - retval = g_list_prepend (retval, g_bytes_new (hash, GSB_CUE_LEN)); + retval = g_list_prepend (retval, g_bytes_new (hash, GSB_HASH_CUE_LEN)); } return g_list_reverse (retval); diff --git a/lib/safe-browsing/ephy-gsb-utils.h b/lib/safe-browsing/ephy-gsb-utils.h index 341649a6b..940dd3189 100644 --- a/lib/safe-browsing/ephy-gsb-utils.h +++ b/lib/safe-browsing/ephy-gsb-utils.h @@ -21,13 +21,20 @@ #pragma once #include <glib.h> +#include <json-glib/json-glib.h> G_BEGIN_DECLS -#define GSB_CUE_LEN 4 +#define GSB_HASH_CUE_LEN 4 +#define GSB_RICE_PREFIX_LEN 4 + #define GSB_HASH_TYPE G_CHECKSUM_SHA256 #define GSB_HASH_SIZE (g_checksum_type_get_length (GSB_HASH_TYPE)) +#define GSB_COMPRESSION_TYPE_RAW "RAW" +#define GSB_COMPRESSION_TYPE_RICE "RICE" +#define GSB_COMPRESSION_TYPE_UNSPECIFIED "COMPRESSION_TYPE_UNSPECIFIED" + #define GSB_THREAT_TYPE_MALWARE "MALWARE" #define GSB_THREAT_TYPE_SOCIAL_ENGINEERING "SOCIAL_ENGINEERING" #define GSB_THREAT_TYPE_UNWANTED_SOFTWARE "UNWANTED_SOFTWARE" @@ -84,6 +91,9 @@ char *ephy_gsb_utils_make_list_updates_request (GList *threat char *ephy_gsb_utils_make_full_hashes_request (GList *threat_lists, GList *hash_prefixes); +guint32 *ephy_gsb_utils_rice_delta_decode (JsonObject *rde, + gsize *num_items); + char *ephy_gsb_utils_canonicalize (const char *url, char **host_out, char **path_out, |