diff options
author | Gustavo Noronha Silva <gns@gnome.org> | 2013-12-10 17:17:40 +0100 |
---|---|---|
committer | Gustavo Noronha Silva <gns@gnome.org> | 2013-12-10 17:30:58 +0100 |
commit | 4271c6282cda93725945cd140a056562f9596699 (patch) | |
tree | 2efee499596cf5b693475b81109ad60d5d66bd14 | |
parent | 6fcb86b308d2402cb8da84548c3b0f3bcc5276c6 (diff) | |
download | libsoup-content-sniffing-update.tar.gz |
Bring image sniffing up-to-date with the MIMESNIFF speccontent-sniffing-update
-rw-r--r-- | libsoup/soup-content-sniffer.c | 198 | ||||
-rw-r--r-- | tests/resources/home.jpg | bin | 0 -> 1074 bytes | |||
-rw-r--r-- | tests/resources/home.png | bin | 0 -> 313 bytes | |||
-rw-r--r-- | tests/resources/tux.webp | bin | 0 -> 17128 bytes | |||
-rw-r--r-- | tests/sniffing-test.c | 3 |
5 files changed, 100 insertions, 101 deletions
diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c index 55ac5e8d..ee2c29be 100644 --- a/libsoup/soup-content-sniffer.c +++ b/libsoup/soup-content-sniffer.c @@ -40,7 +40,6 @@ static void soup_content_sniffer_session_feature_init (SoupSessionFeatureInterfa static SoupContentProcessorInterface *soup_content_sniffer_default_content_processor_interface; static void soup_content_sniffer_content_processor_init (SoupContentProcessorInterface *interface, gpointer interface_data); - G_DEFINE_TYPE_WITH_CODE (SoupContentSniffer, soup_content_sniffer, G_TYPE_OBJECT, G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, soup_content_sniffer_session_feature_init) @@ -77,14 +76,99 @@ soup_content_sniffer_init (SoupContentSniffer *content_sniffer) { } +typedef struct { + const guchar *mask; + const guchar *pattern; + guint pattern_length; + const char *sniffed_type; +} SoupContentSnifferMediaPattern; + +static SoupContentSnifferMediaPattern image_types_table[] = { + + /* Windows icon signature. */ + { (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x00\x00\x01\x00", + 4, + "image/x-icon" }, + + /* Windows cursor signature. */ + { (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x00\x00\x02\x00", + 4, + "image/x-icon" }, + + /* BMP. */ + { (const guchar *)"\xFF\xFF", + (const guchar *)"BM", + 2, + "image/bmp" }, + + /* GIFs. */ + { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"GIF87a", + 6, + "image/gif" }, + + { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"GIF89a", + 6, + "image/gif" }, + + /* WEBP. */ + { (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"RIFF\x00\x00\x00\x00WEBPVP", + 14, + "image/webp" }, + + /* PNG. */ + { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x89PNG\x0D\x0A\x1A\x0A", + 8, + "image/png" }, + + /* JPEG. */ + { (const guchar *)"\xFF\xFF\xFF", + (const guchar *)"\xFF\xD8\xFF", + 3, + "image/jpeg" }, +}; + +static char* +sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer, + const char *content_type) +{ + const guchar *resource = (const guchar *)buffer->data; + int resource_length = MIN (512, buffer->length); + int i; + + for (i = 0; i < G_N_ELEMENTS (image_types_table); i++) { + SoupContentSnifferMediaPattern *type_row = &(image_types_table[i]); + int j; + + if (resource_length < type_row->pattern_length) + continue; + + for (j = 0; j < type_row->pattern_length; j++) { + if ((type_row->mask[j] & resource[j]) != type_row->pattern[j]) + break; + } + + /* This means our comparison above matched completely */ + if (j == type_row->pattern_length) + return g_strdup (type_row->sniffed_type); + } + + return g_strdup (content_type); +} + /* This table is based on the MIMESNIFF spec; - * See 7.1 Identifying a resource with an unknown MIME type - */ + * See 7.1 Identifying a resource with an unknown MIME type + */ typedef struct { /* @has_ws is TRUE if @pattern contains "generic" whitespace */ gboolean has_ws; /* @has_tag_termination is TRUE if we should check for a tag-terminating - * byte (0x20 " " or 0x3E ">") after the pattern match. */ + * byte (0x20 " " or 0x3E ">") after the pattern match. */ gboolean has_tag_termination; const guchar *mask; const guchar *pattern; @@ -93,7 +177,6 @@ typedef struct { gboolean scriptable; } SoupContentSnifferPattern; - /* When has_ws is TRUE, spaces in the pattern will indicate where insignificant space * is allowed. Those spaces are marked with \x00 on the mask. */ @@ -261,66 +344,6 @@ static SoupContentSnifferPattern types_table[] = { 4, "text/plain", FALSE }, - - /* Images. */ - - { FALSE, FALSE, /* Windows icon signature. */ - (const guchar *)"\xFF\xFF\xFF\xFF", - (const guchar *)"\x00\x00\x01\x00", - 4, - "image/x-icon", - FALSE }, - - { FALSE, FALSE, /* Windows cursor signature. */ - (const guchar *)"\xFF\xFF\xFF\xFF", - (const guchar *)"\x00\x00\x02\x00", - 4, - "image/x-icon", - FALSE }, - - { FALSE, FALSE, /* BMP. */ - (const guchar *)"\xFF\xFF", - (const guchar *)"BM", - 2, - "image/bmp", - FALSE }, - - { FALSE, FALSE, /* GIF. */ - (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", - (const guchar *)"GIF87a", - 6, - "image/gif", - FALSE }, - - { FALSE, FALSE, /* GIF. */ - (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", - (const guchar *)"GIF89a", - 6, - "image/gif", - FALSE }, - - { FALSE, FALSE, /* WEBP. */ - (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF", - (const guchar *)"RIFF\x00\x00\x00\x00WEBPVP", - 14, - "image/webp", - FALSE }, - - { FALSE, FALSE, /* PNG. */ - (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", - (const guchar *)"\x89PNG\x0D\x0A\x1A\x0A", - 8, - "image/png", - FALSE }, - - { FALSE, FALSE, /* JPEG. */ - (const guchar *)"\xFF\xFF\xFF", - (const guchar *)"\xFF\xD8\xFF", - 3, - "image/jpeg", - FALSE }, - - /* TODO: audio/video, archive type. */ }; /* Whether a given byte looks like it might be part of binary content. @@ -351,6 +374,7 @@ static char* sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer, gboolean sniff_scriptable) { + char *sniffed_type = NULL; const guchar *resource = (const guchar *)buffer->data; int resource_length = MIN (512, buffer->length); int i; @@ -416,6 +440,13 @@ sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer, } } + sniffed_type = sniff_images (sniffer, buffer, "unknown/unknown"); + + if (!g_str_equal (sniffed_type, "unknown/unknown")) + return sniffed_type; + + g_free (sniffed_type); + for (i = 0; i < resource_length; i++) { if (byte_looks_binary[resource[i]]) return g_strdup ("application/octet-stream"); @@ -463,33 +494,6 @@ sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer) return sniff_unknown (sniffer, buffer, TRUE); } -static char* -sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer, - const char *content_type) -{ - const guchar *resource = (const guchar *)buffer->data; - int resource_length = MIN (512, buffer->length); - int i; - - for (i = 0; i < G_N_ELEMENTS (types_table); i++) { - SoupContentSnifferPattern *type_row = &(types_table[i]); - - if (resource_length < type_row->pattern_length) - continue; - - if (!g_str_has_prefix (type_row->sniffed_type, "image/")) - continue; - - /* All of the image types use all-\xFF for the mask, - * so we can just memcmp. - */ - if (memcmp (type_row->pattern, resource, type_row->pattern_length) == 0) - return g_strdup (type_row->sniffed_type); - } - - return g_strdup (content_type); -} - static gboolean skip_insignificant_space (const gchar *resource, int *pos, int resource_length) { while ((resource[*pos] == '\x09') || @@ -684,15 +688,7 @@ soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg, if (!g_ascii_strcasecmp (content_type, "text/html")) return sniff_feed_or_html (sniffer, buffer); - /* 2.7.5 Content-Type sniffing: image - * The spec says: - * - * If the resource's official type is "image/svg+xml", then - * the sniffed type of the resource is its official type (an - * XML type) - * - * The XML case is handled by the if above; if you refactor - * this code, keep this in mind. + /* 6. Image types. */ if (!g_ascii_strncasecmp (content_type, "image/", 6)) return sniff_images (sniffer, buffer, content_type); diff --git a/tests/resources/home.jpg b/tests/resources/home.jpg Binary files differnew file mode 100644 index 00000000..ac1f3bbc --- /dev/null +++ b/tests/resources/home.jpg diff --git a/tests/resources/home.png b/tests/resources/home.png Binary files differnew file mode 100644 index 00000000..0bb82bac --- /dev/null +++ b/tests/resources/home.png diff --git a/tests/resources/tux.webp b/tests/resources/tux.webp Binary files differnew file mode 100644 index 00000000..8764f066 --- /dev/null +++ b/tests/resources/tux.webp diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c index c1fe56b9..d53d06f8 100644 --- a/tests/sniffing-test.c +++ b/tests/sniffing-test.c @@ -596,6 +596,9 @@ main (int argc, char **argv) /* Test the image sniffing path */ test_sniffing ("/type/image_png/home.gif", "image/gif"); + test_sniffing ("/type/image_gif/home.png", "image/png"); + test_sniffing ("/type/image_png/home.jpg", "image/jpeg"); + test_sniffing ("/type/image_png/tux.webp", "image/webp"); /* The spec tells us to only use the last Content-Type header */ |