summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGustavo Noronha Silva <gns@gnome.org>2013-12-10 17:17:40 +0100
committerDan Winship <danw@gnome.org>2014-02-17 12:22:22 -0500
commit26a65181db0b1fc3eb97748a5e3d9ceeecdc62e3 (patch)
treebe860eeb83a20700f6290ea3866264871aaed61f
parentcd4f6a94f9275670091326a5aec8a07bce7f8d79 (diff)
downloadlibsoup-26a65181db0b1fc3eb97748a5e3d9ceeecdc62e3.tar.gz
sniffing: Bring image sniffing up-to-date with the MIMESNIFF spec
-rw-r--r--libsoup/soup-content-sniffer.c212
-rw-r--r--tests/resources/home.jpgbin0 -> 1074 bytes
-rw-r--r--tests/resources/home.pngbin0 -> 313 bytes
-rw-r--r--tests/resources/tux.webpbin0 -> 17128 bytes
-rw-r--r--tests/sniffing-test.c11
-rw-r--r--tests/soup-tests.gresource.xml3
6 files changed, 127 insertions, 99 deletions
diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c
index 5b768bb2..5e0b2a70 100644
--- a/libsoup/soup-content-sniffer.c
+++ b/libsoup/soup-content-sniffer.c
@@ -77,6 +77,105 @@ soup_content_sniffer_init (SoupContentSniffer *content_sniffer)
{
}
+typedef struct {
+ const guchar *mask;
+ const guchar *pattern;
+ guint pattern_length;
+ const char *sniffed_type;
+} SoupContentSnifferMediaPattern;
+
+static char*
+sniff_media (SoupContentSniffer *sniffer,
+ SoupBuffer *buffer,
+ SoupContentSnifferMediaPattern table[],
+ int table_length)
+{
+ const guchar *resource = (const guchar *)buffer->data;
+ int resource_length = MIN (512, buffer->length);
+ int i;
+
+ for (i = 0; i < table_length; i++) {
+ SoupContentSnifferMediaPattern *type_row = &(table[i]);
+ int j;
+
+ if (resource_length < type_row->pattern_length)
+ continue;
+
+ for (j = 0; j < type_row->pattern_length; j++) {
+ if ((type_row->mask[j] & resource[j]) != type_row->pattern[j])
+ break;
+ }
+
+ /* This means our comparison above matched completely */
+ if (j == type_row->pattern_length)
+ return g_strdup (type_row->sniffed_type);
+ }
+
+ return NULL;
+}
+
+/* This table is based on the MIMESNIFF spec;
+ * See 6.1 Matching an image type pattern
+ */
+static SoupContentSnifferMediaPattern image_types_table[] = {
+
+ /* Windows icon signature. */
+ { (const guchar *)"\xFF\xFF\xFF\xFF",
+ (const guchar *)"\x00\x00\x01\x00",
+ 4,
+ "image/x-icon" },
+
+ /* Windows cursor signature. */
+ { (const guchar *)"\xFF\xFF\xFF\xFF",
+ (const guchar *)"\x00\x00\x02\x00",
+ 4,
+ "image/x-icon" },
+
+ /* BMP. */
+ { (const guchar *)"\xFF\xFF",
+ (const guchar *)"BM",
+ 2,
+ "image/bmp" },
+
+ /* GIFs. */
+ { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
+ (const guchar *)"GIF87a",
+ 6,
+ "image/gif" },
+
+ { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
+ (const guchar *)"GIF89a",
+ 6,
+ "image/gif" },
+
+ /* WEBP. */
+ { (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
+ (const guchar *)"RIFF\x00\x00\x00\x00WEBPVP",
+ 14,
+ "image/webp" },
+
+ /* PNG. */
+ { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
+ (const guchar *)"\x89PNG\x0D\x0A\x1A\x0A",
+ 8,
+ "image/png" },
+
+ /* JPEG. */
+ { (const guchar *)"\xFF\xFF\xFF",
+ (const guchar *)"\xFF\xD8\xFF",
+ 3,
+ "image/jpeg" },
+};
+
+static char*
+sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer)
+{
+ return sniff_media (sniffer,
+ buffer,
+ image_types_table,
+ G_N_ELEMENTS (image_types_table));
+}
+
/* This table is based on the MIMESNIFF spec;
* See 7.1 Identifying a resource with an unknown MIME type
*/
@@ -262,66 +361,6 @@ static SoupContentSnifferPattern types_table[] = {
4,
"text/plain",
FALSE },
-
- /* Images. */
-
- { FALSE, FALSE, /* Windows icon signature. */
- (const guchar *)"\xFF\xFF\xFF\xFF",
- (const guchar *)"\x00\x00\x01\x00",
- 4,
- "image/x-icon",
- FALSE },
-
- { FALSE, FALSE, /* Windows cursor signature. */
- (const guchar *)"\xFF\xFF\xFF\xFF",
- (const guchar *)"\x00\x00\x02\x00",
- 4,
- "image/x-icon",
- FALSE },
-
- { FALSE, FALSE, /* BMP. */
- (const guchar *)"\xFF\xFF",
- (const guchar *)"BM",
- 2,
- "image/bmp",
- FALSE },
-
- { FALSE, FALSE, /* GIF. */
- (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
- (const guchar *)"GIF87a",
- 6,
- "image/gif",
- FALSE },
-
- { FALSE, FALSE, /* GIF. */
- (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF",
- (const guchar *)"GIF89a",
- 6,
- "image/gif",
- FALSE },
-
- { FALSE, FALSE, /* WEBP. */
- (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF",
- (const guchar *)"RIFF\x00\x00\x00\x00WEBPVP",
- 14,
- "image/webp",
- FALSE },
-
- { FALSE, FALSE, /* PNG. */
- (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
- (const guchar *)"\x89PNG\x0D\x0A\x1A\x0A",
- 8,
- "image/png",
- FALSE },
-
- { FALSE, FALSE, /* JPEG. */
- (const guchar *)"\xFF\xFF\xFF",
- (const guchar *)"\xFF\xD8\xFF",
- 3,
- "image/jpeg",
- FALSE },
-
- /* TODO: audio/video, archive type. */
};
/* Whether a given byte looks like it might be part of binary content.
@@ -352,6 +391,7 @@ static char*
sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer,
gboolean sniff_scriptable)
{
+ char *sniffed_type = NULL;
const guchar *resource = (const guchar *)buffer->data;
int resource_length = MIN (512, buffer->length);
int i;
@@ -417,6 +457,12 @@ sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer,
}
}
+ sniffed_type = sniff_images (sniffer, buffer);
+
+ if (sniffed_type != NULL)
+ return sniffed_type;
+
+
for (i = 0; i < resource_length; i++) {
if (byte_looks_binary[resource[i]])
return g_strdup ("application/octet-stream");
@@ -464,33 +510,6 @@ sniff_text_or_binary (SoupContentSniffer *sniffer, SoupBuffer *buffer)
return sniff_unknown (sniffer, buffer, TRUE);
}
-static char*
-sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer,
- const char *content_type)
-{
- const guchar *resource = (const guchar *)buffer->data;
- int resource_length = MIN (512, buffer->length);
- int i;
-
- for (i = 0; i < G_N_ELEMENTS (types_table); i++) {
- SoupContentSnifferPattern *type_row = &(types_table[i]);
-
- if (resource_length < type_row->pattern_length)
- continue;
-
- if (!g_str_has_prefix (type_row->sniffed_type, "image/"))
- continue;
-
- /* All of the image types use all-\xFF for the mask,
- * so we can just memcmp.
- */
- if (memcmp (type_row->pattern, resource, type_row->pattern_length) == 0)
- return g_strdup (type_row->sniffed_type);
- }
-
- return g_strdup (content_type);
-}
-
static gboolean
skip_insignificant_space (const char *resource, int *pos, int resource_length)
{
@@ -646,6 +665,7 @@ soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg,
{
const char *content_type;
const char *x_content_type_options;
+ char *sniffed_type = NULL;
gboolean no_sniff = FALSE;
content_type = soup_message_headers_get_content_type (msg->response_headers, params);
@@ -685,18 +705,14 @@ soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg,
if (!g_ascii_strcasecmp (content_type, "text/html"))
return sniff_feed_or_html (sniffer, buffer);
- /* 2.7.5 Content-Type sniffing: image
- * The spec says:
- *
- * If the resource's official type is "image/svg+xml", then
- * the sniffed type of the resource is its official type (an
- * XML type)
- *
- * The XML case is handled by the if above; if you refactor
- * this code, keep this in mind.
+ /* 6. Image types.
*/
- if (!g_ascii_strncasecmp (content_type, "image/", 6))
- return sniff_images (sniffer, buffer, content_type);
+ if (!g_ascii_strncasecmp (content_type, "image/", 6)) {
+ sniffed_type = sniff_images (sniffer, buffer);
+ if (sniffed_type != NULL)
+ return sniffed_type;
+ return g_strdup (content_type);
+ }
/* If we got text/plain, use text_or_binary */
if (g_str_equal (content_type, "text/plain")) {
diff --git a/tests/resources/home.jpg b/tests/resources/home.jpg
new file mode 100644
index 00000000..ac1f3bbc
--- /dev/null
+++ b/tests/resources/home.jpg
Binary files differ
diff --git a/tests/resources/home.png b/tests/resources/home.png
new file mode 100644
index 00000000..0bb82bac
--- /dev/null
+++ b/tests/resources/home.png
Binary files differ
diff --git a/tests/resources/tux.webp b/tests/resources/tux.webp
new file mode 100644
index 00000000..8764f066
--- /dev/null
+++ b/tests/resources/tux.webp
Binary files differ
diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c
index 498df976..868b7c5f 100644
--- a/tests/sniffing-test.c
+++ b/tests/sniffing-test.c
@@ -554,9 +554,18 @@ main (int argc, char **argv)
do_sniffing_test);
/* Test the image sniffing path */
- g_test_add_data_func ("/sniffing/type/image",
+ g_test_add_data_func ("/sniffing/type/image/gif",
"type/image_png/home.gif => image/gif",
do_sniffing_test);
+ g_test_add_data_func ("/sniffing/type/image/png",
+ "type/image_gif/home.png => image/png",
+ do_sniffing_test);
+ g_test_add_data_func ("/sniffing/type/image/jpeg",
+ "type/image_png/home.jpg => image/jpeg",
+ do_sniffing_test);
+ g_test_add_data_func ("/sniffing/type/image/webp",
+ "type/image_png/tux.webp => image/webp",
+ do_sniffing_test);
/* The spec tells us to only use the last Content-Type header */
g_test_add_data_func ("/sniffing/multiple-headers",
diff --git a/tests/soup-tests.gresource.xml b/tests/soup-tests.gresource.xml
index 320cd63d..444ed588 100644
--- a/tests/soup-tests.gresource.xml
+++ b/tests/soup-tests.gresource.xml
@@ -5,6 +5,8 @@
<file>resources/atom.xml</file>
<file>resources/feed.rdf</file>
<file>resources/home.gif</file>
+ <file>resources/home.jpg</file>
+ <file>resources/home.png</file>
<file>resources/html_binary.html</file>
<file>resources/leading_space.html</file>
<file>resources/mbox</file>
@@ -16,5 +18,6 @@
<file>resources/test.html</file>
<file>resources/text.txt</file>
<file>resources/text_binary.txt</file>
+ <file>resources/tux.webp</file>
</gresource>
</gresources>