From 9530c7b58197d7d45e21aa60a9986735ccf515b1 Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Tue, 10 Dec 2013 19:45:55 +0100 Subject: sniffing: Add audio/video sniffing --- libsoup/soup-content-sniffer.c | 111 +++++++++++++++++++++++++++++++++++++++++ tests/resources/test.aiff | Bin 0 -> 384088 bytes tests/resources/test.mp4 | Bin 0 -> 192844 bytes tests/resources/test.ogg | Bin 0 -> 16994 bytes tests/resources/test.wav | Bin 0 -> 384080 bytes tests/resources/test.webm | Bin 0 -> 149879 bytes tests/sniffing-test.c | 19 +++++++ tests/soup-tests.gresource.xml | 5 ++ 8 files changed, 135 insertions(+) create mode 100644 tests/resources/test.aiff create mode 100644 tests/resources/test.mp4 create mode 100644 tests/resources/test.ogg create mode 100644 tests/resources/test.wav create mode 100644 tests/resources/test.webm diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c index 5e0b2a70..5659af9a 100644 --- a/libsoup/soup-content-sniffer.c +++ b/libsoup/soup-content-sniffer.c @@ -176,6 +176,103 @@ sniff_images (SoupContentSniffer *sniffer, SoupBuffer *buffer) G_N_ELEMENTS (image_types_table)); } +/* This table is based on the MIMESNIFF spec; + * See 6.2 Matching an audio or video type pattern + */ +static SoupContentSnifferMediaPattern audio_video_types_table[] = { + { (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x1A\x45\xDF\xA3", + 4, + "video/webm" }, + + { (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)".snd", + 4, + "audio/basic" }, + + + { (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", + (const guchar *)"FORM\0\0\0\0AIFF", + 12, + "audio/aiff" }, + + { (const guchar *)"\xFF\xFF\xFF", + (const guchar *)"ID3", + 3, + "audio/mpeg" }, + + { (const guchar *)"\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"OggS\0", + 5, + "application/ogg" }, + + { (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"MThd\x00\x00\x00\x06", + 8, + "audio/midi" }, + + { (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", + (const guchar *)"RIFF\x00\x00\x00\x00AVI ", + 12, + "video/avi" }, + + { (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", + (const guchar *)"RIFF\x00\x00\x00\x00WAVE", + 12, + "audio/wave" }, +}; + +static gboolean +sniff_mp4 (SoupContentSniffer *sniffer, SoupBuffer *buffer) +{ + const char *resource = (const char *)buffer->data; + int resource_length = MIN (512, buffer->length); + guint32 box_size = *((guint32*)resource); + int i; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + box_size = ((box_size >> 24) | + ((box_size << 8) & 0x00FF0000) | + ((box_size >> 8) & 0x0000FF00) | + (box_size << 24)); +#endif + + if (resource_length < 12 || resource_length < box_size || box_size % 4 != 0) + return FALSE; + + if (!g_str_has_prefix (resource + 4, "ftyp")) + return FALSE; + + if (!g_str_has_prefix (resource + 8, "mp4")) + return FALSE; + + for (i = 16; i < box_size && i < resource_length; i = i + 4) { + if (g_str_has_prefix (resource + i, "mp4")) + return TRUE; + } + + return FALSE; +} + +static char* +sniff_audio_video (SoupContentSniffer *sniffer, SoupBuffer *buffer) +{ + char *sniffed_type; + + sniffed_type = sniff_media (sniffer, + buffer, + audio_video_types_table, + G_N_ELEMENTS (audio_video_types_table)); + + if (sniffed_type != NULL) + return sniffed_type; + + if (sniff_mp4 (sniffer, buffer)) + return g_strdup ("video/mp4"); + + return NULL; +} + /* This table is based on the MIMESNIFF spec; * See 7.1 Identifying a resource with an unknown MIME type */ @@ -462,6 +559,10 @@ sniff_unknown (SoupContentSniffer *sniffer, SoupBuffer *buffer, if (sniffed_type != NULL) return sniffed_type; + sniffed_type = sniff_audio_video (sniffer, buffer); + + if (sniffed_type != NULL) + return sniffed_type; for (i = 0; i < resource_length; i++) { if (byte_looks_binary[resource[i]]) @@ -714,6 +815,16 @@ soup_content_sniffer_real_sniff (SoupContentSniffer *sniffer, SoupMessage *msg, return g_strdup (content_type); } + /* 7. Audio and video types. */ + if (!g_ascii_strncasecmp (content_type, "audio/", 6) || + !g_ascii_strncasecmp (content_type, "video/", 6) || + !g_ascii_strcasecmp (content_type, "application/ogg")) { + sniffed_type = sniff_audio_video (sniffer, buffer); + if (sniffed_type != NULL) + return sniffed_type; + return g_strdup (content_type); + } + /* If we got text/plain, use text_or_binary */ if (g_str_equal (content_type, "text/plain")) { return sniff_text_or_binary (sniffer, buffer); diff --git a/tests/resources/test.aiff b/tests/resources/test.aiff new file mode 100644 index 00000000..9a1ecbb2 Binary files /dev/null and b/tests/resources/test.aiff differ diff --git a/tests/resources/test.mp4 b/tests/resources/test.mp4 new file mode 100644 index 00000000..d278c8ad Binary files /dev/null and b/tests/resources/test.mp4 differ diff --git a/tests/resources/test.ogg b/tests/resources/test.ogg new file mode 100644 index 00000000..e8f49ac3 Binary files /dev/null and b/tests/resources/test.ogg differ diff --git a/tests/resources/test.wav b/tests/resources/test.wav new file mode 100644 index 00000000..11660b29 Binary files /dev/null and b/tests/resources/test.wav differ diff --git a/tests/resources/test.webm b/tests/resources/test.webm new file mode 100644 index 00000000..7e53d0b4 Binary files /dev/null and b/tests/resources/test.webm differ diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c index 868b7c5f..5b0e6ee7 100644 --- a/tests/sniffing-test.c +++ b/tests/sniffing-test.c @@ -567,6 +567,25 @@ main (int argc, char **argv) "type/image_png/tux.webp => image/webp", do_sniffing_test); + /* Test audio and video sniffing path */ + g_test_add_data_func ("/sniffing/type/audio/wav", + "type/audio_mpeg/test.wav => audio/wave", + do_sniffing_test); + g_test_add_data_func ("/sniffing/type/audio/aiff", + "type/audio_mpeg/test.aiff => audio/aiff", + do_sniffing_test); + g_test_add_data_func ("/sniffing/type/audio/ogg", + "type/audio_mpeg/test.ogg => application/ogg", + do_sniffing_test); + g_test_add_data_func ("/sniffing/type/video/webm", + "type/video_theora/test.webm => video/webm", + do_sniffing_test); + + /* Test the MP4 sniffing path */ + g_test_add_data_func ("/sniffing/type/video/mp4", + "unknown/test.mp4 => video/mp4", + do_sniffing_test); + /* The spec tells us to only use the last Content-Type header */ g_test_add_data_func ("/sniffing/multiple-headers", "multiple_headers/home.gif => image/gif", diff --git a/tests/soup-tests.gresource.xml b/tests/soup-tests.gresource.xml index 444ed588..b24a7297 100644 --- a/tests/soup-tests.gresource.xml +++ b/tests/soup-tests.gresource.xml @@ -15,7 +15,12 @@ resources/mbox.zlib resources/ps_binary.ps resources/rss20.xml + resources/test.aiff resources/test.html + resources/test.mp4 + resources/test.ogg + resources/test.wav + resources/test.webm resources/text.txt resources/text_binary.txt resources/tux.webp -- cgit v1.2.1