From d1efe3dad63edf97a2bc7b06cd7c1c9c9603ac31 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 23 Mar 2009 21:39:40 +0000 Subject: Bug 566530 - Handle (illegal) unencoded spaces in URIs * libsoup/soup-uri.c (uri_normalized_copy): optionally fix up parts with unencoded spaces in them. (soup_uri_new_with_base): tell uri_normalized_copy() to fix up spaces in the path and query components * tests/uri-parsing.c (abs_tests): test parsing and unparsing a URI with an unencoded space in it. * tests/redirect-test.c (tests, server_callback): add a test of redirecting to a URI with an unencoded space in it. svn path=/trunk/; revision=1252 --- ChangeLog | 15 +++++++++++++++ libsoup/soup-uri.c | 33 ++++++++++++++++++++++++++------- tests/redirect-test.c | 19 +++++++++++++++++++ tests/uri-parsing.c | 6 +++++- 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5581c492..3477375e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2009-03-23 Dan Winship + + Bug 566530 - Handle (illegal) unencoded spaces in URIs + + * libsoup/soup-uri.c (uri_normalized_copy): optionally fix up + parts with unencoded spaces in them. + (soup_uri_new_with_base): tell uri_normalized_copy() to fix up + spaces in the path and query components + + * tests/uri-parsing.c (abs_tests): test parsing and unparsing a + URI with an unencoded space in it. + + * tests/redirect-test.c (tests, server_callback): add a test of + redirecting to a URI with an unencoded space in it. + 2009-03-15 Dan Winship * configure.in: 2.26.0 diff --git a/libsoup/soup-uri.c b/libsoup/soup-uri.c index ff923adb..2743576c 100644 --- a/libsoup/soup-uri.c +++ b/libsoup/soup-uri.c @@ -93,7 +93,7 @@ static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars); static char *uri_decoded_copy (const char *str, int length); -static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra); +static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra, gboolean fixup); const char *_SOUP_URI_SCHEME_HTTP, *_SOUP_URI_SCHEME_HTTPS; @@ -152,7 +152,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string) end = hash = strchr (uri_string, '#'); if (hash && hash[1]) { uri->fragment = uri_normalized_copy (hash + 1, strlen (hash + 1), - NULL); + NULL, FALSE); if (!uri->fragment) { soup_uri_free (uri); return NULL; @@ -249,7 +249,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string) if (question[1]) { uri->query = uri_normalized_copy (question + 1, end - (question + 1), - NULL); + NULL, TRUE); if (!uri->query) { soup_uri_free (uri); return NULL; @@ -260,7 +260,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string) if (end != uri_string) { uri->path = uri_normalized_copy (uri_string, end - uri_string, - NULL); + NULL, TRUE); if (!uri->path) { soup_uri_free (uri); return NULL; @@ -654,10 +654,12 @@ soup_uri_decode (const char *part) } static char * -uri_normalized_copy (const char *part, int length, const char *unescape_extra) +uri_normalized_copy (const char *part, int length, + const char *unescape_extra, gboolean fixup) { unsigned char *s, *d, c; char *normalized = g_strndup (part, length); + gboolean need_fixup = FALSE; s = d = (unsigned char *)normalized; do { @@ -678,10 +680,27 @@ uri_normalized_copy (const char *part, int length, const char *unescape_extra) *d++ = g_ascii_toupper (*s++); *d++ = g_ascii_toupper (*s); } - } else + } else { + if (*s == ' ') + need_fixup = TRUE; *d++ = *s; + } } while (*s++); + if (fixup && need_fixup) { + char *tmp, *sp; + /* This code is lame, but so are people who put + * unencoded spaces in URLs! + */ + while ((sp = strchr (normalized, ' '))) { + tmp = g_strdup_printf ("%.*s%%20%s", + (int)(sp - normalized), + normalized, sp + 1); + g_free (normalized); + normalized = tmp; + }; + } + return normalized; } @@ -709,7 +728,7 @@ uri_normalized_copy (const char *part, int length, const char *unescape_extra) char * soup_uri_normalize (const char *part, const char *unescape_extra) { - return uri_normalized_copy (part, strlen (part), unescape_extra); + return uri_normalized_copy (part, strlen (part), unescape_extra, FALSE); } diff --git a/tests/redirect-test.c b/tests/redirect-test.c index 5bb37e95..e863068c 100644 --- a/tests/redirect-test.c +++ b/tests/redirect-test.c @@ -93,6 +93,12 @@ static struct { { "GET", "/", 200 }, { NULL } } }, { { { "POST", "/307", 307 }, + { NULL } } }, + + /* Test behavior with recoverably-bad Location header + */ + { { { "GET", "/bad", 302 }, + { "GET", "/bad%20with%20spaces", 200 }, { NULL } } } }; static const int n_tests = G_N_ELEMENTS (tests); @@ -206,6 +212,19 @@ server_callback (SoupServer *server, SoupMessage *msg, char *remainder; guint status_code; + if (g_str_has_prefix (path, "/bad")) { + if (!strcmp (path, "/bad")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_replace (msg->response_headers, + "Location", + "/bad with spaces"); + } else if (!strcmp (path, "/bad with spaces")) + soup_message_set_status (msg, SOUP_STATUS_OK); + else + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + return; + } + if (!strcmp (path, "/")) { if (msg->method != SOUP_METHOD_GET && msg->method != SOUP_METHOD_HEAD) { diff --git a/tests/uri-parsing.c b/tests/uri-parsing.c index 52d52717..407ddfd7 100644 --- a/tests/uri-parsing.c +++ b/tests/uri-parsing.c @@ -57,7 +57,11 @@ static struct { { "http://[::FFFF:129.144.52.38]:80/index.html", "http://[::FFFF:129.144.52.38]/index.html" }, { "http://[2010:836B:4179::836B:4179]", - "http://[2010:836B:4179::836B:4179]/" } + "http://[2010:836B:4179::836B:4179]/" }, + + /* Try to recover certain kinds of invalid URIs */ + { "http://host/path with spaces", + "http://host/path%20with%20spaces" } }; static int num_abs_tests = G_N_ELEMENTS(abs_tests); -- cgit v1.2.1