summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Winship <danw@src.gnome.org>2009-03-23 21:39:40 +0000
committerDan Winship <danw@src.gnome.org>2009-03-23 21:39:40 +0000
commitd1efe3dad63edf97a2bc7b06cd7c1c9c9603ac31 (patch)
treefcee73b46dbbc7710969f2679a03cc0a18e23800
parent035ecf09bfe1f671ca88f41d59879928b614f39a (diff)
downloadlibsoup-d1efe3dad63edf97a2bc7b06cd7c1c9c9603ac31.tar.gz
Bug 566530 - Handle (illegal) unencoded spaces in URIs
* libsoup/soup-uri.c (uri_normalized_copy): optionally fix up parts with unencoded spaces in them. (soup_uri_new_with_base): tell uri_normalized_copy() to fix up spaces in the path and query components * tests/uri-parsing.c (abs_tests): test parsing and unparsing a URI with an unencoded space in it. * tests/redirect-test.c (tests, server_callback): add a test of redirecting to a URI with an unencoded space in it. svn path=/trunk/; revision=1252
-rw-r--r--ChangeLog15
-rw-r--r--libsoup/soup-uri.c33
-rw-r--r--tests/redirect-test.c19
-rw-r--r--tests/uri-parsing.c6
4 files changed, 65 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 5581c492..3477375e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2009-03-23 Dan Winship <danw@gnome.org>
+
+ Bug 566530 - Handle (illegal) unencoded spaces in URIs
+
+ * libsoup/soup-uri.c (uri_normalized_copy): optionally fix up
+ parts with unencoded spaces in them.
+ (soup_uri_new_with_base): tell uri_normalized_copy() to fix up
+ spaces in the path and query components
+
+ * tests/uri-parsing.c (abs_tests): test parsing and unparsing a
+ URI with an unencoded space in it.
+
+ * tests/redirect-test.c (tests, server_callback): add a test of
+ redirecting to a URI with an unencoded space in it.
+
2009-03-15 Dan Winship <danw@gnome.org>
* configure.in: 2.26.0
diff --git a/libsoup/soup-uri.c b/libsoup/soup-uri.c
index ff923adb..2743576c 100644
--- a/libsoup/soup-uri.c
+++ b/libsoup/soup-uri.c
@@ -93,7 +93,7 @@
static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
static char *uri_decoded_copy (const char *str, int length);
-static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra);
+static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra, gboolean fixup);
const char *_SOUP_URI_SCHEME_HTTP, *_SOUP_URI_SCHEME_HTTPS;
@@ -152,7 +152,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
end = hash = strchr (uri_string, '#');
if (hash && hash[1]) {
uri->fragment = uri_normalized_copy (hash + 1, strlen (hash + 1),
- NULL);
+ NULL, FALSE);
if (!uri->fragment) {
soup_uri_free (uri);
return NULL;
@@ -249,7 +249,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
if (question[1]) {
uri->query = uri_normalized_copy (question + 1,
end - (question + 1),
- NULL);
+ NULL, TRUE);
if (!uri->query) {
soup_uri_free (uri);
return NULL;
@@ -260,7 +260,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
if (end != uri_string) {
uri->path = uri_normalized_copy (uri_string, end - uri_string,
- NULL);
+ NULL, TRUE);
if (!uri->path) {
soup_uri_free (uri);
return NULL;
@@ -654,10 +654,12 @@ soup_uri_decode (const char *part)
}
static char *
-uri_normalized_copy (const char *part, int length, const char *unescape_extra)
+uri_normalized_copy (const char *part, int length,
+ const char *unescape_extra, gboolean fixup)
{
unsigned char *s, *d, c;
char *normalized = g_strndup (part, length);
+ gboolean need_fixup = FALSE;
s = d = (unsigned char *)normalized;
do {
@@ -678,10 +680,27 @@ uri_normalized_copy (const char *part, int length, const char *unescape_extra)
*d++ = g_ascii_toupper (*s++);
*d++ = g_ascii_toupper (*s);
}
- } else
+ } else {
+ if (*s == ' ')
+ need_fixup = TRUE;
*d++ = *s;
+ }
} while (*s++);
+ if (fixup && need_fixup) {
+ char *tmp, *sp;
+ /* This code is lame, but so are people who put
+ * unencoded spaces in URLs!
+ */
+ while ((sp = strchr (normalized, ' '))) {
+ tmp = g_strdup_printf ("%.*s%%20%s",
+ (int)(sp - normalized),
+ normalized, sp + 1);
+ g_free (normalized);
+ normalized = tmp;
+ };
+ }
+
return normalized;
}
@@ -709,7 +728,7 @@ uri_normalized_copy (const char *part, int length, const char *unescape_extra)
char *
soup_uri_normalize (const char *part, const char *unescape_extra)
{
- return uri_normalized_copy (part, strlen (part), unescape_extra);
+ return uri_normalized_copy (part, strlen (part), unescape_extra, FALSE);
}
diff --git a/tests/redirect-test.c b/tests/redirect-test.c
index 5bb37e95..e863068c 100644
--- a/tests/redirect-test.c
+++ b/tests/redirect-test.c
@@ -93,6 +93,12 @@ static struct {
{ "GET", "/", 200 },
{ NULL } } },
{ { { "POST", "/307", 307 },
+ { NULL } } },
+
+ /* Test behavior with recoverably-bad Location header
+ */
+ { { { "GET", "/bad", 302 },
+ { "GET", "/bad%20with%20spaces", 200 },
{ NULL } } }
};
static const int n_tests = G_N_ELEMENTS (tests);
@@ -206,6 +212,19 @@ server_callback (SoupServer *server, SoupMessage *msg,
char *remainder;
guint status_code;
+ if (g_str_has_prefix (path, "/bad")) {
+ if (!strcmp (path, "/bad")) {
+ soup_message_set_status (msg, SOUP_STATUS_FOUND);
+ soup_message_headers_replace (msg->response_headers,
+ "Location",
+ "/bad with spaces");
+ } else if (!strcmp (path, "/bad with spaces"))
+ soup_message_set_status (msg, SOUP_STATUS_OK);
+ else
+ soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND);
+ return;
+ }
+
if (!strcmp (path, "/")) {
if (msg->method != SOUP_METHOD_GET &&
msg->method != SOUP_METHOD_HEAD) {
diff --git a/tests/uri-parsing.c b/tests/uri-parsing.c
index 52d52717..407ddfd7 100644
--- a/tests/uri-parsing.c
+++ b/tests/uri-parsing.c
@@ -57,7 +57,11 @@ static struct {
{ "http://[::FFFF:129.144.52.38]:80/index.html",
"http://[::FFFF:129.144.52.38]/index.html" },
{ "http://[2010:836B:4179::836B:4179]",
- "http://[2010:836B:4179::836B:4179]/" }
+ "http://[2010:836B:4179::836B:4179]/" },
+
+ /* Try to recover certain kinds of invalid URIs */
+ { "http://host/path with spaces",
+ "http://host/path%20with%20spaces" }
};
static int num_abs_tests = G_N_ELEMENTS(abs_tests);