diff options
author | Salah-Eddin Shaban <salshaaban@gmail.com> | 2017-08-13 00:02:49 +0200 |
---|---|---|
committer | Daniel Stenberg <daniel@haxx.se> | 2017-08-13 00:03:54 +0200 |
commit | d6ecb2c851dd05f6dbd5072f761221acbc06ec05 (patch) | |
tree | adf3835245c16bf6226179d0458264eac1c11f1d /lib/transfer.c | |
parent | 47e4a6fa1ce32f2237f646e8d40ec39dcee6f38c (diff) | |
download | curl-d6ecb2c851dd05f6dbd5072f761221acbc06ec05.tar.gz |
redirect: skip URL encoding for host names
This fixes redirects to IDN URLs
Fixes #1441
Closes #1762
Reported by: David Lord
Diffstat (limited to 'lib/transfer.c')
-rw-r--r-- | lib/transfer.c | 68 |
1 files changed, 62 insertions, 6 deletions
diff --git a/lib/transfer.c b/lib/transfer.c index 81c056e0e..3537b58c6 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -1391,16 +1391,56 @@ CURLcode Curl_posttransfer(struct Curl_easy *data) #ifndef CURL_DISABLE_HTTP /* + * Find the separator at the end of the host name, or the '?' in cases like + * http://www.url.com?id=2380 + */ +static const char *find_host_sep(const char *url) +{ + const char *sep; + const char *query; + + /* Find the start of the hostname */ + sep = strstr(url, "//"); + if(!sep) + sep = url; + else + sep += 2; + + query = strchr(sep, '?'); + sep = strchr(sep, '/'); + + if(!sep) + sep = url + strlen(url); + + if(!query) + query = url + strlen(url); + + return sep < query ? sep : query; +} + +/* * strlen_url() returns the length of the given URL if the spaces within the * URL were properly URL encoded. + * URL encoding should be skipped for host names, otherwise IDN resolution + * will fail. */ -static size_t strlen_url(const char *url) +static size_t strlen_url(const char *url, bool relative) { const unsigned char *ptr; size_t newlen=0; bool left=TRUE; /* left side of the ? */ + const unsigned char *host_sep = (const unsigned char *) url; + + if(!relative) + host_sep = (const unsigned char *) find_host_sep(url); for(ptr=(unsigned char *)url; *ptr; ptr++) { + + if(ptr < host_sep) { + ++newlen; + continue; + } + switch(*ptr) { case '?': left=FALSE; @@ -1423,16 +1463,29 @@ static size_t strlen_url(const char *url) /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in * the source URL accordingly. + * URL encoding should be skipped for host names, otherwise IDN resolution + * will fail. */ -static void strcpy_url(char *output, const char *url) +static void strcpy_url(char *output, const char *url, bool relative) { /* we must add this with whitespace-replacing */ bool left=TRUE; const unsigned char *iptr; char *optr = output; + const unsigned char *host_sep = (const unsigned char *) url; + + if(!relative) + host_sep = (const unsigned char *) find_host_sep(url); + for(iptr = (unsigned char *)url; /* read from here */ *iptr; /* until zero byte */ iptr++) { + + if(iptr < host_sep) { + *optr++ = *iptr; + continue; + } + switch(*iptr) { case '?': left=FALSE; @@ -1488,6 +1541,7 @@ static char *concat_url(const char *base, const char *relurl) char *protsep; char *pathsep; size_t newlen; + bool host_changed = FALSE; const char *useurl = relurl; size_t urllen; @@ -1568,6 +1622,7 @@ static char *concat_url(const char *base, const char *relurl) *protsep=0; useurl = &relurl[2]; /* we keep the slashes from the original, so we skip the new ones */ + host_changed = TRUE; } else { /* cut off the original URL from the first slash, or deal with URLs @@ -1599,7 +1654,7 @@ static char *concat_url(const char *base, const char *relurl) letter we replace each space with %20 while it is replaced with '+' on the right side of the '?' letter. */ - newlen = strlen_url(useurl); + newlen = strlen_url(useurl, !host_changed); urllen = strlen(url_clone); @@ -1621,7 +1676,7 @@ static char *concat_url(const char *base, const char *relurl) newest[urllen++]='/'; /* then append the new piece on the right side */ - strcpy_url(&newest[urllen], useurl); + strcpy_url(&newest[urllen], useurl, !host_changed); free(url_clone); @@ -1694,7 +1749,7 @@ CURLcode Curl_follow(struct Curl_easy *data, /* The new URL MAY contain space or high byte values, that means a mighty stupid redirect URL but we still make an effort to do "right". */ char *newest; - size_t newlen = strlen_url(newurl); + size_t newlen = strlen_url(newurl, FALSE); /* This is an absolute URL, don't allow the custom port number */ disallowport = TRUE; @@ -1702,7 +1757,8 @@ CURLcode Curl_follow(struct Curl_easy *data, newest = malloc(newlen+1); /* get memory for this */ if(!newest) return CURLE_OUT_OF_MEMORY; - strcpy_url(newest, newurl); /* create a space-free URL */ + + strcpy_url(newest, newurl, FALSE); /* create a space-free URL */ newurl = newest; /* use this instead now */ } |