diff options
-rw-r--r-- | docs/libcurl/curl_url_get.3 | 10 | ||||
-rw-r--r-- | lib/urlapi.c | 109 | ||||
-rw-r--r-- | tests/libtest/lib1560.c | 63 |
3 files changed, 143 insertions, 39 deletions
diff --git a/docs/libcurl/curl_url_get.3 b/docs/libcurl/curl_url_get.3 index 7fbdff0ed..51cc37cc2 100644 --- a/docs/libcurl/curl_url_get.3 +++ b/docs/libcurl/curl_url_get.3 @@ -56,7 +56,7 @@ default port for the scheme. .IP CURLU_URLDECODE Asks \fIcurl_url_get(3)\fP to URL decode the contents before returning it. It will not attempt to decode the scheme, the port number or the full URL. - +ยด The query component will also get plus-to-space conversion as a bonus when this bit is set. @@ -66,6 +66,14 @@ encoding. If there's any byte values lower than 32 in the decoded string, the get operation will return an error instead. +.IP CURLU_URLENCODE +If set, will make \fIcurl_url_get(3)\fP URL encode the host name part when a +full URL is retrieved. If not set (default), libcurl returns the URL with the +host name "raw" to support IDN names to appear as-is. IDN host names are +typically using non-ASCII bytes that otherwise will be percent-encoded. + +Note that even when not asking for URL encoding, the '%' (byte 37) will be URL +encoded to make sure the host name remains valid. .SH PARTS .IP CURLUPART_URL When asked to return the full URL, \fIcurl_url_get(3)\fP will return a diff --git a/lib/urlapi.c b/lib/urlapi.c index 367ddca79..5c943c52c 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -654,7 +654,7 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) } else { /* letters from the second string is not ok */ - len = strcspn(hostname, " "); + len = strcspn(hostname, " \r\n"); if(hlen != len) /* hostname with bad content */ return CURLUE_MALFORMED_INPUT; @@ -754,6 +754,30 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen) return TRUE; } +/* return strdup'ed version in 'outp', possibly percent decoded */ +static CURLUcode decode_host(char *hostname, char **outp) +{ + char *per = NULL; + if(hostname[0] != '[') + /* only decode if not an ipv6 numerical */ + per = strchr(hostname, '%'); + if(!per) { + *outp = strdup(hostname); + if(!*outp) + return CURLUE_OUT_OF_MEMORY; + } + else { + /* might be encoded */ + size_t dlen; + CURLcode result = Curl_urldecode(NULL, hostname, 0, + outp, &dlen, REJECT_CTRL); + if(result) + return CURLUE_MALFORMED_INPUT; + } + + return CURLUE_OK; +} + static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) { char *path; @@ -1029,20 +1053,22 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) { /* Skip hostname check, it's allowed to be empty. */ + u->host = strdup(""); } else { - result = hostname_check(u, hostname); - if(result) - return result; + if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4))) + u->host = strdup(normalized_ipv4); + else { + result = decode_host(hostname, &u->host); + if(result) + return result; + result = hostname_check(u, u->host); + if(result) + return result; + } } - - if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4))) - u->host = strdup(normalized_ipv4); - else - u->host = strdup(hostname); if(!u->host) return CURLUE_OUT_OF_MEMORY; - if((flags & CURLU_GUESS_SCHEME) && !schemep) { /* legacy curl-style guess based on host name */ if(checkprefix("ftp.", hostname)) @@ -1137,6 +1163,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, CURLUcode ifmissing = CURLUE_UNKNOWN_PART; char portbuf[7]; bool urldecode = (flags & CURLU_URLDECODE)?1:0; + bool urlencode = (flags & CURLU_URLENCODE)?1:0; bool plusdecode = FALSE; (void)flags; if(!u) @@ -1254,16 +1281,55 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, if(h && !(h->flags & PROTOPT_URLOPTIONS)) options = NULL; - if((u->host[0] == '[') && u->zoneid) { - /* make it '[ host %25 zoneid ]' */ - size_t hostlen = strlen(u->host); - size_t alen = hostlen + 3 + strlen(u->zoneid) + 1; - allochost = malloc(alen); + if(u->host[0] == '[') { + if(u->zoneid) { + /* make it '[ host %25 zoneid ]' */ + size_t hostlen = strlen(u->host); + size_t alen = hostlen + 3 + strlen(u->zoneid) + 1; + allochost = malloc(alen); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + memcpy(allochost, u->host, hostlen - 1); + msnprintf(&allochost[hostlen - 1], alen - hostlen + 1, + "%%25%s]", u->zoneid); + } + } + else if(urlencode) { + int hostlen = (int)strlen(u->host); + allochost = curl_easy_escape(NULL, u->host, hostlen); if(!allochost) return CURLUE_OUT_OF_MEMORY; - memcpy(allochost, u->host, hostlen - 1); - msnprintf(&allochost[hostlen - 1], alen - hostlen + 1, - "%%25%s]", u->zoneid); + } + else { + /* only encode '%' in output host name */ + char *host = u->host; + size_t pcount = 0; + /* first, count number of percents present in the name */ + while(*host) { + if(*host == '%') + pcount++; + host++; + } + /* if there were percents, encode the host name */ + if(pcount) { + size_t hostlen = strlen(u->host); + size_t alen = hostlen + 2 * pcount + 1; + char *o = allochost = malloc(alen); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + + host = u->host; + while(*host) { + if(*host == '%') { + memcpy(o, "%25", 3); + o += 3; + host++; + continue; + } + *o++ = *host++; + } + *o = '\0'; + } } url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", @@ -1405,10 +1471,15 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, case CURLUPART_OPTIONS: storep = &u->options; break; - case CURLUPART_HOST: + case CURLUPART_HOST: { + size_t len = strcspn(part, " \r\n"); + if(strlen(part) != len) + /* hostname with bad content */ + return CURLUE_MALFORMED_INPUT; storep = &u->host; Curl_safefree(u->zoneid); break; + } case CURLUPART_ZONEID: storep = &u->zoneid; break; diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index f7529592c..de3e3109d 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -243,14 +243,14 @@ static const struct testcase get_parts_list[] ={ {"https://127.0.0.1:443", "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [16] | [17]", 0, CURLU_NO_DEFAULT_PORT, CURLUE_OK}, - {"http://%3a:%3a@ex%0ample/%3f+?+%3f+%23#+%23%3f%g7", - "http | : | : | [13] | [6] | [15] | /?+ | ? # | +#?%g7", + {"http://%3a:%3a@ex4mple/%3f+?+%3f+%23#+%23%3f%g7", + "http | : | : | [13] | ex4mple | [15] | /?+ | ? # | +#?%g7", 0, CURLU_URLDECODE, CURLUE_OK}, - {"http://%3a:%3a@ex%0ample/%3f?%3f%35#%35%3f%g7", - "http | %3a | %3a | [13] | ex%0ample | [15] | /%3f | %3f%35 | %35%3f%g7", + {"http://%3a:%3a@ex4mple/%3f?%3f%35#%35%3f%g7", + "http | %3a | %3a | [13] | ex4mple | [15] | /%3f | %3f%35 | %35%3f%g7", 0, 0, CURLUE_OK}, {"http://HO0_-st%41/", - "http | [11] | [12] | [13] | HO0_-st%41 | [15] | / | [16] | [17]", + "http | [11] | [12] | [13] | HO0_-stA | [15] | / | [16] | [17]", 0, 0, CURLUE_OK}, {"file://hello.html", "", @@ -356,6 +356,17 @@ static const struct testcase get_parts_list[] ={ }; static const struct urltestcase get_url_list[] = { + /* percent encoded host names */ + {"https://%this", "https://%25this/", 0, 0, CURLUE_OK}, + {"https://h%c", "https://h%25c/", 0, 0, CURLUE_OK}, + {"https://%%%%%%", "https://%25%25%25%25%25%25/", 0, 0, CURLUE_OK}, + {"https://%41", "https://A/", 0, 0, CURLUE_OK}, + {"https://%20", "", 0, 0, CURLUE_MALFORMED_INPUT}, + {"https://%41%0d", "", 0, 0, CURLUE_MALFORMED_INPUT}, + {"https://%25", "https://%25/", 0, 0, CURLUE_OK}, + {"https://_%c0_", "https://_\xC0_/", 0, 0, CURLUE_OK}, + {"https://_%c0_", "https://_%C0_/", 0, CURLU_URLENCODE, CURLUE_OK}, + /* IPv4 trickeries */ {"https://16843009", "https://1.1.1.1/", 0, 0, CURLUE_OK}, {"https://0x7f.1", "https://127.0.0.1/", 0, 0, CURLUE_OK}, @@ -365,6 +376,8 @@ static const struct urltestcase get_url_list[] = { {"https://1.0xffffff", "https://1.255.255.255/", 0, 0, CURLUE_OK}, /* IPv4 numerical overflows or syntax errors will not normalize */ {"https://+127.0.0.1", "https://+127.0.0.1/", 0, 0, CURLUE_OK}, + {"https://+127.0.0.1", "https://%2B127.0.0.1/", 0, CURLU_URLENCODE, + CURLUE_OK}, {"https://127.-0.0.1", "https://127.-0.0.1/", 0, 0, CURLUE_OK}, {"https://127.0. 1", "https://127.0.0.1/", 0, 0, CURLUE_MALFORMED_INPUT}, {"https://1.0x1000000", "https://1.0x1000000/", 0, 0, CURLUE_OK}, @@ -528,6 +541,14 @@ static int checkurl(const char *url, const char *out) /* !checksrc! disable SPACEBEFORECOMMA 1 */ static const struct setcase set_parts_list[] = { {"https://example.com/", + "host=++,", /* '++' there's no automatic URL decode when settin this + part */ + "https://++/", + 0, /* get */ + 0, /* set */ + CURLUE_OK, CURLUE_OK}, + + {"https://example.com/", "query=Al2cO3tDkcDZ3EWE5Lh+LX8TPHs,", /* contains '+' */ "https://example.com/?Al2cO3tDkcDZ3EWE5Lh%2bLX8TPHs", CURLU_URLDECODE, /* decode on get */ @@ -588,10 +609,11 @@ static const struct setcase set_parts_list[] = { "scheme=https,user= @:,host=foobar,", "https://%20%20%20%40%3a@foobar/", 0, CURLU_URLENCODE, CURLUE_OK, CURLUE_OK}, + /* Setting a host name with spaces is not OK: */ {NULL, "scheme=https,host= ,path= ,user= ,password= ,query= ,fragment= ,", - "https://%20:%20@%20%20/%20?+#%20", - 0, CURLU_URLENCODE, CURLUE_OK, CURLUE_OK}, + "[nothing]", + 0, CURLU_URLENCODE, CURLUE_OK, CURLUE_MALFORMED_INPUT}, {NULL, "scheme=https,host=foobar,path=/this /path /is /here,", "https://foobar/this%20/path%20/is%20/here", @@ -719,7 +741,7 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags) CURLUPart what = part2id(part); #if 0 /* for debugging this */ - fprintf(stderr, "%s = %s [%d]\n", part, value, (int)what); + fprintf(stderr, "%s = \"%s\" [%d]\n", part, value, (int)what); #endif if(what > CURLUPART_ZONEID) fprintf(stderr, "UNKNOWN part '%s'\n", part); @@ -847,16 +869,18 @@ static int set_parts(void) set_parts_list[i].set, (int)uc, set_parts_list[i].pcode); error++; } + if(!uc) { + /* only do this if it worked */ + rc = curl_url_get(urlp, CURLUPART_URL, &url, 0); - rc = curl_url_get(urlp, CURLUPART_URL, &url, 0); - - if(rc) { - fprintf(stderr, "%s:%d Get URL returned %d (%s)\n", - __FILE__, __LINE__, (int)rc, curl_url_strerror(rc)); - error++; - } - else if(checkurl(url, set_parts_list[i].out)) { - error++; + if(rc) { + fprintf(stderr, "%s:%d Get URL returned %d (%s)\n", + __FILE__, __LINE__, (int)rc, curl_url_strerror(rc)); + error++; + } + else if(checkurl(url, set_parts_list[i].out)) { + error++; + } } curl_free(url); } @@ -888,8 +912,9 @@ static int get_url(void) rc = curl_url_get(urlp, CURLUPART_URL, &url, get_url_list[i].getflags); if(rc) { - fprintf(stderr, "%s:%d returned %d (%s)\n", - __FILE__, __LINE__, (int)rc, curl_url_strerror(rc)); + fprintf(stderr, "%s:%d returned %d (%s). URL: '%s'\n", + __FILE__, __LINE__, (int)rc, curl_url_strerror(rc), + get_url_list[i].in); error++; } else { |