summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/libcurl/curl_url_get.310
-rw-r--r--lib/urlapi.c109
-rw-r--r--tests/libtest/lib1560.c63
3 files changed, 143 insertions, 39 deletions
diff --git a/docs/libcurl/curl_url_get.3 b/docs/libcurl/curl_url_get.3
index 7fbdff0ed..51cc37cc2 100644
--- a/docs/libcurl/curl_url_get.3
+++ b/docs/libcurl/curl_url_get.3
@@ -56,7 +56,7 @@ default port for the scheme.
.IP CURLU_URLDECODE
Asks \fIcurl_url_get(3)\fP to URL decode the contents before returning it. It
will not attempt to decode the scheme, the port number or the full URL.
-
+ยด
The query component will also get plus-to-space conversion as a bonus when
this bit is set.
@@ -66,6 +66,14 @@ encoding.
If there's any byte values lower than 32 in the decoded string, the get
operation will return an error instead.
+.IP CURLU_URLENCODE
+If set, will make \fIcurl_url_get(3)\fP URL encode the host name part when a
+full URL is retrieved. If not set (default), libcurl returns the URL with the
+host name "raw" to support IDN names to appear as-is. IDN host names are
+typically using non-ASCII bytes that otherwise will be percent-encoded.
+
+Note that even when not asking for URL encoding, the '%' (byte 37) will be URL
+encoded to make sure the host name remains valid.
.SH PARTS
.IP CURLUPART_URL
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
diff --git a/lib/urlapi.c b/lib/urlapi.c
index 367ddca79..5c943c52c 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -654,7 +654,7 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
}
else {
/* letters from the second string is not ok */
- len = strcspn(hostname, " ");
+ len = strcspn(hostname, " \r\n");
if(hlen != len)
/* hostname with bad content */
return CURLUE_MALFORMED_INPUT;
@@ -754,6 +754,30 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
return TRUE;
}
+/* return strdup'ed version in 'outp', possibly percent decoded */
+static CURLUcode decode_host(char *hostname, char **outp)
+{
+ char *per = NULL;
+ if(hostname[0] != '[')
+ /* only decode if not an ipv6 numerical */
+ per = strchr(hostname, '%');
+ if(!per) {
+ *outp = strdup(hostname);
+ if(!*outp)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ /* might be encoded */
+ size_t dlen;
+ CURLcode result = Curl_urldecode(NULL, hostname, 0,
+ outp, &dlen, REJECT_CTRL);
+ if(result)
+ return CURLUE_MALFORMED_INPUT;
+ }
+
+ return CURLUE_OK;
+}
+
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
{
char *path;
@@ -1029,20 +1053,22 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
/* Skip hostname check, it's allowed to be empty. */
+ u->host = strdup("");
}
else {
- result = hostname_check(u, hostname);
- if(result)
- return result;
+ if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
+ u->host = strdup(normalized_ipv4);
+ else {
+ result = decode_host(hostname, &u->host);
+ if(result)
+ return result;
+ result = hostname_check(u, u->host);
+ if(result)
+ return result;
+ }
}
-
- if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
- u->host = strdup(normalized_ipv4);
- else
- u->host = strdup(hostname);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
-
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
@@ -1137,6 +1163,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
char portbuf[7];
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
+ bool urlencode = (flags & CURLU_URLENCODE)?1:0;
bool plusdecode = FALSE;
(void)flags;
if(!u)
@@ -1254,16 +1281,55 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
if(h && !(h->flags & PROTOPT_URLOPTIONS))
options = NULL;
- if((u->host[0] == '[') && u->zoneid) {
- /* make it '[ host %25 zoneid ]' */
- size_t hostlen = strlen(u->host);
- size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
- allochost = malloc(alen);
+ if(u->host[0] == '[') {
+ if(u->zoneid) {
+ /* make it '[ host %25 zoneid ]' */
+ size_t hostlen = strlen(u->host);
+ size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
+ allochost = malloc(alen);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+ memcpy(allochost, u->host, hostlen - 1);
+ msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
+ "%%25%s]", u->zoneid);
+ }
+ }
+ else if(urlencode) {
+ int hostlen = (int)strlen(u->host);
+ allochost = curl_easy_escape(NULL, u->host, hostlen);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
- memcpy(allochost, u->host, hostlen - 1);
- msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
- "%%25%s]", u->zoneid);
+ }
+ else {
+ /* only encode '%' in output host name */
+ char *host = u->host;
+ size_t pcount = 0;
+ /* first, count number of percents present in the name */
+ while(*host) {
+ if(*host == '%')
+ pcount++;
+ host++;
+ }
+ /* if there were percents, encode the host name */
+ if(pcount) {
+ size_t hostlen = strlen(u->host);
+ size_t alen = hostlen + 2 * pcount + 1;
+ char *o = allochost = malloc(alen);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+
+ host = u->host;
+ while(*host) {
+ if(*host == '%') {
+ memcpy(o, "%25", 3);
+ o += 3;
+ host++;
+ continue;
+ }
+ *o++ = *host++;
+ }
+ *o = '\0';
+ }
}
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
@@ -1405,10 +1471,15 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
case CURLUPART_OPTIONS:
storep = &u->options;
break;
- case CURLUPART_HOST:
+ case CURLUPART_HOST: {
+ size_t len = strcspn(part, " \r\n");
+ if(strlen(part) != len)
+ /* hostname with bad content */
+ return CURLUE_MALFORMED_INPUT;
storep = &u->host;
Curl_safefree(u->zoneid);
break;
+ }
case CURLUPART_ZONEID:
storep = &u->zoneid;
break;
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index f7529592c..de3e3109d 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -243,14 +243,14 @@ static const struct testcase get_parts_list[] ={
{"https://127.0.0.1:443",
"https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [16] | [17]",
0, CURLU_NO_DEFAULT_PORT, CURLUE_OK},
- {"http://%3a:%3a@ex%0ample/%3f+?+%3f+%23#+%23%3f%g7",
- "http | : | : | [13] | [6] | [15] | /?+ | ? # | +#?%g7",
+ {"http://%3a:%3a@ex4mple/%3f+?+%3f+%23#+%23%3f%g7",
+ "http | : | : | [13] | ex4mple | [15] | /?+ | ? # | +#?%g7",
0, CURLU_URLDECODE, CURLUE_OK},
- {"http://%3a:%3a@ex%0ample/%3f?%3f%35#%35%3f%g7",
- "http | %3a | %3a | [13] | ex%0ample | [15] | /%3f | %3f%35 | %35%3f%g7",
+ {"http://%3a:%3a@ex4mple/%3f?%3f%35#%35%3f%g7",
+ "http | %3a | %3a | [13] | ex4mple | [15] | /%3f | %3f%35 | %35%3f%g7",
0, 0, CURLUE_OK},
{"http://HO0_-st%41/",
- "http | [11] | [12] | [13] | HO0_-st%41 | [15] | / | [16] | [17]",
+ "http | [11] | [12] | [13] | HO0_-stA | [15] | / | [16] | [17]",
0, 0, CURLUE_OK},
{"file://hello.html",
"",
@@ -356,6 +356,17 @@ static const struct testcase get_parts_list[] ={
};
static const struct urltestcase get_url_list[] = {
+ /* percent encoded host names */
+ {"https://%this", "https://%25this/", 0, 0, CURLUE_OK},
+ {"https://h%c", "https://h%25c/", 0, 0, CURLUE_OK},
+ {"https://%%%%%%", "https://%25%25%25%25%25%25/", 0, 0, CURLUE_OK},
+ {"https://%41", "https://A/", 0, 0, CURLUE_OK},
+ {"https://%20", "", 0, 0, CURLUE_MALFORMED_INPUT},
+ {"https://%41%0d", "", 0, 0, CURLUE_MALFORMED_INPUT},
+ {"https://%25", "https://%25/", 0, 0, CURLUE_OK},
+ {"https://_%c0_", "https://_\xC0_/", 0, 0, CURLUE_OK},
+ {"https://_%c0_", "https://_%C0_/", 0, CURLU_URLENCODE, CURLUE_OK},
+
/* IPv4 trickeries */
{"https://16843009", "https://1.1.1.1/", 0, 0, CURLUE_OK},
{"https://0x7f.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
@@ -365,6 +376,8 @@ static const struct urltestcase get_url_list[] = {
{"https://1.0xffffff", "https://1.255.255.255/", 0, 0, CURLUE_OK},
/* IPv4 numerical overflows or syntax errors will not normalize */
{"https://+127.0.0.1", "https://+127.0.0.1/", 0, 0, CURLUE_OK},
+ {"https://+127.0.0.1", "https://%2B127.0.0.1/", 0, CURLU_URLENCODE,
+ CURLUE_OK},
{"https://127.-0.0.1", "https://127.-0.0.1/", 0, 0, CURLUE_OK},
{"https://127.0. 1", "https://127.0.0.1/", 0, 0, CURLUE_MALFORMED_INPUT},
{"https://1.0x1000000", "https://1.0x1000000/", 0, 0, CURLUE_OK},
@@ -528,6 +541,14 @@ static int checkurl(const char *url, const char *out)
/* !checksrc! disable SPACEBEFORECOMMA 1 */
static const struct setcase set_parts_list[] = {
{"https://example.com/",
+ "host=++,", /* '++' there's no automatic URL decode when settin this
+ part */
+ "https://++/",
+ 0, /* get */
+ 0, /* set */
+ CURLUE_OK, CURLUE_OK},
+
+ {"https://example.com/",
"query=Al2cO3tDkcDZ3EWE5Lh+LX8TPHs,", /* contains '+' */
"https://example.com/?Al2cO3tDkcDZ3EWE5Lh%2bLX8TPHs",
CURLU_URLDECODE, /* decode on get */
@@ -588,10 +609,11 @@ static const struct setcase set_parts_list[] = {
"scheme=https,user= @:,host=foobar,",
"https://%20%20%20%40%3a@foobar/",
0, CURLU_URLENCODE, CURLUE_OK, CURLUE_OK},
+ /* Setting a host name with spaces is not OK: */
{NULL,
"scheme=https,host= ,path= ,user= ,password= ,query= ,fragment= ,",
- "https://%20:%20@%20%20/%20?+#%20",
- 0, CURLU_URLENCODE, CURLUE_OK, CURLUE_OK},
+ "[nothing]",
+ 0, CURLU_URLENCODE, CURLUE_OK, CURLUE_MALFORMED_INPUT},
{NULL,
"scheme=https,host=foobar,path=/this /path /is /here,",
"https://foobar/this%20/path%20/is%20/here",
@@ -719,7 +741,7 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
CURLUPart what = part2id(part);
#if 0
/* for debugging this */
- fprintf(stderr, "%s = %s [%d]\n", part, value, (int)what);
+ fprintf(stderr, "%s = \"%s\" [%d]\n", part, value, (int)what);
#endif
if(what > CURLUPART_ZONEID)
fprintf(stderr, "UNKNOWN part '%s'\n", part);
@@ -847,16 +869,18 @@ static int set_parts(void)
set_parts_list[i].set, (int)uc, set_parts_list[i].pcode);
error++;
}
+ if(!uc) {
+ /* only do this if it worked */
+ rc = curl_url_get(urlp, CURLUPART_URL, &url, 0);
- rc = curl_url_get(urlp, CURLUPART_URL, &url, 0);
-
- if(rc) {
- fprintf(stderr, "%s:%d Get URL returned %d (%s)\n",
- __FILE__, __LINE__, (int)rc, curl_url_strerror(rc));
- error++;
- }
- else if(checkurl(url, set_parts_list[i].out)) {
- error++;
+ if(rc) {
+ fprintf(stderr, "%s:%d Get URL returned %d (%s)\n",
+ __FILE__, __LINE__, (int)rc, curl_url_strerror(rc));
+ error++;
+ }
+ else if(checkurl(url, set_parts_list[i].out)) {
+ error++;
+ }
}
curl_free(url);
}
@@ -888,8 +912,9 @@ static int get_url(void)
rc = curl_url_get(urlp, CURLUPART_URL, &url, get_url_list[i].getflags);
if(rc) {
- fprintf(stderr, "%s:%d returned %d (%s)\n",
- __FILE__, __LINE__, (int)rc, curl_url_strerror(rc));
+ fprintf(stderr, "%s:%d returned %d (%s). URL: '%s'\n",
+ __FILE__, __LINE__, (int)rc, curl_url_strerror(rc),
+ get_url_list[i].in);
error++;
}
else {