summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2021-04-08 10:34:06 +0200
committerDaniel Stenberg <daniel@haxx.se>2021-04-08 17:35:22 +0200
commit6de63f53ab187b592e45d80ac604fa4e02b0554e (patch)
tree8da74de33ed059603125fd61c6986b3a97109f47
parentc502b47f1f2cf4668dd627203c62b978868df632 (diff)
downloadcurl-bagder/urlapi-normalize-ipv4.tar.gz
urlapi: "normalize" numerical IPv4 host namesbagder/urlapi-normalize-ipv4
When the host name in a URL is given as an IPv4 numerical address, the address can be specified with dotted numericals in four different ways: a32, a.b24, a.b.c16 or a.b.c.d and each part can be specified in decimal, octal (0-prefixed) or hexadecimal (0x-prefixed). Instead of passing on the name as-is and leaving the handling to the underlying name functions, which made them not work with c-ares but work with getaddrinfo, this change now makes the curl URL API itself detect and "normalize" host names specified as IPv4 numericals. The WHATWG URL Spec says this is an okay way to specify a host name in a URL. RFC 3896 does not allow them, but curl didn't prevent them before and it seems other RFC 3896-using tools have not either. Host names used like this are widely supported by other tools as well due to the handling being done by getaddrinfo and friends. I decided to add the functionality into the URL API itself so that all users of these functions get the benefits, when for example wanting to compare two URLs. Also, it makes curl built to use c-ares now support them as well and make curl builds more consistent. The normalization makes HTTPS and virtual hosted HTTP work fine even when curl gets the address specified using one of the "obscure" formats. Test 1560 is extended to verify. Fixes #6863 Closes #6871
-rw-r--r--lib/urlapi.c92
-rw-r--r--tests/libtest/lib1560.c15
2 files changed, 104 insertions, 3 deletions
diff --git a/lib/urlapi.c b/lib/urlapi.c
index e3a788221..340dc33df 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -667,6 +667,90 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
+/*
+ * Handle partial IPv4 numerical addresses and different bases, like
+ * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
+ *
+ * If the given input string is syntactically wrong or any part for example is
+ * too big, this function returns FALSE and doesn't create any output.
+ *
+ * Output the "normalized" version of that input string in plain quad decimal
+ * integers and return TRUE.
+ */
+static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
+{
+ bool done = FALSE;
+ int n = 0;
+ const char *c = hostname;
+ unsigned long parts[4] = {0, 0, 0, 0};
+
+ while(!done) {
+ char *endp;
+ unsigned long l = strtoul(c, &endp, 0);
+
+ /* overflow or nothing parsed at all */
+ if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
+ return FALSE;
+
+#if SIZEOF_LONG > 4
+ /* a value larger than 32 bits */
+ if(l > UINT_MAX)
+ return FALSE;
+#endif
+
+ parts[n] = l;
+ c = endp;
+
+ switch (*c) {
+ case '.' :
+ if(n == 3)
+ return FALSE;
+ n++;
+ c++;
+ break;
+
+ case '\0':
+ done = TRUE;
+ break;
+
+ default:
+ return FALSE;
+ }
+ }
+
+ /* this is deemed a valid IPv4 numerical address */
+
+ switch(n) {
+ case 0: /* a -- 32 bits */
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0] >> 24, (parts[0] >> 16) & 0xff,
+ (parts[0] >> 8) & 0xff, parts[0] & 0xff);
+ break;
+ case 1: /* a.b -- 8.24 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xffffff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], (parts[1] >> 16) & 0xff,
+ (parts[1] >> 8) & 0xff, parts[1] & 0xff);
+ break;
+ case 2: /* a.b.c -- 8.8.16 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], parts[1], (parts[2] >> 8) & 0xff,
+ parts[2] & 0xff);
+ break;
+ case 3: /* a.b.c.d -- 8.8.8.8 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
+ (parts[3] > 0xff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], parts[1], parts[2], parts[3]);
+ break;
+ }
+ return TRUE;
+}
+
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
{
char *path;
@@ -899,6 +983,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}
if(hostname) {
+ char normalized_ipv4[sizeof("255.255.255.255") + 1];
/*
* Parse the login details and strip them out of the host name.
*/
@@ -922,7 +1007,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
return result;
}
- u->host = strdup(hostname);
+ if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
+ u->host = strdup(normalized_ipv4);
+ else
+ u->host = strdup(hostname);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index c81400242..a469b7a0c 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -323,6 +323,19 @@ static struct testcase get_parts_list[] ={
};
static struct urltestcase get_url_list[] = {
+ /* IPv4 trickeries */
+ {"https://16843009", "https://1.1.1.1/", 0, 0, CURLUE_OK},
+ {"https://0x7f.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
+ {"https://0177.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
+ {"https://0111.02.0x3", "https://73.2.0.3/", 0, 0, CURLUE_OK},
+ {"https://0xff.0xff.0377.255", "https://255.255.255.255/", 0, 0, CURLUE_OK},
+ {"https://1.0xffffff", "https://1.255.255.255/", 0, 0, CURLUE_OK},
+ /* IPv4 numerical overflows or syntax errors will not normalize */
+ {"https://1.0x1000000", "https://1.0x1000000/", 0, 0, CURLUE_OK},
+ {"https://1.2.3.256", "https://1.2.3.256/", 0, 0, CURLUE_OK},
+ {"https://1.2.3.4.5", "https://1.2.3.4.5/", 0, 0, CURLUE_OK},
+ {"https://1.2.0x100.3", "https://1.2.0x100.3/", 0, 0, CURLUE_OK},
+ {"https://4294967296", "https://4294967296/", 0, 0, CURLUE_OK},
/* 40 bytes scheme is the max allowed */
{"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA://hostname/path",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa://hostname/path",