summaryrefslogtreecommitdiff
path: root/lib/urlapi.c
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2021-04-08 10:34:06 +0200
committerDaniel Stenberg <daniel@haxx.se>2021-04-08 17:35:22 +0200
commit6de63f53ab187b592e45d80ac604fa4e02b0554e (patch)
tree8da74de33ed059603125fd61c6986b3a97109f47 /lib/urlapi.c
parentc502b47f1f2cf4668dd627203c62b978868df632 (diff)
downloadcurl-6de63f53ab187b592e45d80ac604fa4e02b0554e.tar.gz
urlapi: "normalize" numerical IPv4 host namesbagder/urlapi-normalize-ipv4
When the host name in a URL is given as an IPv4 numerical address, the address can be specified with dotted numericals in four different ways: a32, a.b24, a.b.c16 or a.b.c.d and each part can be specified in decimal, octal (0-prefixed) or hexadecimal (0x-prefixed). Instead of passing on the name as-is and leaving the handling to the underlying name functions, which made them not work with c-ares but work with getaddrinfo, this change now makes the curl URL API itself detect and "normalize" host names specified as IPv4 numericals. The WHATWG URL Spec says this is an okay way to specify a host name in a URL. RFC 3896 does not allow them, but curl didn't prevent them before and it seems other RFC 3896-using tools have not either. Host names used like this are widely supported by other tools as well due to the handling being done by getaddrinfo and friends. I decided to add the functionality into the URL API itself so that all users of these functions get the benefits, when for example wanting to compare two URLs. Also, it makes curl built to use c-ares now support them as well and make curl builds more consistent. The normalization makes HTTPS and virtual hosted HTTP work fine even when curl gets the address specified using one of the "obscure" formats. Test 1560 is extended to verify. Fixes #6863 Closes #6871
Diffstat (limited to 'lib/urlapi.c')
-rw-r--r--lib/urlapi.c92
1 files changed, 90 insertions, 2 deletions
diff --git a/lib/urlapi.c b/lib/urlapi.c
index e3a788221..340dc33df 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -667,6 +667,90 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
+/*
+ * Handle partial IPv4 numerical addresses and different bases, like
+ * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
+ *
+ * If the given input string is syntactically wrong or any part for example is
+ * too big, this function returns FALSE and doesn't create any output.
+ *
+ * Output the "normalized" version of that input string in plain quad decimal
+ * integers and return TRUE.
+ */
+static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
+{
+ bool done = FALSE;
+ int n = 0;
+ const char *c = hostname;
+ unsigned long parts[4] = {0, 0, 0, 0};
+
+ while(!done) {
+ char *endp;
+ unsigned long l = strtoul(c, &endp, 0);
+
+ /* overflow or nothing parsed at all */
+ if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
+ return FALSE;
+
+#if SIZEOF_LONG > 4
+ /* a value larger than 32 bits */
+ if(l > UINT_MAX)
+ return FALSE;
+#endif
+
+ parts[n] = l;
+ c = endp;
+
+ switch (*c) {
+ case '.' :
+ if(n == 3)
+ return FALSE;
+ n++;
+ c++;
+ break;
+
+ case '\0':
+ done = TRUE;
+ break;
+
+ default:
+ return FALSE;
+ }
+ }
+
+ /* this is deemed a valid IPv4 numerical address */
+
+ switch(n) {
+ case 0: /* a -- 32 bits */
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0] >> 24, (parts[0] >> 16) & 0xff,
+ (parts[0] >> 8) & 0xff, parts[0] & 0xff);
+ break;
+ case 1: /* a.b -- 8.24 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xffffff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], (parts[1] >> 16) & 0xff,
+ (parts[1] >> 8) & 0xff, parts[1] & 0xff);
+ break;
+ case 2: /* a.b.c -- 8.8.16 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], parts[1], (parts[2] >> 8) & 0xff,
+ parts[2] & 0xff);
+ break;
+ case 3: /* a.b.c.d -- 8.8.8.8 bits */
+ if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
+ (parts[3] > 0xff))
+ return FALSE;
+ msnprintf(outp, olen, "%u.%u.%u.%u",
+ parts[0], parts[1], parts[2], parts[3]);
+ break;
+ }
+ return TRUE;
+}
+
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
{
char *path;
@@ -899,6 +983,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}
if(hostname) {
+ char normalized_ipv4[sizeof("255.255.255.255") + 1];
/*
* Parse the login details and strip them out of the host name.
*/
@@ -922,7 +1007,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
return result;
}
- u->host = strdup(hostname);
+ if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
+ u->host = strdup(normalized_ipv4);
+ else
+ u->host = strdup(hostname);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;