summaryrefslogtreecommitdiff
path: root/lib/urlapi.c
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2022-09-01 10:16:24 +0200
committerDaniel Stenberg <daniel@haxx.se>2022-09-07 10:21:45 +0200
commitf703cf971ccdb93fea05f223cabbb99ca06cdc75 (patch)
treefae9b603ebdc5cb79dc6aaed3b49e8744cd57969 /lib/urlapi.c
parent2ae81e680b8d6f8b9eb36fd1a2e223783526dbdd (diff)
downloadcurl-f703cf971ccdb93fea05f223cabbb99ca06cdc75.tar.gz
urlapi: leaner with fewer allocs
Slightly faster with more robust code. Uses fewer and smaller mallocs. - remove two fields from the URL handle struct - reduce copies and allocs - use dynbuf buffers more instead of custom malloc + copies - uses dynbuf to build the host name in reduces serial alloc+free within the same function. - move dedotdotify into urlapi.c and make it static, not strdup the input and optimize it by checking for . and / before using strncmp - remove a few strlen() calls - add Curl_dyn_setlen() that can "trim" an existing dynbuf Closes #9408
Diffstat (limited to 'lib/urlapi.c')
-rw-r--r--lib/urlapi.c832
1 files changed, 487 insertions, 345 deletions
diff --git a/lib/urlapi.c b/lib/urlapi.c
index 0e07d0ba3..baecb84b9 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -27,12 +27,12 @@
#include "urldata.h"
#include "urlapi-int.h"
#include "strcase.h"
-#include "dotdot.h"
#include "url.h"
#include "escape.h"
#include "curl_ctype.h"
#include "inet_pton.h"
#include "inet_ntop.h"
+#include "strdup.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@@ -68,9 +68,6 @@ struct Curl_URL {
char *path;
char *query;
char *fragment;
-
- char *scratch; /* temporary scratch area */
- char *temppath; /* temporary path pointer */
long portnum; /* the numerical version */
};
@@ -88,8 +85,6 @@ static void free_urlhandle(struct Curl_URL *u)
free(u->path);
free(u->query);
free(u->fragment);
- free(u->scratch);
- free(u->temppath);
}
/*
@@ -130,64 +125,19 @@ static bool urlchar_needs_escaping(int c)
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
}
-/*
- * strlen_url() returns the length of the given URL if the spaces within the
- * URL were properly URL encoded.
- * URL encoding should be skipped for host names, otherwise IDN resolution
- * will fail.
- */
-static size_t strlen_url(const char *url, bool relative)
-{
- const unsigned char *ptr;
- size_t newlen = 0;
- bool left = TRUE; /* left side of the ? */
- const unsigned char *host_sep = (const unsigned char *) url;
-
- if(!relative)
- host_sep = (const unsigned char *) find_host_sep(url);
-
- for(ptr = (unsigned char *)url; *ptr; ptr++) {
-
- if(ptr < host_sep) {
- ++newlen;
- continue;
- }
-
- if(*ptr == ' ') {
- if(left)
- newlen += 3;
- else
- newlen++;
- continue;
- }
-
- if (*ptr == '?')
- left = FALSE;
-
- if(urlchar_needs_escaping(*ptr))
- newlen += 2;
-
- newlen++;
- }
-
- return newlen;
-}
-
-/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
- * the source URL accordingly.
+/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
+ * in the source URL accordingly.
+ *
* URL encoding should be skipped for host names, otherwise IDN resolution
* will fail.
*
- * Returns TRUE if something was updated.
*/
-static bool strcpy_url(char *output, const char *url, bool relative)
+static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
{
/* we must add this with whitespace-replacing */
bool left = TRUE;
const unsigned char *iptr;
- char *optr = output;
const unsigned char *host_sep = (const unsigned char *) url;
- bool changed = FALSE;
if(!relative)
host_sep = (const unsigned char *) find_host_sep(url);
@@ -197,19 +147,20 @@ static bool strcpy_url(char *output, const char *url, bool relative)
iptr++) {
if(iptr < host_sep) {
- *optr++ = *iptr;
+ if(Curl_dyn_addn(o, iptr, 1))
+ return CURLUE_OUT_OF_MEMORY;
continue;
}
if(*iptr == ' ') {
if(left) {
- *optr++='%'; /* add a '%' */
- *optr++='2'; /* add a '2' */
- *optr++='0'; /* add a '0' */
+ if(Curl_dyn_addn(o, "%20", 3))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ if(Curl_dyn_addn(o, "+", 1))
+ return CURLUE_OUT_OF_MEMORY;
}
- else
- *optr++='+'; /* add a '+' here */
- changed = TRUE;
continue;
}
@@ -217,24 +168,24 @@ static bool strcpy_url(char *output, const char *url, bool relative)
left = FALSE;
if(urlchar_needs_escaping(*iptr)) {
- msnprintf(optr, 4, "%%%02x", *iptr);
- changed = TRUE;
- optr += 3;
+ if(Curl_dyn_addf(o, "%%%02x", *iptr))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ if(Curl_dyn_addn(o, iptr, 1))
+ return CURLUE_OUT_OF_MEMORY;
}
- else
- *optr++ = *iptr;
}
- *optr = 0; /* null-terminate output buffer */
- return changed;
+ return CURLUE_OK;
}
/*
- * Returns true if the given URL is absolute (as opposed to relative). Returns
- * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
- * be larger than MAX_SCHEME_LEN if buf is set.
+ * Returns the length of the scheme if the given URL is absolute (as opposed
+ * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
+ * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
*/
-bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
+size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
{
int i;
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
@@ -243,7 +194,7 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
buf[0] = 0; /* always leave a defined value in buf */
#ifdef WIN32
if(STARTS_WITH_DRIVE_PREFIX(url))
- return FALSE;
+ return 0;
#endif
for(i = 0; i < MAX_SCHEME_LEN; ++i) {
char s = url[i];
@@ -257,15 +208,17 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
}
}
if(i && (url[i] == ':') && (url[i + 1] == '/')) {
+ /* the length of the scheme is the name part only */
+ size_t len = i;
if(buf) {
buf[i] = 0;
while(i--) {
buf[i] = Curl_raw_tolower(url[i]);
}
}
- return TRUE;
+ return len;
}
- return FALSE;
+ return 0;
}
/*
@@ -273,34 +226,26 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
* URL-encodes any spaces.
* The returned pointer must be freed by the caller unless NULL
* (returns NULL on out of memory).
+ *
+ * Note that this function destroys the 'base' string.
*/
-static char *concat_url(const char *base, const char *relurl)
+static char *concat_url(char *base, const char *relurl)
{
/***
TRY to append this new path to the old URL
to the right of the host part. Oh crap, this is doomed to cause
problems in the future...
*/
- char *newest;
+ struct dynbuf newest;
char *protsep;
char *pathsep;
- size_t newlen;
bool host_changed = FALSE;
-
const char *useurl = relurl;
- size_t urllen;
-
- /* we must make our own copy of the URL to play with, as it may
- point to read-only data */
- char *url_clone = strdup(base);
-
- if(!url_clone)
- return NULL; /* skip out of this NOW */
/* protsep points to the start of the host name */
- protsep = strstr(url_clone, "//");
+ protsep = strstr(base, "//");
if(!protsep)
- protsep = url_clone;
+ protsep = base;
else
protsep += 2; /* pass the slashes */
@@ -393,38 +338,24 @@ static char *concat_url(const char *base, const char *relurl)
}
}
- /* If the new part contains a space, this is a mighty stupid redirect
- but we still make an effort to do "right". To the left of a '?'
- letter we replace each space with %20 while it is replaced with '+'
- on the right side of the '?' letter.
- */
- newlen = strlen_url(useurl, !host_changed);
-
- urllen = strlen(url_clone);
-
- newest = malloc(urllen + 1 + /* possible slash */
- newlen + 1 /* zero byte */);
-
- if(!newest) {
- free(url_clone); /* don't leak this */
- return NULL;
- }
+ Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
/* copy over the root url part */
- memcpy(newest, url_clone, urllen);
+ if(Curl_dyn_add(&newest, base))
+ return NULL;
/* check if we need to append a slash */
if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
;
- else
- newest[urllen++]='/';
+ else {
+ if(Curl_dyn_addn(&newest, "/", 1))
+ return NULL;
+ }
/* then append the new piece on the right side */
- strcpy_url(&newest[urllen], useurl, !host_changed);
+ strcpy_url(&newest, useurl, !host_changed);
- free(url_clone);
-
- return newest;
+ return Curl_dyn_ptr(&newest);
}
/* scan for byte values < 31 or 127 */
@@ -458,7 +389,7 @@ static bool junkscan(const char *part, unsigned int flags)
*
*/
static CURLUcode parse_hostname_login(struct Curl_URL *u,
- char **hostname,
+ struct dynbuf *host,
unsigned int flags)
{
CURLUcode result = CURLUE_OK;
@@ -468,23 +399,27 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
char *optionsp = NULL;
const struct Curl_handler *h = NULL;
- /* At this point, we're hoping all the other special cases have
- * been taken care of, so conn->host.name is at most
- * [user[:password][;options]]@]hostname
+ /* At this point, we assume all the other special cases have been taken
+ * care of, so the host is at most
+ *
+ * [user[:password][;options]]@]hostname
*
* We need somewhere to put the embedded details, so do that first.
*/
- char *ptr = strchr(*hostname, '@');
- char *login = *hostname;
+ char *login = Curl_dyn_ptr(host);
+ char *ptr;
+
+ DEBUGASSERT(login);
+ ptr = strchr(login, '@');
if(!ptr)
goto out;
/* We will now try to extract the
* possible login information in a string like:
* ftp://user:password@ftp.my.site:8021/README */
- *hostname = ++ptr;
+ ptr++;
/* if this is a known scheme, get some details */
if(u->scheme)
@@ -530,6 +465,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
u->options = optionsp;
}
+ /* move the name to the start of the host buffer */
+ if(Curl_dyn_tail(host, strlen(ptr)))
+ return CURLUE_OUT_OF_MEMORY;
+
return CURLUE_OK;
out:
@@ -543,13 +482,13 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
return result;
}
-UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
+UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
bool has_scheme)
{
char *portptr = NULL;
char endbracket;
int len;
-
+ char *hostname = Curl_dyn_ptr(host);
/*
* Find the end of an IPv6 address, either on the ']' ending bracket or
* a percent-encoded zone index.
@@ -586,6 +525,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
char *rest;
long port;
char portbuf[7];
+ size_t keep = portptr - hostname;
/* Browser behavior adaptation. If there's a colon with no digits after,
just cut off the name there which makes us ignore the colon and just
@@ -594,15 +534,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
Don't do it if the URL has no scheme, to make something that looks like
a scheme not work!
*/
- if(!portptr[1]) {
- *portptr = '\0';
+ Curl_dyn_setlen(host, keep);
+ portptr++;
+ if(!*portptr)
return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
- }
- if(!ISDIGIT(portptr[1]))
+ if(!ISDIGIT(*portptr))
return CURLUE_BAD_PORT_NUMBER;
- port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
+ port = strtol(portptr, &rest, 10); /* Port number must be decimal */
if(port > 0xffff)
return CURLUE_BAD_PORT_NUMBER;
@@ -610,7 +550,6 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;
- *portptr++ = '\0'; /* cut off the name there */
*rest = 0;
/* generate a new port number string to get rid of leading zeroes etc */
msnprintf(portbuf, sizeof(portbuf), "%ld", port);
@@ -623,12 +562,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
return CURLUE_OK;
}
-static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
+ size_t hlen) /* length of hostname */
{
size_t len;
- size_t hlen = strlen(hostname);
+ DEBUGASSERT(hostname);
- if(hostname[0] == '[') {
+ if(!hostname[0])
+ return CURLUE_NO_HOST;
+ else if(hostname[0] == '[') {
const char *l = "0123456789abcdefABCDEF:.";
if(hlen < 4) /* '[::]' is the shortest possible valid string */
return CURLUE_BAD_IPV6;
@@ -692,8 +634,6 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
/* hostname with bad content */
return CURLUE_BAD_HOSTNAME;
}
- if(!hostname[0])
- return CURLUE_NO_HOST;
return CURLUE_OK;
}
@@ -787,79 +727,231 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
return TRUE;
}
-/* return strdup'ed version in 'outp', possibly percent decoded */
-static CURLUcode decode_host(char *hostname, char **outp)
+/* if necessary, replace the host content with a URL decoded version */
+static CURLUcode decode_host(struct dynbuf *host)
{
char *per = NULL;
- if(hostname[0] != '[')
+ const char *hostname = Curl_dyn_ptr(host);
+ if(hostname[0] == '[')
/* only decode if not an ipv6 numerical */
- per = strchr(hostname, '%');
- if(!per) {
- *outp = strdup(hostname);
- if(!*outp)
- return CURLUE_OUT_OF_MEMORY;
- }
+ return CURLUE_OK;
+ per = strchr(hostname, '%');
+ if(!per)
+ /* nothing to decode */
+ return CURLUE_OK;
else {
- /* might be encoded */
+ /* encoded */
size_t dlen;
- CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
+ char *decoded;
+ CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
+ REJECT_CTRL);
if(result)
return CURLUE_BAD_HOSTNAME;
+ Curl_dyn_reset(host);
+ result = Curl_dyn_addn(host, decoded, dlen);
+ free(decoded);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
return CURLUE_OK;
}
-static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
+/*
+ * "Remove Dot Segments"
+ * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
+ */
+
+/*
+ * dedotdotify()
+ * @unittest: 1395
+ *
+ * This function gets a null-terminated path with dot and dotdot sequences
+ * passed in and strips them off according to the rules in RFC 3986 section
+ * 5.2.4.
+ *
+ * The function handles a query part ('?' + stuff) appended but it expects
+ * that fragments ('#' + stuff) have already been cut off.
+ *
+ * Note that this funciton *writes* a byte into the source buffer during its
+ * operation.
+ *
+ * RETURNS
+ *
+ * an allocated dedotdotified output string
+ */
+UNITTEST char *dedotdotify(const char *input, size_t clen);
+UNITTEST char *dedotdotify(const char *input, size_t clen)
{
- char *path;
- bool path_alloced = FALSE;
+ char *out = malloc(clen + 1);
+ char *outptr;
+ const char *orginput = input;
+ char *queryp;
+ if(!out)
+ return NULL; /* out of memory */
+
+ *out = 0; /* null-terminates, for inputs like "./" */
+ outptr = out;
+
+ if(!*input)
+ /* zero length input string, return that */
+ return out;
+
+ /*
+ * To handle query-parts properly, we must find it and remove it during the
+ * dotdot-operation and then append it again at the end to the output
+ * string.
+ */
+ queryp = strchr(input, '?');
+
+ do {
+ bool dotdot = TRUE;
+ if(*input == '.') {
+ /* A. If the input buffer begins with a prefix of "../" or "./", then
+ remove that prefix from the input buffer; otherwise, */
+
+ if(!strncmp("./", input, 2)) {
+ input += 2;
+ clen -= 2;
+ }
+ else if(!strncmp("../", input, 3)) {
+ input += 3;
+ clen -= 3;
+ }
+ /* D. if the input buffer consists only of "." or "..", then remove
+ that from the input buffer; otherwise, */
+
+ else if(!strcmp(".", input) || !strcmp("..", input) ||
+ !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
+ *out = 0;
+ break;
+ }
+ else
+ dotdot = FALSE;
+ }
+ else if(*input == '/') {
+ /* B. if the input buffer begins with a prefix of "/./" or "/.", where
+ "." is a complete path segment, then replace that prefix with "/" in
+ the input buffer; otherwise, */
+ if(!strncmp("/./", input, 3)) {
+ input += 2;
+ clen -= 2;
+ }
+ else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
+ *outptr++ = '/';
+ *outptr = 0;
+ break;
+ }
+
+ /* C. if the input buffer begins with a prefix of "/../" or "/..",
+ where ".." is a complete path segment, then replace that prefix with
+ "/" in the input buffer and remove the last segment and its
+ preceding "/" (if any) from the output buffer; otherwise, */
+
+ else if(!strncmp("/../", input, 4)) {
+ input += 3;
+ clen -= 3;
+ /* remove the last segment from the output buffer */
+ while(outptr > out) {
+ outptr--;
+ if(*outptr == '/')
+ break;
+ }
+ *outptr = 0; /* null-terminate where it stops */
+ }
+ else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
+ /* remove the last segment from the output buffer */
+ while(outptr > out) {
+ outptr--;
+ if(*outptr == '/')
+ break;
+ }
+ *outptr++ = '/';
+ *outptr = 0; /* null-terminate where it stops */
+ break;
+ }
+ else
+ dotdot = FALSE;
+ }
+ else
+ dotdot = FALSE;
+
+ if(!dotdot) {
+ /* E. move the first path segment in the input buffer to the end of
+ the output buffer, including the initial "/" character (if any) and
+ any subsequent characters up to, but not including, the next "/"
+ character or the end of the input buffer. */
+
+ do {
+ *outptr++ = *input++;
+ clen--;
+ } while(*input && (*input != '/') && (*input != '?'));
+ *outptr = 0;
+ }
+
+ /* continue until end of input string OR, if there is a terminating
+ query part, stop there */
+ } while(*input && (!queryp || (input < queryp)));
+
+ if(queryp) {
+ size_t qlen;
+ /* There was a query part, append that to the output. */
+ size_t oindex = queryp - orginput;
+ qlen = strlen(&orginput[oindex]);
+ memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
+ }
+
+ return out;
+}
+
+static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
+{
+ const char *path;
+ size_t pathlen;
bool uncpath = FALSE;
- char *hostname;
char *query = NULL;
char *fragment = NULL;
- CURLUcode result;
- bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN + 1];
const char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
+ CURLUcode result = CURLUE_OK;
+ size_t fraglen = 0;
+ struct dynbuf host;
DEBUGASSERT(url);
+ Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
+
/*************************************************************
* Parse the URL.
************************************************************/
/* allocate scratch area */
urllen = strlen(url);
- if(urllen > CURL_MAX_INPUT_LENGTH)
+ if(urllen > CURL_MAX_INPUT_LENGTH) {
/* excessive input length */
- return CURLUE_MALFORMED_INPUT;
-
- path = u->scratch = malloc(urllen * 2 + 2);
- if(!path)
- return CURLUE_OUT_OF_MEMORY;
-
- hostname = &path[urllen + 1];
- hostname[0] = 0;
-
- if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
- url_has_scheme = TRUE;
- schemelen = strlen(schemebuf);
+ result = CURLUE_MALFORMED_INPUT;
+ goto fail;
}
+ schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf));
+
/* handle the file: scheme */
- if(url_has_scheme && !strcmp(schemebuf, "file")) {
- if(urllen <= 6)
+ if(schemelen && !strcmp(schemebuf, "file")) {
+ if(urllen <= 6) {
/* file:/ is not enough to actually be a complete file: URL */
- return CURLUE_BAD_FILE_URL;
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
/* path has been allocated large enough to hold this */
- strcpy(path, &url[5]);
+ path = (char *)&url[5];
- u->scheme = strdup("file");
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ schemep = u->scheme = strdup("file");
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
/* Extra handling URLs with an authority component (i.e. that start with
* "file://")
@@ -869,7 +961,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
*/
if(path[0] == '/' && path[1] == '/') {
/* swallow the two slashes */
- char *ptr = &path[2];
+ const char *ptr = &path[2];
/*
* According to RFC 8089, a file: URL can be reliably dereferenced if:
@@ -905,13 +997,17 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
chars, and the delimiting slash character must be appended to the
host name */
path = strpbrk(ptr, "/\\:*?\"<>|");
- if(!path || *path != '/')
- return CURLUE_BAD_FILE_URL;
+ if(!path || *path != '/') {
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
len = path - ptr;
if(len) {
- memcpy(hostname, ptr, len);
- hostname[len] = 0;
+ if(Curl_dyn_addn(&host, ptr, len)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
uncpath = TRUE;
}
@@ -919,7 +1015,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
#else
/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
none */
- return CURLUE_BAD_FILE_URL;
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
#endif
}
}
@@ -928,7 +1025,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}
if(!uncpath)
- hostname = NULL; /* no host for file: URLs by default */
+ /* no host for file: URLs by default */
+ Curl_dyn_reset(&host);
#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
/* Don't allow Windows drive letters when not in Windows.
@@ -936,13 +1034,14 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
STARTS_WITH_URL_DRIVE_PREFIX(path)) {
/* File drive letters are only accepted in MSDOS/Windows */
- return CURLUE_BAD_FILE_URL;
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
}
#else
/* If the path starts with a slash and a drive letter, ditch the slash */
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
/* This cannot be done with strcpy, as the memory chunks overlap! */
- memmove(path, &path[1], strlen(&path[1]) + 1);
+ path++;
}
#endif
@@ -952,32 +1051,39 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
const char *p;
const char *hostp;
size_t len;
- path[0] = 0;
- if(url_has_scheme) {
+ if(schemelen) {
int i = 0;
p = &url[schemelen + 1];
while(p && (*p == '/') && (i < 4)) {
p++;
i++;
}
- if((i < 1) || (i>3))
+ if((i < 1) || (i>3)) {
/* less than one or more than three slashes */
- return CURLUE_BAD_SLASHES;
+ result = CURLUE_BAD_SLASHES;
+ goto fail;
+ }
schemep = schemebuf;
if(!Curl_builtin_scheme(schemep) &&
- !(flags & CURLU_NON_SUPPORT_SCHEME))
- return CURLUE_UNSUPPORTED_SCHEME;
+ !(flags & CURLU_NON_SUPPORT_SCHEME)) {
+ result = CURLUE_UNSUPPORTED_SCHEME;
+ goto fail;
+ }
- if(junkscan(schemep, flags))
- return CURLUE_BAD_SCHEME;
+ if(junkscan(schemep, flags)) {
+ result = CURLUE_BAD_SCHEME;
+ goto fail;
+ }
}
else {
/* no scheme! */
- if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
- return CURLUE_BAD_SCHEME;
+ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
+ result = CURLUE_BAD_SCHEME;
+ goto fail;
+ }
if(flags & CURLU_DEFAULT_SCHEME)
schemep = DEFAULT_SCHEME;
@@ -994,122 +1100,156 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
len = p - hostp;
if(len) {
- memcpy(hostname, hostp, len);
- hostname[len] = 0;
+ if(Curl_dyn_addn(&host, hostp, len)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
else {
- if(!(flags & CURLU_NO_AUTHORITY))
- return CURLUE_NO_HOST;
+ if(!(flags & CURLU_NO_AUTHORITY)) {
+ result = CURLUE_NO_HOST;
+ goto fail;
+ }
}
- strcpy(path, p);
+ path = (char *)p;
if(schemep) {
u->scheme = strdup(schemep);
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
}
- if((flags & CURLU_URLENCODE) && path[0]) {
- /* worst case output length is 3x the original! */
- char *newp = malloc(strlen(path) * 3);
- if(!newp)
- return CURLUE_OUT_OF_MEMORY;
- path_alloced = TRUE;
- strcpy_url(newp, path, TRUE); /* consider it relative */
- u->temppath = path = newp;
+ if(*path && (flags & CURLU_URLENCODE)) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ path = u->path = Curl_dyn_ptr(&enc);
}
fragment = strchr(path, '#');
if(fragment) {
- *fragment++ = 0;
- if(junkscan(fragment, flags))
- return CURLUE_BAD_FRAGMENT;
- if(fragment[0]) {
- u->fragment = strdup(fragment);
- if(!u->fragment)
- return CURLUE_OUT_OF_MEMORY;
+ fraglen = strlen(fragment);
+ if(fraglen > 1) {
+ /* skip the leading '#' in the copy but include the terminating null */
+ u->fragment = Curl_memdup(fragment + 1, fraglen);
+ if(!u->fragment) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ if(junkscan(u->fragment, flags)) {
+ result = CURLUE_BAD_FRAGMENT;
+ goto fail;
+ }
}
}
query = strchr(path, '?');
- if(query) {
- *query++ = 0;
- if(junkscan(query, flags))
- return CURLUE_BAD_QUERY;
- /* done even if the query part is a blank string */
- u->query = strdup(query);
- if(!u->query)
- return CURLUE_OUT_OF_MEMORY;
- }
+ if(query && (!fragment || (query < fragment))) {
+ size_t qlen = strlen(query) - fraglen; /* includes '?' */
+ pathlen = strlen(path) - qlen - fraglen;
+ if(qlen > 1) {
+ u->query = Curl_memdup(query + 1, qlen);
+ if(!u->query) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->query[qlen - 1] = 0;
- if(junkscan(path, flags))
- return CURLUE_BAD_PATH;
+ if(junkscan(u->query, flags)) {
+ result = CURLUE_BAD_QUERY;
+ goto fail;
+ }
+ }
+ else {
+ /* single byte query */
+ u->query = strdup("");
+ if(!u->query) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else
+ pathlen = strlen(path) - fraglen;
- if(!path[0])
- /* if there's no path left set, unset */
+ if(!pathlen) {
+ /* there is no path left, unset */
path = NULL;
+ }
else {
+ if(!u->path) {
+ u->path = Curl_memdup(path, pathlen + 1);
+ if(!u->path) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->path[pathlen] = 0;
+ path = u->path;
+ }
+ else if(flags & CURLU_URLENCODE)
+ /* it might have encoded more than just the path so cut it */
+ u->path[pathlen] = 0;
+
+ if(junkscan(u->path, flags)) {
+ result = CURLUE_BAD_PATH;
+ goto fail;
+ }
+
if(!(flags & CURLU_PATH_AS_IS)) {
/* remove ../ and ./ sequences according to RFC3986 */
- char *newp = Curl_dedotdotify(path);
- if(!newp)
- return CURLUE_OUT_OF_MEMORY;
-
- if(strcmp(newp, path)) {
- /* if we got a new version */
- if(path_alloced)
- Curl_safefree(u->temppath);
- u->temppath = path = newp;
- path_alloced = TRUE;
+ char *newp = dedotdotify((char *)path, pathlen);
+ if(!newp) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
}
- else
- free(newp);
+ free(u->path);
+ u->path = newp;
}
-
- u->path = path_alloced?path:strdup(path);
- if(!u->path)
- return CURLUE_OUT_OF_MEMORY;
- u->temppath = NULL; /* used now */
}
- if(hostname) {
+ if(Curl_dyn_len(&host)) {
char normalized_ipv4[sizeof("255.255.255.255") + 1];
/*
* Parse the login details and strip them out of the host name.
*/
- result = parse_hostname_login(u, &hostname, flags);
- if(result)
- return result;
-
- result = Curl_parse_port(u, hostname, url_has_scheme);
+ result = parse_hostname_login(u, &host, flags);
+ if(!result)
+ result = Curl_parse_port(u, &host, schemelen);
if(result)
- return result;
+ goto fail;
- if(junkscan(hostname, flags))
- return CURLUE_BAD_HOSTNAME;
+ if(junkscan(Curl_dyn_ptr(&host), flags)) {
+ result = CURLUE_BAD_HOSTNAME;
+ goto fail;
+ }
- if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
- /* Skip hostname check, it's allowed to be empty. */
- u->host = strdup("");
+ if(ipv4_normalize(Curl_dyn_ptr(&host),
+ normalized_ipv4, sizeof(normalized_ipv4))) {
+ Curl_dyn_reset(&host);
+ if(Curl_dyn_add(&host, normalized_ipv4)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
else {
- if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
- u->host = strdup(normalized_ipv4);
- else {
- result = decode_host(hostname, &u->host);
- if(result)
- return result;
- result = hostname_check(u, u->host);
- if(result)
- return result;
- }
+ result = decode_host(&host);
+ if(!result)
+ result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
+ if(result)
+ goto fail;
}
- if(!u->host)
- return CURLUE_OUT_OF_MEMORY;
+
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ const char *hostname = Curl_dyn_ptr(&host);
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
schemep = "ftp";
@@ -1127,27 +1267,26 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
schemep = "http";
u->scheme = strdup(schemep);
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else if(flags & CURLU_NO_AUTHORITY) {
+ /* allowed to be empty. */
+ if(Curl_dyn_add(&host, "")) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
}
}
- Curl_safefree(u->scratch);
- Curl_safefree(u->temppath);
+ u->host = Curl_dyn_ptr(&host);
- return CURLUE_OK;
-}
-
-/*
- * Parse the URL and set the relevant members of the Curl_URL struct.
- */
-static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
-{
- CURLUcode result = seturl(url, u, flags);
- if(result) {
- free_urlhandle(u);
- memset(u, 0, sizeof(struct Curl_URL));
- }
+ return result;
+ fail:
+ Curl_dyn_free(&host);
+ free_urlhandle(u);
return result;
}
@@ -1165,8 +1304,6 @@ static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
free_urlhandle(u);
*u = tmpurl;
}
- else
- free_urlhandle(&tmpurl);
return result;
}
@@ -1344,14 +1481,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
if(u->host[0] == '[') {
if(u->zoneid) {
/* make it '[ host %25 zoneid ]' */
+ struct dynbuf enc;
size_t hostlen = strlen(u->host);
- size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
- allochost = malloc(alen);
- if(!allochost)
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
+ u->zoneid))
return CURLUE_OUT_OF_MEMORY;
- memcpy(allochost, u->host, hostlen - 1);
- msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
- "%%25%s]", u->zoneid);
+ allochost = Curl_dyn_ptr(&enc);
}
}
else if(urlencode) {
@@ -1362,32 +1498,32 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
else {
/* only encode '%' in output host name */
char *host = u->host;
- size_t pcount = 0;
+ bool percent = FALSE;
/* first, count number of percents present in the name */
while(*host) {
- if(*host == '%')
- pcount++;
+ if(*host == '%') {
+ percent = TRUE;
+ break;
+ }
host++;
}
- /* if there were percents, encode the host name */
- if(pcount) {
- size_t hostlen = strlen(u->host);
- size_t alen = hostlen + 2 * pcount + 1;
- char *o = allochost = malloc(alen);
- if(!allochost)
- return CURLUE_OUT_OF_MEMORY;
-
+ /* if there were percent(s), encode the host name */
+ if(percent) {
+ struct dynbuf enc;
+ CURLcode result;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
host = u->host;
while(*host) {
- if(*host == '%') {
- memcpy(o, "%25", 3);
- o += 3;
- host++;
- continue;
- }
- *o++ = *host++;
+ if(*host == '%')
+ result = Curl_dyn_addn(&enc, "%25", 3);
+ else
+ result = Curl_dyn_addn(&enc, host, 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ host++;
}
- *o = '\0';
+ free(u->host);
+ u->host = Curl_dyn_ptr(&enc);
}
}
@@ -1445,16 +1581,12 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
*part = decoded;
}
if(urlencode) {
- /* worst case output length is 3x the original! */
- char *newp = malloc(strlen(*part) * 3);
- if(!newp)
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
return CURLUE_OUT_OF_MEMORY;
- if(strcpy_url(newp, *part, TRUE)) { /* consider it relative */
- free(*part);
- *part = newp;
- }
- else
- free(newp);
+ free(*part);
+ *part = Curl_dyn_ptr(&enc);
}
return CURLUE_OK;
@@ -1628,14 +1760,16 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
if(urlencode) {
const unsigned char *i;
- char *o;
- char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
- if(!enc)
- return CURLUE_OUT_OF_MEMORY;
- for(i = (const unsigned char *)part, o = enc; *i; i++) {
+ struct dynbuf enc;
+
+ Curl_dyn_init(&enc, nalloc * 3 + 1);
+
+ for(i = (const unsigned char *)part; *i; i++) {
+ CURLcode result;
if((*i == ' ') && plusencode) {
- *o = '+';
- o++;
+ result = Curl_dyn_addn(&enc, "+", 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
else if(Curl_isunreserved(*i) ||
((*i == '/') && urlskipslash) ||
@@ -1643,16 +1777,17 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
if((*i == '=') && equalsencode)
/* only skip the first equals sign */
equalsencode = FALSE;
- *o = *i;
- o++;
+ result = Curl_dyn_addn(&enc, i, 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
else {
- msnprintf(o, 4, "%%%02x", *i);
- o += 3;
+ result = Curl_dyn_addf(&enc, "%%%02x", *i);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
}
- *o = 0; /* null-terminate */
- newp = enc;
+ newp = Curl_dyn_ptr(&enc);
}
else {
char *p;
@@ -1674,34 +1809,41 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
}
if(appendquery) {
- /* Append the string onto the old query. Add a '&' separator if none is
- present at the end of the existing query already */
+ /* Append the 'newp' string onto the old query. Add a '&' separator if
+ none is present at the end of the existing query already */
+
size_t querylen = u->query ? strlen(u->query) : 0;
bool addamperand = querylen && (u->query[querylen -1] != '&');
if(querylen) {
- size_t newplen = strlen(newp);
- char *p = malloc(querylen + addamperand + newplen + 1);
- if(!p) {
- free((char *)newp);
- return CURLUE_OUT_OF_MEMORY;
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+
+ if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+ goto nomem;
+
+ if(addamperand) {
+ if(Curl_dyn_addn(&enc, "&", 1))
+ goto nomem;
}
- strcpy(p, u->query); /* original query */
- if(addamperand)
- p[querylen] = '&'; /* ampersand */
- strcpy(&p[querylen + addamperand], newp); /* new suffix */
+ if(Curl_dyn_add(&enc, newp))
+ goto nomem;
free((char *)newp);
free(*storep);
- *storep = p;
+ *storep = Curl_dyn_ptr(&enc);
return CURLUE_OK;
+ nomem:
+ free((char *)newp);
+ return CURLUE_OUT_OF_MEMORY;
}
}
if(what == CURLUPART_HOST) {
- if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
+ size_t n = strlen(newp);
+ if(!n && (flags & CURLU_NO_AUTHORITY)) {
/* Skip hostname check, it's allowed to be empty. */
}
else {
- if(hostname_check(u, (char *)newp)) {
+ if(hostname_check(u, (char *)newp, n)) {
free((char *)newp);
return CURLUE_BAD_HOSTNAME;
}