summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2018-09-14 23:33:28 +0200
committerDaniel Stenberg <daniel@haxx.se>2018-09-20 09:04:11 +0200
commitb4016899dc29f5a1fa8764f958cb27d8a400e842 (patch)
tree1f565b4f78dc3117b6cc30fd3841fed4a15e9054
parent5c73093edb3bd527db9c8abdee53d0f18e6a4cc1 (diff)
downloadcurl-bagder/urlapi-internal.tar.gz
url: use the URL API internally as wellbagder/urlapi-internal
... to make it a truly unified URL parser. Closes #3017
-rw-r--r--lib/curl_path.c4
-rw-r--r--lib/dict.c2
-rw-r--r--lib/easy.c4
-rw-r--r--lib/file.c4
-rw-r--r--lib/ftp.c35
-rw-r--r--lib/ftp.h2
-rw-r--r--lib/gopher.c2
-rw-r--r--lib/http.c117
-rw-r--r--lib/imap.c20
-rw-r--r--lib/ldap.c10
-rw-r--r--lib/multi.c9
-rw-r--r--lib/pop3.c5
-rw-r--r--lib/smb.c2
-rw-r--r--lib/smtp.c5
-rw-r--r--lib/tftp.c4
-rw-r--r--lib/transfer.c45
-rw-r--r--lib/url.c968
-rw-r--r--lib/url.h2
-rw-r--r--lib/urldata.h17
-rw-r--r--tests/data/test3254
-rw-r--r--tests/data/test5234
-rw-r--r--tests/data/test5632
22 files changed, 348 insertions, 919 deletions
diff --git a/lib/curl_path.c b/lib/curl_path.c
index e843deac7..68f3e44ba 100644
--- a/lib/curl_path.c
+++ b/lib/curl_path.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -39,7 +39,7 @@ CURLcode Curl_getworkingpath(struct connectdata *conn,
char *working_path;
size_t working_path_len;
CURLcode result =
- Curl_urldecode(data, data->state.path, 0, &working_path,
+ Curl_urldecode(data, data->state.up.path, 0, &working_path,
&working_path_len, FALSE);
if(result)
return result;
diff --git a/lib/dict.c b/lib/dict.c
index 408d57b92..78ef046d4 100644
--- a/lib/dict.c
+++ b/lib/dict.c
@@ -136,7 +136,7 @@ static CURLcode dict_do(struct connectdata *conn, bool *done)
struct Curl_easy *data = conn->data;
curl_socket_t sockfd = conn->sock[FIRSTSOCKET];
- char *path = data->state.path;
+ char *path = data->state.up.path;
curl_off_t *bytecount = &data->req.bytecount;
*done = TRUE; /* unconditionally */
diff --git a/lib/easy.c b/lib/easy.c
index 88fc4f460..fb9105a1c 100644
--- a/lib/easy.c
+++ b/lib/easy.c
@@ -1002,10 +1002,6 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
*/
void curl_easy_reset(struct Curl_easy *data)
{
- Curl_safefree(data->state.pathbuffer);
-
- data->state.path = NULL;
-
Curl_free_request_state(data);
/* zero out UserDefined data: */
diff --git a/lib/file.c b/lib/file.c
index 542f34a45..3cfa0e703 100644
--- a/lib/file.c
+++ b/lib/file.c
@@ -143,7 +143,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done)
#endif
size_t real_path_len;
- CURLcode result = Curl_urldecode(data, data->state.path, 0, &real_path,
+ CURLcode result = Curl_urldecode(data, data->state.up.path, 0, &real_path,
&real_path_len, FALSE);
if(result)
return result;
@@ -197,7 +197,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done)
file->fd = fd;
if(!data->set.upload && (fd == -1)) {
- failf(data, "Couldn't open file %s", data->state.path);
+ failf(data, "Couldn't open file %s", data->state.up.path);
file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE);
return CURLE_FILE_COULDNT_READ_FILE;
}
diff --git a/lib/ftp.c b/lib/ftp.c
index 429708fc5..270b1e5f0 100644
--- a/lib/ftp.c
+++ b/lib/ftp.c
@@ -1444,6 +1444,7 @@ static CURLcode ftp_state_list(struct connectdata *conn)
{
CURLcode result = CURLE_OK;
struct Curl_easy *data = conn->data;
+ struct FTP *ftp = data->req.protop;
/* If this output is to be machine-parsed, the NLST command might be better
to use, since the LIST command output is not specified or standard in any
@@ -1460,7 +1461,7 @@ static CURLcode ftp_state_list(struct connectdata *conn)
then just do LIST (in that case: nothing to do here)
*/
char *cmd, *lstArg, *slashPos;
- const char *inpath = data->state.path;
+ const char *inpath = ftp->path;
lstArg = NULL;
if((data->set.ftp_filemethod == FTPFILE_NOCWD) &&
@@ -3141,7 +3142,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status,
int ftpcode;
CURLcode result = CURLE_OK;
char *path = NULL;
- const char *path_to_use = data->state.path;
+ const char *path_to_use = ftp->path;
if(!ftp)
return CURLE_OK;
@@ -3346,7 +3347,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status,
/* Send any post-transfer QUOTE strings? */
if(!status && !result && !premature && data->set.postquote)
result = ftp_sendquote(conn, data->set.postquote);
-
+ Curl_safefree(ftp->pathalloc);
return result;
}
@@ -3695,12 +3696,13 @@ static void wc_data_dtor(void *ptr)
static CURLcode init_wc_data(struct connectdata *conn)
{
char *last_slash;
- char *path = conn->data->state.path;
+ struct FTP *ftp = conn->data->req.protop;
+ char *path = ftp->path;
struct WildcardData *wildcard = &(conn->data->wildcard);
CURLcode result = CURLE_OK;
struct ftp_wc *ftpwc = NULL;
- last_slash = strrchr(conn->data->state.path, '/');
+ last_slash = strrchr(ftp->path, '/');
if(last_slash) {
last_slash++;
if(last_slash[0] == '\0') {
@@ -3757,7 +3759,7 @@ static CURLcode init_wc_data(struct connectdata *conn)
goto fail;
}
- wildcard->path = strdup(conn->data->state.path);
+ wildcard->path = strdup(ftp->path);
if(!wildcard->path) {
result = CURLE_OUT_OF_MEMORY;
goto fail;
@@ -3828,16 +3830,15 @@ static CURLcode wc_statemach(struct connectdata *conn)
/* filelist has at least one file, lets get first one */
struct ftp_conn *ftpc = &conn->proto.ftpc;
struct curl_fileinfo *finfo = wildcard->filelist.head->ptr;
+ struct FTP *ftp = conn->data->req.protop;
char *tmp_path = aprintf("%s%s", wildcard->path, finfo->filename);
if(!tmp_path)
return CURLE_OUT_OF_MEMORY;
- /* switch default "state.pathbuffer" and tmp_path, good to see
- ftp_parse_url_path function to understand this trick */
- Curl_safefree(conn->data->state.pathbuffer);
- conn->data->state.pathbuffer = tmp_path;
- conn->data->state.path = tmp_path;
+ /* switch default ftp->path and tmp_path */
+ free(ftp->pathalloc);
+ ftp->pathalloc = ftp->path = tmp_path;
infof(conn->data, "Wildcard - START of \"%s\"\n", finfo->filename);
if(conn->data->set.chunk_bgn) {
@@ -4105,7 +4106,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
struct FTP *ftp = data->req.protop;
struct ftp_conn *ftpc = &conn->proto.ftpc;
const char *slash_pos; /* position of the first '/' char in curpos */
- const char *path_to_use = data->state.path;
+ const char *path_to_use = ftp->path;
const char *cur_pos;
const char *filename = NULL;
@@ -4191,7 +4192,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
/* parse the URL path into separate path components */
while((slash_pos = strchr(cur_pos, '/')) != NULL) {
/* 1 or 0 pointer offset to indicate absolute directory */
- ssize_t absolute_dir = ((cur_pos - data->state.path > 0) &&
+ ssize_t absolute_dir = ((cur_pos - ftp->path > 0) &&
(ftpc->dirdepth == 0))?1:0;
/* seek out the next path component */
@@ -4268,7 +4269,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
size_t dlen;
char *path;
CURLcode result =
- Curl_urldecode(conn->data, data->state.path, 0, &path, &dlen, TRUE);
+ Curl_urldecode(conn->data, ftp->path, 0, &path, &dlen, TRUE);
if(result) {
freedirs(ftpc);
return result;
@@ -4388,16 +4389,16 @@ static CURLcode ftp_setup_connection(struct connectdata *conn)
char *type;
struct FTP *ftp;
- conn->data->req.protop = ftp = malloc(sizeof(struct FTP));
+ conn->data->req.protop = ftp = calloc(sizeof(struct FTP), 1);
if(NULL == ftp)
return CURLE_OUT_OF_MEMORY;
- data->state.path++; /* don't include the initial slash */
+ ftp->path = &data->state.up.path[1]; /* don't include the initial slash */
data->state.slash_removed = TRUE; /* we've skipped the slash */
/* FTP URLs support an extension like ";type=<typecode>" that
* we'll try to get now! */
- type = strstr(data->state.path, ";type=");
+ type = strstr(ftp->path, ";type=");
if(!type)
type = strstr(conn->host.rawalloc, ";type=");
diff --git a/lib/ftp.h b/lib/ftp.h
index 7ec339118..38d03223c 100644
--- a/lib/ftp.h
+++ b/lib/ftp.h
@@ -105,6 +105,8 @@ struct FTP {
curl_off_t *bytecountp;
char *user; /* user name string */
char *passwd; /* password string */
+ char *path; /* points to the urlpieces struct field */
+ char *pathalloc; /* if non-NULL a pointer to an allocated path */
/* transfer a file/body or not, done as a typedefed enum just to make
debuggers display the full symbol and not just the numerical value */
diff --git a/lib/gopher.c b/lib/gopher.c
index 3ecee9bdc..b441a641d 100644
--- a/lib/gopher.c
+++ b/lib/gopher.c
@@ -78,7 +78,7 @@ static CURLcode gopher_do(struct connectdata *conn, bool *done)
curl_socket_t sockfd = conn->sock[FIRSTSOCKET];
curl_off_t *bytecount = &data->req.bytecount;
- char *path = data->state.path;
+ char *path = data->state.up.path;
char *sel = NULL;
char *sel_org = NULL;
ssize_t amount, k;
diff --git a/lib/http.c b/lib/http.c
index c1d0d68cd..0f2465919 100644
--- a/lib/http.c
+++ b/lib/http.c
@@ -1877,7 +1877,8 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
struct Curl_easy *data = conn->data;
CURLcode result = CURLE_OK;
struct HTTP *http;
- const char *ppath = data->state.path;
+ const char *path = data->state.up.path;
+ const char *query = data->state.up.query;
bool paste_ftp_userpwd = FALSE;
char ftp_typecode[sizeof("/;type=?")] = "";
const char *host = conn->host.name;
@@ -1995,7 +1996,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
}
/* setup the authentication headers */
- result = Curl_http_output_auth(conn, request, ppath, FALSE);
+ result = Curl_http_output_auth(conn, request, path, FALSE);
if(result)
return result;
@@ -2223,47 +2224,49 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
/* The path sent to the proxy is in fact the entire URL. But if the remote
host is a IDN-name, we must make sure that the request we produce only
uses the encoded host name! */
+
+ /* and no fragment part */
+ CURLUcode uc;
+ char *url;
+ CURLU *h = curl_url_dup(data->state.uh);
+ if(!h)
+ return CURLE_OUT_OF_MEMORY;
+
if(conn->host.dispname != conn->host.name) {
- char *url = data->change.url;
- ptr = strstr(url, conn->host.dispname);
- if(ptr) {
- /* This is where the display name starts in the URL, now replace this
- part with the encoded name. TODO: This method of replacing the host
- name is rather crude as I believe there's a slight risk that the
- user has entered a user name or password that contain the host name
- string. */
- size_t currlen = strlen(conn->host.dispname);
- size_t newlen = strlen(conn->host.name);
- size_t urllen = strlen(url);
-
- char *newurl;
-
- newurl = malloc(urllen + newlen - currlen + 1);
- if(newurl) {
- /* copy the part before the host name */
- memcpy(newurl, url, ptr - url);
- /* append the new host name instead of the old */
- memcpy(newurl + (ptr - url), conn->host.name, newlen);
- /* append the piece after the host name */
- memcpy(newurl + newlen + (ptr - url),
- ptr + currlen, /* copy the trailing zero byte too */
- urllen - (ptr-url) - currlen + 1);
- if(data->change.url_alloc) {
- Curl_safefree(data->change.url);
- data->change.url_alloc = FALSE;
- }
- data->change.url = newurl;
- data->change.url_alloc = TRUE;
- }
- else
- return CURLE_OUT_OF_MEMORY;
- }
+ uc = curl_url_set(h, CURLUPART_HOST, conn->host.name, 0);
+ if(uc)
+ return CURLE_OUT_OF_MEMORY;
}
- ppath = data->change.url;
- if(checkprefix("ftp://", ppath)) {
+ uc = curl_url_set(h, CURLUPART_FRAGMENT, NULL, 0);
+ if(uc)
+ return CURLE_OUT_OF_MEMORY;
+
+ if(strcasecompare("http", data->state.up.scheme)) {
+ /* when getting HTTP, we don't want the userinfo the URL */
+ uc = curl_url_set(h, CURLUPART_USER, NULL, 0);
+ if(uc)
+ return CURLE_OUT_OF_MEMORY;
+ uc = curl_url_set(h, CURLUPART_PASSWORD, NULL, 0);
+ if(uc)
+ return CURLE_OUT_OF_MEMORY;
+ }
+ /* now extract the new version of the URL */
+ uc = curl_url_get(h, CURLUPART_URL, &url, 0);
+ if(uc)
+ return CURLE_OUT_OF_MEMORY;
+
+ if(data->change.url_alloc)
+ free(data->change.url);
+
+ data->change.url = url;
+ data->change.url_alloc = TRUE;
+
+ curl_url_cleanup(h);
+
+ if(strcasecompare("ftp", data->state.up.scheme)) {
if(data->set.proxy_transfer_mode) {
/* when doing ftp, append ;type=<a|i> if not present */
- char *type = strstr(ppath, ";type=");
+ char *type = strstr(path, ";type=");
if(type && type[6] && type[7] == 0) {
switch(Curl_raw_toupper(type[6])) {
case 'A':
@@ -2278,7 +2281,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
char *p = ftp_typecode;
/* avoid sending invalid URLs like ftp://example.com;type=i if the
* user specified ftp://example.com without the slash */
- if(!*data->state.path && ppath[strlen(ppath) - 1] != '/') {
+ if(!*data->state.up.path && path[strlen(path) - 1] != '/') {
*p++ = '/';
}
snprintf(p, sizeof(ftp_typecode) - 1, ";type=%c",
@@ -2431,18 +2434,32 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
if(result)
return result;
- if(data->set.str[STRING_TARGET])
- ppath = data->set.str[STRING_TARGET];
+ if(data->set.str[STRING_TARGET]) {
+ path = data->set.str[STRING_TARGET];
+ query = NULL;
+ }
/* url */
- if(paste_ftp_userpwd)
+ if(conn->bits.httpproxy && !conn->bits.tunnel_proxy) {
+ char *url = data->change.url;
+ result = Curl_add_buffer(&req_buffer, url, strlen(url));
+ if(result)
+ return result;
+ }
+ else if(paste_ftp_userpwd)
result = Curl_add_bufferf(&req_buffer, "ftp://%s:%s@%s",
conn->user, conn->passwd,
- ppath + sizeof("ftp://") - 1);
- else
- result = Curl_add_buffer(&req_buffer, ppath, strlen(ppath));
- if(result)
- return result;
+ path + sizeof("ftp://") - 1);
+ else {
+ result = Curl_add_buffer(&req_buffer, path, strlen(path));
+ if(result)
+ return result;
+ if(query) {
+ result = Curl_add_bufferf(&req_buffer, "?%s", query);
+ if(result)
+ return result;
+ }
+ }
result =
Curl_add_bufferf(&req_buffer,
@@ -2515,7 +2532,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
co = Curl_cookie_getlist(data->cookies,
conn->allocptr.cookiehost?
conn->allocptr.cookiehost:host,
- data->state.path,
+ data->state.up.path,
(conn->handler->protocol&CURLPROTO_HTTPS)?
TRUE:FALSE);
Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);
@@ -3836,7 +3853,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
here, or else use real peer host name. */
conn->allocptr.cookiehost?
conn->allocptr.cookiehost:conn->host.name,
- data->state.path);
+ data->state.up.path);
Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);
}
#endif
diff --git a/lib/imap.c b/lib/imap.c
index 63fcb4d41..3ef89097f 100644
--- a/lib/imap.c
+++ b/lib/imap.c
@@ -1717,8 +1717,6 @@ static CURLcode imap_regular_transfer(struct connectdata *conn,
static CURLcode imap_setup_connection(struct connectdata *conn)
{
- struct Curl_easy *data = conn->data;
-
/* Initialise the IMAP layer */
CURLcode result = imap_init(conn);
if(result)
@@ -1726,7 +1724,6 @@ static CURLcode imap_setup_connection(struct connectdata *conn)
/* Clear the TLS upgraded flag */
conn->tls_upgraded = FALSE;
- data->state.path++; /* don't include the initial slash */
return CURLE_OK;
}
@@ -1959,7 +1956,7 @@ static CURLcode imap_parse_url_path(struct connectdata *conn)
CURLcode result = CURLE_OK;
struct Curl_easy *data = conn->data;
struct IMAP *imap = data->req.protop;
- const char *begin = data->state.path;
+ const char *begin = &data->state.up.path[1]; /* skip leading slash */
const char *ptr = begin;
/* See how much of the URL is a valid path and decode it */
@@ -2065,17 +2062,10 @@ static CURLcode imap_parse_url_path(struct connectdata *conn)
/* Does the URL contain a query parameter? Only valid when we have a mailbox
and no UID as per RFC-5092 */
- if(imap->mailbox && !imap->uid && !imap->mindex && *ptr == '?') {
- /* Find the length of the query parameter */
- begin = ++ptr;
- while(imap_is_bchar(*ptr))
- ptr++;
-
- /* Decode the query parameter */
- result = Curl_urldecode(data, begin, ptr - begin, &imap->query, NULL,
- TRUE);
- if(result)
- return result;
+ if(imap->mailbox && !imap->uid && !imap->mindex) {
+ /* Get the query parameter, URL decoded */
+ (void)curl_url_get(data->state.uh, CURLUPART_QUERY, &imap->query,
+ CURLU_URLDECODE);
}
/* Any extra stuff at the end of the URL is an error */
diff --git a/lib/ldap.c b/lib/ldap.c
index 4d8f4fa28..e42d1fbbc 100644
--- a/lib/ldap.c
+++ b/lib/ldap.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -838,9 +838,9 @@ static int _ldap_url_parse2(const struct connectdata *conn, LDAPURLDesc *ludp)
size_t i;
if(!conn->data ||
- !conn->data->state.path ||
- conn->data->state.path[0] != '/' ||
- !checkprefix("LDAP", conn->data->change.url))
+ !conn->data->state.up.path ||
+ conn->data->state.up.path[0] != '/' ||
+ !strcasecompare("LDAP", conn->data->state.up.scheme))
return LDAP_INVALID_SYNTAX;
ludp->lud_scope = LDAP_SCOPE_BASE;
@@ -848,7 +848,7 @@ static int _ldap_url_parse2(const struct connectdata *conn, LDAPURLDesc *ludp)
ludp->lud_host = conn->host.name;
/* Duplicate the path */
- p = path = strdup(conn->data->state.path + 1);
+ p = path = strdup(conn->data->state.up.path + 1);
if(!path)
return LDAP_NO_MEMORY;
diff --git a/lib/multi.c b/lib/multi.c
index 2faeaa74f..d5e09aab4 100644
--- a/lib/multi.c
+++ b/lib/multi.c
@@ -542,10 +542,8 @@ static CURLcode multi_done(struct connectdata **connp,
Curl_getoff_all_pipelines(data, conn);
/* Cleanup possible redirect junk */
- free(data->req.newurl);
- data->req.newurl = NULL;
- free(data->req.location);
- data->req.location = NULL;
+ Curl_safefree(data->req.newurl);
+ Curl_safefree(data->req.location);
switch(status) {
case CURLE_ABORTED_BY_CALLBACK:
@@ -657,7 +655,6 @@ static CURLcode multi_done(struct connectdata **connp,
cache here, and therefore cannot be used from this point on
*/
Curl_free_request_state(data);
-
return result;
}
@@ -2015,8 +2012,6 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi,
}
else if(comeback)
rc = CURLM_CALL_MULTI_PERFORM;
-
- free(newurl);
break;
}
diff --git a/lib/pop3.c b/lib/pop3.c
index cd994f63d..5e0fd2299 100644
--- a/lib/pop3.c
+++ b/lib/pop3.c
@@ -1303,8 +1303,6 @@ static CURLcode pop3_regular_transfer(struct connectdata *conn,
static CURLcode pop3_setup_connection(struct connectdata *conn)
{
- struct Curl_easy *data = conn->data;
-
/* Initialise the POP3 layer */
CURLcode result = pop3_init(conn);
if(result)
@@ -1312,7 +1310,6 @@ static CURLcode pop3_setup_connection(struct connectdata *conn)
/* Clear the TLS upgraded flag */
conn->tls_upgraded = FALSE;
- data->state.path++; /* don't include the initial slash */
return CURLE_OK;
}
@@ -1387,7 +1384,7 @@ static CURLcode pop3_parse_url_path(struct connectdata *conn)
/* The POP3 struct is already initialised in pop3_connect() */
struct Curl_easy *data = conn->data;
struct POP3 *pop3 = data->req.protop;
- const char *path = data->state.path;
+ const char *path = &data->state.up.path[1]; /* skip leading path */
/* URL decode the path for the message ID */
return Curl_urldecode(data, path, 0, &pop3->id, NULL, TRUE);
diff --git a/lib/smb.c b/lib/smb.c
index e1209e099..e4f266e19 100644
--- a/lib/smb.c
+++ b/lib/smb.c
@@ -969,7 +969,7 @@ static CURLcode smb_parse_url_path(struct connectdata *conn)
char *slash;
/* URL decode the path */
- result = Curl_urldecode(data, data->state.path, 0, &path, NULL, TRUE);
+ result = Curl_urldecode(data, data->state.up.path, 0, &path, NULL, TRUE);
if(result)
return result;
diff --git a/lib/smtp.c b/lib/smtp.c
index 50c0b3477..587562306 100644
--- a/lib/smtp.c
+++ b/lib/smtp.c
@@ -1441,7 +1441,6 @@ static CURLcode smtp_regular_transfer(struct connectdata *conn,
static CURLcode smtp_setup_connection(struct connectdata *conn)
{
- struct Curl_easy *data = conn->data;
CURLcode result;
/* Clear the TLS upgraded flag */
@@ -1452,8 +1451,6 @@ static CURLcode smtp_setup_connection(struct connectdata *conn)
if(result)
return result;
- data->state.path++; /* don't include the initial slash */
-
return CURLE_OK;
}
@@ -1507,7 +1504,7 @@ static CURLcode smtp_parse_url_path(struct connectdata *conn)
/* The SMTP struct is already initialised in smtp_connect() */
struct Curl_easy *data = conn->data;
struct smtp_conn *smtpc = &conn->proto.smtpc;
- const char *path = data->state.path;
+ const char *path = &data->state.up.path[1]; /* skip leading path */
char localhost[HOSTNAME_MAX + 1];
/* Calculate the path if necessary */
diff --git a/lib/tftp.c b/lib/tftp.c
index e5bc80b02..5b74e8e08 100644
--- a/lib/tftp.c
+++ b/lib/tftp.c
@@ -485,7 +485,7 @@ static CURLcode tftp_send_first(tftp_state_data_t *state, tftp_event_t event)
/* As RFC3617 describes the separator slash is not actually part of the
file name so we skip the always-present first letter of the path
string. */
- result = Curl_urldecode(data, &state->conn->data->state.path[1], 0,
+ result = Curl_urldecode(data, &state->conn->data->state.up.path[1], 0,
&filename, NULL, FALSE);
if(result)
return result;
@@ -1374,7 +1374,7 @@ static CURLcode tftp_setup_connection(struct connectdata * conn)
/* TFTP URLs support an extension like ";mode=<typecode>" that
* we'll try to get now! */
- type = strstr(data->state.path, ";mode=");
+ type = strstr(data->state.up.path, ";mode=");
if(!type)
type = strstr(conn->host.rawalloc, ";mode=");
diff --git a/lib/transfer.c b/lib/transfer.c
index 3d8089ee6..c76af2c78 100644
--- a/lib/transfer.c
+++ b/lib/transfer.c
@@ -567,7 +567,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
infof(data,
"Rewinding stream by : %zd"
" bytes on url %s (zero-length body)\n",
- nread, data->state.path);
+ nread, data->state.up.path);
read_rewind(conn, (size_t)nread);
}
else {
@@ -575,7 +575,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
"Excess found in a non pipelined read:"
" excess = %zd"
" url = %s (zero-length body)\n",
- nread, data->state.path);
+ nread, data->state.up.path);
}
}
@@ -744,7 +744,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
" bytes on url %s (size = %" CURL_FORMAT_CURL_OFF_T
", maxdownload = %" CURL_FORMAT_CURL_OFF_T
", bytecount = %" CURL_FORMAT_CURL_OFF_T ", nread = %zd)\n",
- excess, data->state.path,
+ excess, data->state.up.path,
k->size, k->maxdownload, k->bytecount, nread);
read_rewind(conn, excess);
}
@@ -1474,6 +1474,7 @@ CURLcode Curl_follow(struct Curl_easy *data,
/* Location: redirect */
bool disallowport = FALSE;
bool reachedmax = FALSE;
+ CURLUcode uc;
if(type == FOLLOW_REDIR) {
if((data->set.maxredirs != -1) &&
@@ -1506,33 +1507,21 @@ CURLcode Curl_follow(struct Curl_easy *data,
}
}
- if(!Curl_is_absolute_url(newurl, NULL, 8)) {
- /***
- *DANG* this is an RFC 2068 violation. The URL is supposed
- to be absolute and this doesn't seem to be that!
- */
- char *absolute = Curl_concat_url(data->change.url, newurl);
- if(!absolute)
- return CURLE_OUT_OF_MEMORY;
- newurl = absolute;
- }
- else {
- /* The new URL MAY contain space or high byte values, that means a mighty
- stupid redirect URL but we still make an effort to do "right". */
- char *newest;
- size_t newlen = Curl_strlen_url(newurl, FALSE);
-
+ if(Curl_is_absolute_url(newurl, NULL, 8))
/* This is an absolute URL, don't allow the custom port number */
disallowport = TRUE;
- newest = malloc(newlen + 1); /* get memory for this */
- if(!newest)
- return CURLE_OUT_OF_MEMORY;
-
- Curl_strcpy_url(newest, newurl, FALSE); /* create a space-free URL */
- newurl = newest; /* use this instead now */
+ DEBUGASSERT(data->state.uh);
+ uc = curl_url_set(data->state.uh, CURLUPART_URL, newurl, 0);
+ if(uc)
+ /* TODO: consider an error code remap here */
+ return CURLE_URL_MALFORMAT;
- }
+ free(newurl);
+ uc = curl_url_get(data->state.uh, CURLUPART_URL, &newurl, 0);
+ if(uc)
+ /* TODO: consider an error code remap here */
+ return CURLE_OUT_OF_MEMORY;
if(type == FOLLOW_FAKE) {
/* we're only figuring out the new url if we would've followed locations
@@ -1549,10 +1538,8 @@ CURLcode Curl_follow(struct Curl_easy *data,
if(disallowport)
data->state.allow_port = FALSE;
- if(data->change.url_alloc) {
+ if(data->change.url_alloc)
Curl_safefree(data->change.url);
- data->change.url_alloc = FALSE;
- }
data->change.url = newurl;
data->change.url_alloc = TRUE;
diff --git a/lib/url.c b/lib/url.c
index 249d1237d..5c3a5f74c 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -127,10 +127,6 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
static void conn_free(struct connectdata *conn);
static void free_fixed_hostname(struct hostname *host);
-static CURLcode parse_url_login(struct Curl_easy *data,
- struct connectdata *conn,
- char **userptr, char **passwdptr,
- char **optionsptr);
static unsigned int get_protocol_family(unsigned int protocol);
/* Some parts of the code (e.g. chunked encoding) assume this buffer has at
@@ -294,6 +290,22 @@ void Curl_freeset(struct Curl_easy *data)
Curl_mime_cleanpart(&data->set.mimepost);
}
+/* free the URL pieces */
+void Curl_up_free(struct Curl_easy *data)
+{
+ struct urlpieces *up = &data->state.up;
+ Curl_safefree(up->scheme);
+ Curl_safefree(up->hostname);
+ Curl_safefree(up->port);
+ Curl_safefree(up->user);
+ Curl_safefree(up->password);
+ Curl_safefree(up->options);
+ Curl_safefree(up->path);
+ Curl_safefree(up->query);
+ curl_url_cleanup(data->state.uh);
+ data->state.uh = NULL;
+}
+
/*
* This is the internal function curl_easy_cleanup() calls. This should
* cleanup and free all resources associated with this sessionhandle.
@@ -313,7 +325,6 @@ CURLcode Curl_close(struct Curl_easy *data)
Curl_expire_clear(data); /* shut off timers */
m = data->multi;
-
if(m)
/* This handle is still part of a multi handle, take care of this first
and detach this handle from there. */
@@ -336,10 +347,6 @@ CURLcode Curl_close(struct Curl_easy *data)
if(data->state.rangestringalloc)
free(data->state.range);
- /* Free the pathbuffer */
- Curl_safefree(data->state.pathbuffer);
- data->state.path = NULL;
-
/* freed here just in case DONE wasn't called */
Curl_free_request_state(data);
@@ -359,12 +366,7 @@ CURLcode Curl_close(struct Curl_easy *data)
}
data->change.referer = NULL;
- if(data->change.url_alloc) {
- Curl_safefree(data->change.url);
- data->change.url_alloc = FALSE;
- }
- data->change.url = NULL;
-
+ Curl_up_free(data);
Curl_safefree(data->state.buffer);
Curl_safefree(data->state.headerbuff);
Curl_safefree(data->state.ulbuf);
@@ -1992,379 +1994,113 @@ static CURLcode findprotocol(struct Curl_easy *data,
return CURLE_UNSUPPORTED_PROTOCOL;
}
+
+static CURLcode uc_to_curlcode(CURLUcode uc)
+{
+ switch(uc) {
+ default:
+ return CURLE_URL_MALFORMAT;
+ case CURLUE_UNSUPPORTED_SCHEME:
+ return CURLE_UNSUPPORTED_PROTOCOL;
+ case CURLUE_OUT_OF_MEMORY:
+ return CURLE_OUT_OF_MEMORY;
+ }
+}
+
/*
* Parse URL and fill in the relevant members of the connection struct.
*/
static CURLcode parseurlandfillconn(struct Curl_easy *data,
- struct connectdata *conn,
- bool *prot_missing,
- char **userp, char **passwdp,
- char **optionsp)
+ struct connectdata *conn)
{
- char *at;
- char *fragment;
- char *path = data->state.path;
- char *query;
- int rc;
- const char *protop = "";
CURLcode result;
- bool rebuild_url = FALSE;
- bool url_has_scheme = FALSE;
- char protobuf[16];
-
- *prot_missing = FALSE;
-
- /* We might pass the entire URL into the request so we need to make sure
- * there are no bad characters in there.*/
- if(strpbrk(data->change.url, "\r\n")) {
- failf(data, "Illegal characters found in URL");
- return CURLE_URL_MALFORMAT;
- }
-
- /*************************************************************
- * Parse the URL.
- *
- * We need to parse the url even when using the proxy, because we will need
- * the hostname and port in case we are trying to SSL connect through the
- * proxy -- and we don't know if we will need to use SSL until we parse the
- * url ...
- ************************************************************/
- if(data->change.url[0] == ':') {
- failf(data, "Bad URL, colon is first character");
- return CURLE_URL_MALFORMAT;
- }
-
- /* MSDOS/Windows style drive prefix, eg c: in c:foo */
-#define STARTS_WITH_DRIVE_PREFIX(str) \
- ((('a' <= str[0] && str[0] <= 'z') || \
- ('A' <= str[0] && str[0] <= 'Z')) && \
- (str[1] == ':'))
-
- /* MSDOS/Windows style drive prefix, optionally with
- * a '|' instead of ':', followed by a slash or NUL */
-#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
- ((('a' <= (str)[0] && (str)[0] <= 'z') || \
- ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
- ((str)[1] == ':' || (str)[1] == '|') && \
- ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
-
- /* Don't mistake a drive letter for a scheme if the default protocol is file.
- curld --proto-default file c:/foo/bar.txt */
- if(STARTS_WITH_DRIVE_PREFIX(data->change.url) &&
- data->set.str[STRING_DEFAULT_PROTOCOL] &&
- strcasecompare(data->set.str[STRING_DEFAULT_PROTOCOL], "file")) {
- ; /* do nothing */
- }
- else { /* check for a scheme */
- int i;
- for(i = 0; i < 16 && data->change.url[i]; ++i) {
- if(data->change.url[i] == '/')
- break;
- if(data->change.url[i] == ':') {
- url_has_scheme = TRUE;
- break;
- }
- }
- }
-
- /* handle the file: scheme */
- if((url_has_scheme && strncasecompare(data->change.url, "file:", 5)) ||
- (!url_has_scheme && data->set.str[STRING_DEFAULT_PROTOCOL] &&
- strcasecompare(data->set.str[STRING_DEFAULT_PROTOCOL], "file"))) {
- if(url_has_scheme)
- rc = sscanf(data->change.url, "%*15[^\n/:]:%[^\n]", path);
- else
- rc = sscanf(data->change.url, "%[^\n]", path);
-
- if(rc != 1) {
- failf(data, "Bad URL");
- return CURLE_URL_MALFORMAT;
- }
-
- /* Extra handling URLs with an authority component (i.e. that start with
- * "file://")
- *
- * We allow omitted hostname (e.g. file:/<path>) -- valid according to
- * RFC 8089, but not the (current) WHAT-WG URL spec.
- */
- if(url_has_scheme && path[0] == '/' && path[1] == '/') {
- /* swallow the two slashes */
- char *ptr = &path[2];
-
- /*
- * According to RFC 8089, a file: URL can be reliably dereferenced if:
- *
- * o it has no/blank hostname, or
- *
- * o the hostname matches "localhost" (case-insensitively), or
- *
- * o the hostname is a FQDN that resolves to this machine.
- *
- * For brevity, we only consider URLs with empty, "localhost", or
- * "127.0.0.1" hostnames as local.
- *
- * Additionally, there is an exception for URLs with a Windows drive
- * letter in the authority (which was accidentally omitted from RFC 8089
- * Appendix E, but believe me, it was meant to be there. --MK)
- */
- if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
- /* the URL includes a host name, it must match "localhost" or
- "127.0.0.1" to be valid */
- if(!checkprefix("localhost/", ptr) &&
- !checkprefix("127.0.0.1/", ptr)) {
- failf(data, "Invalid file://hostname/, "
- "expected localhost or 127.0.0.1 or none");
- return CURLE_URL_MALFORMAT;
- }
- ptr += 9; /* now points to the slash after the host */
- }
-
- /* This cannot be done with strcpy, as the memory chunks overlap! */
- memmove(path, ptr, strlen(ptr) + 1);
- }
+ CURLU *uh;
+ CURLUcode uc;
+ char *hostname;
-#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
- /* Don't allow Windows drive letters when not in Windows.
- * This catches both "file:/c:" and "file:c:" */
- if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
- STARTS_WITH_URL_DRIVE_PREFIX(path)) {
- failf(data, "File drive letters are only accepted in MSDOS/Windows.");
- return CURLE_URL_MALFORMAT;
- }
-#else
- /* If the path starts with a slash and a drive letter, ditch the slash */
- if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
- /* This cannot be done with strcpy, as the memory chunks overlap! */
- memmove(path, &path[1], strlen(&path[1]) + 1);
- }
-#endif
+ Curl_up_free(data); /* cleanup previous leftovers first */
- protop = "file"; /* protocol string */
- *prot_missing = !url_has_scheme;
+ /* parse the URL */
+ uh = data->state.uh = curl_url();
+ if(!uh)
+ return CURLE_OUT_OF_MEMORY;
+ uc = curl_url_set(uh, CURLUPART_URL, data->change.url, CURLU_GUESS_SCHEME);
+ if(uc)
+ return uc_to_curlcode(uc);
+
+ uc = curl_url_get(uh, CURLUPART_SCHEME, &data->state.up.scheme, 0);
+ if(uc)
+ return uc_to_curlcode(uc);
+
+ uc = curl_url_get(uh, CURLUPART_USER, &data->state.up.user,
+ CURLU_URLDECODE);
+ if(!uc) {
+ conn->user = strdup(data->state.up.user);
+ if(!conn->user)
+ return CURLE_OUT_OF_MEMORY;
+ conn->bits.user_passwd = TRUE;
}
- else {
- /* clear path */
- char slashbuf[4];
- path[0] = 0;
-
- rc = sscanf(data->change.url,
- "%15[^\n/:]:%3[/]%[^\n/?#]%[^\n]",
- protobuf, slashbuf, conn->host.name, path);
- if(2 == rc) {
- failf(data, "Bad URL");
- return CURLE_URL_MALFORMAT;
- }
- if(3 > rc) {
-
- /*
- * The URL was badly formatted, let's try the browser-style _without_
- * protocol specified like 'http://'.
- */
- rc = sscanf(data->change.url, "%[^\n/?#]%[^\n]", conn->host.name, path);
- if(1 > rc) {
- /*
- * We couldn't even get this format.
- * djgpp 2.04 has a sscanf() bug where 'conn->host.name' is
- * assigned, but the return value is EOF!
- */
-#if defined(__DJGPP__) && (DJGPP_MINOR == 4)
- if(!(rc == -1 && *conn->host.name))
-#endif
- {
- failf(data, "<url> malformed");
- return CURLE_URL_MALFORMAT;
- }
- }
-
- /*
- * Since there was no protocol part specified in the URL use the
- * user-specified default protocol. If we weren't given a default make a
- * guess by matching some protocols against the host's outermost
- * sub-domain name. Finally if there was no match use HTTP.
- */
-
- protop = data->set.str[STRING_DEFAULT_PROTOCOL];
- if(!protop) {
- /* Note: if you add a new protocol, please update the list in
- * lib/version.c too! */
- if(checkprefix("FTP.", conn->host.name))
- protop = "ftp";
- else if(checkprefix("DICT.", conn->host.name))
- protop = "DICT";
- else if(checkprefix("LDAP.", conn->host.name))
- protop = "LDAP";
- else if(checkprefix("IMAP.", conn->host.name))
- protop = "IMAP";
- else if(checkprefix("SMTP.", conn->host.name))
- protop = "smtp";
- else if(checkprefix("POP3.", conn->host.name))
- protop = "pop3";
- else
- protop = "http";
- }
+ else if(uc != CURLUE_NO_USER)
+ return uc_to_curlcode(uc);
- *prot_missing = TRUE; /* not given in URL */
- }
- else {
- size_t s = strlen(slashbuf);
- protop = protobuf;
- if(s != 2) {
- infof(data, "Unwillingly accepted illegal URL using %zu slash%s!\n",
- s, s>1?"es":"");
-
- if(data->change.url_alloc)
- free(data->change.url);
- /* repair the URL to use two slashes */
- data->change.url = aprintf("%s://%s%s",
- protobuf, conn->host.name, path);
- if(!data->change.url)
- return CURLE_OUT_OF_MEMORY;
- data->change.url_alloc = TRUE;
- }
- }
+ uc = curl_url_get(uh, CURLUPART_PASSWORD, &data->state.up.password,
+ CURLU_URLDECODE);
+ if(!uc) {
+ conn->passwd = strdup(data->state.up.password);
+ if(!conn->passwd)
+ return CURLE_OUT_OF_MEMORY;
+ conn->bits.user_passwd = TRUE;
}
+ else if(uc != CURLUE_NO_PASSWORD)
+ return uc_to_curlcode(uc);
- /* We search for '?' in the host name (but only on the right side of a
- * @-letter to allow ?-letters in username and password) to handle things
- * like http://example.com?param= (notice the missing '/').
- */
- at = strchr(conn->host.name, '@');
- if(at)
- query = strchr(at + 1, '?');
- else
- query = strchr(conn->host.name, '?');
-
- if(query) {
- /* We must insert a slash before the '?'-letter in the URL. If the URL had
- a slash after the '?', that is where the path currently begins and the
- '?string' is still part of the host name.
-
- We must move the trailing part from the host name and put it first in
- the path. And have it all prefixed with a slash.
- */
-
- size_t hostlen = strlen(query);
- size_t pathlen = strlen(path);
-
- /* move the existing path plus the zero byte forward, to make room for
- the host-name part */
- memmove(path + hostlen + 1, path, pathlen + 1);
-
- /* now copy the trailing host part in front of the existing path */
- memcpy(path + 1, query, hostlen);
-
- path[0]='/'; /* prepend the missing slash */
- rebuild_url = TRUE;
-
- *query = 0; /* now cut off the hostname at the ? */
- }
- else if(!path[0]) {
- /* if there's no path set, use a single slash */
- strcpy(path, "/");
- rebuild_url = TRUE;
+ uc = curl_url_get(uh, CURLUPART_OPTIONS, &data->state.up.options,
+ CURLU_URLDECODE);
+ if(!uc) {
+ conn->options = strdup(data->state.up.options);
+ if(!conn->options)
+ return CURLE_OUT_OF_MEMORY;
}
+ else if(uc != CURLUE_NO_OPTIONS)
+ return uc_to_curlcode(uc);
- /* If the URL is malformatted (missing a '/' after hostname before path) we
- * insert a slash here. The only letters except '/' that can start a path is
- * '?' and '#' - as controlled by the two sscanf() patterns above.
- */
- if(path[0] != '/') {
- /* We need this function to deal with overlapping memory areas. We know
- that the memory area 'path' points to is 'urllen' bytes big and that
- is bigger than the path. Use +1 to move the zero byte too. */
- memmove(&path[1], path, strlen(path) + 1);
- path[0] = '/';
- rebuild_url = TRUE;
- }
- else if(!data->set.path_as_is) {
- /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
- char *newp = Curl_dedotdotify(path);
- if(!newp)
+ uc = curl_url_get(uh, CURLUPART_HOST, &data->state.up.hostname, 0);
+ if(uc) {
+ if(!strcasecompare("file", data->state.up.scheme))
return CURLE_OUT_OF_MEMORY;
-
- if(strcmp(newp, path)) {
- rebuild_url = TRUE;
- free(data->state.pathbuffer);
- data->state.pathbuffer = newp;
- data->state.path = newp;
- path = newp;
- }
- else
- free(newp);
}
- /*
- * "rebuild_url" means that one or more URL components have been modified so
- * we need to generate an updated full version. We need the corrected URL
- * when communicating over HTTP proxy and we don't know at this point if
- * we're using a proxy or not.
- */
- if(rebuild_url) {
- char *reurl;
-
- size_t plen = strlen(path); /* new path, should be 1 byte longer than
- the original */
- size_t prefixlen = strlen(conn->host.name);
-
- if(!*prot_missing) {
- size_t protolen = strlen(protop);
-
- if(curl_strnequal(protop, data->change.url, protolen))
- prefixlen += protolen;
- else {
- failf(data, "<url> malformed");
- return CURLE_URL_MALFORMAT;
- }
-
- if(curl_strnequal("://", &data->change.url[protolen], 3))
- prefixlen += 3;
- /* only file: is allowed to omit one or both slashes */
- else if(curl_strnequal("file:", data->change.url, 5))
- prefixlen += 1 + (data->change.url[5] == '/');
- else {
- failf(data, "<url> malformed");
- return CURLE_URL_MALFORMAT;
- }
- }
+ uc = curl_url_get(uh, CURLUPART_PATH, &data->state.up.path, 0);
+ if(uc)
+ return uc_to_curlcode(uc);
- reurl = malloc(prefixlen + plen + 1);
- if(!reurl)
+ uc = curl_url_get(uh, CURLUPART_PORT, &data->state.up.port,
+ CURLU_DEFAULT_PORT);
+ if(uc) {
+ if(!strcasecompare("file", data->state.up.scheme))
return CURLE_OUT_OF_MEMORY;
-
- /* copy the prefix */
- memcpy(reurl, data->change.url, prefixlen);
-
- /* append the trailing piece + zerobyte */
- memcpy(&reurl[prefixlen], path, plen + 1);
-
- /* possible free the old one */
- if(data->change.url_alloc) {
- Curl_safefree(data->change.url);
- data->change.url_alloc = FALSE;
- }
-
- infof(data, "Rebuilt URL to: %s\n", reurl);
-
- data->change.url = reurl;
- data->change.url_alloc = TRUE; /* free this later */
+ }
+ else {
+ unsigned long port = strtoul(data->state.up.port, NULL, 10);
+ conn->remote_port = curlx_ultous(port);
}
- result = findprotocol(data, conn, protop);
- if(result)
- return result;
+ (void)curl_url_get(uh, CURLUPART_QUERY, &data->state.up.query, 0);
- /*
- * Parse the login details from the URL and strip them out of
- * the host name
- */
- result = parse_url_login(data, conn, userp, passwdp, optionsp);
+ result = findprotocol(data, conn, data->state.up.scheme);
if(result)
return result;
- if(conn->host.name[0] == '[') {
+ hostname = data->state.up.hostname;
+ if(!hostname)
+ /* this is for file:// transfers, get a dummy made */
+ hostname = (char *)"";
+
+ if(hostname[0] == '[') {
/* This looks like an IPv6 address literal. See if there is an address
- scope if there is no location header */
- char *percent = strchr(conn->host.name, '%');
+ scope. */
+ char *percent = strchr(++hostname, '%');
+ conn->bits.ipv6_ip = TRUE;
if(percent) {
unsigned int identifier_offset = 3;
char *endp;
@@ -2412,33 +2148,22 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
infof(data, "Invalid IPv6 address format\n");
}
}
+ percent = strchr(hostname, ']');
+ if(percent)
+ /* terminate IPv6 numerical at end bracket */
+ *percent = 0;
}
+ /* make sure the connect struct gets its own copy of the host name */
+ conn->host.rawalloc = strdup(hostname);
+ if(!conn->host.rawalloc)
+ return CURLE_OUT_OF_MEMORY;
+ conn->host.name = conn->host.rawalloc;
+
if(data->set.scope_id)
/* Override any scope that was set above. */
conn->scope_id = data->set.scope_id;
- /* Remove the fragment part of the path. Per RFC 2396, this is always the
- last part of the URI. We are looking for the first '#' so that we deal
- gracefully with non conformant URI such as http://example.com#foo#bar. */
- fragment = strchr(path, '#');
- if(fragment) {
- *fragment = 0;
-
- /* we know the path part ended with a fragment, so we know the full URL
- string does too and we need to cut it off from there so it isn't used
- over proxy */
- fragment = strchr(data->change.url, '#');
- if(fragment)
- *fragment = 0;
- }
-
- /*
- * So if the URL was A://B/C#D,
- * protop is A
- * conn->host.name is B
- * data->state.path is /C
- */
return CURLE_OK;
}
@@ -3090,131 +2815,6 @@ out:
#endif /* CURL_DISABLE_PROXY */
/*
- * parse_url_login()
- *
- * Parse the login details (user name, password and options) from the URL and
- * strip them out of the host name
- *
- * Inputs: data->set.use_netrc (CURLOPT_NETRC)
- * conn->host.name
- *
- * Outputs: (almost :- all currently undefined)
- * conn->bits.user_passwd - non-zero if non-default passwords exist
- * user - non-zero length if defined
- * passwd - non-zero length if defined
- * options - non-zero length if defined
- * conn->host.name - remove user name and password
- */
-static CURLcode parse_url_login(struct Curl_easy *data,
- struct connectdata *conn,
- char **user, char **passwd, char **options)
-{
- CURLcode result = CURLE_OK;
- char *userp = NULL;
- char *passwdp = NULL;
- char *optionsp = NULL;
-
- /* At this point, we're hoping all the other special cases have
- * been taken care of, so conn->host.name is at most
- * [user[:password][;options]]@]hostname
- *
- * We need somewhere to put the embedded details, so do that first.
- */
-
- char *ptr = strchr(conn->host.name, '@');
- char *login = conn->host.name;
-
- DEBUGASSERT(!**user);
- DEBUGASSERT(!**passwd);
- DEBUGASSERT(!**options);
- DEBUGASSERT(conn->handler);
-
- if(!ptr)
- goto out;
-
- /* We will now try to extract the
- * possible login information in a string like:
- * ftp://user:password@ftp.my.site:8021/README */
- conn->host.name = ++ptr;
-
- /* So the hostname is sane. Only bother interpreting the
- * results if we could care. It could still be wasted
- * work because it might be overtaken by the programmatically
- * set user/passwd, but doing that first adds more cases here :-(
- */
-
- if(data->set.use_netrc == CURL_NETRC_REQUIRED)
- goto out;
-
- /* We could use the login information in the URL so extract it. Only parse
- options if the handler says we should. */
- result =
- Curl_parse_login_details(login, ptr - login - 1,
- &userp, &passwdp,
- (conn->handler->flags & PROTOPT_URLOPTIONS)?
- &optionsp:NULL);
- if(result)
- goto out;
-
- if(userp) {
- char *newname;
-
- if(data->set.disallow_username_in_url) {
- failf(data, "Option DISALLOW_USERNAME_IN_URL is set "
- "and url contains username.");
- result = CURLE_LOGIN_DENIED;
- goto out;
- }
-
- /* We have a user in the URL */
- conn->bits.userpwd_in_url = TRUE;
- conn->bits.user_passwd = TRUE; /* enable user+password */
-
- /* Decode the user */
- result = Curl_urldecode(data, userp, 0, &newname, NULL, FALSE);
- if(result) {
- goto out;
- }
-
- free(*user);
- *user = newname;
- }
-
- if(passwdp) {
- /* We have a password in the URL so decode it */
- char *newpasswd;
- result = Curl_urldecode(data, passwdp, 0, &newpasswd, NULL, FALSE);
- if(result) {
- goto out;
- }
-
- free(*passwd);
- *passwd = newpasswd;
- }
-
- if(optionsp) {
- /* We have an options list in the URL so decode it */
- char *newoptions;
- result = Curl_urldecode(data, optionsp, 0, &newoptions, NULL, FALSE);
- if(result) {
- goto out;
- }
-
- free(*options);
- *options = newoptions;
- }
-
-
- out:
-
- free(userp);
- free(passwdp);
- free(optionsp);
-
- return result;
-}
-
-/*
* Curl_parse_login_details()
*
* This is used to parse a login string for user name, password and options in
@@ -3347,131 +2947,23 @@ CURLcode Curl_parse_login_details(const char *login, const size_t len,
* No matter if we use a proxy or not, we have to figure out the remote
* port number of various reasons.
*
- * To be able to detect port number flawlessly, we must not confuse them
- * IPv6-specified addresses in the [0::1] style. (RFC2732)
- *
- * The conn->host.name is currently [user:passwd@]host[:port] where host
- * could be a hostname, IPv4 address or IPv6 address.
- *
* The port number embedded in the URL is replaced, if necessary.
*************************************************************/
static CURLcode parse_remote_port(struct Curl_easy *data,
struct connectdata *conn)
{
- char *portptr;
- char endbracket;
-
- /* Note that at this point, the IPv6 address cannot contain any scope
- suffix as that has already been removed in the parseurlandfillconn()
- function */
- if((1 == sscanf(conn->host.name, "[%*45[0123456789abcdefABCDEF:.]%c",
- &endbracket)) &&
- (']' == endbracket)) {
- /* this is a RFC2732-style specified IP-address */
- conn->bits.ipv6_ip = TRUE;
-
- conn->host.name++; /* skip over the starting bracket */
- portptr = strchr(conn->host.name, ']');
- if(portptr) {
- *portptr++ = '\0'; /* zero terminate, killing the bracket */
- if(*portptr) {
- if (*portptr != ':') {
- failf(data, "IPv6 closing bracket followed by '%c'", *portptr);
- return CURLE_URL_MALFORMAT;
- }
- }
- else
- portptr = NULL; /* no port number available */
- }
- }
- else {
-#ifdef ENABLE_IPV6
- struct in6_addr in6;
- if(Curl_inet_pton(AF_INET6, conn->host.name, &in6) > 0) {
- /* This is a numerical IPv6 address, meaning this is a wrongly formatted
- URL */
- failf(data, "IPv6 numerical address used in URL without brackets");
- return CURLE_URL_MALFORMAT;
- }
-#endif
-
- portptr = strchr(conn->host.name, ':');
- }
if(data->set.use_port && data->state.allow_port) {
- /* if set, we use this and ignore the port possibly given in the URL */
+ /* if set, we use this instead of the port possibly given in the URL */
+ char portbuf[16];
+ CURLUcode uc;
conn->remote_port = (unsigned short)data->set.use_port;
- if(portptr)
- *portptr = '\0'; /* cut off the name there anyway - if there was a port
- number - since the port number is to be ignored! */
- if(conn->bits.httpproxy) {
- /* we need to create new URL with the new port number */
- char *url;
- char type[12]="";
-
- if(conn->bits.type_set)
- snprintf(type, sizeof(type), ";type=%c",
- data->set.prefer_ascii?'A':
- (data->set.ftp_list_only?'D':'I'));
-
- /*
- * This synthesized URL isn't always right--suffixes like ;type=A are
- * stripped off. It would be better to work directly from the original
- * URL and simply replace the port part of it.
- */
- url = aprintf("%s://%s%s%s:%d%s%s%s", conn->given->scheme,
- conn->bits.ipv6_ip?"[":"", conn->host.name,
- conn->bits.ipv6_ip?"]":"", conn->remote_port,
- data->state.slash_removed?"/":"", data->state.path,
- type);
- if(!url)
- return CURLE_OUT_OF_MEMORY;
-
- if(data->change.url_alloc) {
- Curl_safefree(data->change.url);
- data->change.url_alloc = FALSE;
- }
-
- data->change.url = url;
- data->change.url_alloc = TRUE;
- }
- }
- else if(portptr) {
- /* no CURLOPT_PORT given, extract the one from the URL */
-
- char *rest;
- long port;
-
- port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
-
- if((port < 0) || (port > 0xffff)) {
- /* Single unix standard says port numbers are 16 bits long */
- failf(data, "Port number out of range");
- return CURLE_URL_MALFORMAT;
- }
-
- if(rest[0]) {
- failf(data, "Port number ended with '%c'", rest[0]);
- return CURLE_URL_MALFORMAT;
- }
-
- if(rest != &portptr[1]) {
- *portptr = '\0'; /* cut off the name there */
- conn->remote_port = curlx_ultous(port);
- }
- else {
- /* Browser behavior adaptation. If there's a colon with no digits after,
- just cut off the name there which makes us ignore the colon and just
- use the default port. Firefox and Chrome both do that. */
- *portptr = '\0';
- }
+ snprintf(portbuf, sizeof(portbuf), "%u", conn->remote_port);
+ uc = curl_url_set(data->state.uh, CURLUPART_PORT, portbuf, 0);
+ if(uc)
+ return CURLE_OUT_OF_MEMORY;
}
- /* only if remote_port was not already parsed off the URL we use the
- default port number */
- if(conn->remote_port < 0)
- conn->remote_port = (unsigned short)conn->given->defport;
-
return CURLE_OK;
}
@@ -3483,11 +2975,16 @@ static CURLcode override_login(struct Curl_easy *data,
struct connectdata *conn,
char **userp, char **passwdp, char **optionsp)
{
+ bool user_changed = FALSE;
+ bool passwd_changed = FALSE;
+ CURLUcode uc;
if(data->set.str[STRING_USERNAME]) {
free(*userp);
*userp = strdup(data->set.str[STRING_USERNAME]);
if(!*userp)
return CURLE_OUT_OF_MEMORY;
+ conn->bits.user_passwd = TRUE; /* enable user+password */
+ user_changed = TRUE;
}
if(data->set.str[STRING_PASSWORD]) {
@@ -3495,6 +2992,8 @@ static CURLcode override_login(struct Curl_easy *data,
*passwdp = strdup(data->set.str[STRING_PASSWORD]);
if(!*passwdp)
return CURLE_OUT_OF_MEMORY;
+ conn->bits.user_passwd = TRUE; /* enable user+password */
+ passwd_changed = TRUE;
}
if(data->set.str[STRING_OPTIONS]) {
@@ -3506,9 +3005,16 @@ static CURLcode override_login(struct Curl_easy *data,
conn->bits.netrc = FALSE;
if(data->set.use_netrc != CURL_NETRC_IGNORED) {
- int ret = Curl_parsenetrc(conn->host.name,
- userp, passwdp,
- data->set.str[STRING_NETRC_FILE]);
+ char *nuser = NULL;
+ char *npasswd = NULL;
+ int ret;
+
+ if(data->set.use_netrc == CURL_NETRC_OPTIONAL)
+ nuser = *userp; /* to separate otherwise indentical machines */
+
+ ret = Curl_parsenetrc(conn->host.name,
+ &nuser, &npasswd,
+ data->set.str[STRING_NETRC_FILE]);
if(ret > 0) {
infof(data, "Couldn't find host %s in the "
DOT_CHAR "netrc file; using defaults\n",
@@ -3522,55 +3028,85 @@ static CURLcode override_login(struct Curl_easy *data,
file, so that it is safe to use even if we followed a Location: to a
different host or similar. */
conn->bits.netrc = TRUE;
-
conn->bits.user_passwd = TRUE; /* enable user+password */
+
+ if(data->set.use_netrc == CURL_NETRC_OPTIONAL) {
+ /* prefer credentials outside netrc */
+ if(nuser && !*userp) {
+ free(*userp);
+ *userp = nuser;
+ user_changed = TRUE;
+ }
+ if(npasswd && !*passwdp) {
+ free(*passwdp);
+ *passwdp = npasswd;
+ passwd_changed = TRUE;
+ }
+ }
+ else {
+ /* prefer netrc credentials */
+ if(nuser) {
+ free(*userp);
+ *userp = nuser;
+ user_changed = TRUE;
+ }
+ if(npasswd) {
+ free(*passwdp);
+ *passwdp = npasswd;
+ passwd_changed = TRUE;
+ }
+ }
}
}
+ /* for updated strings, we update them in the URL */
+ if(user_changed) {
+ uc = curl_url_set(data->state.uh, CURLUPART_USER, *userp, 0);
+ if(uc)
+ return uc_to_curlcode(uc);
+ }
+ if(passwd_changed) {
+ uc = curl_url_set(data->state.uh, CURLUPART_PASSWORD, *passwdp, 0);
+ if(uc)
+ return uc_to_curlcode(uc);
+ }
return CURLE_OK;
}
/*
* Set the login details so they're available in the connection
*/
-static CURLcode set_login(struct connectdata *conn,
- const char *user, const char *passwd,
- const char *options)
+static CURLcode set_login(struct connectdata *conn)
{
CURLcode result = CURLE_OK;
+ const char *setuser = CURL_DEFAULT_USER;
+ const char *setpasswd = CURL_DEFAULT_PASSWORD;
/* If our protocol needs a password and we have none, use the defaults */
- if((conn->handler->flags & PROTOPT_NEEDSPWD) && !conn->bits.user_passwd) {
- /* Store the default user */
- conn->user = strdup(CURL_DEFAULT_USER);
-
- /* Store the default password */
- if(conn->user)
- conn->passwd = strdup(CURL_DEFAULT_PASSWORD);
- else
- conn->passwd = NULL;
-
- /* This is the default password, so DON'T set conn->bits.user_passwd */
- }
+ if((conn->handler->flags & PROTOPT_NEEDSPWD) && !conn->bits.user_passwd)
+ ;
else {
- /* Store the user, zero-length if not set */
- conn->user = strdup(user);
-
- /* Store the password (only if user is present), zero-length if not set */
- if(conn->user)
- conn->passwd = strdup(passwd);
- else
- conn->passwd = NULL;
+ setuser = "";
+ setpasswd = "";
+ }
+ /* Store the default user */
+ if(!conn->user) {
+ conn->user = strdup(setuser);
+ if(!conn->user)
+ return CURLE_OUT_OF_MEMORY;
}
- if(!conn->user || !conn->passwd)
- result = CURLE_OUT_OF_MEMORY;
-
- /* Store the options, null if not set */
- if(!result && options[0]) {
- conn->options = strdup(options);
+ /* Store the default password */
+ if(!conn->passwd) {
+ conn->passwd = strdup(setpasswd);
+ if(!conn->passwd)
+ result = CURLE_OUT_OF_MEMORY;
+ }
- if(!conn->options)
+ /* if there's a user without password, consider password blank */
+ if(conn->user && !conn->passwd) {
+ conn->passwd = strdup("");
+ if(!conn->passwd)
result = CURLE_OUT_OF_MEMORY;
}
@@ -4022,12 +3558,7 @@ static CURLcode create_conn(struct Curl_easy *data,
CURLcode result = CURLE_OK;
struct connectdata *conn;
struct connectdata *conn_temp = NULL;
- size_t urllen;
- char *user = NULL;
- char *passwd = NULL;
- char *options = NULL;
bool reuse;
- bool prot_missing = FALSE;
bool connections_available = TRUE;
bool force_reuse = FALSE;
bool waitpipe = FALSE;
@@ -4039,7 +3570,6 @@ static CURLcode create_conn(struct Curl_easy *data,
/*************************************************************
* Check input data
*************************************************************/
-
if(!data->change.url) {
result = CURLE_URL_MALFORMAT;
goto out;
@@ -4061,107 +3591,10 @@ static CURLcode create_conn(struct Curl_easy *data,
any failure */
*in_connect = conn;
- /* This initing continues below, see the comment "Continue connectdata
- * initialization here" */
-
- /***********************************************************
- * We need to allocate memory to store the path in. We get the size of the
- * full URL to be sure, and we need to make it at least 256 bytes since
- * other parts of the code will rely on this fact
- ***********************************************************/
-#define LEAST_PATH_ALLOC 256
- urllen = strlen(data->change.url);
- if(urllen < LEAST_PATH_ALLOC)
- urllen = LEAST_PATH_ALLOC;
-
- /*
- * We malloc() the buffers below urllen+2 to make room for 2 possibilities:
- * 1 - an extra terminating zero
- * 2 - an extra slash (in case a syntax like "www.host.com?moo" is used)
- */
-
- Curl_safefree(data->state.pathbuffer);
- data->state.path = NULL;
-
- data->state.pathbuffer = malloc(urllen + 2);
- if(NULL == data->state.pathbuffer) {
- result = CURLE_OUT_OF_MEMORY; /* really bad error */
- goto out;
- }
- data->state.path = data->state.pathbuffer;
-
- conn->host.rawalloc = malloc(urllen + 2);
- if(NULL == conn->host.rawalloc) {
- Curl_safefree(data->state.pathbuffer);
- data->state.path = NULL;
- result = CURLE_OUT_OF_MEMORY;
- goto out;
- }
-
- conn->host.name = conn->host.rawalloc;
- conn->host.name[0] = 0;
-
- user = strdup("");
- passwd = strdup("");
- options = strdup("");
- if(!user || !passwd || !options) {
- result = CURLE_OUT_OF_MEMORY;
- goto out;
- }
-
- result = parseurlandfillconn(data, conn, &prot_missing, &user, &passwd,
- &options);
+ result = parseurlandfillconn(data, conn);
if(result)
goto out;
- /*************************************************************
- * No protocol part in URL was used, add it!
- *************************************************************/
- if(prot_missing) {
- /* We're guessing prefixes here and if we're told to use a proxy or if
- we're going to follow a Location: later or... then we need the protocol
- part added so that we have a valid URL. */
- char *reurl;
- char *ch_lower;
-
- reurl = aprintf("%s://%s", conn->handler->scheme, data->change.url);
-
- if(!reurl) {
- result = CURLE_OUT_OF_MEMORY;
- goto out;
- }
-
- /* Change protocol prefix to lower-case */
- for(ch_lower = reurl; *ch_lower != ':'; ch_lower++)
- *ch_lower = (char)TOLOWER(*ch_lower);
-
- if(data->change.url_alloc) {
- Curl_safefree(data->change.url);
- data->change.url_alloc = FALSE;
- }
-
- data->change.url = reurl;
- data->change.url_alloc = TRUE; /* free this later */
- }
-
- /*************************************************************
- * If the protocol can't handle url query strings, then cut
- * off the unhandable part
- *************************************************************/
- if((conn->given->flags&PROTOPT_NOURLQUERY)) {
- char *path_q_sep = strchr(conn->data->state.path, '?');
- if(path_q_sep) {
- /* according to rfc3986, allow the query (?foo=bar)
- also on protocols that can't handle it.
-
- cut the string-part after '?'
- */
-
- /* terminate the string */
- path_q_sep[0] = 0;
- }
- }
-
if(data->set.str[STRING_BEARER]) {
conn->oauth_bearer = strdup(data->set.str[STRING_BEARER]);
if(!conn->oauth_bearer) {
@@ -4205,10 +3638,12 @@ static CURLcode create_conn(struct Curl_easy *data,
/* Check for overridden login details and set them accordingly so they
they are known when protocol->setup_connection is called! */
- result = override_login(data, conn, &user, &passwd, &options);
+ result = override_login(data, conn, &conn->user, &conn->passwd,
+ &conn->options);
if(result)
goto out;
- result = set_login(conn, user, passwd, options);
+
+ result = set_login(conn); /* default credentials */
if(result)
goto out;
@@ -4394,6 +3829,9 @@ static CURLcode create_conn(struct Curl_easy *data,
* new one.
*************************************************************/
+ DEBUGASSERT(conn->user);
+ DEBUGASSERT(conn->passwd);
+
/* reuse_fresh is TRUE if we are told to use a new connection by force, but
we only acknowledge this option if this is not a re-used connection
already (which happens due to follow-location or during a HTTP
@@ -4569,10 +4007,6 @@ static CURLcode create_conn(struct Curl_easy *data,
result = resolve_server(data, conn, async);
out:
-
- free(options);
- free(passwd);
- free(user);
return result;
}
diff --git a/lib/url.h b/lib/url.h
index 7b9aff5c4..1c18f7137 100644
--- a/lib/url.h
+++ b/lib/url.h
@@ -48,6 +48,8 @@ CURLcode Curl_open(struct Curl_easy **curl);
CURLcode Curl_init_userdefined(struct Curl_easy *data);
void Curl_freeset(struct Curl_easy * data);
+/* free the URL pieces */
+void Curl_up_free(struct Curl_easy *data);
CURLcode Curl_close(struct Curl_easy *data); /* opposite of curl_open() */
CURLcode Curl_connect(struct Curl_easy *, struct connectdata **,
bool *async, bool *protocol_connect);
diff --git a/lib/urldata.h b/lib/urldata.h
index 85712ba20..1fede5090 100644
--- a/lib/urldata.h
+++ b/lib/urldata.h
@@ -1224,6 +1224,18 @@ struct time_node {
expire_id eid;
};
+/* individual pieces of the URL */
+struct urlpieces {
+ char *scheme;
+ char *hostname;
+ char *port;
+ char *user;
+ char *password;
+ char *options;
+ char *path;
+ char *query;
+};
+
struct UrlState {
/* Points to the connection cache */
@@ -1314,9 +1326,6 @@ struct UrlState {
/* for FTP downloads: how many CRLFs did we converted to LFs? */
curl_off_t crlf_conversions;
#endif
- char *pathbuffer;/* allocated buffer to store the URL's path part in */
- char *path; /* path to use, points to somewhere within the pathbuffer
- area */
bool slash_removed; /* set TRUE if the 'path' points to a path where the
initial URL slash separator has been taken off */
bool use_range;
@@ -1350,6 +1359,8 @@ struct UrlState {
#ifdef CURLDEBUG
bool conncache_lock;
#endif
+ CURLU *uh; /* URL handle for the current parsed URL */
+ struct urlpieces up;
};
diff --git a/tests/data/test325 b/tests/data/test325
index 6d5898d45..922f37ba2 100644
--- a/tests/data/test325
+++ b/tests/data/test325
@@ -15,7 +15,7 @@ HTTP/1.1 301 OK
Date: Thu, 09 Nov 2010 14:49:00 GMT
Server: test-server/fake
Content-Length: 7
-Location: http://%HOSTIP:%HTTPPORT/325
+Location: http://somewhere/325
MooMoo
</data>
@@ -24,7 +24,7 @@ HTTP/1.1 301 OK
Date: Thu, 09 Nov 2010 14:49:00 GMT
Server: test-server/fake
Content-Length: 7
-Location: http://%HOSTIP:%HTTPPORT/325
+Location: http://somewhere/325
</datacheck>
</reply>
diff --git a/tests/data/test523 b/tests/data/test523
index 9abe0ed22..665211d48 100644
--- a/tests/data/test523
+++ b/tests/data/test523
@@ -39,7 +39,7 @@ HTTP GET with proxy and CURLOPT_PORT
</name>
# first URL then proxy
<command>
-http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT
+http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT
</command>
</client>
@@ -50,7 +50,7 @@ http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT
^User-Agent:.*
</strip>
<protocol>
-GET HTTP://www.example.com:19999/523 HTTP/1.1
+GET http://www.example.com:19999/523 HTTP/1.1
Host: www.example.com:19999
Authorization: Basic eHh4Onl5eQ==
Accept: */*
diff --git a/tests/data/test563 b/tests/data/test563
index cecbedc21..c9df79219 100644
--- a/tests/data/test563
+++ b/tests/data/test563
@@ -47,7 +47,7 @@ ftp_proxy=http://%HOSTIP:%HTTPPORT/
# Verify data after the test has been "shot"
<verify>
<protocol>
-GET FTP://%HOSTIP:%FTPPORT/563;type=A HTTP/1.1
+GET ftp://%HOSTIP:%FTPPORT/563;type=A HTTP/1.1
Host: %HOSTIP:%FTPPORT
Accept: */*
Proxy-Connection: Keep-Alive