summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2018-09-19 10:17:03 +0200
committerDaniel Stenberg <daniel@haxx.se>2018-09-19 23:21:52 +0200
commit9307c219ad4741db860b864c860ac2f8bf9fad9d (patch)
treef8a75b214a770f7bc75996abee293e7e53727074
parenteb0b3acbc1beb08489222ed713ac387ca900fe90 (diff)
downloadcurl-9307c219ad4741db860b864c860ac2f8bf9fad9d.tar.gz
urlapi: add CURLU_GUESS_SCHEME and fix hostname acceptance
In order for this API to fully work for libcurl itself, it now offers a CURLU_GUESS_SCHEME flag that makes it "guess" scheme based on the host name prefix just like libcurl always did. If there's no known prefix, it will guess "http://". Separately, it relaxes the check of the host name so that IDN host names can be passed in as well. Both these changes are necessary for libcurl itself to use this API. Assisted-by: Daniel Gustafsson Closes #3018
-rw-r--r--docs/libcurl/curl_url_set.310
-rw-r--r--include/curl/urlapi.h1
-rw-r--r--lib/urlapi.c48
-rw-r--r--tests/data/test15606
-rw-r--r--tests/libtest/lib1560.c26
5 files changed, 79 insertions, 12 deletions
diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3
index b2b273f82..95b76bd8c 100644
--- a/docs/libcurl/curl_url_set.3
+++ b/docs/libcurl/curl_url_set.3
@@ -96,6 +96,16 @@ The query part gets space-to-plus conversion before the URL conversion.
This URL encoding is charset unaware and will convert the input on a
byte-by-byte manner.
+.IP CURLU_DEFAULT_SCHEME
+If set, will make libcurl allow the URL to be set without a scheme and then
+sets that to the default scheme: HTTPS. Overrides the \fICURLU_GUESS_SCHEME\fP
+option if both are set.
+.IP CURLU_GUESS_SCHEME
+If set, will make libcurl allow the URL to be set without a scheme and it
+instead "guesses" which scheme that was intended based on the host name. If
+the outermost sub-domain name matches DICT, FTP, IMAP, LDAP, POP3 or SMTP then
+that scheme will be used, otherwise it picks HTTP. Conflicts with the
+\fICURLU_DEFAULT_SCHEME\fP option which takes precendence if both are set.
.SH RETURN VALUE
Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went
fine.
diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h
index b16cfce56..319de35b7 100644
--- a/include/curl/urlapi.h
+++ b/include/curl/urlapi.h
@@ -75,6 +75,7 @@ typedef enum {
#define CURLU_URLDECODE (1<<6) /* URL decode on get */
#define CURLU_URLENCODE (1<<7) /* URL encode on set */
#define CURLU_APPENDQUERY (1<<8) /* append a form style part */
+#define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */
typedef struct Curl_URL CURLU;
diff --git a/lib/urlapi.c b/lib/urlapi.c
index f6d911667..ef565d98d 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -554,7 +554,7 @@ static CURLUcode junkscan(char *part)
static CURLUcode hostname_check(char *hostname, unsigned int flags)
{
- const char *l; /* accepted characters */
+ const char *l = NULL; /* accepted characters */
size_t len;
size_t hlen = strlen(hostname);
(void)flags;
@@ -564,14 +564,21 @@ static CURLUcode hostname_check(char *hostname, unsigned int flags)
l = "0123456789abcdefABCDEF::.";
hlen -= 2;
}
- else /* % for URL escaped letters */
- l = "0123456789abcdefghijklimnopqrstuvwxyz-_.ABCDEFGHIJKLIMNOPQRSTUVWXYZ%";
-
- len = strspn(hostname, l);
- if(hlen != len)
- /* hostname with bad content */
- return CURLUE_MALFORMED_INPUT;
+ if(l) {
+ /* only valid letters are ok */
+ len = strspn(hostname, l);
+ if(hlen != len)
+ /* hostname with bad content */
+ return CURLUE_MALFORMED_INPUT;
+ }
+ else {
+ /* letters from the second string is not ok */
+ len = strcspn(hostname, " ");
+ if(hlen != len)
+ /* hostname with bad content */
+ return CURLUE_MALFORMED_INPUT;
+ }
return CURLUE_OK;
}
@@ -587,7 +594,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
CURLUcode result;
bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN];
- char *schemep;
+ char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
const struct Curl_handler *h = NULL;
@@ -723,9 +730,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
else {
/* no scheme! */
- if(!(flags & CURLU_DEFAULT_SCHEME))
+ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
return CURLUE_MALFORMED_INPUT;
- schemep = (char *) DEFAULT_SCHEME;
+ if(flags & CURLU_DEFAULT_SCHEME)
+ schemep = (char *) DEFAULT_SCHEME;
/*
* The URL was badly formatted, let's try without scheme specified.
@@ -744,6 +752,24 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
memcpy(hostname, hostp, len);
hostname[len] = 0;
+ if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ /* legacy curl-style guess based on host name */
+ if(checkprefix("ftp.", hostname))
+ schemep = (char *)"ftp";
+ else if(checkprefix("dict.", hostname))
+ schemep = (char *)"dict";
+ else if(checkprefix("ldap.", hostname))
+ schemep = (char *)"ldap";
+ else if(checkprefix("imap.", hostname))
+ schemep = (char *)"imap";
+ else if(checkprefix("smtp.", hostname))
+ schemep = (char *)"smtp";
+ else if(checkprefix("pop3.", hostname))
+ schemep = (char *)"pop3";
+ else
+ schemep = (char *)"http";
+ }
+
len = strlen(p);
memcpy(path, p, len);
path[len] = 0;
diff --git a/tests/data/test1560 b/tests/data/test1560
index 720df036f..4b6c97a53 100644
--- a/tests/data/test1560
+++ b/tests/data/test1560
@@ -16,6 +16,12 @@ none
file
https
http
+pop3
+smtp
+imap
+ldap
+dict
+ftp
</features>
<name>
URL API
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index 669ea9ada..30fb582a2 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -246,8 +246,32 @@ static struct testcase get_parts_list[] ={
};
static struct urltestcase get_url_list[] = {
+ {"smtp.example.com/path/html",
+ "smtp://smtp.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"https.example.com/path/html",
+ "http://https.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"dict.example.com/path/html",
+ "dict://dict.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"pop3.example.com/path/html",
+ "pop3://pop3.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"ldap.example.com/path/html",
+ "ldap://ldap.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"imap.example.com/path/html",
+ "imap://imap.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"ftp.example.com/path/html",
+ "ftp://ftp.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"example.com/path/html",
+ "http://example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
{"HTTP://test/", "http://test/", 0, 0, CURLUE_OK},
- {"http://HO0_-st..~./", "", 0, 0, CURLUE_MALFORMED_INPUT},
+ {"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK},
{"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},