summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Kerwin <matthew@kerwin.net.au>2017-11-24 21:24:40 +1000
committerDaniel Stenberg <daniel@haxx.se>2017-11-24 23:12:55 +0100
commit25634611f983824503cd708f0b65dddf028c3026 (patch)
tree34683babc1100fbc582a3b0fb0bd64fab775ea47
parentd6ec96f7ffc5e8ee264d206634d3ec146794b80b (diff)
downloadcurl-25634611f983824503cd708f0b65dddf028c3026.tar.gz
URL: update "file:" URL handling
* LOTS of comment updates * explicit error for SMB shares (e.g. "file:////share/path/file") * more strict handling of authority (i.e. "//localhost/") * now accepts dodgy old "C:|" drive letters * more precise handling of drive letters in and out of Windows (especially recognising both "file:c:/" and "file:/c:/") Closes #2110
-rw-r--r--lib/url.c121
1 files changed, 78 insertions, 43 deletions
diff --git a/lib/url.c b/lib/url.c
index 1de02c2bd..9f9fa0c43 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -2039,6 +2039,14 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
('A' <= str[0] && str[0] <= 'Z')) && \
(str[1] == ':'))
+ /* MSDOS/Windows style drive prefix, optionally with
+ * a '|' instead of ':', followed by a slash or NUL */
+#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
+ ((('a' <= (str)[0] && (str)[0] <= 'z') || \
+ ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
+ ((str)[1] == ':' || (str)[1] == '|') && \
+ ((str)[2] == '/' || (str)[2] == 0))
+
/* Don't mistake a drive letter for a scheme if the default protocol is file.
curld --proto-default file c:/foo/bar.txt */
if(STARTS_WITH_DRIVE_PREFIX(data->change.url) &&
@@ -2071,63 +2079,90 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
return CURLE_URL_MALFORMAT;
}
- if(url_has_scheme && path[0] == '/' && path[1] == '/') {
- /* Allow omitted hostname (e.g. file:/<path>). This is not strictly
- * speaking a valid file: URL by RFC 1738, but treating file:/<path> as
- * file://localhost/<path> is similar to how other schemes treat missing
- * hostnames. See RFC 1808. */
-
- /* This cannot be done with strcpy() in a portable manner, since the
- memory areas overlap! */
- memmove(path, path + 2, strlen(path + 2) + 1);
+ if(url_has_scheme && path[0] == '/' && path[1] == '/' &&
+ path[2] == '/' && path[3] == '/') {
+ /* This appears to be a UNC string (usually indicating a SMB share).
+ * We don't do SMB in file: URLs. (TODO?)
+ */
+ failf(data, "SMB shares are not supported in file: URLs.");
+ return CURLE_URL_MALFORMAT;
}
- /*
- * we deal with file://<host>/<path> differently since it supports no
- * hostname other than "localhost" and "127.0.0.1", which is unique among
- * the URL protocols specified in RFC 1738
+ /* Extra handling URLs with an authority component (i.e. that start with
+ * "file://")
+ *
+ * We allow omitted hostname (e.g. file:/<path>) -- valid according to
+ * RFC 8089, but not the (current) WHAT-WG URL spec.
*/
- if(path[0] != '/' && !STARTS_WITH_DRIVE_PREFIX(path)) {
- /* the URL includes a host name, it must match "localhost" or
- "127.0.0.1" to be valid */
- char *ptr;
- if(!checkprefix("localhost/", path) &&
- !checkprefix("127.0.0.1/", path)) {
- failf(data, "Invalid file://hostname/, "
- "expected localhost or 127.0.0.1 or none");
- return CURLE_URL_MALFORMAT;
- }
- ptr = &path[9]; /* now points to the slash after the host */
-
- /* there was a host name and slash present
-
- RFC1738 (section 3.1, page 5) says:
-
- The rest of the locator consists of data specific to the scheme,
- and is known as the "url-path". It supplies the details of how the
- specified resource can be accessed. Note that the "/" between the
- host (or port) and the url-path is NOT part of the url-path.
+ if(url_has_scheme && path[0] == '/' && path[1] == '/') {
+ /* swallow the two slashes */
+ char *ptr = &path[2];
- As most agents use file://localhost/foo to get '/foo' although the
- slash preceding foo is a separator and not a slash for the path,
- a URL as file://localhost//foo must be valid as well, to refer to
- the same file with an absolute path.
- */
+ /*
+ * According to RFC 8089, a file: URL can be reliably dereferenced if:
+ *
+ * o it has no/blank hostname, or
+ *
+ * o the hostname matches "localhost" (case-insensitively), or
+ *
+ * o the hostname is a FQDN that resolves to this machine.
+ *
+ * For brevity, we only consider URLs with empty, "localhost", or
+ * "127.0.0.1" hostnames as local.
+ *
+ * Additionally, there is an exception for URLs with a Windows drive
+ * letter in the authority (which was accidentally omitted from RFC 8089
+ * Appendix E, but believe me, it was meant to be there. --MK)
+ */
+ if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
+ /* the URL includes a host name, it must match "localhost" or
+ "127.0.0.1" to be valid */
+ if(!checkprefix("localhost/", ptr) &&
+ !checkprefix("127.0.0.1/", ptr)) {
+ failf(data, "Invalid file://hostname/, "
+ "expected localhost or 127.0.0.1 or none");
+ return CURLE_URL_MALFORMAT;
+ }
+ ptr += 9; /* now points to the slash after the host */
+ }
- if('/' == ptr[1])
- /* if there was two slashes, we skip the first one as that is then
- used truly as a separator */
+ /*
+ * RFC 8089, Appendix D, Section D.1, says:
+ *
+ * > In a POSIX file system, the root of the file system is represented
+ * > as a directory with a zero-length name, usually written as "/"; the
+ * > presence of this root in a file URI can be taken as given by the
+ * > initial slash in the "path-absolute" rule.
+ *
+ * i.e. the first slash is part of the path.
+ *
+ * However in RFC 1738 the "/" between the host (or port) and the
+ * URL-path was NOT part of the URL-path. Any agent that followed the
+ * older spec strictly, and wanted to refer to a file with an absolute
+ * path, would have included a second slash. So if there are two
+ * slashes, swallow one.
+ */
+ if('/' == ptr[1]) /* note: the only way ptr[0]!='/' is if ptr[1]==':' */
ptr++;
- /* This cannot be made with strcpy, as the memory chunks overlap! */
+ /* This cannot be done with strcpy, as the memory chunks overlap! */
memmove(path, ptr, strlen(ptr) + 1);
}
#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
- if(STARTS_WITH_DRIVE_PREFIX(path)) {
+ /* Don't allow Windows drive letters when not in Windows.
+ * This catches both "file:/c:" and "file:c:" */
+ if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
+ STARTS_WITH_URL_DRIVE_PREFIX(path)) {
failf(data, "File drive letters are only accepted in MSDOS/Windows.");
return CURLE_URL_MALFORMAT;
}
+#else
+ /* If the path starts with a slash and a drive letter, ditch the slash */
+ if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
+ /* This cannot be done with strcpy, as the memory chunks overlap! */
+ memmove(path, &path[1], strlen(&path[1]) + 1);
+ }
#endif
protop = "file"; /* protocol string */