diff options
Diffstat (limited to 'lib/url.js')
-rw-r--r-- | lib/url.js | 80 |
1 files changed, 47 insertions, 33 deletions
diff --git a/lib/url.js b/lib/url.js index b8ba3fb1d..db7723895 100644 --- a/lib/url.js +++ b/lib/url.js @@ -64,7 +64,7 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i, // them. nonHostChars = ['%', '/', '?', ';', '#'] .concat(unwise).concat(autoEscape), - nonAuthChars = ['/', '@', '?', '#'].concat(delims), + hostEndingChars = ['/', '?', '#'], hostnameMaxLen = 255, hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/, @@ -146,49 +146,63 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { if (!hostlessProtocol[proto] && (slashes || (proto && !slashedProtocol[proto]))) { + // there's a hostname. // the first instance of /, ?, ;, or # ends the host. - // don't enforce full RFC correctness, just be unstupid about it. - + // // If there is an @ in the hostname, then non-host chars *are* allowed - // to the left of the first @ sign, unless some non-auth character + // to the left of the last @ sign, unless some host-ending character // comes *before* the @-sign. // URLs are obnoxious. - var atSign = rest.indexOf('@'); - if (atSign !== -1) { - var auth = rest.slice(0, atSign); - - // there *may be* an auth - var hasAuth = true; - for (var i = 0, l = nonAuthChars.length; i < l; i++) { - if (auth.indexOf(nonAuthChars[i]) !== -1) { - // not a valid auth. Something like http://foo.com/bar@baz/ - hasAuth = false; - break; - } - } + // + // ex: + // http://a@b@c/ => user:a@b host:c + // http://a@b?@c => user:a host:c path:/?@c + + // v0.12 TODO(isaacs): This is not quite how Chrome does things. + // Review our test case against browsers more comprehensively. + + // find the first instance of any hostEndingChars + var hostEnd = -1; + for (var i = 0; i < hostEndingChars.length; i++) { + var hec = rest.indexOf(hostEndingChars[i]); + if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) + hostEnd = hec; + } - if (hasAuth) { - // pluck off the auth portion. - this.auth = decodeURIComponent(auth); - rest = rest.substr(atSign + 1); - } + // at this point, either we have an explicit point where the + // auth portion cannot go past, or the last @ char is the decider. + var auth, atSign; + if (hostEnd === -1) { + // atSign can be anywhere. + atSign = rest.lastIndexOf('@'); + } else { + // atSign must be in auth portion. + // http://a@b/c@d => host:b auth:a path:/c@d + atSign = rest.lastIndexOf('@', hostEnd); } - var firstNonHost = -1; - for (var i = 0, l = nonHostChars.length; i < l; i++) { - var index = rest.indexOf(nonHostChars[i]); - if (index !== -1 && - (firstNonHost < 0 || index < firstNonHost)) firstNonHost = index; + // Now we have a portion which is definitely the auth. + // Pull that off. + if (atSign !== -1) { + auth = rest.slice(0, atSign); + rest = rest.slice(atSign + 1); + this.auth = decodeURIComponent(auth); } - if (firstNonHost !== -1) { - this.host = rest.substr(0, firstNonHost); - rest = rest.substr(firstNonHost); - } else { - this.host = rest; - rest = ''; + // the host is the remaining to the left of the first non-host char + hostEnd = -1; + for (var i = 0; i < nonHostChars.length; i++) { + var hec = rest.indexOf(nonHostChars[i]); + if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) + hostEnd = hec; } + // if we still have not hit it, then the entire thing is a host. + if (hostEnd === -1) + hostEnd = rest.length; + + this.host = rest.slice(0, hostEnd); + rest = rest.slice(hostEnd); // pull out port. this.parseHost(); |