summaryrefslogtreecommitdiff
path: root/lib/url.js
diff options
context:
space:
mode:
Diffstat (limited to 'lib/url.js')
-rw-r--r--lib/url.js80
1 files changed, 47 insertions, 33 deletions
diff --git a/lib/url.js b/lib/url.js
index b8ba3fb1d..db7723895 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -64,7 +64,7 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i,
// them.
nonHostChars = ['%', '/', '?', ';', '#']
.concat(unwise).concat(autoEscape),
- nonAuthChars = ['/', '@', '?', '#'].concat(delims),
+ hostEndingChars = ['/', '?', '#'],
hostnameMaxLen = 255,
hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/,
hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/,
@@ -146,49 +146,63 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
if (!hostlessProtocol[proto] &&
(slashes || (proto && !slashedProtocol[proto]))) {
+
// there's a hostname.
// the first instance of /, ?, ;, or # ends the host.
- // don't enforce full RFC correctness, just be unstupid about it.
-
+ //
// If there is an @ in the hostname, then non-host chars *are* allowed
- // to the left of the first @ sign, unless some non-auth character
+ // to the left of the last @ sign, unless some host-ending character
// comes *before* the @-sign.
// URLs are obnoxious.
- var atSign = rest.indexOf('@');
- if (atSign !== -1) {
- var auth = rest.slice(0, atSign);
-
- // there *may be* an auth
- var hasAuth = true;
- for (var i = 0, l = nonAuthChars.length; i < l; i++) {
- if (auth.indexOf(nonAuthChars[i]) !== -1) {
- // not a valid auth. Something like http://foo.com/bar@baz/
- hasAuth = false;
- break;
- }
- }
+ //
+ // ex:
+ // http://a@b@c/ => user:a@b host:c
+ // http://a@b?@c => user:a host:c path:/?@c
+
+ // v0.12 TODO(isaacs): This is not quite how Chrome does things.
+ // Review our test case against browsers more comprehensively.
+
+ // find the first instance of any hostEndingChars
+ var hostEnd = -1;
+ for (var i = 0; i < hostEndingChars.length; i++) {
+ var hec = rest.indexOf(hostEndingChars[i]);
+ if (hec !== -1 && (hostEnd === -1 || hec < hostEnd))
+ hostEnd = hec;
+ }
- if (hasAuth) {
- // pluck off the auth portion.
- this.auth = decodeURIComponent(auth);
- rest = rest.substr(atSign + 1);
- }
+ // at this point, either we have an explicit point where the
+ // auth portion cannot go past, or the last @ char is the decider.
+ var auth, atSign;
+ if (hostEnd === -1) {
+ // atSign can be anywhere.
+ atSign = rest.lastIndexOf('@');
+ } else {
+ // atSign must be in auth portion.
+ // http://a@b/c@d => host:b auth:a path:/c@d
+ atSign = rest.lastIndexOf('@', hostEnd);
}
- var firstNonHost = -1;
- for (var i = 0, l = nonHostChars.length; i < l; i++) {
- var index = rest.indexOf(nonHostChars[i]);
- if (index !== -1 &&
- (firstNonHost < 0 || index < firstNonHost)) firstNonHost = index;
+ // Now we have a portion which is definitely the auth.
+ // Pull that off.
+ if (atSign !== -1) {
+ auth = rest.slice(0, atSign);
+ rest = rest.slice(atSign + 1);
+ this.auth = decodeURIComponent(auth);
}
- if (firstNonHost !== -1) {
- this.host = rest.substr(0, firstNonHost);
- rest = rest.substr(firstNonHost);
- } else {
- this.host = rest;
- rest = '';
+ // the host is the remaining to the left of the first non-host char
+ hostEnd = -1;
+ for (var i = 0; i < nonHostChars.length; i++) {
+ var hec = rest.indexOf(nonHostChars[i]);
+ if (hec !== -1 && (hostEnd === -1 || hec < hostEnd))
+ hostEnd = hec;
}
+ // if we still have not hit it, then the entire thing is a host.
+ if (hostEnd === -1)
+ hostEnd = rest.length;
+
+ this.host = rest.slice(0, hostEnd);
+ rest = rest.slice(hostEnd);
// pull out port.
this.parseHost();