diff options
Diffstat (limited to 'src/util/net.c')
-rw-r--r-- | src/util/net.c | 422 |
1 files changed, 338 insertions, 84 deletions
diff --git a/src/util/net.c b/src/util/net.c index b2236daf8..ac7befe07 100644 --- a/src/util/net.c +++ b/src/util/net.c @@ -93,121 +93,367 @@ int git_net_url_dup(git_net_url *out, git_net_url *in) return 0; } -int git_net_url_parse(git_net_url *url, const char *given) +static int url_invalid(const char *message) { - struct http_parser_url u = {0}; - bool has_scheme, has_host, has_port, has_path, has_query, has_userinfo; - git_str scheme = GIT_STR_INIT, - host = GIT_STR_INIT, - port = GIT_STR_INIT, - path = GIT_STR_INIT, - username = GIT_STR_INIT, - password = GIT_STR_INIT, - query = GIT_STR_INIT; - int error = GIT_EINVALIDSPEC; - - if (http_parser_parse_url(given, strlen(given), false, &u)) { - git_error_set(GIT_ERROR_NET, "malformed URL '%s'", given); - goto done; - } + git_error_set(GIT_ERROR_NET, "invalid url: %s", message); + return GIT_EINVALIDSPEC; +} - has_scheme = !!(u.field_set & (1 << UF_SCHEMA)); - has_host = !!(u.field_set & (1 << UF_HOST)); - has_port = !!(u.field_set & (1 << UF_PORT)); - has_path = !!(u.field_set & (1 << UF_PATH)); - has_query = !!(u.field_set & (1 << UF_QUERY)); - has_userinfo = !!(u.field_set & (1 << UF_USERINFO)); +static int url_parse_authority( + const char **user_start, size_t *user_len, + const char **password_start, size_t *password_len, + const char **host_start, size_t *host_len, + const char **port_start, size_t *port_len, + const char *authority_start, size_t len, + const char *scheme_start, size_t scheme_len) +{ + const char *c, *hostport_end, *host_end = NULL, + *userpass_end, *user_end = NULL; - if (has_scheme) { - const char *url_scheme = given + u.field_data[UF_SCHEMA].off; - size_t url_scheme_len = u.field_data[UF_SCHEMA].len; - git_str_put(&scheme, url_scheme, url_scheme_len); - git__strntolower(scheme.ptr, scheme.size); - } else { - git_error_set(GIT_ERROR_NET, "malformed URL '%s'", given); - goto done; - } + enum { + HOSTPORT, HOST, IPV6, HOST_END, USERPASS, USER + } state = HOSTPORT; - if (has_host) { - const char *url_host = given + u.field_data[UF_HOST].off; - size_t url_host_len = u.field_data[UF_HOST].len; - git_str_decode_percent(&host, url_host, url_host_len); - } + if (len == 0) + return 0; - if (has_port) { - const char *url_port = given + u.field_data[UF_PORT].off; - size_t url_port_len = u.field_data[UF_PORT].len; - git_str_put(&port, url_port, url_port_len); - } else { - const char *default_port = default_port_for_scheme(scheme.ptr); + /* + * walk the authority backwards so that we can parse google code's + * ssh urls that are not rfc compliant and allow @ in the username + */ + for (hostport_end = authority_start + len, c = hostport_end - 1; + c >= authority_start && !user_end; + c--) { + switch (state) { + case HOSTPORT: + if (*c == ':') { + *port_start = c + 1; + *port_len = hostport_end - *port_start; + host_end = c; + state = HOST; + break; + } - if (default_port == NULL) { - git_error_set(GIT_ERROR_NET, "unknown scheme for URL '%s'", given); - goto done; - } + /* + * if we've only seen digits then we don't know + * if we're parsing just a host or a host and port. + * if we see a non-digit, then we're in a host, + * otherwise, fall through to possibly match the + * "@" (user/host separator). + */ + + if (*c < '0' || *c > '9') { + host_end = hostport_end; + state = HOST; + } - git_str_puts(&port, default_port); - } + /* fall through */ - if (has_path) { - const char *url_path = given + u.field_data[UF_PATH].off; - size_t url_path_len = u.field_data[UF_PATH].len; - git_str_put(&path, url_path, url_path_len); - } else { - git_str_puts(&path, "/"); + case HOST: + if (*c == ']' && host_end == c + 1) { + host_end = c; + state = IPV6; + } + + else if (*c == '@') { + *host_start = c + 1; + *host_len = host_end ? host_end - *host_start : + hostport_end - *host_start; + userpass_end = c; + state = USERPASS; + } + + else if (*c == '[' || *c == ']' || *c == ':') { + return url_invalid("malformed hostname"); + } + + break; + + case IPV6: + if (*c == '[') { + *host_start = c + 1; + *host_len = host_end - *host_start; + state = HOST_END; + } + + else if ((*c < '0' || *c > '9') && + (*c < 'a' || *c > 'f') && + (*c < 'A' || *c > 'F') && + (*c != ':')) { + return url_invalid("malformed hostname"); + } + + break; + + case HOST_END: + if (*c == '@') { + userpass_end = c; + state = USERPASS; + break; + } + + return url_invalid("malformed hostname"); + + case USERPASS: + if (*c == '@' && + strncasecmp(scheme_start, "ssh", scheme_len)) + return url_invalid("malformed hostname"); + + if (*c == ':') { + *password_start = c + 1; + *password_len = userpass_end - *password_start; + user_end = c; + state = USER; + break; + } + + break; + + default: + GIT_ASSERT(!"unhandled state"); + } } - if (has_query) { - const char *url_query = given + u.field_data[UF_QUERY].off; - size_t url_query_len = u.field_data[UF_QUERY].len; - git_str_decode_percent(&query, url_query, url_query_len); + switch (state) { + case HOSTPORT: + *host_start = authority_start; + *host_len = (hostport_end - *host_start); + break; + case HOST: + *host_start = authority_start; + *host_len = (host_end - *host_start); + break; + case IPV6: + return url_invalid("malformed hostname"); + case HOST_END: + break; + case USERPASS: + *user_start = authority_start; + *user_len = (userpass_end - *user_start); + break; + case USER: + *user_start = authority_start; + *user_len = (user_end - *user_start); + break; + default: + GIT_ASSERT(!"unhandled state"); } - if (has_userinfo) { - const char *url_userinfo = given + u.field_data[UF_USERINFO].off; - size_t url_userinfo_len = u.field_data[UF_USERINFO].len; - const char *colon = memchr(url_userinfo, ':', url_userinfo_len); + return 0; +} + +int git_net_url_parse(git_net_url *url, const char *given) +{ + const char *c, *scheme_start, *authority_start, *user_start, + *password_start, *host_start, *port_start, *path_start, + *query_start, *fragment_start, *default_port; + git_str scheme = GIT_STR_INIT, user = GIT_STR_INIT, + password = GIT_STR_INIT, host = GIT_STR_INIT, + port = GIT_STR_INIT, path = GIT_STR_INIT, + query = GIT_STR_INIT, fragment = GIT_STR_INIT; + size_t scheme_len = 0, user_len = 0, password_len = 0, host_len = 0, + port_len = 0, path_len = 0, query_len = 0, fragment_len = 0; + bool hierarchical = false; + int error = 0; + + enum { + SCHEME, + AUTHORITY_START, AUTHORITY, + PATH_START, PATH, + QUERY, + FRAGMENT + } state = SCHEME; + + memset(url, 0, sizeof(git_net_url)); + + for (c = scheme_start = given; *c; c++) { + switch (state) { + case SCHEME: + if (*c == ':') { + scheme_len = (c - scheme_start); + + if (*(c+1) == '/' && *(c+2) == '/') { + c += 2; + hierarchical = true; + state = AUTHORITY_START; + } else { + state = PATH_START; + } + } else if ((*c < 'A' || *c > 'Z') && + (*c < 'a' || *c > 'z') && + (*c < '0' || *c > '9') && + (*c != '+' && *c != '-' && *c != '.')) { + /* + * an illegal scheme character means that we + * were just given a relative path + */ + path_start = given; + state = PATH; + break; + } + break; + + case AUTHORITY_START: + authority_start = c; + state = AUTHORITY; + + /* fall through */ + + case AUTHORITY: + if (*c != '/') + break; + + /* + * authority is sufficiently complex that we parse + * it separately + */ + if ((error = url_parse_authority( + &user_start, &user_len, + &password_start,&password_len, + &host_start, &host_len, + &port_start, &port_len, + authority_start, (c - authority_start), + scheme_start, scheme_len)) < 0) + goto done; + + /* fall through */ + + case PATH_START: + path_start = c; + state = PATH; + /* fall through */ + + case PATH: + switch (*c) { + case '?': + path_len = (c - path_start); + query_start = c + 1; + state = QUERY; + break; + case '#': + path_len = (c - path_start); + fragment_start = c + 1; + state = FRAGMENT; + break; + } + break; + + case QUERY: + if (*c == '#') { + query_len = (c - query_start); + fragment_start = c + 1; + state = FRAGMENT; + } + break; - if (colon) { - const char *url_username = url_userinfo; - size_t url_username_len = colon - url_userinfo; - const char *url_password = colon + 1; - size_t url_password_len = url_userinfo_len - (url_username_len + 1); + case FRAGMENT: + break; - git_str_decode_percent(&username, url_username, url_username_len); - git_str_decode_percent(&password, url_password, url_password_len); - } else { - git_str_decode_percent(&username, url_userinfo, url_userinfo_len); + default: + GIT_ASSERT(!"unhandled state"); } } - if (git_str_oom(&scheme) || - git_str_oom(&host) || - git_str_oom(&port) || - git_str_oom(&path) || - git_str_oom(&query) || - git_str_oom(&username) || - git_str_oom(&password)) - return -1; + switch (state) { + case SCHEME: + /* + * if we never saw a ':' then we were given a relative + * path, not a bare scheme + */ + path_start = given; + path_len = (c - scheme_start); + break; + case AUTHORITY_START: + break; + case AUTHORITY: + if ((error = url_parse_authority( + &user_start, &user_len, + &password_start,&password_len, + &host_start, &host_len, + &port_start, &port_len, + authority_start, (c - authority_start), + scheme_start, scheme_len)) < 0) + goto done; + break; + case PATH_START: + break; + case PATH: + path_len = (c - path_start); + break; + case QUERY: + query_len = (c - query_start); + break; + case FRAGMENT: + fragment_len = (c - fragment_start); + break; + default: + GIT_ASSERT(!"unhandled state"); + } + + if (scheme_len) { + if ((error = git_str_put(&scheme, scheme_start, scheme_len)) < 0) + goto done; + + git__strntolower(scheme.ptr, scheme.size); + } + + if (user_len && + (error = git_str_decode_percent(&user, user_start, user_len)) < 0) + goto done; + + if (password_len && + (error = git_str_decode_percent(&password, password_start, password_len)) < 0) + goto done; + + if (host_len && + (error = git_str_decode_percent(&host, host_start, host_len)) < 0) + goto done; + + if (port_len) + error = git_str_put(&port, port_start, port_len); + else if (scheme_len && (default_port = default_port_for_scheme(scheme.ptr)) != NULL) + error = git_str_puts(&port, default_port); + + if (error < 0) + goto done; + + if (path_len) + error = git_str_put(&path, path_start, path_len); + else if (hierarchical) + error = git_str_puts(&path, "/"); + + if (error < 0) + goto done; + + if (query_len && + (error = git_str_decode_percent(&query, query_start, query_len)) < 0) + goto done; + + if (fragment_len && + (error = git_str_decode_percent(&fragment, fragment_start, fragment_len)) < 0) + goto done; url->scheme = git_str_detach(&scheme); url->host = git_str_detach(&host); url->port = git_str_detach(&port); url->path = git_str_detach(&path); url->query = git_str_detach(&query); - url->username = git_str_detach(&username); + url->fragment = git_str_detach(&fragment); + url->username = git_str_detach(&user); url->password = git_str_detach(&password); error = 0; done: git_str_dispose(&scheme); + git_str_dispose(&user); + git_str_dispose(&password); git_str_dispose(&host); git_str_dispose(&port); git_str_dispose(&path); git_str_dispose(&query); - git_str_dispose(&username); - git_str_dispose(&password); + git_str_dispose(&fragment); + return error; } @@ -374,7 +620,7 @@ int git_net_url_parse_scp(git_net_url *url, const char *given) break; default: - GIT_ASSERT("unhandled state"); + GIT_ASSERT(!"unhandled state"); } } @@ -400,6 +646,13 @@ int git_net_url_parse_scp(git_net_url *url, const char *given) return 0; } +int git_net_url_parse_standard_or_scp(git_net_url *url, const char *given) +{ + return git_net_str_is_url(given) ? + git_net_url_parse(url, given) : + git_net_url_parse_scp(url, given); +} + int git_net_url_joinpath( git_net_url *out, git_net_url *one, @@ -588,7 +841,7 @@ bool git_net_url_is_default_port(git_net_url *url) { const char *default_port; - if ((default_port = default_port_for_scheme(url->scheme)) != NULL) + if (url->scheme && (default_port = default_port_for_scheme(url->scheme)) != NULL) return (strcmp(url->port, default_port) == 0); else return false; @@ -744,6 +997,7 @@ void git_net_url_dispose(git_net_url *url) git__free(url->port); url->port = NULL; git__free(url->path); url->path = NULL; git__free(url->query); url->query = NULL; + git__free(url->fragment); url->fragment = NULL; git__free(url->username); url->username = NULL; git__free(url->password); url->password = NULL; } |