/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* camel-url.c : utility functions to parse URLs
*
* Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
*
* This library is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library. If not, see .
*
* Authors: Dan Winship
* Jeffrey Stedfast
*/
#ifdef HAVE_CONFIG_H
#include
#endif
#include
#include
#include
#include
#include
#include "camel-mime-utils.h"
#include "camel-object.h"
#include "camel-service.h"
#include "camel-string-utils.h"
#include "camel-url.h"
static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
static void output_param (GQuark key_id, gpointer data, gpointer user_data);
static void append_url_encoded (GString *str, const gchar *in, const gchar *extra_enc_chars);
GType
camel_url_get_type (void)
{
static GType type = G_TYPE_INVALID;
if (G_UNLIKELY (type == G_TYPE_INVALID))
type = g_boxed_type_register_static (
"CamelURL",
(GBoxedCopyFunc) camel_url_copy,
(GBoxedFreeFunc) camel_url_free);
return type;
}
/**
* camel_url_new_with_base:
* @base: a base URL
* @url_string: the URL
*
* Parses @url_string relative to @base.
*
* Returns: a parsed #CamelURL
**/
CamelURL *
camel_url_new_with_base (CamelURL *base,
const gchar *url_string)
{
CamelURL *url;
const gchar *end, *hash, *colon, *semi, *at, *slash, *question;
const gchar *p;
#ifdef G_OS_WIN32
const gchar *start = url_string;
#endif
g_return_val_if_fail (url_string != NULL, NULL);
url = g_new0 (CamelURL, 1);
/* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
* FUNCTION, RUN tests/misc/url AFTERWARDS.
*/
/* Find fragment. RFC 1808 2.4.1 */
end = hash = strchr (url_string, '#');
if (hash) {
if (hash[1]) {
url->fragment = g_strdup (hash + 1);
camel_url_decode (url->fragment);
}
} else
end = url_string + strlen (url_string);
/* Find protocol: initial [a-z+.-]* substring until ":" */
p = url_string;
while (p < end && (isalnum ((guchar) * p) ||
*p == '.' || *p == '+' || *p == '-'))
p++;
if (p > url_string && *p == ':') {
url->protocol = g_strndup (url_string, p - url_string);
camel_strdown (url->protocol);
url_string = p + 1;
}
if (!*url_string && !base)
return url;
#ifdef G_OS_WIN32
if (url->protocol && !strcmp (url->protocol, "file")) {
url->path = g_filename_from_uri (start, &url->host, NULL);
return url;
}
#endif
/* Check for authority */
if (strncmp (url_string, "//", 2) == 0) {
url_string += 2;
slash = url_string + strcspn (url_string, "/#");
at = strchr (url_string, '@');
if (at && at < slash) {
colon = strchr (url_string, ':');
if (colon && colon < at) {
/* XXX We used to extract and store the
* password here, now we just eat it. */
} else {
colon = at;
}
semi = strchr (url_string, ';');
if (semi && semi < colon &&
!g_ascii_strncasecmp (semi, ";auth=", 6)) {
url->authmech = g_strndup (
semi + 6, colon - semi - 6);
camel_url_decode (url->authmech);
} else {
url->authmech = NULL;
semi = colon;
}
url->user = g_strndup (url_string, semi - url_string);
camel_url_decode (url->user);
url_string = at + 1;
} else
url->user = url->authmech = NULL;
/* Find host and port. */
colon = strchr (url_string, ':');
if (colon && colon < slash) {
url->host = g_strndup (url_string, colon - url_string);
url->port = strtoul (colon + 1, NULL, 10);
} else {
url->host = g_strndup (url_string, slash - url_string);
camel_url_decode (url->host);
url->port = 0;
}
url_string = slash;
}
/* Find query */
question = memchr (url_string, '?', end - url_string);
if (question) {
if (question[1]) {
url->query = g_strndup (
question + 1, end - (question + 1));
camel_url_decode (url->query);
}
end = question;
}
/* Find parameters */
semi = memchr (url_string, ';', end - url_string);
if (semi) {
if (semi[1]) {
const gchar *cur, *p, *eq;
gchar *name, *value;
for (cur = semi + 1; cur < end; cur = p + 1) {
p = memchr (cur, ';', end - cur);
if (!p)
p = end;
eq = memchr (cur, '=', p - cur);
if (eq) {
name = g_strndup (cur, eq - cur);
value = g_strndup (eq + 1, p - (eq + 1));
camel_url_decode (value);
} else {
name = g_strndup (cur, p - cur);
value = g_strdup ("");
}
camel_url_decode (name);
g_datalist_set_data_full (
&url->params, name, value, g_free);
g_free (name);
}
}
end = semi;
}
if (end != url_string) {
url->path = g_strndup (url_string, end - url_string);
camel_url_decode (url->path);
}
/* Apply base URL. Again, this is spelled out in RFC 1808. */
if (base && !url->protocol && url->host)
url->protocol = g_strdup (base->protocol);
else if (base && !url->protocol) {
if (!url->user && !url->authmech &&
!url->host && !url->port && !url->path &&
!url->params && !url->query && !url->fragment)
url->fragment = g_strdup (base->fragment);
url->protocol = g_strdup (base->protocol);
url->user = g_strdup (base->user);
url->authmech = g_strdup (base->authmech);
url->host = g_strdup (base->host);
url->port = base->port;
if (!url->path) {
url->path = g_strdup (base->path);
if (!url->params) {
g_datalist_foreach (&base->params, copy_param,
&url->params);
if (!url->query)
url->query = g_strdup (base->query);
}
} else if (*url->path != '/') {
gchar *newpath, *last, *p, *q;
/* the base->path is NULL if given Content-Base url was without last slash,
* i.e. like "http://example.com" (this expected only "http://example.com/") */
last = base->path ? strrchr (base->path, '/') : NULL;
if (last) {
newpath = g_strdup_printf (
"%.*s/%s",
(gint)(last - base->path),
base->path,
url->path);
} else
newpath = g_strdup_printf ("/%s", url->path);
/* Remove "./" where "." is a complete segment. */
for (p = newpath + 1; *p; ) {
if (*(p - 1) == '/' &&
*p == '.' && *(p + 1) == '/')
memmove (p, p + 2, strlen (p + 2) + 1);
else
p++;
}
/* Remove "." at end. */
if (p > newpath + 2 &&
*(p - 1) == '.' && *(p - 2) == '/')
*(p - 1) = '\0';
/* Remove "/../" where != ".." */
for (p = newpath + 1; *p; ) {
if (!strncmp (p, "../", 3)) {
p += 3;
continue;
}
q = strchr (p + 1, '/');
if (!q)
break;
if (strncmp (q, "/../", 4) != 0) {
p = q + 1;
continue;
}
memmove (p, q + 4, strlen (q + 4) + 1);
p = newpath + 1;
}
/* Remove "/.." at end */
q = strrchr (newpath, '/');
if (q && !strcmp (q, "/..")) {
p = q - 1;
while (p > newpath && *p != '/')
p--;
if (strncmp (p, "/../", 4) != 0)
*(p + 1) = 0;
}
g_free (url->path);
url->path = newpath;
}
}
return url;
}
static void
copy_param (GQuark key_id,
gpointer data,
gpointer user_data)
{
GData **copy = user_data;
g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
}
/**
* camel_url_new:
* @url_string: a URL string
* @error: return location for a #GError, or %NULL
*
* Parses an absolute URL.
*
* Returns: a #CamelURL if it can be parsed, or %NULL otherwise
**/
CamelURL *
camel_url_new (const gchar *url_string,
GError **error)
{
CamelURL *url;
if (!url_string || !*url_string)
return NULL;
url = camel_url_new_with_base (NULL, url_string);
if (!url->protocol) {
camel_url_free (url);
g_set_error (
error, CAMEL_ERROR, CAMEL_ERROR_GENERIC,
_("Could not parse URL '%s'"), url_string);
return NULL;
}
return url;
}
/**
* camel_url_to_string:
* @url: a #CamelURL
* @flags: additional translation options
*
* Flatten a #CamelURL into a string.
*
* Returns: a string representing @url, which the caller must free
**/
gchar *
camel_url_to_string (CamelURL *url,
CamelURLFlags flags)
{
GString *str;
gchar *return_result;
g_return_val_if_fail (url != NULL, NULL);
/* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
* tests/misc/url AFTERWARD.
*/
#ifdef G_OS_WIN32
if (url->protocol && !strcmp (url->protocol, "file"))
return g_filename_to_uri (url->path, url->host, NULL);
#endif /* G_OS_WIN32 */
str = g_string_sized_new (20);
if (url->protocol)
g_string_append_printf (str, "%s:", url->protocol);
if (url->host) {
g_string_append (str, "//");
if (url->user) {
append_url_encoded (str, url->user, ":;@/");
if (url->authmech && *url->authmech && !(flags & CAMEL_URL_HIDE_AUTH)) {
g_string_append (str, ";auth=");
append_url_encoded (str, url->authmech, ":@/");
}
g_string_append_c (str, '@');
}
append_url_encoded (str, url->host, ":/");
if (url->port)
g_string_append_printf (str, ":%d", url->port);
if (!url->path && (url->params || url->query || url->fragment))
g_string_append_c (str, '/');
}
if (url->path)
append_url_encoded (str, url->path, ";?");
if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
g_datalist_foreach (&url->params, output_param, str);
if (url->query) {
g_string_append_c (str, '?');
append_url_encoded (str, url->query, NULL);
}
if (url->fragment) {
g_string_append_c (str, '#');
append_url_encoded (str, url->fragment, NULL);
}
return_result = str->str;
g_string_free (str, FALSE);
return return_result;
}
static void
output_param (GQuark key_id,
gpointer data,
gpointer user_data)
{
GString *str = user_data;
g_string_append_c (str, ';');
append_url_encoded (str, g_quark_to_string (key_id), "?=");
if (*(gchar *) data) {
g_string_append_c (str, '=');
append_url_encoded (str, data, "?");
}
}
/**
* camel_url_free:
* @url: a #CamelURL
*
* Frees @url.
**/
void
camel_url_free (CamelURL *url)
{
if (url) {
if (url->user)
memset (url->user, 0, strlen (url->user));
if (url->host)
memset (url->host, 0, strlen (url->host));
g_free (url->protocol);
g_free (url->user);
g_free (url->authmech);
g_free (url->host);
g_free (url->path);
g_datalist_clear (&url->params);
g_free (url->query);
g_free (url->fragment);
g_free (url);
}
}
/**
* camel_url_set_protocol:
* @url: a #CamelURL
* @protocol: protocol schema
*
* Set the protocol of a #CamelURL.
**/
void
camel_url_set_protocol (CamelURL *url,
const gchar *protocol)
{
g_return_if_fail (url != NULL);
g_free (url->protocol);
url->protocol = g_strdup (protocol);
}
/**
* camel_url_set_user:
* @url: a #CamelURL
* @user: username
*
* Set the user of a #CamelURL.
**/
void
camel_url_set_user (CamelURL *url,
const gchar *user)
{
g_return_if_fail (url != NULL);
g_free (url->user);
url->user = g_strdup (user);
}
/**
* camel_url_set_authmech:
* @url: a #CamelURL
* @authmech: authentication mechanism
*
* Set the authmech of a #CamelURL.
**/
void
camel_url_set_authmech (CamelURL *url,
const gchar *authmech)
{
g_return_if_fail (url != NULL);
g_free (url->authmech);
url->authmech = g_strdup (authmech);
}
/**
* camel_url_set_host:
* @url: a #CamelURL
* @host: hostname
*
* Set the hostname of a #CamelURL.
**/
void
camel_url_set_host (CamelURL *url,
const gchar *host)
{
g_return_if_fail (url != NULL);
g_free (url->host);
url->host = g_strdup (host);
}
/**
* camel_url_set_path:
* @url: a #CamelURL
* @path: path
*
* Set the path component of a #CamelURL.
**/
void
camel_url_set_path (CamelURL *url,
const gchar *path)
{
g_return_if_fail (url != NULL);
g_free (url->path);
url->path = g_strdup (path);
}
/**
* camel_url_set_query:
* @url: a #CamelURL
* @query: url query
*
* Set the query of a #CamelURL.
**/
void
camel_url_set_query (CamelURL *url,
const gchar *query)
{
g_return_if_fail (url != NULL);
g_free (url->query);
url->query = g_strdup (query);
}
/**
* camel_url_set_fragment:
* @url: a #CamelURL
* @fragment: url fragment
*
* Set the fragment of a #CamelURL.
**/
void
camel_url_set_fragment (CamelURL *url,
const gchar *fragment)
{
g_return_if_fail (url != NULL);
g_free (url->fragment);
url->fragment = g_strdup (fragment);
}
/**
* camel_url_set_port:
* @url: a #CamelURL
* @port: port
*
* Set the port on a #CamelURL.
**/
void
camel_url_set_port (CamelURL *url,
gint port)
{
g_return_if_fail (url != NULL);
url->port = port;
}
/**
* camel_url_set_param:
* @url: a #CamelURL
* @name: name of the param to set
* @value: value of the param to set
*
* Set a param on the #CamelURL.
**/
void
camel_url_set_param (CamelURL *url,
const gchar *name,
const gchar *value)
{
g_return_if_fail (url != NULL);
if (value)
g_datalist_set_data_full (&url->params, name, g_strdup (value), g_free);
else
g_datalist_remove_data (&url->params, name);
}
/**
* camel_url_get_param:
* @url: a #CamelURL
* @name: name of the param
*
* Get the value of the specified param on the URL.
*
* Returns: the value of a param if found or %NULL otherwise
**/
const gchar *
camel_url_get_param (CamelURL *url,
const gchar *name)
{
g_return_val_if_fail (url != NULL, NULL);
return g_datalist_get_data (&url->params, name);
}
/* From RFC 2396 2.4.3, the characters that should always be encoded */
static const gchar url_encoded_char[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 - 0x0f */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10 - 0x1f */
1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' - '/' */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* '0' - '?' */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' - 'O' */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 'P' - '_' */
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' - 'o' */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 'p' - 0x7f */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static void
append_url_encoded (GString *str,
const gchar *in,
const gchar *extra_enc_chars)
{
const guchar *s = (const guchar *) in;
while (*s) {
if (url_encoded_char[*s] ||
(extra_enc_chars && strchr (extra_enc_chars, *s)))
g_string_append_printf (str, "%%%02x", (gint) * s++);
else
g_string_append_c (str, *s++);
}
}
/**
* camel_url_encode:
* @part: a URL part
* @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
* to escape (or %NULL)
*
* This %-encodes the given URL part and returns the escaped version
* in allocated memory, which the caller must free when it is done.
*
* Returns: the encoded string
**/
gchar *
camel_url_encode (const gchar *part,
const gchar *escape_extra)
{
GString *str;
gchar *encoded;
g_return_val_if_fail (part != NULL, NULL);
str = g_string_new (NULL);
append_url_encoded (str, part, escape_extra);
encoded = str->str;
g_string_free (str, FALSE);
return encoded;
}
/**
* camel_url_decode:
* @part: a URL part
*
* %-decodes the passed-in URL *in place*. The decoded version is
* never longer than the encoded version, so there does not need to
* be any additional space at the end of the string.
*/
void
camel_url_decode (gchar *part)
{
guchar *s, *d;
g_return_if_fail (part != NULL);
#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
s = d = (guchar *) part;
do {
if (*s == '%' && isxdigit (s[1]) && isxdigit (s[2])) {
*d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
s += 2;
} else
*d++ = *s;
} while (*s++);
}
guint
camel_url_hash (gconstpointer v)
{
const CamelURL *u = v;
guint hash = 0;
#define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
ADD_HASH (u->protocol);
ADD_HASH (u->user);
ADD_HASH (u->authmech);
ADD_HASH (u->host);
ADD_HASH (u->path);
ADD_HASH (u->query);
hash ^= u->port;
return hash;
}
static gint
check_equal (gchar *s1,
gchar *s2)
{
if (s1 == NULL) {
if (s2 == NULL)
return TRUE;
else
return FALSE;
}
if (s2 == NULL)
return FALSE;
return strcmp (s1, s2) == 0;
}
gint
camel_url_equal (gconstpointer v,
gconstpointer v2)
{
const CamelURL *u1 = v, *u2 = v2;
return check_equal (u1->protocol, u2->protocol)
&& check_equal (u1->user, u2->user)
&& check_equal (u1->authmech, u2->authmech)
&& check_equal (u1->host, u2->host)
&& check_equal (u1->path, u2->path)
&& check_equal (u1->query, u2->query)
&& u1->port == u2->port;
}
/**
* camel_url_copy:
* @in: a #CamelURL to copy
*
* Copy a #CamelURL.
*
* Returns: a duplicate copy of @in
**/
CamelURL *
camel_url_copy (CamelURL *in)
{
CamelURL *out;
g_return_val_if_fail (in != NULL, NULL);
out = g_malloc0 (sizeof (*out));
out->protocol = g_strdup (in->protocol);
out->user = g_strdup (in->user);
out->authmech = g_strdup (in->authmech);
out->host = g_strdup (in->host);
out->port = in->port;
out->path = g_strdup (in->path);
out->params = NULL;
if (in->params)
g_datalist_foreach (&((CamelURL *) in)->params, copy_param, &out->params);
out->query = g_strdup (in->query);
out->fragment = g_strdup (in->fragment);
return out;
}
gchar *
camel_url_decode_path (const gchar *path)
{
gchar **comps;
GString *str;
guint length, ii;
if (path == NULL || *path == '\0')
return g_strdup (""); /* ??? or NULL? */
str = g_string_new (NULL);
comps = g_strsplit (path, "/", -1);
length = g_strv_length (comps);
for (ii = 0; ii < length; ii++) {
if (ii > 0)
g_string_append_c (str, '/');
camel_url_decode (comps[ii]);
g_string_append (str, comps[ii]);
}
g_strfreev (comps);
return g_string_free (str, FALSE);
}