/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * soup-tld.c * * Copyright (C) 2012 Igalia S.L. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include "soup-tld.h" #include "soup.h" /** * SECTION:soup-tld * @section_id: SoupTLD * @short_description: Top-Level Domain Utilities * * These functions can be used to parse hostnames to attempt to determine * what part of the name belongs to the domain owner, and what part is * simply a "public suffix" such as ".com". */ static const char *soup_tld_get_base_domain_internal (const char *hostname, GError **error); /** * soup_tld_get_base_domain: * @hostname: a hostname * @error: return location for a #GError, or %NULL to ignore * errors. See #SoupTLDError for the available error codes * * Finds the base domain for a given @hostname. The base domain is * composed by the top level domain (such as .org, .com, .co.uk, etc) * plus the second level domain, for example for myhost.mydomain.com * it will return mydomain.com. * * Note that %NULL will be returned for private URLs (those not ending * with any well known TLD) because choosing a base domain for them * would be totally arbitrary. * * Prior to libsoup 2.46, this function required that @hostname be in * UTF-8 if it was an IDN. From 2.46 on, the name can be in either * UTF-8 or ASCII format (and the return value will be in the same * format). * * Returns: a pointer to the start of the base domain in @hostname. If * an error occurs, %NULL will be returned and @error set. * * Since: 2.40 **/ const char * soup_tld_get_base_domain (const char *hostname, GError **error) { g_return_val_if_fail (hostname, NULL); return soup_tld_get_base_domain_internal (hostname, error); } static psl_ctx_t * soup_psl_context (void) { static psl_ctx_t *psl = NULL; if (!psl) psl = psl_latest (NULL); return psl; } /** * soup_tld_domain_is_public_suffix: * @domain: a domain name * * Looks whether the @domain passed as argument is a public domain * suffix (.org, .com, .co.uk, etc) or not. * * Prior to libsoup 2.46, this function required that @domain be in * UTF-8 if it was an IDN. From 2.46 on, the name can be in either * UTF-8 or ASCII format. * * Returns: %TRUE if it is a public domain, %FALSE otherwise. * * Since: 2.40 **/ gboolean soup_tld_domain_is_public_suffix (const char *domain) { const psl_ctx_t* psl = soup_psl_context (); g_return_val_if_fail (domain, FALSE); if (!psl) { g_warning ("soup-tld: There is no public-suffix data available."); return FALSE; } return psl_is_public_suffix2 (psl, domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE); } /** * SOUP_TLD_ERROR: * * The #GError domain for soup-tld-related errors. * * Since: 2.40 */ /** * SoupTLDError: * @SOUP_TLD_ERROR_INVALID_HOSTNAME: A hostname was syntactically * invalid. * @SOUP_TLD_ERROR_IS_IP_ADDRESS: The passed-in "hostname" was * actually an IP address (and thus has no base domain or * public suffix). * @SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS: The passed-in hostname * did not have enough components. Eg, calling * soup_tld_get_base_domain() on "co.uk". * @SOUP_TLD_ERROR_NO_BASE_DOMAIN: The passed-in hostname has * no recognized public suffix. * @SOUP_TLD_ERROR_NO_PSL_DATA: The Public Suffix List was not * available. * * Error codes for %SOUP_TLD_ERROR. * * Since: 2.40 */ G_DEFINE_QUARK (soup-tld-error-quark, soup_tld_error) static const char * soup_tld_get_base_domain_internal (const char *hostname, GError **error) { char *utf8_hostname = NULL; const psl_ctx_t* psl = soup_psl_context (); const char *registrable_domain, *unregistrable_domain; if (!psl) { g_set_error_literal (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_NO_PSL_DATA, _("No public-suffix list available.")); return NULL; } /* Valid hostnames neither start with a dot nor have more than one * dot together. */ if (*hostname == '.') { g_set_error_literal (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_INVALID_HOSTNAME, _("Invalid hostname")); return NULL; } if (g_hostname_is_ip_address (hostname)) { g_set_error_literal (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_IS_IP_ADDRESS, _("Hostname is an IP address")); return NULL; } if (g_hostname_is_ascii_encoded (hostname)) { utf8_hostname = g_hostname_to_unicode (hostname); if (!utf8_hostname) { g_set_error_literal (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_INVALID_HOSTNAME, _("Invalid hostname")); return NULL; } g_free (utf8_hostname); } /* Fetch the domain portion of the hostname and check whether * it's a public domain. */ unregistrable_domain = psl_unregistrable_domain (psl, hostname); if (!psl_is_public_suffix2 (psl, unregistrable_domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE)) { g_set_error_literal (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_NO_BASE_DOMAIN, _("Hostname has no base domain")); return NULL; } registrable_domain = psl_registrable_domain (psl, hostname); if (!registrable_domain) { g_set_error_literal (error, SOUP_TLD_ERROR, SOUP_TLD_ERROR_NOT_ENOUGH_DOMAINS, _("Not enough domains")); return NULL; } return registrable_domain; }