summaryrefslogtreecommitdiff
path: root/src/test-tree-magic.c
diff options
context:
space:
mode:
authorBastien Nocera <hadess@hadess.net>2020-02-08 12:51:24 +0100
committerBastien Nocera <hadess@hadess.net>2020-02-08 13:47:31 +0100
commit5d56ce04729ed3a2d8794f1d46586423cde23fe2 (patch)
treecb5f49ade35001562eb2251395957af10095bf1b /src/test-tree-magic.c
parent071644f8db4850b93934d0939500fc2b4df9eeb1 (diff)
downloadshared-mime-info-5d56ce04729ed3a2d8794f1d46586423cde23fe2.tar.gz
build: Move source code to src/
Diffstat (limited to 'src/test-tree-magic.c')
-rw-r--r--src/test-tree-magic.c695
1 files changed, 695 insertions, 0 deletions
diff --git a/src/test-tree-magic.c b/src/test-tree-magic.c
new file mode 100644
index 00000000..0ff949a1
--- /dev/null
+++ b/src/test-tree-magic.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ *
+ * Author: Matthias Clasen <mclasen@redhat.com>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <glib.h>
+#include <gio/gio.h>
+
+
+typedef struct
+{
+ gchar *path;
+ GFileType type;
+ guint match_case : 1;
+ guint executable : 1;
+ guint non_empty : 1;
+ guint on_disc : 1;
+ gchar *mimetype;
+ GList *matches;
+} TreeMatchlet;
+
+typedef struct
+{
+ gchar *contenttype;
+ gint priority;
+ GList *matches;
+} TreeMatch;
+
+static GList *tree_matches = NULL;
+
+static void
+tree_matchlet_free (TreeMatchlet *matchlet)
+{
+ g_list_foreach (matchlet->matches, (GFunc)tree_matchlet_free, NULL);
+ g_list_free (matchlet->matches);
+ g_free (matchlet->path);
+ g_free (matchlet->mimetype);
+ g_slice_free (TreeMatchlet, matchlet);
+}
+
+static void
+tree_match_free (TreeMatch *match)
+{
+ g_list_foreach (match->matches, (GFunc)tree_matchlet_free, NULL);
+ g_list_free (match->matches);
+ g_free (match->contenttype);
+ g_slice_free (TreeMatch, match);
+}
+
+static void
+tree_magic_shutdown (void)
+{
+ g_list_foreach (tree_matches, (GFunc)tree_match_free, NULL);
+ g_list_free (tree_matches);
+ tree_matches = NULL;
+}
+
+static TreeMatch *
+parse_header (gchar *line)
+{
+ gint len;
+ gchar *s;
+ TreeMatch *match;
+
+ len = strlen (line);
+
+ if (line[0] != '[' || line[len - 1] != ']')
+ return NULL;
+
+ line[len - 1] = 0;
+ s = strchr (line, ':');
+
+ match = g_slice_new0 (TreeMatch);
+ match->priority = atoi (line + 1);
+ match->contenttype = g_strdup (s + 1);
+
+ return match;
+}
+
+static TreeMatchlet *
+parse_match_line (gchar *line,
+ gint *depth)
+{
+ gchar *s, *p;
+ TreeMatchlet *matchlet;
+ gchar **parts;
+ gint i;
+
+ matchlet = g_slice_new0 (TreeMatchlet);
+
+ if (line[0] == '>') {
+ *depth = 0;
+ s = line;
+ }
+ else {
+ *depth = atoi (line);
+ s = strchr (line, '>');
+ }
+ s += 2;
+ p = strchr (s, '"');
+ *p = 0;
+
+ matchlet->path = g_strdup (s);
+ s = p + 1;
+ parts = g_strsplit (s, ",", 0);
+ if (strcmp (parts[0], "=file") == 0)
+ matchlet->type = G_FILE_TYPE_REGULAR;
+ else if (strcmp (parts[0], "=directory") == 0)
+ matchlet->type = G_FILE_TYPE_DIRECTORY;
+ else if (strcmp (parts[0], "=link") == 0)
+ matchlet->type = G_FILE_TYPE_SYMBOLIC_LINK;
+ else
+ matchlet->type = G_FILE_TYPE_UNKNOWN;
+ for (i = 1; parts[i]; i++) {
+ if (strcmp (parts[i], "executable") == 0)
+ matchlet->executable = 1;
+ else if (strcmp (parts[i], "match-case") == 0)
+ matchlet->match_case = 1;
+ else if (strcmp (parts[i], "non-empty") == 0)
+ matchlet->non_empty = 1;
+ else if (strcmp (parts[i], "on-disc") == 0)
+ matchlet->on_disc = 1;
+ else
+ matchlet->mimetype = g_strdup (parts[i]);
+ }
+
+ g_strfreev (parts);
+
+ return matchlet;
+}
+
+static gint
+cmp_match (gconstpointer a, gconstpointer b)
+{
+ const TreeMatch *aa = (const TreeMatch *)a;
+ const TreeMatch *bb = (const TreeMatch *)b;
+
+ return bb->priority - aa->priority;
+}
+
+static void
+insert_match (TreeMatch *match)
+{
+ tree_matches = g_list_insert_sorted (tree_matches, match, cmp_match);
+}
+
+static void
+insert_matchlet (TreeMatch *match,
+ TreeMatchlet *matchlet,
+ gint depth)
+{
+ g_return_if_fail (match != NULL);
+
+ if (depth == 0)
+ match->matches = g_list_append (match->matches, matchlet);
+ else {
+ GList *last;
+ TreeMatchlet *m;
+
+ last = g_list_last (match->matches);
+ if (!last) {
+ tree_matchlet_free (matchlet);
+ g_warning ("can't insert matchlet at depth %d", depth);
+ return;
+ }
+
+ m = (TreeMatchlet *) last->data;
+ depth--;
+ while (depth > 0) {
+ last = g_list_last (m->matches);
+ if (!last) {
+ tree_matchlet_free (matchlet);
+ g_warning ("can't insert matchlet at depth %d", depth);
+ return;
+ }
+
+ m = (TreeMatchlet *) last->data;
+ depth--;
+ }
+ m->matches = g_list_append (m->matches, matchlet);
+ }
+}
+
+static void
+read_tree_magic_from_directory (const gchar *prefix)
+{
+ gchar *filename;
+ gchar *text;
+ gsize len;
+ gint i;
+ TreeMatch *match;
+ TreeMatchlet *matchlet;
+ gint depth;
+
+ filename = g_build_filename (prefix, "mime", "treemagic", NULL);
+ match = NULL;
+
+ if (g_file_get_contents (filename, &text, &len, NULL)) {
+ if (strcmp (text, "MIME-TreeMagic") == 0) {
+ gchar **lines;
+
+ lines = g_strsplit (text + strlen ("MIME-TreeMagic") + 2, "\n", 0);
+ for (i = 0; lines[i] && lines[i][0]; i++) {
+ if (lines[i][0] == '[') {
+ match = parse_header (lines[i]);
+ insert_match (match);
+ }
+ else {
+ matchlet = parse_match_line (lines[i], &depth);
+ insert_matchlet (match, matchlet, depth);
+ }
+ }
+ g_strfreev (lines);
+ }
+ else
+ g_warning ("%s: header not found, skipping\n", filename);
+
+ g_free (text);
+ }
+
+ g_free (filename);
+}
+
+typedef struct
+{
+ gchar *path;
+ gint depth;
+ gboolean ignore_case;
+ gchar **components;
+ gchar **case_components;
+ GFileEnumerator **enumerators;
+ GFile **children;
+} Enumerator;
+
+static gboolean
+component_match (Enumerator *e,
+ gint depth,
+ const gchar *name)
+{
+ gchar *case_folded, *key;
+ gboolean found;
+
+ if (strcmp (name, e->components[depth]) == 0)
+ return TRUE;
+
+ if (!e->ignore_case)
+ return FALSE;
+
+ case_folded = g_utf8_casefold (name, -1);
+ key = g_utf8_collate_key (case_folded, -1);
+
+ found = strcmp (key, e->case_components[depth]) == 0;
+
+ g_free (case_folded);
+ g_free (key);
+
+ return found;
+}
+
+static GFile *
+next_match_recurse (Enumerator *e,
+ gint depth)
+{
+ GFile *file;
+ GFileInfo *info;
+ const gchar *name;
+
+ while (TRUE) {
+ if (e->enumerators[depth] == NULL) {
+ if (depth > 0) {
+ file = next_match_recurse (e, depth - 1);
+ if (file) {
+ e->children[depth] = file;
+ e->enumerators[depth] = g_file_enumerate_children (file,
+ G_FILE_ATTRIBUTE_STANDARD_NAME,
+ G_FILE_QUERY_INFO_NONE,
+ NULL,
+ NULL);
+ }
+ }
+ if (e->enumerators[depth] == NULL)
+ return NULL;
+ }
+
+ while ((info = g_file_enumerator_next_file (e->enumerators[depth], NULL, NULL))) {
+ name = g_file_info_get_name (info);
+ if (component_match (e, depth, name)) {
+ file = g_file_get_child (e->children[depth], name);
+ g_object_unref (info);
+ return file;
+ }
+ g_object_unref (info);
+ }
+
+ g_object_unref (e->enumerators[depth]);
+ e->enumerators[depth] = NULL;
+ g_object_unref (e->children[depth]);
+ e->children[depth] = NULL;
+ }
+}
+
+static GFile *
+enumerator_next (Enumerator *e)
+{
+ return next_match_recurse (e, e->depth - 1);
+}
+
+static Enumerator *
+enumerator_new (GFile *root,
+ const char *path,
+ gboolean ignore_case)
+{
+ Enumerator *e;
+ gint i;
+ gchar *case_folded;
+
+ e = g_new0 (Enumerator, 1);
+ e->path = g_strdup (path);
+ e->ignore_case = ignore_case;
+
+ e->components = g_strsplit (e->path, G_DIR_SEPARATOR_S, -1);
+ e->depth = g_strv_length (e->components);
+ if (e->ignore_case) {
+ e->case_components = g_new0 (char *, e->depth + 1);
+ for (i = 0; e->components[i]; i++) {
+ case_folded = g_utf8_casefold (e->components[i], -1);
+ e->case_components[i] = g_utf8_collate_key (case_folded, -1);
+ g_free (case_folded);
+ }
+ }
+
+ e->children = g_new0 (GFile *, e->depth);
+ e->children[0] = g_object_ref (root);
+ e->enumerators = g_new0 (GFileEnumerator *, e->depth);
+ e->enumerators[0] = g_file_enumerate_children (root,
+ G_FILE_ATTRIBUTE_STANDARD_NAME,
+ G_FILE_QUERY_INFO_NONE,
+ NULL,
+ NULL);
+
+ return e;
+}
+
+static void
+enumerator_free (Enumerator *e)
+{
+ gint i;
+
+ for (i = 0; i < e->depth; i++) {
+ if (e->enumerators[i])
+ g_object_unref (e->enumerators[i]);
+ if (e->children[i])
+ g_object_unref (e->children[i]);
+ }
+
+ g_free (e->enumerators);
+ g_free (e->children);
+ g_strfreev (e->components);
+ if (e->case_components)
+ g_strfreev (e->case_components);
+ g_free (e->path);
+ g_free (e);
+}
+
+static gboolean
+matchlet_match (TreeMatchlet *matchlet,
+ GFile *root)
+{
+ GFile *file;
+ GFileInfo *info;
+ gboolean result;
+ const gchar *attrs;
+ Enumerator *e;
+ GList *l;
+
+ e = enumerator_new (root, matchlet->path, !matchlet->match_case);
+
+ do {
+ file = enumerator_next (e);
+ if (!file) {
+ enumerator_free (e);
+ return FALSE;
+ }
+
+ if (matchlet->mimetype)
+ attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
+ G_FILE_ATTRIBUTE_ACCESS_CAN_EXECUTE ","
+ G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE;
+ else
+ attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
+ G_FILE_ATTRIBUTE_ACCESS_CAN_EXECUTE;
+ info = g_file_query_info (file,
+ attrs,
+ G_FILE_QUERY_INFO_NONE,
+ NULL,
+ NULL);
+ if (info) {
+ result = TRUE;
+
+ if (matchlet->type != G_FILE_TYPE_UNKNOWN &&
+ g_file_info_get_file_type (info) != matchlet->type)
+ result = FALSE;
+
+ if (matchlet->executable &&
+ !g_file_info_get_attribute_boolean (info, G_FILE_ATTRIBUTE_ACCESS_CAN_EXECUTE))
+ result = FALSE;
+ }
+ else
+ result = FALSE;
+
+ if (result && matchlet->non_empty) {
+ GFileEnumerator *child_enum;
+ GFileInfo *child_info;
+
+ child_enum = g_file_enumerate_children (file,
+ G_FILE_ATTRIBUTE_STANDARD_NAME,
+ G_FILE_QUERY_INFO_NONE,
+ NULL,
+ NULL);
+
+ if (child_enum) {
+ child_info = g_file_enumerator_next_file (child_enum, NULL, NULL);
+ if (child_info)
+ g_object_unref (child_info);
+ else
+ result = FALSE;
+ g_object_unref (child_enum);
+ }
+ else
+ result = FALSE;
+ }
+
+ if (result && matchlet->mimetype) {
+ if (strcmp (matchlet->mimetype, g_file_info_get_content_type (info)) != 0)
+ result = FALSE;
+ }
+
+ g_object_unref (info);
+ g_object_unref (file);
+
+ }
+ while (!result);
+
+ enumerator_free (e);
+
+ if (!matchlet->matches)
+ return TRUE;
+
+ for (l = matchlet->matches; l; l = l->next) {
+ TreeMatchlet *submatchlet;
+
+ submatchlet = l->data;
+ if (matchlet_match (submatchlet, root))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+match_match (TreeMatch *match,
+ GFile *root,
+ GPtrArray *types)
+{
+ GList *l;
+
+ for (l = match->matches; l; l = l->next) {
+ TreeMatchlet *matchlet = l->data;
+ if (matchlet_match (matchlet, root)) {
+ g_ptr_array_add (types, g_strdup (match->contenttype));
+ break;
+ }
+ }
+}
+
+static void
+tree_magic_init (void)
+{
+ static gboolean initialized = FALSE;
+ const gchar *dir;
+ const gchar * const * dirs;
+ int i;
+
+ if (!initialized) {
+ initialized = TRUE;
+
+ dir = g_get_user_data_dir ();
+
+ read_tree_magic_from_directory (dir);
+
+ dirs = g_get_system_data_dirs ();
+ for (i = 0; dirs[i]; i++)
+ read_tree_magic_from_directory (dirs[i]);
+ }
+}
+
+static GPtrArray *
+sniff_content_type (GFile *root)
+{
+ GPtrArray *types;
+ GList *l;
+
+ /* TODO: monitor and reload */
+ tree_magic_init ();
+
+ types = g_ptr_array_new ();
+
+ for (l = tree_matches; l; l = l->next) {
+ TreeMatch *match = l->data;
+ match_match (match, root, types);
+ }
+
+ return types;
+}
+
+static gboolean
+handle_one_line (const char *line,
+ const char *cwd)
+{
+ GFile *file;
+ GPtrArray *array;
+ gboolean retval = TRUE;
+ gboolean supposed_to_fail = FALSE;
+ char **items, **mimetypes;
+ char *filename;
+ guint i;
+
+ if (strncmp (line, "x ", 2) == 0) {
+ supposed_to_fail = TRUE;
+ items = g_strsplit (line + 2, " ", 2);
+ } else {
+ items = g_strsplit (line, " ", 2);
+ }
+
+ filename = items[0];
+ mimetypes = g_strsplit (items[1], " ", -1);
+
+ file = g_file_new_for_commandline_arg_and_cwd (filename, cwd);
+
+ array = sniff_content_type (file);
+ if (array->len == 0) {
+ if (supposed_to_fail == FALSE) {
+ g_warning ("Tree %s didn't match %s (found nothing)",
+ filename, items[1]);
+ retval = FALSE;
+ } else {
+ g_message ("Tree %s failed to match %s (expected)",
+ filename, items[1]);
+ retval = TRUE;
+ }
+ goto bail;
+ }
+
+ for (i = 0; i < array->len; i++) {
+ char *found;
+ gboolean matched;
+ guint j;
+
+ matched = FALSE;
+ found = (char *) array->pdata[i];
+ for (j = 0; mimetypes[j] != NULL; j++) {
+ if (strcmp (mimetypes[j], found) == 0) {
+ matched = TRUE;
+ break;
+ }
+ }
+ if (matched == FALSE && supposed_to_fail == FALSE) {
+ g_warning ("Tree %s didn't match %s (found %s)",
+ filename, items[1], found);
+ retval = FALSE;
+ goto bail;
+ } else if (matched == FALSE) {
+ g_message ("Tree %s failed to match %s (expected)",
+ filename, items[1]);
+ } else {
+ g_message ("Tree %s looks good (found %s)",
+ filename, found);
+ }
+ }
+
+bail:
+ g_strfreev (items);
+ g_ptr_array_free (array, TRUE);
+ g_strfreev (mimetypes);
+
+ return retval;
+}
+
+static const char *
+type_to_path (GFileType type)
+{
+ switch (type) {
+ case G_FILE_TYPE_REGULAR:
+ return "Regular";
+ case G_FILE_TYPE_DIRECTORY:
+ return "Directory";
+ case G_FILE_TYPE_SYMBOLIC_LINK:
+ return "Symbolic link";
+ default:
+ return "Unknown";
+ }
+}
+
+static void
+print_matchlet (TreeMatchlet *matchlet, guint depth)
+{
+ GList *l;
+ guint i;
+
+ for (i = depth + 1; i != 0; i--)
+ g_print ("\t");
+ g_print ("%s (type=%s)\n", matchlet->path, type_to_path (matchlet->type));
+ for (l = matchlet->matches ; l != NULL; l = l->next)
+ print_matchlet (l->data, depth + 1);
+}
+
+int main (int argc, char **argv)
+{
+ GError *error = NULL;
+ char *content, **lines, *cwd;
+ guint i;
+
+#if GLIB_CHECK_VERSION(2,36,0)
+#else
+ g_type_init ();
+#endif
+
+ if (argc != 2) {
+ g_print ("Usage: %s [file]\n", argv[0]);
+ g_print ("Where file contains, on each line, a directory path,\n"
+ "followed by one or more content types\n");
+ g_print ("Lines starting with '#' are ignored\n");
+ g_print ("Examples:\n");
+ g_print ("\t#Supposed to work\n");
+ g_print ("\ttests/image-dcf x-content/image-dcf\n");
+ g_print ("\t# Supposed to fail\n");
+ g_print ("\tx tests/ x-content/image-dcf\n");
+ return 1;
+ }
+
+ if (strcmp (argv[1], "-d") == 0) {
+ GList *l;
+
+ tree_magic_init ();
+ for (l = tree_matches ; l != NULL; l = l->next) {
+ GList *k;
+ TreeMatch *match = (TreeMatch *) l->data;
+ g_print ("Type: %s\n", match->contenttype);
+ for (k = match->matches ; k != NULL; k = k->next) {
+ TreeMatchlet *matchlet = (TreeMatchlet *) k->data;
+ print_matchlet (matchlet, 0);
+ }
+ }
+ return 0;
+ }
+
+ if (g_file_get_contents (argv[1], &content, NULL, &error) == FALSE) {
+ g_warning ("Failed to load %s: %s", argv[1], error->message);
+ return 1;
+ }
+ lines = g_strsplit (content, "\n", -1);
+ g_free (content);
+
+ cwd = g_path_get_dirname (argv[1]);
+ for (i = 0; lines[i] != NULL; i++) {
+ if (*lines[i] == '\0' || *lines[i] == '#')
+ continue;
+ if (handle_one_line (lines[i], cwd) == FALSE)
+ return 1;
+ }
+ g_free (cwd);
+
+ tree_magic_shutdown ();
+
+ return 0;
+}
+