summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2014-03-22 23:54:39 +0100
committerCarlos Garnacho <carlosg@gnome.org>2014-03-23 01:32:15 +0100
commit7fd568ee9c8aa3245aa86572d8323147aa70adaa (patch)
tree3bffaf7fde149a52924a70be9ae16a498b6bc16c
parent9ab999291e140687382b5c1ac2dc3400124a8879 (diff)
downloadtracker-subtree-crawling.tar.gz
libtracker-miner: Perform leveled notification in TrackerFileNotifiersubtree-crawling
The current notification process involves crawling over index roots without restrictions, and querying the state of every file in the store. This is fastest, but can get memory hungry on huge directory trees. So split the process in 3 sequencial steps, that are repeated from top to bottom over the directory hierarchy: - A directory is crawled, contents that currently exist in the filesystem are extracted. - Only if the directory is an index root, or was checked to exist in the store through previous iterations, the directory and all contents found are looked up on the store by their uri, new and updated contents are detected by comparing mtimes. - Only if the directory passed #2, and its mtime changed (which usually implies something was added or removed, at this stage we only have to care of the latter), query all elements in the store that nfo:belongsToContainer to it, and check for those files that existed in the store but don't exist anymore. Deleted contents are detected in this stage. The change has been done so there is certain compile-time granularity on the directory processing, currently controlled through the MAX_DEPTH define. This switch controls the maximum depth on crawled/queried chunks, which establishes some indirect limit on the number of GFiles (and all misc data around) that are in memory at the same time. From testing, first-time crawling performance is completely unaffected, and second-time crawling on an unchanged directory tree has negligible decreases. The IN() match on an indexed property like nie:url looks near constant, and the third more expensive step will only happen when it is very likely that there are actual changes to process. So the MAX_DEPTH value has been set to 1 to keep memory usage to a minimum (tracker-miner-fs now peaks on massif at 24MB when it previously early grew to ~180MB, indexing 11304 folders and 123428 files)
-rw-r--r--src/libtracker-miner/tracker-file-notifier.c625
1 files changed, 415 insertions, 210 deletions
diff --git a/src/libtracker-miner/tracker-file-notifier.c b/src/libtracker-miner/tracker-file-notifier.c
index dc6d9ab98..3171921f0 100644
--- a/src/libtracker-miner/tracker-file-notifier.c
+++ b/src/libtracker-miner/tracker-file-notifier.c
@@ -29,12 +29,12 @@
#include "tracker-crawler.h"
#include "tracker-monitor.h"
-static GQuark quark_property_crawled = 0;
-static GQuark quark_property_queried = 0;
static GQuark quark_property_iri = 0;
static GQuark quark_property_store_mtime = 0;
static GQuark quark_property_filesystem_mtime = 0;
+#define MAX_DEPTH 1
+
enum {
PROP_0,
PROP_INDEXING_TREE
@@ -54,6 +54,18 @@ enum {
static guint signals[LAST_SIGNAL] = { 0 };
typedef struct {
+ GFile *root;
+ GQueue *pending_dirs;
+ GPtrArray *query_files;
+ GPtrArray *updated_dirs;
+ guint flags;
+ guint directories_found;
+ guint directories_ignored;
+ guint files_found;
+ guint files_ignored;
+} RootData;
+
+typedef struct {
TrackerIndexingTree *indexing_tree;
TrackerFileSystem *file_system;
@@ -69,7 +81,7 @@ typedef struct {
* trees to get data from
*/
GList *pending_index_roots;
- GFile *current_index_root;
+ RootData *current_index_root;
guint stopped : 1;
} TrackerFileNotifierPrivate;
@@ -84,7 +96,6 @@ typedef struct {
static gboolean crawl_directories_start (TrackerFileNotifier *notifier);
-
G_DEFINE_TYPE (TrackerFileNotifier, tracker_file_notifier, G_TYPE_OBJECT)
static void
@@ -129,6 +140,35 @@ tracker_file_notifier_get_property (GObject *object,
}
}
+static RootData *
+root_data_new (TrackerFileNotifier *notifier,
+ GFile *file)
+{
+ TrackerFileNotifierPrivate *priv = notifier->priv;
+ RootData *data;
+
+ data = g_new0 (RootData, 1);
+ data->root = g_object_ref (file);
+ data->pending_dirs = g_queue_new ();
+ data->query_files = g_ptr_array_new ();
+ data->updated_dirs = g_ptr_array_new ();
+
+ g_queue_push_tail (data->pending_dirs, g_object_ref (file));
+ tracker_indexing_tree_get_root (priv->indexing_tree, file, &data->flags);
+
+ return data;
+}
+
+static void
+root_data_free (RootData *data)
+{
+ g_queue_free_full (data->pending_dirs, (GDestroyNotify) g_object_unref);
+ g_ptr_array_unref (data->query_files);
+ g_ptr_array_unref (data->updated_dirs);
+ g_object_unref (data->root);
+ g_free (data);
+}
+
/* Crawler signal handlers */
static gboolean
crawler_check_file_cb (TrackerCrawler *crawler,
@@ -153,6 +193,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
GFile *root, *canonical;
priv = TRACKER_FILE_NOTIFIER (user_data)->priv;
+ g_assert (priv->current_index_root != NULL);
canonical = tracker_file_system_peek_file (priv->file_system, directory);
root = tracker_indexing_tree_get_root (priv->indexing_tree, directory, NULL);
@@ -162,7 +203,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
* when the time arrives.
*/
if (canonical && root == canonical &&
- root != priv->current_index_root) {
+ root != priv->current_index_root->root) {
return FALSE;
}
@@ -214,14 +255,26 @@ file_notifier_traverse_tree_foreach (GFile *file,
TrackerFileNotifier *notifier;
TrackerFileNotifierPrivate *priv;
guint64 *store_mtime, *disk_mtime;
+ GFile *current_root;
+ GFileType file_type;
notifier = user_data;
priv = notifier->priv;
+ current_root = g_queue_peek_head (priv->current_index_root->pending_dirs);
+
+ /* If we're crawling over a subdirectory of a root index, it's been
+ * already notified in the crawling op that made it processed, so avoid
+ * it here again.
+ */
+ if (current_root == file &&
+ current_root != priv->current_index_root->root)
+ return FALSE;
store_mtime = tracker_file_system_get_property (priv->file_system, file,
quark_property_store_mtime);
disk_mtime = tracker_file_system_get_property (priv->file_system, file,
quark_property_filesystem_mtime);
+ file_type = tracker_file_system_get_file_type (priv->file_system, file);
if (store_mtime && !disk_mtime) {
/* In store but not in disk, delete */
@@ -231,10 +284,20 @@ file_notifier_traverse_tree_foreach (GFile *file,
} else if (disk_mtime && !store_mtime) {
/* In disk but not in store, create */
g_signal_emit (notifier, signals[FILE_CREATED], 0, file);
- } else if (store_mtime && disk_mtime &&
- abs (*disk_mtime - *store_mtime) > 2) {
+ } else if (store_mtime && disk_mtime && *disk_mtime != *store_mtime) {
/* Mtime changed, update */
g_signal_emit (notifier, signals[FILE_UPDATED], 0, file, FALSE);
+
+ if (file_type == G_FILE_TYPE_DIRECTORY) {
+ /* A directory has updated its mtime, this means something
+ * was either added or removed in the mean time. Crawling
+ * will always find all newly added files. But still, we
+ * must check the contents in the store to handle contents
+ * having been deleted in the directory.
+ */
+ g_ptr_array_add (priv->current_index_root->updated_dirs,
+ file);
+ }
} else if (!store_mtime && !disk_mtime) {
/* what are we doing with such file? should happen rarely,
* only with files that we've queried, but we decided not
@@ -260,11 +323,11 @@ notifier_check_next_root (TrackerFileNotifier *notifier)
TrackerFileNotifierPrivate *priv;
priv = notifier->priv;
+ g_assert (priv->current_index_root == NULL);
if (priv->pending_index_roots) {
return crawl_directories_start (notifier);
} else {
- priv->current_index_root = NULL;
g_signal_emit (notifier, signals[FINISHED], 0);
return FALSE;
}
@@ -274,37 +337,24 @@ static void
file_notifier_traverse_tree (TrackerFileNotifier *notifier)
{
TrackerFileNotifierPrivate *priv;
- GFile *current_root, *config_root;
+ GFile *config_root, *directory;
TrackerDirectoryFlags flags;
priv = notifier->priv;
- current_root = priv->current_index_root;
+ g_assert (priv->current_index_root != NULL);
+
+ directory = g_queue_peek_head (priv->current_index_root->pending_dirs);
config_root = tracker_indexing_tree_get_root (priv->indexing_tree,
- current_root, &flags);
+ directory, &flags);
- /* Check mtime for 1) directories with the check_mtime flag
- * and 2) directories gotten from monitor events.
- */
- if (config_root != current_root ||
+ if (config_root != directory ||
flags & TRACKER_DIRECTORY_FLAG_CHECK_MTIME) {
tracker_file_system_traverse (priv->file_system,
- current_root,
+ directory,
G_LEVEL_ORDER,
file_notifier_traverse_tree_foreach,
notifier);
}
-
- /* We dispose regular files here, only directories are cached once crawling
- * has completed.
- */
- tracker_file_system_forget_files (priv->file_system,
- current_root,
- G_FILE_TYPE_REGULAR);
-
- tracker_info (" Notified files after %2.2f seconds",
- g_timer_elapsed (priv->timer, NULL));
-
- notifier_check_next_root (notifier);
}
static gboolean
@@ -334,8 +384,10 @@ file_notifier_add_node_foreach (GNode *node,
if (file_info) {
GFileType file_type;
guint64 time, *time_ptr;
+ gint depth;
file_type = g_file_info_get_file_type (file_info);
+ depth = g_node_depth (node);
/* Intern file in filesystem */
canonical = tracker_file_system_get_file (priv->file_system,
@@ -352,6 +404,19 @@ file_notifier_add_node_foreach (GNode *node,
quark_property_filesystem_mtime,
time_ptr);
g_object_unref (file_info);
+
+ if (file_type == G_FILE_TYPE_DIRECTORY && depth == MAX_DEPTH + 1) {
+ /* If the max crawling depth is reached,
+ * queue dirs for later processing
+ */
+ g_assert (node->children == NULL);
+ g_queue_push_tail (priv->current_index_root->pending_dirs,
+ g_object_ref (canonical));
+ }
+
+ if (depth != 0 || file == priv->current_index_root->root)
+ g_ptr_array_add (priv->current_index_root->query_files,
+ canonical);
}
return FALSE;
@@ -368,9 +433,12 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
gpointer user_data)
{
TrackerFileNotifier *notifier;
+ TrackerFileNotifierPrivate *priv;
DirectoryCrawledData data = { 0 };
notifier = data.notifier = user_data;
+ priv = notifier->priv;
+
g_node_traverse (tree,
G_PRE_ORDER,
G_TRAVERSE_ALL,
@@ -378,23 +446,39 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
file_notifier_add_node_foreach,
&data);
- g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
- directory,
- directories_found, directories_ignored,
- files_found, files_ignored);
-
- tracker_info (" Found %d directories, ignored %d directories",
- directories_found,
- directories_ignored);
- tracker_info (" Found %d files, ignored %d files",
- files_found,
- files_ignored);
+ priv->current_index_root->directories_found += directories_found;
+ priv->current_index_root->directories_ignored += directories_ignored;
+ priv->current_index_root->files_found += files_found;
+ priv->current_index_root->files_ignored += files_ignored;
+}
+
+static GFile *
+_insert_store_info (TrackerFileNotifier *notifier,
+ GFile *file,
+ const gchar *iri,
+ guint64 _time)
+{
+ TrackerFileNotifierPrivate *priv;
+ GFile *canonical;
+
+ priv = notifier->priv;
+ canonical = tracker_file_system_get_file (priv->file_system,
+ file,
+ G_FILE_TYPE_UNKNOWN,
+ NULL);
+ tracker_file_system_set_property (priv->file_system, canonical,
+ quark_property_iri,
+ g_strdup (iri));
+ tracker_file_system_set_property (priv->file_system, canonical,
+ quark_property_store_mtime,
+ g_memdup (&_time, sizeof (guint64)));
+ return canonical;
}
static void
-sparql_file_query_populate (TrackerFileNotifier *notifier,
- TrackerSparqlCursor *cursor,
- gboolean check_root)
+sparql_files_query_populate (TrackerFileNotifier *notifier,
+ TrackerSparqlCursor *cursor,
+ gboolean check_root)
{
TrackerFileNotifierPrivate *priv;
@@ -402,9 +486,9 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
GFile *file, *canonical, *root;
- const gchar *mtime, *iri;
- guint64 *time_ptr;
+ const gchar *time_str, *iri;
GError *error = NULL;
+ guint64 _time;
file = g_file_new_for_uri (tracker_sparql_cursor_get_string (cursor, 0, NULL));
@@ -416,144 +500,269 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
canonical = tracker_file_system_peek_file (priv->file_system, file);
root = tracker_indexing_tree_get_root (priv->indexing_tree, file, NULL);
- if (canonical && root == file &&
- root != priv->current_index_root) {
+ if (canonical && root == file && priv->current_index_root &&
+ root != priv->current_index_root->root) {
g_object_unref (file);
continue;
}
}
- canonical = tracker_file_system_get_file (priv->file_system,
- file,
- G_FILE_TYPE_UNKNOWN,
- NULL);
-
iri = tracker_sparql_cursor_get_string (cursor, 1, NULL);
- tracker_file_system_set_property (priv->file_system, canonical,
- quark_property_iri,
- g_strdup (iri));
-
- mtime = tracker_sparql_cursor_get_string (cursor, 2, NULL);
- time_ptr = g_new (guint64, 1);
- *time_ptr = (guint64) tracker_string_to_date (mtime, NULL, &error);
+ time_str = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+ _time = tracker_string_to_date (time_str, NULL, &error);
if (error) {
/* This should never happen. Assume that file was modified. */
g_critical ("Getting store mtime: %s", error->message);
g_clear_error (&error);
- *time_ptr = 0;
+ _time = 0;
}
- tracker_file_system_set_property (priv->file_system, canonical,
- quark_property_store_mtime,
- time_ptr);
+ _insert_store_info (notifier, file, iri, _time);
g_object_unref (file);
}
}
static void
-sparql_query_cb (GObject *object,
- GAsyncResult *result,
- gpointer user_data)
+sparql_contents_check_deleted (TrackerFileNotifier *notifier,
+ TrackerSparqlCursor *cursor)
{
TrackerFileNotifierPrivate *priv;
+ GFile *file, *canonical;
+ const gchar *iri;
+
+ priv = notifier->priv;
+
+ while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+ file = g_file_new_for_uri (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+ iri = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+
+ if (!tracker_file_system_peek_file (priv->file_system, file)) {
+ /* The file exists on the store, but not on the
+ * crawled content, insert temporarily to handle
+ * the delete event.
+ */
+ canonical = _insert_store_info (notifier, file, iri, 0);
+ g_signal_emit (notifier, signals[FILE_DELETED], 0, canonical);
+ }
+
+ g_object_unref (file);
+ }
+}
+
+static gboolean
+crawl_directory_in_current_root (TrackerFileNotifier *notifier)
+{
+ TrackerFileNotifierPrivate *priv = notifier->priv;
+ gboolean recurse, retval = FALSE;
+ GFile *directory;
+
+ if (!priv->current_index_root)
+ return FALSE;
+
+ directory = g_queue_peek_head (priv->current_index_root->pending_dirs);
+
+ if (!directory)
+ return FALSE;
+
+ g_cancellable_reset (priv->cancellable);
+ recurse = (priv->current_index_root->flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0;
+ retval = tracker_crawler_start (priv->crawler, directory,
+ (recurse) ? MAX_DEPTH : 1);
+ return retval;
+}
+
+static void
+finish_current_directory (TrackerFileNotifier *notifier)
+{
+ TrackerFileNotifierPrivate *priv;
+ GFile *directory;
+
+ priv = notifier->priv;
+ directory = g_queue_pop_head (priv->current_index_root->pending_dirs);
+
+ /* We dispose regular files here, only directories are cached once crawling
+ * has completed.
+ */
+ tracker_file_system_forget_files (priv->file_system,
+ directory,
+ G_FILE_TYPE_REGULAR);
+
+ if (!crawl_directory_in_current_root (notifier)) {
+ /* No more directories left to be crawled in the current
+ * root, jump to the next one.
+ */
+ g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
+ directory,
+ priv->current_index_root->directories_found,
+ priv->current_index_root->directories_ignored,
+ priv->current_index_root->files_found,
+ priv->current_index_root->files_ignored);
+
+ tracker_info (" Notified files after %2.2f seconds",
+ g_timer_elapsed (priv->timer, NULL));
+ tracker_info (" Found %d directories, ignored %d directories",
+ priv->current_index_root->directories_found,
+ priv->current_index_root->directories_ignored);
+ tracker_info (" Found %d files, ignored %d files",
+ priv->current_index_root->files_found,
+ priv->current_index_root->files_ignored);
+
+ root_data_free (priv->current_index_root);
+ priv->current_index_root = NULL;
+
+ notifier_check_next_root (notifier);
+ }
+
+ g_object_unref (directory);
+}
+
+/* Query for directory contents, used to look for deleted contents in those */
+static void
+sparql_contents_query_cb (GObject *object,
+ GAsyncResult *result,
+ gpointer user_data)
+{
TrackerFileNotifier *notifier;
TrackerSparqlCursor *cursor;
GError *error = NULL;
notifier = user_data;
- priv = notifier->priv;
+
cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
result, &error);
-
- if (!cursor || error) {
- g_warning ("Could not query directory elements: %s\n", error->message);
+ if (error) {
+ g_warning ("Could not query directory contents: %s\n", error->message);
g_error_free (error);
- return;
+ } else if (cursor) {
+ sparql_contents_check_deleted (notifier, cursor);
+ g_object_unref (cursor);
}
- sparql_file_query_populate (notifier, cursor, TRUE);
+ finish_current_directory (notifier);
+}
- /* Mark the directory root as queried */
- tracker_file_system_set_property (priv->file_system,
- priv->current_index_root,
- quark_property_queried,
- GUINT_TO_POINTER (TRUE));
+static gchar *
+sparql_contents_compose_query (GFile **directories,
+ guint n_dirs)
+{
+ GString *str;
+ gchar *uri;
+ gint i = 0;
+
+ str = g_string_new ("SELECT nie:url(?u) ?u nfo:fileLastModified(?u) {"
+ " ?u nfo:belongsToContainer ?f . ?f nie:url ?url ."
+ " FILTER (?url IN (");
+ for (i = 0; i < n_dirs; i++) {
+ if (i != 0)
+ g_string_append_c (str, ',');
+
+ uri = g_file_get_uri (directories[i]);
+ g_string_append_printf (str, "\"%s\"", uri);
+ g_free (uri);
+ }
- tracker_info (" Queried files after %2.2f seconds",
- g_timer_elapsed (priv->timer, NULL));
+ g_string_append (str, "))}");
- /* If it's also been crawled, finish operation */
- if (tracker_file_system_get_property (priv->file_system,
- priv->current_index_root,
- quark_property_crawled)) {
- file_notifier_traverse_tree (notifier);
- }
+ return g_string_free (str, FALSE);
+}
- g_object_unref (cursor);
+static void
+sparql_contents_query_start (TrackerFileNotifier *notifier,
+ GFile **directories,
+ guint n_dirs)
+{
+ TrackerFileNotifierPrivate *priv;
+ gchar *sparql;
+
+ priv = notifier->priv;
+ sparql = sparql_contents_compose_query (directories, n_dirs);
+ tracker_sparql_connection_query_async (priv->connection,
+ sparql,
+ priv->cancellable,
+ sparql_contents_query_cb,
+ notifier);
+ g_free (sparql);
}
+/* Query for file information, used on all elements found during crawling */
static void
-sparql_file_query_start (TrackerFileNotifier *notifier,
- GFile *file,
- GFileType file_type,
- gboolean recursive,
- gboolean sync)
+sparql_files_query_cb (GObject *object,
+ GAsyncResult *result,
+ gpointer user_data)
{
TrackerFileNotifierPrivate *priv;
- gchar *uri, *sparql;
+ TrackerFileNotifier *notifier;
+ TrackerSparqlCursor *cursor;
+ GError *error = NULL;
+ notifier = user_data;
priv = notifier->priv;
- uri = g_file_get_uri (file);
-
- if (file_type == G_FILE_TYPE_DIRECTORY) {
- if (recursive) {
- sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
- "where {"
- " ?u a nie:DataObject ; "
- " nie:url ?url . "
- " FILTER (?url = \"%s\" || "
- " fn:starts-with (?url, \"%s/\")) "
- "}", uri, uri);
- } else {
- sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
- "where { "
- " ?u a nie:DataObject ; "
- " nie:url ?url . "
- " OPTIONAL { ?u nfo:belongsToContainer ?p } . "
- " FILTER (?url = \"%s\" || "
- " nie:url(?p) = \"%s\") "
- "}", uri, uri);
- }
- } else {
- /* If it's a regular file, only query this item */
- sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
- "where { "
- " ?u a nie:DataObject ; "
- " nie:url ?url ; "
- " nie:url \"%s\" . "
- "}", uri);
+
+ cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
+ result, &error);
+ if (error) {
+ g_warning ("Could not query indexed files: %s\n", error->message);
+ g_error_free (error);
+ } else if (cursor) {
+ sparql_files_query_populate (notifier, cursor, TRUE);
+ g_object_unref (cursor);
}
- if (sync) {
- TrackerSparqlCursor *cursor;
+ file_notifier_traverse_tree (notifier);
- cursor = tracker_sparql_connection_query (priv->connection,
- sparql, NULL, NULL);
- if (cursor) {
- sparql_file_query_populate (notifier, cursor, FALSE);
- g_object_unref (cursor);
- }
+ if (priv->current_index_root->updated_dirs->len > 0) {
+ /* Updated directories have been found, check for deleted contents in those */
+ sparql_contents_query_start (notifier,
+ (GFile**) priv->current_index_root->updated_dirs->pdata,
+ priv->current_index_root->updated_dirs->len);
+ g_ptr_array_set_size (priv->current_index_root->updated_dirs, 0);
} else {
- tracker_sparql_connection_query_async (priv->connection,
- sparql,
- priv->cancellable,
- sparql_query_cb,
- notifier);
+ finish_current_directory (notifier);
+ }
+}
+
+static gchar *
+sparql_files_compose_query (GFile **files,
+ guint n_files)
+{
+ GString *str;
+ gchar *uri;
+ gint i = 0;
+
+ str = g_string_new ("SELECT ?url ?u nfo:fileLastModified(?u) {"
+ " ?u a rdfs:Resource ; nie:url ?url . "
+ "FILTER (?url IN (");
+ for (i = 0; i < n_files; i++) {
+ if (i != 0)
+ g_string_append_c (str, ',');
+
+ uri = g_file_get_uri (files[i]);
+ g_string_append_printf (str, "\"%s\"", uri);
+ g_free (uri);
}
+ g_string_append (str, "))}");
+
+ return g_string_free (str, FALSE);
+}
+
+static void
+sparql_files_query_start (TrackerFileNotifier *notifier,
+ GFile **files,
+ guint n_files)
+{
+ TrackerFileNotifierPrivate *priv;
+ gchar *sparql;
+
+ priv = notifier->priv;
+ sparql = sparql_files_compose_query (files, n_files);
+ tracker_sparql_connection_query_async (priv->connection,
+ sparql,
+ priv->cancellable,
+ sparql_files_query_cb,
+ notifier);
g_free (sparql);
- g_free (uri);
}
static gboolean
@@ -563,6 +772,10 @@ crawl_directories_start (TrackerFileNotifier *notifier)
TrackerDirectoryFlags flags;
GFile *directory;
+ if (priv->current_index_root) {
+ return FALSE;
+ }
+
if (!priv->pending_index_roots) {
return FALSE;
}
@@ -572,45 +785,17 @@ crawl_directories_start (TrackerFileNotifier *notifier)
}
while (priv->pending_index_roots) {
- directory = priv->current_index_root = priv->pending_index_roots->data;
+ priv->current_index_root = priv->pending_index_roots->data;
priv->pending_index_roots = g_list_delete_link (priv->pending_index_roots,
priv->pending_index_roots);
-
- tracker_indexing_tree_get_root (priv->indexing_tree,
- directory,
- &flags);
-
- /* Unset crawled/queried checks on the
- * directory, we might have requested a
- * reindex.
- */
- tracker_file_system_unset_property (priv->file_system,
- directory,
- quark_property_crawled);
- tracker_file_system_unset_property (priv->file_system,
- directory,
- quark_property_queried);
-
- g_cancellable_reset (priv->cancellable);
+ directory = priv->current_index_root->root;
+ flags = priv->current_index_root->flags;
if ((flags & TRACKER_DIRECTORY_FLAG_IGNORE) == 0 &&
- tracker_crawler_start (priv->crawler,
- directory,
- (flags & TRACKER_DIRECTORY_FLAG_RECURSE) ? -1 : 1)) {
- gchar *uri;
-
- sparql_file_query_start (notifier, directory,
- G_FILE_TYPE_DIRECTORY,
- (flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0,
- FALSE);
-
+ crawl_directory_in_current_root (notifier)) {
g_timer_reset (priv->timer);
g_signal_emit (notifier, signals[DIRECTORY_STARTED], 0, directory);
- uri = g_file_get_uri (directory);
- tracker_info ("Started inspecting '%s'", uri);
- g_free (uri);
-
return TRUE;
} else {
/* Emit both signals for consistency */
@@ -623,9 +808,11 @@ crawl_directories_start (TrackerFileNotifier *notifier)
g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
directory, 0, 0, 0, 0);
}
+
+ root_data_free (priv->current_index_root);
+ priv->current_index_root = NULL;
}
- priv->current_index_root = NULL;
g_signal_emit (notifier, signals[FINISHED], 0);
return FALSE;
@@ -638,27 +825,28 @@ crawler_finished_cb (TrackerCrawler *crawler,
{
TrackerFileNotifier *notifier = user_data;
TrackerFileNotifierPrivate *priv = notifier->priv;
+ GFile *directory;
- tracker_info (" %s crawling files after %2.2f seconds",
- was_interrupted ? "Stopped" : "Finished",
- g_timer_elapsed (priv->timer, NULL));
-
- if (!was_interrupted) {
- GFile *directory;
+ g_assert (priv->current_index_root != NULL);
- directory = priv->current_index_root;
+ if (was_interrupted) {
+ finish_current_directory (notifier);
+ return;
+ }
- /* Mark the directory root as crawled */
- tracker_file_system_set_property (priv->file_system, directory,
- quark_property_crawled,
- GUINT_TO_POINTER (TRUE));
+ directory = g_queue_peek_head (priv->current_index_root->pending_dirs);
- /* If it's also been queried, finish operation */
- if (tracker_file_system_get_property (priv->file_system,
- directory,
- quark_property_queried)) {
- file_notifier_traverse_tree (notifier);
- }
+ if (priv->current_index_root->query_files->len > 0 &&
+ (directory == priv->current_index_root->root ||
+ tracker_file_system_get_property (priv->file_system,
+ directory, quark_property_iri))) {
+ sparql_files_query_start (notifier,
+ (GFile**) priv->current_index_root->query_files->pdata,
+ priv->current_index_root->query_files->len);
+ g_ptr_array_set_size (priv->current_index_root->query_files, 0);
+ } else {
+ file_notifier_traverse_tree (notifier);
+ finish_current_directory (notifier);
}
}
@@ -668,11 +856,12 @@ notifier_queue_file (TrackerFileNotifier *notifier,
TrackerDirectoryFlags flags)
{
TrackerFileNotifierPrivate *priv = notifier->priv;
+ RootData *data = root_data_new (notifier, file);
if (flags & TRACKER_DIRECTORY_FLAG_PRIORITY) {
- priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, file);
+ priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, data);
} else {
- priv->pending_index_roots = g_list_append (priv->pending_index_roots, file);
+ priv->pending_index_roots = g_list_append (priv->pending_index_roots, data);
}
}
@@ -1012,25 +1201,23 @@ indexing_tree_directory_added (TrackerIndexingTree *indexing_tree,
{
TrackerFileNotifier *notifier = user_data;
TrackerFileNotifierPrivate *priv = notifier->priv;
- gboolean start_crawler = FALSE;
TrackerDirectoryFlags flags;
tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
directory = tracker_file_system_get_file (priv->file_system, directory,
G_FILE_TYPE_DIRECTORY, NULL);
- if (!priv->stopped &&
- !priv->pending_index_roots) {
- start_crawler = TRUE;
- }
-
- if (!g_list_find (priv->pending_index_roots, directory)) {
- notifier_queue_file (notifier, directory, flags);
+ notifier_queue_file (notifier, directory, flags);
+ crawl_directories_start (notifier);
+}
- if (start_crawler) {
- crawl_directories_start (notifier);
- }
- }
+static gint
+find_directory_root (RootData *data,
+ GFile *file)
+{
+ if (data->root == file)
+ return 0;
+ return -1;
}
static void
@@ -1041,6 +1228,7 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
TrackerFileNotifier *notifier = user_data;
TrackerFileNotifierPrivate *priv = notifier->priv;
TrackerDirectoryFlags flags;
+ GList *elem;
/* Flags are still valid at the moment of deletion */
tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
@@ -1085,14 +1273,24 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
g_signal_emit (notifier, signals[FILE_DELETED], 0, directory);
}
- priv->pending_index_roots = g_list_remove_all (priv->pending_index_roots,
- directory);
+ elem = g_list_find_custom (priv->pending_index_roots, directory,
+ (GCompareFunc) find_directory_root);
- if (directory == priv->current_index_root) {
+ if (elem) {
+ root_data_free (elem->data);
+ priv->pending_index_roots =
+ g_list_delete_link (priv->pending_index_roots, elem);
+ }
+
+ if (priv->current_index_root &&
+ directory == priv->current_index_root->root) {
/* Directory being currently processed */
tracker_crawler_stop (priv->crawler);
g_cancellable_cancel (priv->cancellable);
+ root_data_free (priv->current_index_root);
+ priv->current_index_root = NULL;
+
notifier_check_next_root (notifier);
}
@@ -1121,6 +1319,10 @@ tracker_file_notifier_finalize (GObject *object)
g_object_unref (priv->cancellable);
g_object_unref (priv->connection);
+ if (priv->current_index_root)
+ root_data_free (priv->current_index_root);
+
+ g_list_foreach (priv->pending_index_roots, (GFunc) root_data_free, NULL);
g_list_free (priv->pending_index_roots);
g_timer_destroy (priv->timer);
@@ -1236,12 +1438,6 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
sizeof (TrackerFileNotifierClass));
/* Initialize property quarks */
- quark_property_crawled = g_quark_from_static_string ("tracker-property-crawled");
- tracker_file_system_register_property (quark_property_crawled, NULL);
-
- quark_property_queried = g_quark_from_static_string ("tracker-property-queried");
- tracker_file_system_register_property (quark_property_queried, NULL);
-
quark_property_iri = g_quark_from_static_string ("tracker-property-iri");
tracker_file_system_register_property (quark_property_iri, g_free);
@@ -1407,10 +1603,19 @@ tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
quark_property_iri);
if (!iri && force) {
+ TrackerSparqlCursor *cursor;
+ gchar *sparql;
+
/* Fetch data for this file synchronously */
- sparql_file_query_start (notifier, canonical,
- G_FILE_TYPE_REGULAR,
- FALSE, TRUE);
+ sparql = sparql_files_compose_query (&file, 1);
+ cursor = tracker_sparql_connection_query (priv->connection,
+ sparql, NULL, NULL);
+ g_free (sparql);
+
+ if (cursor) {
+ sparql_files_query_populate (notifier, cursor, FALSE);
+ g_object_unref (cursor);
+ }
iri = tracker_file_system_get_property (priv->file_system,
canonical,