diff options
author | Carlos Garnacho <carlosg@gnome.org> | 2020-12-20 12:09:50 +0100 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2020-12-31 13:07:29 +0100 |
commit | 2dfd2ec1ae1e93a46eeb4133499d51698a59a624 (patch) | |
tree | c77b452dce27ebaefeda7adbded84651af17d5e2 | |
parent | 4c2246c115e08ca5bde8bee670ec7640b5122da6 (diff) | |
download | tracker-wip/carlosg/refcount-in-code.tar.gz |
libtracker-data: Manage resource refcount in DB via codewip/carlosg/refcount-in-code
Triggers take a performance toll, managing the refcount manually does
fare a bit better. There's several reasons here:
- Triggers added by hundreds as we do takes a performance hit, e.g.
adding dumb "SELECT 1" triggers vs. not adding them still has a
visible effect.
- The updates in the triggers are rather dumb, eg. executing for
a property on insertions, even though that property might be null.
These queries could be avoided entirely.
- Managing refcounts manually means we coalesce many references on a
same resource (eg. rdf:type relations) in a single update.
Do this refcount maintenance in code, in order to stay ABI compatible
and (cross fingers) avoid DB refcount bugs in the future, the rules
are the same:
- Each row in a class table gets a refcount
- Each value in a rdfs:Resource property adds a reference to the
resource being pointed to.
- In addition, multivalued rdfs:Resource properties also add one
reference per value to the resource holding the property.
- Not observed: domainIndex properties transferred from superclasses
This makes insertions and updates sensibly faster, e.g. up to 25%
faster for "INSERT DATA { _:u a rdfs:Resource }" inserted via
TrackerBatch/TrackerResource.
Bonus points: We don't need to set up those runtime triggers anymore,
so TrackerSparqlConnection initialization is also faster.
-rw-r--r-- | src/libtracker-data/tracker-data-manager.c | 231 | ||||
-rw-r--r-- | src/libtracker-data/tracker-data-update.c | 199 |
2 files changed, 228 insertions, 202 deletions
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c index 23291722d..c71d6672f 100644 --- a/src/libtracker-data/tracker-data-manager.c +++ b/src/libtracker-data/tracker-data-manager.c @@ -2763,191 +2763,6 @@ schedule_copy (GPtrArray *schedule, } static void -create_insert_delete_triggers (TrackerDBInterface *iface, - const gchar *database, - const gchar *table_name, - const gchar * const *properties, - gint n_properties, - GError **error) -{ - GError *internal_error = NULL; - GString *trigger_query; - gint i; - - /* Insert trigger */ - tracker_db_interface_execute_query (iface, &internal_error, - "DROP TRIGGER IF EXISTS \"%s\".\"trigger_insert_%s\" ", - database, - table_name); - if (internal_error) { - g_propagate_error (error, internal_error); - return; - } - - trigger_query = g_string_new (NULL); - g_string_append_printf (trigger_query, - "CREATE TRIGGER \"%s\".\"trigger_insert_%s\" " - "AFTER INSERT ON \"%s\" " - "FOR EACH ROW BEGIN ", - database, table_name, - table_name); - for (i = 0; i < n_properties; i++) { - g_string_append_printf (trigger_query, - "INSERT OR IGNORE INTO Refcount (ROWID, Refcount) " - "SELECT NEW.\"%s\", 0 WHERE NEW.\"%s\" IS NOT NULL; " - "UPDATE Refcount SET Refcount = Refcount + 1 WHERE Refcount.ROWID = NEW.\"%s\"; ", - properties[i], - properties[i], - properties[i]); - } - - g_string_append (trigger_query, "END; "); - tracker_db_interface_execute_query (iface, &internal_error, - "%s", trigger_query->str); - g_string_free (trigger_query, TRUE); - - if (internal_error) { - g_propagate_error (error, internal_error); - return; - } - - /* Delete trigger */ - tracker_db_interface_execute_query (iface, &internal_error, - "DROP TRIGGER IF EXISTS \"%s\".\"trigger_delete_%s\" ", - database, - table_name); - if (internal_error) { - g_propagate_error (error, internal_error); - return; - } - - trigger_query = g_string_new (NULL); - g_string_append_printf (trigger_query, - "CREATE TRIGGER \"%s\".\"trigger_delete_%s\" " - "AFTER DELETE ON \"%s\" " - "FOR EACH ROW BEGIN ", - database, table_name, - table_name); - for (i = 0; i < n_properties; i++) { - g_string_append_printf (trigger_query, - "UPDATE Refcount SET Refcount = Refcount - 1 WHERE Refcount.rowid = OLD.\"%s\"; " - "DELETE FROM Refcount WHERE Refcount.ROWID = OLD.\"%s\" AND Refcount.Refcount = 0; ", - properties[i], properties[i]); - } - - g_string_append (trigger_query, "END; "); - tracker_db_interface_execute_query (iface, &internal_error, - "%s", trigger_query->str); - g_string_free (trigger_query, TRUE); - - if (internal_error) { - g_propagate_error (error, internal_error); - return; - } -} - -static void -create_table_triggers (TrackerDataManager *manager, - TrackerDBInterface *iface, - const gchar *database, - TrackerClass *klass, - GError **error) -{ - const gchar *property_name; - TrackerProperty **properties, *property; - GError *internal_error = NULL; - GPtrArray *trigger_properties; - guint i, n_props; - - trigger_properties = g_ptr_array_new (); - g_ptr_array_add (trigger_properties, "ROWID"); - - properties = tracker_ontologies_get_properties (manager->ontologies, &n_props); - - for (i = 0; i < n_props; i++) { - gboolean multivalued; - gchar *table_name; - - property = properties[i]; - - if (tracker_property_get_domain (property) != klass || - tracker_property_get_data_type (property) != TRACKER_PROPERTY_TYPE_RESOURCE) - continue; - - property_name = tracker_property_get_name (property); - multivalued = tracker_property_get_multiple_values (property); - - if (multivalued) { - const gchar * const properties[] = { "ID", property_name }; - - table_name = g_strdup_printf ("%s_%s", - tracker_class_get_name (klass), - property_name); - - create_insert_delete_triggers (iface, database, table_name, properties, - G_N_ELEMENTS (properties), - &internal_error); - if (internal_error) { - g_propagate_error (error, internal_error); - g_ptr_array_unref (trigger_properties); - g_free (table_name); - return; - } - } else { - table_name = g_strdup (tracker_class_get_name (klass)); - g_ptr_array_add (trigger_properties, (gchar *) property_name); - } - - tracker_db_interface_execute_query (iface, &internal_error, - "DROP TRIGGER IF EXISTS \"trigger_update_%s_%s\"", - tracker_class_get_name (klass), - property_name); - if (internal_error) { - g_propagate_error (error, internal_error); - g_ptr_array_unref (trigger_properties); - g_free (table_name); - return; - } - - tracker_db_interface_execute_query (iface, &internal_error, - "CREATE TRIGGER \"%s\".\"trigger_update_%s_%s\" " - "AFTER UPDATE OF \"%s\" ON \"%s\" " - "FOR EACH ROW BEGIN " - "INSERT OR IGNORE INTO Refcount (ROWID, Refcount) " - "SELECT NEW.\"%s\", 0 WHERE NEW.\"%s\" IS NOT NULL; " - "UPDATE Refcount SET Refcount = Refcount + 1 WHERE Refcount.ROWID = NEW.\"%s\"; " - "UPDATE Refcount SET Refcount = Refcount - 1 WHERE Refcount.rowid = OLD.\"%s\";" - "DELETE FROM Refcount WHERE Refcount.ROWID = OLD.\"%s\" AND Refcount.Refcount = 0; " - "END", - database, - tracker_class_get_name (klass), - property_name, - property_name, table_name, - property_name, property_name, - property_name, property_name, property_name); - g_free (table_name); - - if (internal_error) { - g_propagate_error (error, internal_error); - g_ptr_array_unref (trigger_properties); - return; - } - } - - create_insert_delete_triggers (iface, database, - tracker_class_get_name (klass), - (const gchar * const *) trigger_properties->pdata, - trigger_properties->len, - &internal_error); - g_ptr_array_unref (trigger_properties); - - if (internal_error) { - g_propagate_error (error, internal_error); - return; - } -} - -static void create_decomposed_metadata_tables (TrackerDataManager *manager, TrackerDBInterface *iface, const gchar *database, @@ -3250,18 +3065,10 @@ create_decomposed_metadata_tables (TrackerDataManager *manager, } } - if (!in_update || in_change || tracker_class_get_is_new (service)) { - /* FIXME: We are trusting object refcount will stay intact across - * ontology changes. One situation where this is not true are - * removal or properties with rdfs:Resource range. - */ - create_table_triggers (manager, iface, database, service, &internal_error); - - if (internal_error) { - g_propagate_error (error, internal_error); - goto error_out; - } - } + /* FIXME: We are trusting object refcount will stay intact across + * ontology changes. One situation where this is not true are + * removal or properties with rdfs:Resource range. + */ if (copy_schedule) { guint i; @@ -4924,7 +4731,7 @@ tracker_data_manager_clear_graph (TrackerDataManager *manager, graph, tracker_class_get_name (classes[i])); if (!stmt) - break; + goto out; tracker_db_statement_execute (stmt, &inner_error); g_object_unref (stmt); @@ -4943,12 +4750,18 @@ tracker_data_manager_clear_graph (TrackerDataManager *manager, tracker_class_get_name (service), tracker_property_get_name (properties[i])); if (!stmt) - break; + goto out; tracker_db_statement_execute (stmt, &inner_error); g_object_unref (stmt); } + tracker_db_interface_execute_query (iface, + &inner_error, + "DELETE FROM \"%s\".Refcount", + graph); +out: + if (inner_error) { g_propagate_error (error, inner_error); return FALSE; @@ -5018,12 +4831,30 @@ tracker_data_manager_copy_graph (TrackerDataManager *manager, tracker_class_get_name (service), tracker_property_get_name (properties[i])); if (!stmt) - break; + goto out; tracker_db_statement_execute (stmt, &inner_error); g_object_unref (stmt); } + /* Transfer refcounts */ + tracker_db_interface_execute_query (iface, + &inner_error, + "INSERT OR IGNORE INTO \"%s\".Refcount " + "SELECT ID, 0 from \"%s\".Refcount", + destination, + source); + if (inner_error) + goto out; + + tracker_db_interface_execute_query (iface, + &inner_error, + "UPDATE \"%s\".Refcount AS B " + "SET Refcount = B.Refcount + A.Refcount " + "FROM (SELECT ID, Refcount FROM \"%s\".Refcount) AS A " + "WHERE B.ID = A.ID", + destination, source); +out: if (inner_error) { g_propagate_error (error, inner_error); return FALSE; diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c index e7e1e19d3..a6b6c9665 100644 --- a/src/libtracker-data/tracker-data-update.c +++ b/src/libtracker-data/tracker-data-update.c @@ -63,6 +63,8 @@ struct _TrackerDataUpdateBufferGraph { /* string -> TrackerDataUpdateBufferResource */ GHashTable *resources; + /* id -> integer */ + GHashTable *refcounts; }; struct _TrackerDataUpdateBufferResource { @@ -1026,9 +1028,166 @@ tracker_data_resource_buffer_flush (TrackerData *data, } static void +tracker_data_update_refcount (TrackerData *data, + gint id, + gint refcount) +{ + const TrackerDataUpdateBufferGraph *graph; + gint old_refcount; + + g_assert (data->resource_buffer != NULL); + graph = data->resource_buffer->graph; + + old_refcount = GPOINTER_TO_INT (g_hash_table_lookup (graph->refcounts, + GINT_TO_POINTER (id))); + g_hash_table_insert (graph->refcounts, + GINT_TO_POINTER (id), + GINT_TO_POINTER (old_refcount + refcount)); +} + +static void +tracker_data_resource_ref (TrackerData *data, + gint id, + gboolean multivalued) +{ + if (multivalued) + tracker_data_update_refcount (data, data->resource_buffer->id, 1); + + tracker_data_update_refcount (data, id, 1); +} + +static void +tracker_data_resource_unref (TrackerData *data, + gint id, + gboolean multivalued) +{ + if (multivalued) + tracker_data_update_refcount (data, data->resource_buffer->id, -1); + + tracker_data_update_refcount (data, id, -1); +} + +/* Only applies to multivalued properties */ +static void +tracker_data_resource_unref_all (TrackerData *data, + TrackerProperty *property) +{ + GArray *old_values; + gint i; + + g_assert (tracker_property_get_multiple_values (property) == TRUE); + g_assert (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE); + + old_values = get_old_property_values (data, property, NULL); + + for (i = 0; i < old_values->len; i++) { + GValue *value; + + value = &g_array_index (old_values, GValue, i); + tracker_data_resource_unref (data, g_value_get_int64 (value), TRUE); + } +} + +static void +tracker_data_flush_graph_refcounts (TrackerData *data, + TrackerDataUpdateBufferGraph *graph, + GError **error) +{ + TrackerDBInterface *iface; + TrackerDBStatement *stmt; + GHashTableIter iter; + gpointer key, value; + gint id, refcount; + GError *inner_error = NULL; + const gchar *database; + gchar *insert_query; + gchar *update_query; + gchar *delete_query; + + iface = tracker_data_manager_get_writable_db_interface (data->manager); + database = graph->graph ? graph->graph : "main"; + + insert_query = g_strdup_printf ("INSERT OR IGNORE INTO \"%s\".Refcount (ROWID, Refcount) VALUES (?1, 0)", + database); + update_query = g_strdup_printf ("UPDATE \"%s\".Refcount SET Refcount = Refcount + ?2 WHERE Refcount.ROWID = ?1", + database); + delete_query = g_strdup_printf ("DELETE FROM \"%s\".Refcount WHERE Refcount.ROWID = ?1 AND Refcount.Refcount = 0", + database); + + g_hash_table_iter_init (&iter, graph->refcounts); + + while (g_hash_table_iter_next (&iter, &key, &value)) { + id = GPOINTER_TO_INT (key); + refcount = GPOINTER_TO_INT (value); + + if (refcount > 0) { + stmt = tracker_db_interface_create_statement (iface, TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE, + &inner_error, insert_query); + if (inner_error) { + g_propagate_error (error, inner_error); + break; + } + + tracker_db_statement_bind_int (stmt, 0, id); + tracker_db_statement_execute (stmt, &inner_error); + g_object_unref (stmt); + + if (inner_error) { + g_propagate_error (error, inner_error); + break; + } + } + + if (refcount != 0) { + stmt = tracker_db_interface_create_statement (iface, TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE, + &inner_error, update_query); + if (inner_error) { + g_propagate_error (error, inner_error); + break; + } + + tracker_db_statement_bind_int (stmt, 0, id); + tracker_db_statement_bind_int (stmt, 1, refcount); + tracker_db_statement_execute (stmt, &inner_error); + g_object_unref (stmt); + + if (inner_error) { + g_propagate_error (error, inner_error); + break; + } + } + + if (refcount < 0) { + stmt = tracker_db_interface_create_statement (iface, TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE, + &inner_error, delete_query); + if (inner_error) { + g_propagate_error (error, inner_error); + break; + } + + tracker_db_statement_bind_int (stmt, 0, id); + tracker_db_statement_execute (stmt, &inner_error); + g_object_unref (stmt); + + if (inner_error) { + g_propagate_error (error, inner_error); + break; + } + } + + g_hash_table_iter_remove (&iter); + } + + g_free (insert_query); + g_free (update_query); + g_free (delete_query); +} + +static void graph_buffer_free (TrackerDataUpdateBufferGraph *graph) { g_hash_table_unref (graph->resources); + g_hash_table_unref (graph->refcounts); g_free (graph->graph); g_slice_free (TrackerDataUpdateBufferGraph, graph); } @@ -1066,6 +1225,12 @@ tracker_data_update_buffer_flush (TrackerData *data, goto out; } } + + tracker_data_flush_graph_refcounts (data, graph, &actual_error); + if (actual_error) { + g_propagate_error (error, actual_error); + goto out; + } } out: @@ -1130,6 +1295,7 @@ cache_create_service_decomposed (TrackerData *data, g_value_init (&gvalue, G_TYPE_INT64); cache_insert_row (data, cl); + tracker_data_resource_ref (data, data->resource_buffer->id, FALSE); class_id = tracker_class_get_id (cl); ontologies = tracker_data_manager_get_ontologies (data->manager); @@ -1137,6 +1303,7 @@ cache_create_service_decomposed (TrackerData *data, g_value_set_int64 (&gvalue, class_id); cache_insert_value (data, "rdfs:Resource_rdf:type", "rdf:type", &gvalue, TRUE); + tracker_data_resource_ref (data, class_id, TRUE); tracker_data_dispatch_insert_statement_callbacks (data, tracker_property_get_id (tracker_ontologies_get_rdf_type (ontologies)), @@ -1761,6 +1928,9 @@ cache_insert_metadata_decomposed (TrackerData *data, &value, multiple_values); + if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_ref (data, g_value_get_int64 (&value), multiple_values); + if (!multiple_values) { process_domain_indexes (data, property, &value, field_name); } @@ -1813,6 +1983,8 @@ delete_metadata_decomposed (TrackerData *data, } else { cache_delete_value (data, table_name, field_name, &value, multiple_values); + if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref (data, g_value_get_int64 (&value), multiple_values); if (!multiple_values) { TrackerClass **domain_index_classes; @@ -1968,6 +2140,8 @@ cache_delete_resource_type_full (TrackerData *data, value_set_remove_value (old_values, old_gvalue); cache_delete_value (data, table_name, field_name, ©, multiple_values); + if (tracker_property_get_data_type (prop) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref (data, g_value_get_int64 (©), multiple_values); if (!multiple_values) { TrackerClass **domain_index_classes; @@ -1992,8 +2166,10 @@ cache_delete_resource_type_full (TrackerData *data, g_value_set_int64 (&gvalue, tracker_class_get_id (class)); cache_delete_value (data, "rdfs:Resource_rdf:type", "rdf:type", &gvalue, TRUE); + tracker_data_resource_unref (data, tracker_class_get_id (class), TRUE); cache_delete_row (data, class); + tracker_data_resource_unref (data, data->resource_buffer->id, FALSE); tracker_data_dispatch_delete_statement_callbacks (data, tracker_property_get_id (tracker_ontologies_get_rdf_type (ontologies)), @@ -2031,6 +2207,7 @@ ensure_graph_buffer (TrackerDataUpdateBuffer *buffer, } graph_buffer = g_slice_new0 (TrackerDataUpdateBufferGraph); + graph_buffer->refcounts = g_hash_table_new (NULL, NULL); graph_buffer->graph = g_strdup (name); if (graph_buffer->graph) { graph_buffer->id = tracker_data_manager_find_graph (data->manager, @@ -2197,12 +2374,17 @@ delete_all_helper (TrackerData *data, cache_delete_all_values (data, tracker_property_get_table_name (property), tracker_property_get_name (property)); + if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref_all (data, property); } else { + value = &g_array_index (old_values, GValue, 0); cache_delete_value (data, tracker_property_get_table_name (property), tracker_property_get_name (property), - &g_array_index (old_values, GValue, 0), + value, FALSE); + if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref (data, g_value_get_int64 (value), FALSE); } } else { super_old_values = get_old_property_values (data, property, error); @@ -2220,6 +2402,10 @@ delete_all_helper (TrackerData *data, tracker_property_get_name (property), value, tracker_property_get_multiple_values (property)); + if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) { + tracker_data_resource_unref (data, g_value_get_int64 (value), + tracker_property_get_multiple_values (property)); + } } } @@ -2301,6 +2487,8 @@ delete_single_valued (TrackerData *data, cache_delete_all_values (data, tracker_property_get_table_name (field), tracker_property_get_name (field)); + if (tracker_property_get_data_type (field) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref_all (data, field); } else if (!multiple_values) { GError *inner_error = NULL; GArray *old_values; @@ -2308,11 +2496,16 @@ delete_single_valued (TrackerData *data, old_values = get_old_property_values (data, field, &inner_error); if (old_values && old_values->len == 1) { + GValue *value; + + value = &g_array_index (old_values, GValue, 0); cache_delete_value (data, tracker_property_get_table_name (field), tracker_property_get_name (field), - &g_array_index (old_values, GValue, 0), + value, FALSE); + if (tracker_property_get_data_type (field) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref (data, g_value_get_int64 (value), multiple_values); } else { /* no need to error out if statement does not exist for any reason */ g_clear_error (&inner_error); @@ -2551,6 +2744,8 @@ tracker_data_update_statement (TrackerData *data, cache_delete_all_values (data, tracker_property_get_table_name (property), tracker_property_get_name (property)); + if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) + tracker_data_resource_unref_all (data, property); } else { if (!resource_buffer_switch (data, graph, subject, error)) return; |