diff options
author | Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com> | 2012-05-03 16:48:56 +0200 |
---|---|---|
committer | Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com> | 2012-05-03 16:48:56 +0200 |
commit | 6fbb4c2b9648a75ff95439e3fa4193558762563f (patch) | |
tree | 8c6cc793bd2ecb628bd9897a45262411a49c0c30 | |
parent | 6657fe221da26b1cfd37a5bc1bc4cc80ebeb52cf (diff) | |
parent | 1897c150b841c249100fcfd225a3626b742565f8 (diff) | |
download | zeitgeist-6fbb4c2b9648a75ff95439e3fa4193558762563f.tar.gz |
Merge branch 'master' into libzeitgeist2
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | configure.ac | 6 | ||||
-rw-r--r-- | extensions/fts++/indexer.cpp | 5 | ||||
-rw-r--r-- | extensions/fts++/test/test-indexer.cpp | 41 | ||||
-rw-r--r-- | extensions/fts++/zeitgeist-fts.vala | 10 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/db-reader.vala | 46 | ||||
-rw-r--r-- | src/sql-schema.vala | 47 | ||||
-rw-r--r-- | src/sql.vala | 2 | ||||
-rw-r--r-- | test/dbus/remote-test.py | 16 | ||||
-rwxr-xr-x | tools/generate_events.py | 262 |
11 files changed, 416 insertions, 27 deletions
@@ -73,3 +73,5 @@ libzeitgeist/*.c *.la *.stamp *.lo +*.pyc +*.swp diff --git a/configure.ac b/configure.ac index 32256888..74f2b9ab 100644 --- a/configure.ac +++ b/configure.ac @@ -90,6 +90,12 @@ AC_ARG_ENABLE([fts], AM_CONDITIONAL(HAVE_FTS, test "x$enable_fts" != "xno") +AC_ARG_ENABLE([explain-queries], + AS_HELP_STRING([--enable-explain-queries], [Enable SQL debugging]), + [explain_queries=$enableval],[enable_queries="no"]) + +AM_CONDITIONAL(EXPLAIN_QUERIES, test "x$explain_queries" = "xyes") + AC_CONFIG_FILES([ Makefile src/Makefile diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp index af96eac7..483caf7f 100644 --- a/extensions/fts++/indexer.cpp +++ b/extensions/fts++/indexer.cpp @@ -561,12 +561,13 @@ bool Indexer::IndexUri (std::string const& uri, std::string const& origin) size_t question_mark = uri.find ('?'); if (question_mark != std::string::npos) { - std::string stripped (uri, 0, question_mark - 1); + std::string stripped (uri, 0, question_mark); basename = g_path_get_basename (stripped.c_str ()); } else { - basename = g_file_get_basename (f); + // g_file_get_basename would unescape the uri, we don't want that here + basename = g_path_get_basename (uri.c_str ()); } // step 2) unescape and check that it's valid utf8 diff --git a/extensions/fts++/test/test-indexer.cpp b/extensions/fts++/test/test-indexer.cpp index 62ca4118..8330a5ea 100644 --- a/extensions/fts++/test/test-indexer.cpp +++ b/extensions/fts++/test/test-indexer.cpp @@ -88,6 +88,21 @@ assert_nth_result_has_text (GPtrArray* results, int n, const char *text) g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, text); } +// This function only supports events with a single subject, +// since that's enough for the tests in this file. +static void +assert_nth_result_has_uri (GPtrArray* results, int n, const char *text) +{ + g_assert_cmpuint (n, <, results->len); + ZeitgeistEvent *event = (ZeitgeistEvent*) results->pdata[n]; + g_assert (event); + g_assert_cmpint (zeitgeist_event_num_subjects (event), ==, 1); + ZeitgeistSubject *subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert (subject); + g_assert_cmpstr (zeitgeist_subject_get_uri (subject), ==, text); +} + static ZeitgeistEvent* create_test_event1 (void) { ZeitgeistEvent *event = zeitgeist_event_new (); @@ -609,6 +624,30 @@ test_simple_underscores (Fixture *fix, gconstpointer data) } static void +test_simple_escaped_string (Fixture *fix, gconstpointer data) // (LP: #594171) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + GPtrArray* results; + + // add test events to DBs + const char uri[] = "http://encodings.com/percentage-%25-is-fun"; + const char text[] = "%25 is the encoding for a percentage"; + event_id = index_event (fix, create_test_event_simple (uri, text)); + + // Search for MostPopularSubjects + results = search_simple (fix, "percentage", NULL, + ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS, &matches); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + assert_nth_result_has_id (results, 0, event_id); + assert_nth_result_has_uri (results, 0, uri); + assert_nth_result_has_text (results, 0, text); +} + +static void test_simple_camelcase (Fixture *fix, gconstpointer data) { guint matches; @@ -1147,6 +1186,8 @@ void test_indexer_create_suite (void) setup, test_simple_noexpand_valid, teardown); g_test_add ("/Zeitgeist/FTS/Indexer/Simple/Underscores", Fixture, 0, setup, test_simple_underscores, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/Simple/EscapedString", Fixture, 0, + setup, test_simple_escaped_string, teardown); g_test_add ("/Zeitgeist/FTS/Indexer/Simple/Camelcase", Fixture, 0, setup, test_simple_camelcase, teardown); g_test_add ("/Zeitgeist/FTS/Indexer/Simple/PrefixWithDashes", Fixture, 0, diff --git a/extensions/fts++/zeitgeist-fts.vala b/extensions/fts++/zeitgeist-fts.vala index 7800a828..29ea1f4d 100644 --- a/extensions/fts++/zeitgeist-fts.vala +++ b/extensions/fts++/zeitgeist-fts.vala @@ -69,10 +69,10 @@ namespace Zeitgeist indexer = new Indexer (engine); } - private void do_quit () + private void close () { engine.close (); - mainloop.quit (); + indexer = null; // close the index } public void register_dbus_object (DBusConnection conn) throws IOError @@ -231,6 +231,10 @@ namespace Zeitgeist if (instance != null) { + // Close any database connections + instance.close (); + + // Release the bus name Bus.unown_name (owner_id); instance.unregister_dbus_object (); instance = null; @@ -249,7 +253,7 @@ namespace Zeitgeist static void safe_exit () { - instance.do_quit (); + mainloop.quit (); } static int main (string[] args) diff --git a/src/Makefile.am b/src/Makefile.am index 2377c813..e74dfdc7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -18,6 +18,12 @@ AM_VALAFLAGS = \ $(top_srcdir)/config.vapi \ $(NULL) +if EXPLAIN_QUERIES +AM_VALAFLAGS = $(AM_VALAFLAGS) \ + -D EXPLAIN_QUERIES \ + $(NULL) +endif + # Make sure every extension has only one vala file! extensions_VALASOURCES = \ ext-data-source-registry.vala \ diff --git a/src/db-reader.vala b/src/db-reader.vala index 5da62457..f7c996f6 100644 --- a/src/db-reader.vala +++ b/src/db-reader.vala @@ -401,10 +401,9 @@ public class DbReader : Object * Only URIs for subjects matching the indicated `result_event_templates` * and `result_storage_state` are returned. */ - if (result_type == ResultType.MOST_RECENT_EVENTS || - result_type == ResultType.LEAST_RECENT_EVENTS) + if (result_type == RelevantResultType.RECENT || + result_type == RelevantResultType.RELATED) { - // We pick out the ids for relational event so we can set them as // roots the ids are taken from the events that match the // events_templates @@ -482,36 +481,41 @@ public class DbReader : Object { window.add(temp_related_uris[j]); if (temp_related_uris[j].id in ids) + { count_in_window = true; + break; + } } if (count_in_window) { for (int j = 0; j < window.length; j++) { - if (uri_counter.lookup (window[j].uri) == null) + if (window[j].id in result_ids) { - RelatedUri ruri = RelatedUri () + if (uri_counter.lookup (window[j].uri) == null) { - id = window[j].id, - timestamp = window[j].timestamp, - uri = window[j].uri, - counter = 0 - }; - uri_counter.insert (window[j].uri, ruri); - } - uri_counter.lookup (window[j].uri).counter++; - if (uri_counter.lookup (window[j].uri).timestamp - < window[j].timestamp) - { - uri_counter.lookup (window[j].uri).timestamp = - window[j].timestamp; + RelatedUri ruri = RelatedUri () + { + id = window[j].id, + timestamp = window[j].timestamp, + uri = window[j].uri, + counter = 0 + }; + uri_counter.insert (window[j].uri, ruri); + } + uri_counter.lookup (window[j].uri).counter++; + if (uri_counter.lookup (window[j].uri).timestamp + < window[j].timestamp) + { + uri_counter.lookup (window[j].uri).timestamp = + window[j].timestamp; + } } } } } - // We have the big hashtable with the structs, now we sort them by // most used and limit the result then sort again List<RelatedUri?> temp_ruris = new List<RelatedUri?>(); @@ -537,7 +541,7 @@ public class DbReader : Object } // Sort by recency - if (result_type == 1) + if (result_type == RelevantResultType.RECENT) temp_ruris.sort ((a, b) => { int64 delta = a.timestamp - b.timestamp; if (delta < 0) return 1; @@ -560,7 +564,7 @@ public class DbReader : Object } else { - throw new EngineError.DATABASE_ERROR ("Unsupported ResultType."); + throw new EngineError.DATABASE_ERROR ("Unsupported RelevantResultType"); } } diff --git a/src/sql-schema.vala b/src/sql-schema.vala index 4c369a96..2c67a930 100644 --- a/src/sql-schema.vala +++ b/src/sql-schema.vala @@ -57,10 +57,37 @@ namespace Zeitgeist.SQLite Timestamp.now ()); exec_query (database, schema_sql); } - else if (schema_version == 4 || schema_version == 5) + else if (schema_version >= 3 && schema_version <= 5) { backup_database (); + if (schema_version == 3) + { + // Add missing columns to storage table + exec_query (database, + "ALTER TABLE storage ADD COLUMN icon VARCHAR"); + exec_query (database, + "ALTER TABLE storage ADD COLUMN display_name VARCHAR"); + + // Set subjects that don't have a storage to "unknown", so + // they'll always be marked as available. + // FIXME: Do we want to separate unknown/local/online? + exec_query (database, """ + INSERT OR IGNORE INTO storage (value, state) + VALUES ('unknown', 1) + """); + exec_query (database, """ + UPDATE event SET subj_storage = + (SELECT id FROM storage WHERE value='unknown') + WHERE subj_storage IS NULL + """); + + // The events table is missing two columns, (event) origin + // and subj_current_id. It needs to be replaced. + exec_query (database, + "ALTER TABLE event RENAME TO event_old"); + } + string[] tables = { "interpretation", "manifestation", "mimetype", "actor" }; @@ -84,6 +111,24 @@ namespace Zeitgeist.SQLite exec_query (database, "DROP TABLE %s_old".printf (table)); } + if (schema_version == 3) + { + // Migrate events from the old table + exec_query (database, """ + INSERT INTO event + SELECT + id, timestamp, interpretation, manifestation, + actor, payload, subj_id, subj_interpretation, + subj_manifestation, subj_origin, subj_mimetype, + subj_text, subj_storage, NULL as origin, + subj_id AS subj_id_current + FROM event_old + """); + + // This will also drop any triggers the `events' table had + exec_query (database, "DROP TABLE event_old"); + } + // Ontology update exec_query (database, "INSERT OR IGNORE INTO manifestation (value) VALUES ('%s')" diff --git a/src/sql.vala b/src/sql.vala index 224fde9d..eab92891 100644 --- a/src/sql.vala +++ b/src/sql.vala @@ -293,6 +293,8 @@ namespace Zeitgeist.SQLite rc = prepared_stmt.db_handle ().prepare_v2 (explain_sql, -1, out stmt); assert_query_success(rc, "SQL error"); + print ("%s\n", explain_sql); + while ((rc = stmt.step()) == Sqlite.ROW) { int select_id = stmt.column_int (0); diff --git a/test/dbus/remote-test.py b/test/dbus/remote-test.py index aff2cdee..1c601385 100644 --- a/test/dbus/remote-test.py +++ b/test/dbus/remote-test.py @@ -445,6 +445,22 @@ class ZeitgeistRemoteFindEventIdsTest(testutils.RemoteTestCase): storage_state=StorageState.NotAvailable) self.assertEquals(ids, [5, 4, 2, 3, 1]) + def testFindEventIdsWithUnknownStorageState(self): + """ + Events with storage state "unknown" should always be considered + as being available. + """ + + event = parse_events("test/data/single_event.js")[0] + event.subjects[0].uri = 'file:///i-am-unknown' + event.subjects[0].storage = 'unknown' + + self.insertEventsAndWait([event]) + + tmpl = Event.new_for_values(subject_uri='file:///i-am-unknown') + ids = self.findEventIdsAndWait([tmpl], storage_state=StorageState.Available) + self.assertEquals(ids, [6]) + class ZeitgeistRemoteInterfaceTest(testutils.RemoteTestCase): def testQuit(self): diff --git a/tools/generate_events.py b/tools/generate_events.py new file mode 100755 index 00000000..8910607c --- /dev/null +++ b/tools/generate_events.py @@ -0,0 +1,262 @@ +#! /usr/bin/env python +# -.- coding: utf-8 -.- + +# Zeitgeist - Insert random events into the database +# +# Copyright © 2012 Canonical Ltd. +# By Siegfried-A. Gevatter <siegfried.gevatter@collabora.co.uk> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 2.1 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# ############################################################################# +# WARNING: make sure you launch Zeitgeist with ZEITGEIST_DATA_PATH set if +# you don't want to fill your real database! +# ############################################################################# + +import os +import sys +import time +import random +from collections import deque +from gi.repository import GLib, GObject + +from zeitgeist import mimetypes +from zeitgeist.datamodel import * +from zeitgeist.client import ZeitgeistDBusInterface + +class EventGenerator: + + NUM_WORDS = 1000 + NUM_SIMULTANEOUS_URIS = 1000 + MAX_EVENT_AGE = 366*24*3600*1000 + + _words = None + _mimetypes = None + _desktop_files = None + _schemas = None + _uri_table = None + + def __init__(self): + # Initialize a pool of random words for use in URIs, etc. + dictionary_words = map(str.strip, + open('/usr/share/dict/words').readlines()) + dictionary_words = filter(lambda x: '\'s' not in x, dictionary_words) + self._words = random.sample(dictionary_words, self.NUM_WORDS) + + # Initialize a pool of MIME-Types + self._mimetypes = mimetypes.MIMES.keys() + + # Initialize a pool of application names + self._desktop_files = filter(lambda actor: actor.endswith('.desktop'), + os.listdir('/usr/share/applications')) + + # Initialize a list of URI schemas + self._schemas = ('application', 'davs', 'http', 'https', 'ftp') + + # Initialize a cache of URIs + self._uri_table = deque(maxlen=self.NUM_SIMULTANEOUS_URIS) + + def get_word(self): + # FIXME: add numbers and stuff? + return random.choice(self._words) + + def get_extension(self): + if random.random() < 0.8: + extensions = [ + 'odt', 'odp', 'doc', + 'oga', 'ogv', 'mp3' + 'png', 'jpg', 'gif', 'tiff' + 'html', 'xml', 'txt' + 'py', 'c', 'cpp', 'js', 'vala' + ] + else: + extensions = self._words + return filter(str.isalpha, random.choice(extensions)) + + def get_path(self, force_directory=False): + path = '' + num_parts = 1 + abs(int(random.gauss(3, 3))) + for i in range(num_parts): + path += '/%s' % self.get_word() + if random.random() < 0.9 and not force_directory: + path += '.%s' % self.get_extension() + return path + + def get_schema(self): + rand = random.random() + if rand < 0.005: + return '%s://' % random.choice(self._words) + elif rand < 0.4: + return '%s://' % random.choice(self._schemas) + else: + return 'file:///' + + def generate_uri(self): + file_uri = GLib.filename_to_uri(self.get_path(), None) + return self.get_schema() + file_uri[8:] + + def get_uri(self): + """ + We keep a cache of NUM_SIMULATENOUS_URIS uris for reuse. Every access + has a 1% chance of replacing a URI in the table with a new one. + """ + index = random.randint(0, self.NUM_SIMULTANEOUS_URIS) + if index >= len(self._uri_table): + # The URI table isn't fully initialized yet... + uri = self.generate_uri() + self._uri_table.append(uri) + return uri + if random.random() < 0.01: + # Generate a new URI + self._uri_table[index] = self.generate_uri() + return self._uri_table[index] + + def get_text(self): + num_words = abs(int(random.gauss(4, 3))) + return ' '.join(self.get_word() for i in range(num_words)) + + def get_subject_origin(self, uri): + scheme = GLib.uri_parse_scheme(uri) + if scheme == 'file': + return GLib.path_get_dirname(uri) + elif scheme in ('http', 'https'): + scheme, domain = uri.split('://', 1) + return '%s://%s' % (scheme, domain.split('/', 1)[0]) + else: + return GLib.filename_to_uri( + self.get_path(force_directory=True), None) + + def get_event_origin(self): + if random.random() < 0.005: + return self.get_uri() + return '' + + def get_actor(self): + return 'application://%s' % random.choice(self._desktop_files) + + def get_timestamp(self): + current_time = int(time.time() * 1000) + return random.randint(current_time - self.MAX_EVENT_AGE, current_time) + + def get_event_interpretation(self): + interpretations = Interpretation.EVENT_INTERPRETATION.get_children() + return random.choice(list(interpretations)) + + def get_subject_interpretation(self): + ev_interp = Interpretation.EVENT_INTERPRETATION.get_children() + subj_interp = set(Interpretation.get_children()) + subj_interp.difference_update(ev_interp) + return random.choice(list(subj_interp)) + + def get_event_manifestation(self): + if random.random() < 0.3: + manifestations = Manifestation.EVENT_MANIFESTATION.get_children() + return random.choice(list(manifestations)) + else: + return Manifestation.USER_ACTIVITY + + def get_subject_manifestation(self): + ev_manif = Manifestation.EVENT_MANIFESTATION.get_children() + subj_manif = set(Interpretation.get_children()) + subj_manif.difference_update(ev_manif) + return random.choice(list(subj_manif)) + + def get_subject(self, event_interpretation): + uri = self.get_uri() + + subject = Subject.new_for_values( + uri = uri, + current_uri = uri, + interpretation = self.get_subject_interpretation(), + manifestation = self.get_subject_manifestation(), + origin = self.get_subject_origin(uri), + mimetype = random.choice(self._mimetypes), + text = self.get_text(), + storage = "") + + if event_interpretation == Interpretation.MOVE_EVENT: + while subject.uri == subject.current_uri: + subject.current_uri = self.get_uri() + + return subject + + def get_event(self): + event_interpretation = self.get_event_interpretation() + event = Event.new_for_values( + timestamp = self.get_timestamp(), + interpretation = event_interpretation, + manifestation = self.get_event_manifestation(), + actor = self.get_actor(), + origin = self.get_event_origin()) + + num_subjects = max(1, abs(int(random.gauss(1, 1)))) + while len(event.subjects) < num_subjects: + subject = self.get_subject(event_interpretation) + if subject.uri not in (x.uri for x in event.get_subjects()): + # events with two subjects having the same URI aren't supported + event.append_subject(subject) + + return event + +class EventInserter(): + + BUFFER_SIZE = 1000 + + _log = None + _buffer = None + _events_inserted = None + + def __init__(self): + self._log = ZeitgeistDBusInterface() + self._buffer = [] + self._events_inserted = 0 + + def insert(self, event): + buffer_full = len(self._buffer) >= self.BUFFER_SIZE + if buffer_full: + self.flush() + self._buffer.append(event) + return buffer_full + + def flush(self): + if self._buffer: + self._log.InsertEvents(self._buffer) + self._events_inserted += len(self._buffer) + self._buffer = [] + + def get_insertion_count(self): + return self._events_inserted + +def main(): + limit = '10000000' if len(sys.argv) < 2 else sys.argv[1] + if len(sys.argv) > 2 or not limit.isdigit(): + print "Usage: %s [<num_events>]" % sys.argv[0] + sys.exit(1) + limit = int(limit) + + event_inserter = EventInserter() + try: + generator = EventGenerator() + for i in xrange(limit): + event = generator.get_event() + event.payload = 'generate_events.py' + if event_inserter.insert(event): + print "Inserted %d events." % i + except KeyboardInterrupt: + pass + event_inserter.flush() + print "Inserted %d events. Done." % event_inserter.get_insertion_count() + +if __name__ == '__main__': + main() |