summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiegfried-Angel Gevatter Pujals <rainct@ubuntu.com>2012-05-03 16:48:56 +0200
committerSiegfried-Angel Gevatter Pujals <rainct@ubuntu.com>2012-05-03 16:48:56 +0200
commit6fbb4c2b9648a75ff95439e3fa4193558762563f (patch)
tree8c6cc793bd2ecb628bd9897a45262411a49c0c30
parent6657fe221da26b1cfd37a5bc1bc4cc80ebeb52cf (diff)
parent1897c150b841c249100fcfd225a3626b742565f8 (diff)
downloadzeitgeist-6fbb4c2b9648a75ff95439e3fa4193558762563f.tar.gz
Merge branch 'master' into libzeitgeist2
-rw-r--r--.gitignore2
-rw-r--r--configure.ac6
-rw-r--r--extensions/fts++/indexer.cpp5
-rw-r--r--extensions/fts++/test/test-indexer.cpp41
-rw-r--r--extensions/fts++/zeitgeist-fts.vala10
-rw-r--r--src/Makefile.am6
-rw-r--r--src/db-reader.vala46
-rw-r--r--src/sql-schema.vala47
-rw-r--r--src/sql.vala2
-rw-r--r--test/dbus/remote-test.py16
-rwxr-xr-xtools/generate_events.py262
11 files changed, 416 insertions, 27 deletions
diff --git a/.gitignore b/.gitignore
index 8ab801b7..6fdfd042 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,5 @@ libzeitgeist/*.c
*.la
*.stamp
*.lo
+*.pyc
+*.swp
diff --git a/configure.ac b/configure.ac
index 32256888..74f2b9ab 100644
--- a/configure.ac
+++ b/configure.ac
@@ -90,6 +90,12 @@ AC_ARG_ENABLE([fts],
AM_CONDITIONAL(HAVE_FTS, test "x$enable_fts" != "xno")
+AC_ARG_ENABLE([explain-queries],
+ AS_HELP_STRING([--enable-explain-queries], [Enable SQL debugging]),
+ [explain_queries=$enableval],[enable_queries="no"])
+
+AM_CONDITIONAL(EXPLAIN_QUERIES, test "x$explain_queries" = "xyes")
+
AC_CONFIG_FILES([
Makefile
src/Makefile
diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp
index af96eac7..483caf7f 100644
--- a/extensions/fts++/indexer.cpp
+++ b/extensions/fts++/indexer.cpp
@@ -561,12 +561,13 @@ bool Indexer::IndexUri (std::string const& uri, std::string const& origin)
size_t question_mark = uri.find ('?');
if (question_mark != std::string::npos)
{
- std::string stripped (uri, 0, question_mark - 1);
+ std::string stripped (uri, 0, question_mark);
basename = g_path_get_basename (stripped.c_str ());
}
else
{
- basename = g_file_get_basename (f);
+ // g_file_get_basename would unescape the uri, we don't want that here
+ basename = g_path_get_basename (uri.c_str ());
}
// step 2) unescape and check that it's valid utf8
diff --git a/extensions/fts++/test/test-indexer.cpp b/extensions/fts++/test/test-indexer.cpp
index 62ca4118..8330a5ea 100644
--- a/extensions/fts++/test/test-indexer.cpp
+++ b/extensions/fts++/test/test-indexer.cpp
@@ -88,6 +88,21 @@ assert_nth_result_has_text (GPtrArray* results, int n, const char *text)
g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, text);
}
+// This function only supports events with a single subject,
+// since that's enough for the tests in this file.
+static void
+assert_nth_result_has_uri (GPtrArray* results, int n, const char *text)
+{
+ g_assert_cmpuint (n, <, results->len);
+ ZeitgeistEvent *event = (ZeitgeistEvent*) results->pdata[n];
+ g_assert (event);
+ g_assert_cmpint (zeitgeist_event_num_subjects (event), ==, 1);
+ ZeitgeistSubject *subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert (subject);
+ g_assert_cmpstr (zeitgeist_subject_get_uri (subject), ==, text);
+}
+
static ZeitgeistEvent* create_test_event1 (void)
{
ZeitgeistEvent *event = zeitgeist_event_new ();
@@ -609,6 +624,30 @@ test_simple_underscores (Fixture *fix, gconstpointer data)
}
static void
+test_simple_escaped_string (Fixture *fix, gconstpointer data) // (LP: #594171)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ GPtrArray* results;
+
+ // add test events to DBs
+ const char uri[] = "http://encodings.com/percentage-%25-is-fun";
+ const char text[] = "%25 is the encoding for a percentage";
+ event_id = index_event (fix, create_test_event_simple (uri, text));
+
+ // Search for MostPopularSubjects
+ results = search_simple (fix, "percentage", NULL,
+ ZEITGEIST_RESULT_TYPE_MOST_POPULAR_SUBJECTS, &matches);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+ assert_nth_result_has_id (results, 0, event_id);
+ assert_nth_result_has_uri (results, 0, uri);
+ assert_nth_result_has_text (results, 0, text);
+}
+
+static void
test_simple_camelcase (Fixture *fix, gconstpointer data)
{
guint matches;
@@ -1147,6 +1186,8 @@ void test_indexer_create_suite (void)
setup, test_simple_noexpand_valid, teardown);
g_test_add ("/Zeitgeist/FTS/Indexer/Simple/Underscores", Fixture, 0,
setup, test_simple_underscores, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/Simple/EscapedString", Fixture, 0,
+ setup, test_simple_escaped_string, teardown);
g_test_add ("/Zeitgeist/FTS/Indexer/Simple/Camelcase", Fixture, 0,
setup, test_simple_camelcase, teardown);
g_test_add ("/Zeitgeist/FTS/Indexer/Simple/PrefixWithDashes", Fixture, 0,
diff --git a/extensions/fts++/zeitgeist-fts.vala b/extensions/fts++/zeitgeist-fts.vala
index 7800a828..29ea1f4d 100644
--- a/extensions/fts++/zeitgeist-fts.vala
+++ b/extensions/fts++/zeitgeist-fts.vala
@@ -69,10 +69,10 @@ namespace Zeitgeist
indexer = new Indexer (engine);
}
- private void do_quit ()
+ private void close ()
{
engine.close ();
- mainloop.quit ();
+ indexer = null; // close the index
}
public void register_dbus_object (DBusConnection conn) throws IOError
@@ -231,6 +231,10 @@ namespace Zeitgeist
if (instance != null)
{
+ // Close any database connections
+ instance.close ();
+
+ // Release the bus name
Bus.unown_name (owner_id);
instance.unregister_dbus_object ();
instance = null;
@@ -249,7 +253,7 @@ namespace Zeitgeist
static void safe_exit ()
{
- instance.do_quit ();
+ mainloop.quit ();
}
static int main (string[] args)
diff --git a/src/Makefile.am b/src/Makefile.am
index 2377c813..e74dfdc7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -18,6 +18,12 @@ AM_VALAFLAGS = \
$(top_srcdir)/config.vapi \
$(NULL)
+if EXPLAIN_QUERIES
+AM_VALAFLAGS = $(AM_VALAFLAGS) \
+ -D EXPLAIN_QUERIES \
+ $(NULL)
+endif
+
# Make sure every extension has only one vala file!
extensions_VALASOURCES = \
ext-data-source-registry.vala \
diff --git a/src/db-reader.vala b/src/db-reader.vala
index 5da62457..f7c996f6 100644
--- a/src/db-reader.vala
+++ b/src/db-reader.vala
@@ -401,10 +401,9 @@ public class DbReader : Object
* Only URIs for subjects matching the indicated `result_event_templates`
* and `result_storage_state` are returned.
*/
- if (result_type == ResultType.MOST_RECENT_EVENTS ||
- result_type == ResultType.LEAST_RECENT_EVENTS)
+ if (result_type == RelevantResultType.RECENT ||
+ result_type == RelevantResultType.RELATED)
{
-
// We pick out the ids for relational event so we can set them as
// roots the ids are taken from the events that match the
// events_templates
@@ -482,36 +481,41 @@ public class DbReader : Object
{
window.add(temp_related_uris[j]);
if (temp_related_uris[j].id in ids)
+ {
count_in_window = true;
+ break;
+ }
}
if (count_in_window)
{
for (int j = 0; j < window.length; j++)
{
- if (uri_counter.lookup (window[j].uri) == null)
+ if (window[j].id in result_ids)
{
- RelatedUri ruri = RelatedUri ()
+ if (uri_counter.lookup (window[j].uri) == null)
{
- id = window[j].id,
- timestamp = window[j].timestamp,
- uri = window[j].uri,
- counter = 0
- };
- uri_counter.insert (window[j].uri, ruri);
- }
- uri_counter.lookup (window[j].uri).counter++;
- if (uri_counter.lookup (window[j].uri).timestamp
- < window[j].timestamp)
- {
- uri_counter.lookup (window[j].uri).timestamp =
- window[j].timestamp;
+ RelatedUri ruri = RelatedUri ()
+ {
+ id = window[j].id,
+ timestamp = window[j].timestamp,
+ uri = window[j].uri,
+ counter = 0
+ };
+ uri_counter.insert (window[j].uri, ruri);
+ }
+ uri_counter.lookup (window[j].uri).counter++;
+ if (uri_counter.lookup (window[j].uri).timestamp
+ < window[j].timestamp)
+ {
+ uri_counter.lookup (window[j].uri).timestamp =
+ window[j].timestamp;
+ }
}
}
}
}
-
// We have the big hashtable with the structs, now we sort them by
// most used and limit the result then sort again
List<RelatedUri?> temp_ruris = new List<RelatedUri?>();
@@ -537,7 +541,7 @@ public class DbReader : Object
}
// Sort by recency
- if (result_type == 1)
+ if (result_type == RelevantResultType.RECENT)
temp_ruris.sort ((a, b) => {
int64 delta = a.timestamp - b.timestamp;
if (delta < 0) return 1;
@@ -560,7 +564,7 @@ public class DbReader : Object
}
else
{
- throw new EngineError.DATABASE_ERROR ("Unsupported ResultType.");
+ throw new EngineError.DATABASE_ERROR ("Unsupported RelevantResultType");
}
}
diff --git a/src/sql-schema.vala b/src/sql-schema.vala
index 4c369a96..2c67a930 100644
--- a/src/sql-schema.vala
+++ b/src/sql-schema.vala
@@ -57,10 +57,37 @@ namespace Zeitgeist.SQLite
Timestamp.now ());
exec_query (database, schema_sql);
}
- else if (schema_version == 4 || schema_version == 5)
+ else if (schema_version >= 3 && schema_version <= 5)
{
backup_database ();
+ if (schema_version == 3)
+ {
+ // Add missing columns to storage table
+ exec_query (database,
+ "ALTER TABLE storage ADD COLUMN icon VARCHAR");
+ exec_query (database,
+ "ALTER TABLE storage ADD COLUMN display_name VARCHAR");
+
+ // Set subjects that don't have a storage to "unknown", so
+ // they'll always be marked as available.
+ // FIXME: Do we want to separate unknown/local/online?
+ exec_query (database, """
+ INSERT OR IGNORE INTO storage (value, state)
+ VALUES ('unknown', 1)
+ """);
+ exec_query (database, """
+ UPDATE event SET subj_storage =
+ (SELECT id FROM storage WHERE value='unknown')
+ WHERE subj_storage IS NULL
+ """);
+
+ // The events table is missing two columns, (event) origin
+ // and subj_current_id. It needs to be replaced.
+ exec_query (database,
+ "ALTER TABLE event RENAME TO event_old");
+ }
+
string[] tables = { "interpretation", "manifestation",
"mimetype", "actor" };
@@ -84,6 +111,24 @@ namespace Zeitgeist.SQLite
exec_query (database, "DROP TABLE %s_old".printf (table));
}
+ if (schema_version == 3)
+ {
+ // Migrate events from the old table
+ exec_query (database, """
+ INSERT INTO event
+ SELECT
+ id, timestamp, interpretation, manifestation,
+ actor, payload, subj_id, subj_interpretation,
+ subj_manifestation, subj_origin, subj_mimetype,
+ subj_text, subj_storage, NULL as origin,
+ subj_id AS subj_id_current
+ FROM event_old
+ """);
+
+ // This will also drop any triggers the `events' table had
+ exec_query (database, "DROP TABLE event_old");
+ }
+
// Ontology update
exec_query (database,
"INSERT OR IGNORE INTO manifestation (value) VALUES ('%s')"
diff --git a/src/sql.vala b/src/sql.vala
index 224fde9d..eab92891 100644
--- a/src/sql.vala
+++ b/src/sql.vala
@@ -293,6 +293,8 @@ namespace Zeitgeist.SQLite
rc = prepared_stmt.db_handle ().prepare_v2 (explain_sql, -1, out stmt);
assert_query_success(rc, "SQL error");
+ print ("%s\n", explain_sql);
+
while ((rc = stmt.step()) == Sqlite.ROW)
{
int select_id = stmt.column_int (0);
diff --git a/test/dbus/remote-test.py b/test/dbus/remote-test.py
index aff2cdee..1c601385 100644
--- a/test/dbus/remote-test.py
+++ b/test/dbus/remote-test.py
@@ -445,6 +445,22 @@ class ZeitgeistRemoteFindEventIdsTest(testutils.RemoteTestCase):
storage_state=StorageState.NotAvailable)
self.assertEquals(ids, [5, 4, 2, 3, 1])
+ def testFindEventIdsWithUnknownStorageState(self):
+ """
+ Events with storage state "unknown" should always be considered
+ as being available.
+ """
+
+ event = parse_events("test/data/single_event.js")[0]
+ event.subjects[0].uri = 'file:///i-am-unknown'
+ event.subjects[0].storage = 'unknown'
+
+ self.insertEventsAndWait([event])
+
+ tmpl = Event.new_for_values(subject_uri='file:///i-am-unknown')
+ ids = self.findEventIdsAndWait([tmpl], storage_state=StorageState.Available)
+ self.assertEquals(ids, [6])
+
class ZeitgeistRemoteInterfaceTest(testutils.RemoteTestCase):
def testQuit(self):
diff --git a/tools/generate_events.py b/tools/generate_events.py
new file mode 100755
index 00000000..8910607c
--- /dev/null
+++ b/tools/generate_events.py
@@ -0,0 +1,262 @@
+#! /usr/bin/env python
+# -.- coding: utf-8 -.-
+
+# Zeitgeist - Insert random events into the database
+#
+# Copyright © 2012 Canonical Ltd.
+# By Siegfried-A. Gevatter <siegfried.gevatter@collabora.co.uk>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# #############################################################################
+# WARNING: make sure you launch Zeitgeist with ZEITGEIST_DATA_PATH set if
+# you don't want to fill your real database!
+# #############################################################################
+
+import os
+import sys
+import time
+import random
+from collections import deque
+from gi.repository import GLib, GObject
+
+from zeitgeist import mimetypes
+from zeitgeist.datamodel import *
+from zeitgeist.client import ZeitgeistDBusInterface
+
+class EventGenerator:
+
+ NUM_WORDS = 1000
+ NUM_SIMULTANEOUS_URIS = 1000
+ MAX_EVENT_AGE = 366*24*3600*1000
+
+ _words = None
+ _mimetypes = None
+ _desktop_files = None
+ _schemas = None
+ _uri_table = None
+
+ def __init__(self):
+ # Initialize a pool of random words for use in URIs, etc.
+ dictionary_words = map(str.strip,
+ open('/usr/share/dict/words').readlines())
+ dictionary_words = filter(lambda x: '\'s' not in x, dictionary_words)
+ self._words = random.sample(dictionary_words, self.NUM_WORDS)
+
+ # Initialize a pool of MIME-Types
+ self._mimetypes = mimetypes.MIMES.keys()
+
+ # Initialize a pool of application names
+ self._desktop_files = filter(lambda actor: actor.endswith('.desktop'),
+ os.listdir('/usr/share/applications'))
+
+ # Initialize a list of URI schemas
+ self._schemas = ('application', 'davs', 'http', 'https', 'ftp')
+
+ # Initialize a cache of URIs
+ self._uri_table = deque(maxlen=self.NUM_SIMULTANEOUS_URIS)
+
+ def get_word(self):
+ # FIXME: add numbers and stuff?
+ return random.choice(self._words)
+
+ def get_extension(self):
+ if random.random() < 0.8:
+ extensions = [
+ 'odt', 'odp', 'doc',
+ 'oga', 'ogv', 'mp3'
+ 'png', 'jpg', 'gif', 'tiff'
+ 'html', 'xml', 'txt'
+ 'py', 'c', 'cpp', 'js', 'vala'
+ ]
+ else:
+ extensions = self._words
+ return filter(str.isalpha, random.choice(extensions))
+
+ def get_path(self, force_directory=False):
+ path = ''
+ num_parts = 1 + abs(int(random.gauss(3, 3)))
+ for i in range(num_parts):
+ path += '/%s' % self.get_word()
+ if random.random() < 0.9 and not force_directory:
+ path += '.%s' % self.get_extension()
+ return path
+
+ def get_schema(self):
+ rand = random.random()
+ if rand < 0.005:
+ return '%s://' % random.choice(self._words)
+ elif rand < 0.4:
+ return '%s://' % random.choice(self._schemas)
+ else:
+ return 'file:///'
+
+ def generate_uri(self):
+ file_uri = GLib.filename_to_uri(self.get_path(), None)
+ return self.get_schema() + file_uri[8:]
+
+ def get_uri(self):
+ """
+ We keep a cache of NUM_SIMULATENOUS_URIS uris for reuse. Every access
+ has a 1% chance of replacing a URI in the table with a new one.
+ """
+ index = random.randint(0, self.NUM_SIMULTANEOUS_URIS)
+ if index >= len(self._uri_table):
+ # The URI table isn't fully initialized yet...
+ uri = self.generate_uri()
+ self._uri_table.append(uri)
+ return uri
+ if random.random() < 0.01:
+ # Generate a new URI
+ self._uri_table[index] = self.generate_uri()
+ return self._uri_table[index]
+
+ def get_text(self):
+ num_words = abs(int(random.gauss(4, 3)))
+ return ' '.join(self.get_word() for i in range(num_words))
+
+ def get_subject_origin(self, uri):
+ scheme = GLib.uri_parse_scheme(uri)
+ if scheme == 'file':
+ return GLib.path_get_dirname(uri)
+ elif scheme in ('http', 'https'):
+ scheme, domain = uri.split('://', 1)
+ return '%s://%s' % (scheme, domain.split('/', 1)[0])
+ else:
+ return GLib.filename_to_uri(
+ self.get_path(force_directory=True), None)
+
+ def get_event_origin(self):
+ if random.random() < 0.005:
+ return self.get_uri()
+ return ''
+
+ def get_actor(self):
+ return 'application://%s' % random.choice(self._desktop_files)
+
+ def get_timestamp(self):
+ current_time = int(time.time() * 1000)
+ return random.randint(current_time - self.MAX_EVENT_AGE, current_time)
+
+ def get_event_interpretation(self):
+ interpretations = Interpretation.EVENT_INTERPRETATION.get_children()
+ return random.choice(list(interpretations))
+
+ def get_subject_interpretation(self):
+ ev_interp = Interpretation.EVENT_INTERPRETATION.get_children()
+ subj_interp = set(Interpretation.get_children())
+ subj_interp.difference_update(ev_interp)
+ return random.choice(list(subj_interp))
+
+ def get_event_manifestation(self):
+ if random.random() < 0.3:
+ manifestations = Manifestation.EVENT_MANIFESTATION.get_children()
+ return random.choice(list(manifestations))
+ else:
+ return Manifestation.USER_ACTIVITY
+
+ def get_subject_manifestation(self):
+ ev_manif = Manifestation.EVENT_MANIFESTATION.get_children()
+ subj_manif = set(Interpretation.get_children())
+ subj_manif.difference_update(ev_manif)
+ return random.choice(list(subj_manif))
+
+ def get_subject(self, event_interpretation):
+ uri = self.get_uri()
+
+ subject = Subject.new_for_values(
+ uri = uri,
+ current_uri = uri,
+ interpretation = self.get_subject_interpretation(),
+ manifestation = self.get_subject_manifestation(),
+ origin = self.get_subject_origin(uri),
+ mimetype = random.choice(self._mimetypes),
+ text = self.get_text(),
+ storage = "")
+
+ if event_interpretation == Interpretation.MOVE_EVENT:
+ while subject.uri == subject.current_uri:
+ subject.current_uri = self.get_uri()
+
+ return subject
+
+ def get_event(self):
+ event_interpretation = self.get_event_interpretation()
+ event = Event.new_for_values(
+ timestamp = self.get_timestamp(),
+ interpretation = event_interpretation,
+ manifestation = self.get_event_manifestation(),
+ actor = self.get_actor(),
+ origin = self.get_event_origin())
+
+ num_subjects = max(1, abs(int(random.gauss(1, 1))))
+ while len(event.subjects) < num_subjects:
+ subject = self.get_subject(event_interpretation)
+ if subject.uri not in (x.uri for x in event.get_subjects()):
+ # events with two subjects having the same URI aren't supported
+ event.append_subject(subject)
+
+ return event
+
+class EventInserter():
+
+ BUFFER_SIZE = 1000
+
+ _log = None
+ _buffer = None
+ _events_inserted = None
+
+ def __init__(self):
+ self._log = ZeitgeistDBusInterface()
+ self._buffer = []
+ self._events_inserted = 0
+
+ def insert(self, event):
+ buffer_full = len(self._buffer) >= self.BUFFER_SIZE
+ if buffer_full:
+ self.flush()
+ self._buffer.append(event)
+ return buffer_full
+
+ def flush(self):
+ if self._buffer:
+ self._log.InsertEvents(self._buffer)
+ self._events_inserted += len(self._buffer)
+ self._buffer = []
+
+ def get_insertion_count(self):
+ return self._events_inserted
+
+def main():
+ limit = '10000000' if len(sys.argv) < 2 else sys.argv[1]
+ if len(sys.argv) > 2 or not limit.isdigit():
+ print "Usage: %s [<num_events>]" % sys.argv[0]
+ sys.exit(1)
+ limit = int(limit)
+
+ event_inserter = EventInserter()
+ try:
+ generator = EventGenerator()
+ for i in xrange(limit):
+ event = generator.get_event()
+ event.payload = 'generate_events.py'
+ if event_inserter.insert(event):
+ print "Inserted %d events." % i
+ except KeyboardInterrupt:
+ pass
+ event_inserter.flush()
+ print "Inserted %d events. Done." % event_inserter.get_insertion_count()
+
+if __name__ == '__main__':
+ main()