summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorMartyn Russell <martyn@lanedo.com>2013-10-03 16:27:29 +0100
committerMartyn Russell <martyn@lanedo.com>2013-10-03 16:29:21 +0100
commit29e9be5eaab7637265fb3f0e4ed821367bf46013 (patch)
treead0b209ffaa38603917830dd8a90c12ea3d39fb0 /utils
parent30d64d6bd2b7421e82cfdd41ec8c79ae891f49d1 (diff)
downloadtracker-29e9be5eaab7637265fb3f0e4ed821367bf46013.tar.gz
utils/sandbox: Added updated version of Sam's original tracker-sandbox
Formerly this was a shell script. I've updated this to a Python script and it has much more flexibility. Now you can run separate instances of tracker from different prefixes with different data sets for different content locations, i.e. have multiple data sets for different uses.
Diffstat (limited to 'utils')
-rw-r--r--utils/Makefile.am4
-rw-r--r--utils/sandbox/Makefile.am3
-rwxr-xr-xutils/sandbox/tracker-sandbox.py480
-rwxr-xr-xutils/tracker-sandbox146
4 files changed, 485 insertions, 148 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 28acddd6a..c871a3ac9 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -5,10 +5,10 @@ SUBDIRS = \
ontology \
data-generators \
mtp \
- tracker-sql
+ tracker-sql \
+ sandbox
if HAVE_TRACKER_RESDUMP
SUBDIRS += tracker-resdump
endif
-EXTRA_DIST = tracker-sandbox
diff --git a/utils/sandbox/Makefile.am b/utils/sandbox/Makefile.am
new file mode 100644
index 000000000..c6b1d39c2
--- /dev/null
+++ b/utils/sandbox/Makefile.am
@@ -0,0 +1,3 @@
+include $(top_srcdir)/Makefile.decl
+
+EXTRA_DIST = tracker-sandbox.py
diff --git a/utils/sandbox/tracker-sandbox.py b/utils/sandbox/tracker-sandbox.py
new file mode 100755
index 000000000..092398c15
--- /dev/null
+++ b/utils/sandbox/tracker-sandbox.py
@@ -0,0 +1,480 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2012-2013 Martyn Russell <martyn@lanedo.com>
+# Copyright (C) 2012 Sam Thursfield <sam.thursfield@codethink.co.uk>
+#
+# This script allows a user to utilise Tracker for local instances by
+# specifying an index directory location where the Tracker data is
+# stored and a content directory location where the content to be
+# indexed is kept. From there, queries or a shell can be launched to
+# use that data.
+#
+# This was initially a shell script by Sam and later converted into a
+# more comprehensive python script by Martyn.
+#
+# Usage:
+# - Create or update an index stored in tracker/ subdir with content in html/
+# tracker-sandbox.py -i tracker -c html -u
+# - Query for 'foo'
+# tracker-sandbox.py -i tracker -c html -q foo
+# - List files in index
+# tracker-sandbox.py -i tracker -c html -l
+# - Start shell with environment set up
+# tracker-sandbox.py -i tracker -c html -s
+# - Test with different prefixes, e.g. /usr/local installs
+# tracker-sandbox.py -i tracker -c html -s -p /usr/local
+# ...
+#
+# Changes:
+# - If you make _ANY_ changes, please send them in so I can incorporate them.
+#
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import os
+import subprocess
+import optparse
+import signal
+import sys
+import string
+import errno
+import gi
+
+from multiprocessing import Process
+
+import ConfigParser
+
+from gi.repository import Tracker, GObject
+
+# Script
+script_name = 'tracker-sandbox'
+script_version = '0.1'
+script_about = 'Localised Tracker sandbox for content indexing and search'
+
+index_location_abs = ''
+content_location_abs = ''
+
+default_prefix = '/usr'
+default_debug_verbosity = 2
+
+# Session
+dbus_session_pid = -1
+dbus_session_address = ''
+dbus_session_file = ''
+
+store_pid = -1
+store_proc = None
+
+# Template config file
+config_template = """
+[General]
+Verbosity=0
+SchedIdle=0
+InitialSleep=0
+
+[Monitors]
+EnableMonitors=false
+
+[Indexing]
+Throttle=0
+IndexOnBattery=true
+IndexOnBatteryFirstTime=true
+IndexRemovableMedia=false
+IndexOpticalDiscs=false
+LowDiskSpaceLimit=-1
+IndexRecursiveDirectories=;
+IndexSingleDirectories=;
+IgnoredDirectories=;
+IgnoredDirectoriesWithContent=;
+IgnoredFiles=
+CrawlingInterval=-1
+RemovableDaysThreshold=3
+
+[Writeback]
+EnableWriteback=false
+"""
+
+# Utilities
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+def debug(message):
+ if opts.debug:
+ print(message)
+
+# DB functions (sync for now)
+def db_query_have_files():
+ # Set this here in case we used 'bus' for an update() before this.
+ os.environ['TRACKER_SPARQL_BACKEND'] = 'direct'
+
+ print 'Using query to check index has data in it...'
+
+ conn = Tracker.SparqlConnection.get(None)
+ cursor = conn.query('select count(?urn) where { ?urn a nfo:FileDataObject }', None)
+
+ # Only expect one result here...
+ while (cursor.next(None)):
+ print ' Currently %d file(s) exist in our index' % (cursor.get_integer(0))
+
+def db_query_list_files():
+ # Set this here in case we used 'bus' for an update() before this.
+ os.environ['TRACKER_SPARQL_BACKEND'] = 'direct'
+
+ print 'Using query to list files indexed...'
+
+ conn = Tracker.SparqlConnection.get(None)
+ cursor = conn.query('select nie:url(?urn) where { ?urn a nfo:FileDataObject }', None)
+
+ # Only expect one result here...
+ while (cursor.next(None)):
+ print ' ' + cursor.get_string(0)[0]
+
+def db_query_files_that_match():
+ conn = Tracker.SparqlConnection.get(None)
+ cursor = conn.query('select nie:url(?urn) where { ?urn a nfo:FileDataObject . ?urn fts:match "%s" }' % (opts.query), None)
+
+ print 'Found:'
+
+ # Only expect one result here...
+ while (cursor.next(None)):
+ print ' ' + cursor.get_string(0)[0]
+
+# Index functions
+def index_clean():
+ #tracker-control -r
+ debug ('Cleaning index')
+
+def index_update():
+ debug('Updating index ...')
+ debug('--')
+
+ try:
+ binary = os.path.join(opts.prefix, 'libexec', 'tracker-miner-fs')
+ if not os.path.exists(binary):
+ binary = os.path.join(opts.prefix, 'lib', 'tracker-miner-fs')
+ if not os.path.exists(binary):
+ print 'Could not find "tracker-miner-fs" in prefix lib/libexec directories'
+ print 'Is Tracker installed properly?'
+ sys.exit(1)
+
+ # Mine data WITHOUT being a daemon, exit when done. Ignore desktop files
+ subprocess.check_output([binary, "--no-daemon", "--disable-miner=applications"])
+ except subprocess.CalledProcessError, e:
+ print 'Could not run file system miner,' + e.output
+ sys.exit(1)
+
+ debug('--')
+
+ # We've now finished updating the index now OR we completely failed
+ print 'Index now up to date!'
+
+ # Check we have data in our index...
+ db_query_have_files()
+
+def index_shell():
+ print 'Starting shell... (type "exit" to finish)'
+ print
+
+ os.system("/bin/bash")
+
+# Environment / Clean up
+def dbus_session_get_from_content(content):
+ global dbus_session_address
+ global dbus_session_pid
+
+ if len(content) < 1:
+ print 'Content was empty ... can not get DBus session information from empty string'
+ return False
+
+ dbus_session_address = content.splitlines()[0]
+ dbus_session_pid = int(content.splitlines()[1])
+
+ if dbus_session_address == '':
+ print 'DBus session file was corrupt (no address), please remove "%s"' % (dbus_session_file)
+ sys.exit(1)
+ if dbus_session_pid < 0:
+ print 'DBus session file was corrupt (no PID), please remove "%s"' % (dbus_session_file)
+ sys.exit(1)
+
+ return True
+
+def dbus_session_file_get():
+ try:
+ f = open(dbus_session_file, 'r')
+ content = f.read()
+ f.close()
+ except IOError as e:
+ # Expect this if we have a new session to set up
+ return False
+ except:
+ print "Unexpected error:", sys.exc_info()[0]
+ raise
+
+ return dbus_session_get_from_content(content)
+
+def dbus_session_file_set():
+ mkdir_p(os.environ['XDG_RUNTIME_DIR'])
+
+ content = '%s\n%s' % (dbus_session_address, dbus_session_pid)
+ f = open(dbus_session_file, 'w')
+ f.write(content)
+ f.close()
+
+def environment_unset():
+ debug('Cleaning up files ...')
+
+ if not dbus_session_file == '':
+ debug(' Removing DBus session file')
+ os.unlink(dbus_session_file)
+
+ debug('Cleaning up processes ...')
+
+ if dbus_session_pid > 0:
+ debug(' Killing DBus session')
+ try:
+ os.kill(dbus_session_pid, signal.SIGTERM)
+ except (SystemError, OSError): # (3, 'No such process') old python-schedutils incorrectly raised SystemError
+ debug(' Process %d not found', dbus_session_pid)
+
+
+ if not opts.update:
+ return
+
+ # FIXME: clean up tracker-store, can't use tracker-control for this,
+ # that kills everything it finds in /proc sadly.
+ if store_pid > 0:
+ debug(' Killing Tracker store')
+ os.kill(store_pid, signal.SIGTERM)
+
+def environment_set_and_add_path(env, prefix, suffix):
+ new = os.path.join(prefix, suffix)
+
+ if os.environ.has_key(env):
+ existing = os.environ[env]
+ full = '%s:%s' % (new, existing)
+ else:
+ full = new
+
+ os.environ[env] = full
+
+def environment_set():
+ # Environment
+ global dbus_session_address
+ global dbus_session_pid
+ global dbus_session_file
+ global index_location_abs
+ global content_location_abs
+ global default_debug_verbosity
+
+ index_location_abs = os.path.abspath (opts.index_location)
+
+ if opts.update:
+ # Only needed for updating index
+ content_location_abs = os.path.abspath (opts.content_location)
+
+ # Data
+ os.environ['XDG_DATA_HOME'] = '%s' % index_location_abs
+ os.environ['XDG_CONFIG_HOME'] = '%s' % index_location_abs
+ os.environ['XDG_CACHE_HOME'] = '%s' % index_location_abs
+ os.environ['XDG_RUNTIME_DIR'] = '%s' % index_location_abs
+
+ # Prefix - only set if non-standard
+ if opts.prefix != default_prefix:
+ environment_set_and_add_path ('PATH', opts.prefix, 'bin')
+ environment_set_and_add_path ('LD_LIBRARY_PATH', opts.prefix, 'lib')
+ environment_set_and_add_path ('XDG_DATA_DIRS', opts.prefix, 'share')
+
+ os.environ['TRACKER_DB_ONTOLOGIES_DIR'] = os.path.join(opts.prefix, 'share', 'tracker', 'ontologies')
+ os.environ['TRACKER_EXTRACTOR_RULES_DIR'] = os.path.join(opts.prefix, 'share', 'tracker', 'extract-rules')
+ os.environ['TRACKER_LANGUAGE_STOPWORDS_DIR'] = os.path.join(opts.prefix, 'share', 'tracker', 'languages')
+
+ # Preferences
+ os.environ['TRACKER_USE_CONFIG_FILES'] = 'yes'
+
+ if opts.update:
+ # Updates need to use the bus
+ os.environ['TRACKER_SPARQL_BACKEND'] = 'bus'
+ else:
+ # Queries can use readonly access to the database directly
+ os.environ['TRACKER_SPARQL_BACKEND'] = 'direct'
+
+ if opts.debug:
+ os.environ['G_MESSAGES_DEBUG'] = 'all'
+ os.environ['TRACKER_VERBOSITY'] = '%d' % default_debug_verbosity
+ os.environ['DBUS_VERBOSE'] = '1'
+ else:
+ os.environ['TRACKER_VERBOSITY'] = '0'
+
+ debug('Using prefix location "%s"' % opts.prefix)
+ debug('Using index location "%s"' % index_location_abs)
+
+ if opts.update:
+ debug('Using content location "%s"' % content_location_abs)
+
+ # Make sure File System miner is configured correctly
+ config_dir = os.path.join(os.environ['XDG_CONFIG_HOME'], 'tracker')
+ config_filename = os.path.join(config_dir, 'tracker-miner-fs.cfg')
+
+ debug('Using config file "%s"' % config_filename)
+
+ # Only update config if we're updating the database
+ mkdir_p(config_dir)
+
+ if not os.path.exists(config_filename):
+ f = open(config_filename, 'w')
+ f.write(config_template)
+ f.close()
+
+ debug(' New file written')
+
+ # Set content path
+ config = ConfigParser.ConfigParser()
+ config.optionxform = str
+ config.read(config_filename)
+ config.set('Indexing', 'IndexRecursiveDirectories', content_location_abs + ";")
+
+ with open(config_filename, 'wb') as f:
+ config.write(f)
+
+ # Ensure directory exists
+ # DBus specific instance
+ dbus_session_file = os.path.join(os.environ['XDG_RUNTIME_DIR'], 'dbus-session')
+
+ if dbus_session_file_get() == False:
+ output = subprocess.check_output(["/bin/dbus-daemon",
+ "--session",
+ "--print-address=1",
+ "--print-pid=1",
+ "--fork"])
+
+ dbus_session_get_from_content(output)
+ dbus_session_file_set()
+ debug('Using new D-Bus session with address "%s" with PID %d' % (dbus_session_address, dbus_session_pid))
+ else:
+ debug('Using existing D-Bus session with address "%s" with PID %d' % (dbus_session_address, dbus_session_pid))
+
+ # Important, other subprocesses must use our new bus
+ os.environ['DBUS_SESSION_BUS_ADDRESS'] = dbus_session_address
+
+# Entry point/start
+if __name__ == "__main__":
+ # Parse command line
+ usage_oneline = '%s -i <DIR> -c <DIR> [OPTION...]' % (os.path.basename(sys.argv[0]))
+ usage = '\n %s - %s' % (usage_oneline, script_about)
+ usage_invalid = 'Usage:\n %s' % (usage_oneline)
+
+ popt = optparse.OptionParser(usage)
+ popt.add_option('-v', '--version',
+ action = 'count',
+ dest = 'version',
+ help = 'show version information')
+ popt.add_option('-d', '--debug',
+ action = 'count',
+ dest = 'debug',
+ help = 'show additional debugging')
+ popt.add_option('-p', '--prefix',
+ action = 'store',
+ metavar = 'PATH',
+ dest = 'prefix',
+ default = default_prefix,
+ help = 'use a non-standard prefix (default="%s")' % default_prefix)
+ popt.add_option('-i', '--index',
+ action = 'store',
+ metavar = 'DIR',
+ dest = 'index_location',
+ help = 'directory storing the index')
+ popt.add_option('-c', '--content',
+ action = 'store',
+ metavar = 'DIR',
+ dest = 'content_location',
+ help = 'directory storing the content which is indexed')
+ popt.add_option('-u', '--update',
+ action = 'count',
+ dest = 'update',
+ help = 'update index/database from content')
+ popt.add_option('-l', '--list-files',
+ action = 'count',
+ dest = 'list_files',
+ help = 'list files indexed')
+ popt.add_option('-s', '--shell',
+ action = 'count',
+ dest = 'shell',
+ help = 'start a shell with the environment set up')
+ popt.add_option('-q', '--query',
+ action = 'store',
+ metavar = 'CRITERIA',
+ dest = 'query',
+ help = 'what content to look for in files')
+
+ (opts, args) = popt.parse_args()
+
+ if opts.version:
+ print '%s %s\n%s\n' % (script_name, script_version, script_about)
+ sys.exit(0)
+
+ if not opts.index_location and not opts.content_location:
+ print 'Expected index (-i) or content (-c) locations to be specified'
+ print usage_invalid
+ sys.exit(1)
+
+ if opts.update and (not opts.index_location or not opts.content_location):
+ print 'Expected index (-i) and content (-c) locations to be specified'
+ print 'These arguments are required to update the index databases'
+ sys.exit(1)
+
+ if (opts.query or opts.query or opts.list_files or opts.shell) and not opts.index_location:
+ print 'Expected index location (-i) to be specified'
+ print 'This arguments is required to use the content that has been indexed'
+ sys.exit(1)
+
+ if not opts.update and not opts.query and not opts.list_files and not opts.shell:
+ print 'No action specified (e.g. update (-u), shell (-s), list files (-l), etc)\n'
+ print '%s %s\n%s\n' % (script_name, script_version, script_about)
+ print usage_invalid
+ sys.exit(1)
+
+ # Set up environment variables and foo needed to get started.
+ environment_set()
+
+ try:
+ if opts.update:
+ index_update()
+
+ if opts.list_files:
+ db_query_list_files()
+
+ if opts.shell:
+ index_shell()
+ sys.exit(0)
+
+ if opts.query:
+ if not os.path.exists(index_location_abs):
+ print 'Can not query yet, index has not been created, see --update or -u'
+ print usage_invalid
+ sys.exit(1)
+
+ db_query_files_that_match()
+
+ except KeyboardInterrupt:
+ print 'Handling Ctrl+C'
+
+ environment_unset()
diff --git a/utils/tracker-sandbox b/utils/tracker-sandbox
deleted file mode 100755
index 7b95332dc..000000000
--- a/utils/tracker-sandbox
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/sh
-
-# Lightweight script for running test instances of Tracker. Less effort
-# than running a full instance of gnome-session from jhbuild. Primary
-# goal is to avoid messing with your actual data.
-
-usage() {
- echo "tracker-sandbox:"
- echo " Creates a lightweight test environment for Tracker, to avoid"
- echo " messing with your real data. The safest mechanism is to run"
- echo " the sandbox as a different user, but also supports running in"
- echo " your real user account but with a database in /tmp/tracker-test"
- echo
- echo " Multiple instances of tracker-sandbox will share a session."
- echo " Currently the first instance owns the session and those started"
- echo " later will stop working once the first instance has exited."
- echo
- echo "Recommended usage:"
- echo " su <dummy user account>"
- echo " tracker-sandbox --user"
- echo
- echo "Alternative usage:"
- echo " tracker-sandbox"
- echo
- echo "Other options:"
- echo " --help Show this information"
- echo " -p, --prefix DIR Set up environment to use Tracker installed"
- echo " in DIR (similar to 'jhbuild shell')"
-}
-
-PREFIX=
-SEPARATE_USER_MODE=false
-
-while [ $# -gt 0 ]; do
- case $1 in
- --help)
- usage
- exit 0
- ;;
- --prefix|-p)
- shift
- if [ -z "$1" ]; then
- echo "Error: --prefix option requires an argument"
- exit 127
- fi
- PREFIX=$1
- ;;
- --user)
- SEPARATE_USER_MODE=true
- ;;
- *)
- echo "Error: unknown option $1"
- echo "Run '$0 --help' for help."
- exit 127
- ;;
- esac
- shift
-done
-
-DBUS_SESSION_BUS_PID=
-
-set -o errexit
-
-if [ "$SEPARATE_USER_MODE" != "true" ]; then
- export DCONF_PROFILE=$PREFIX/share/tracker-tests/trackertest
-
- if ! [ -e $DCONF_PROFILE ]; then
- echo "Warning: did not find file '$DCONF_PROFILE'"
- echo "The DConf profile allows the sandboxed Tracker to be configured"
- echo "independently from the system Tracker. This file should be "
- echo "installed if Tracker was configured with --enable-functional-tests."
- echo
- fi
-
- TEMP_DIR=/tmp/tracker-test
- mkdir -p $TEMP_DIR
- export XDG_CACHE_HOME=$TEMP_DIR
- export XDG_CONFIG_HOME=$TEMP_DIR
- export XDG_DATA_HOME=$TEMP_DIR
-
- export XDG_RUNTIME_DIR=${XDG_RUNTIME_DIR:-$TEMP_DIR}
-
- if [ ! -O "$XDG_RUNTIME_DIR" ]; then
- echo "Error: $XDG_RUNTIME_DIR is not writable by current user ($(whoami))."
- echo "Use the '--user' option if you have switched to a dummy user account."
- exit 1
- fi
-else
- # We can't create a 'real' runtime dir without root, but for testing
- # the security implications are irrelevant.
- XDG_RUNTIME_DIR=/tmp/tracker-test-$(whoami)
- mkdir -p $XDG_RUNTIME_DIR
- export XDG_RUNTIME_DIR
-fi
-
-if [ -n "$PREFIX" ]; then
- if [ ! -d "$PREFIX" ]; then
- echo "Error: unable to find prefix '$PREFIX'"
- exit 1
- fi
-
- # Interestingly, 'jhbuild run' *doesn't* alter PATH - I wonder why?
- export PATH="$PREFIX/bin:$PATH"
- export LD_LIBRARY_PATH="$PREFIX/lib:$PATH"
- export XDG_DATA_DIRS="$PREFIX/share:$XDG_DATA_DIRS"
-
- export TRACKER_DB_ONTOLOGIES_DIR="$PREFIX/share/tracker/ontologies"
- export TRACKER_EXTRACTOR_RULES_DIR="$PREFIX/share/tracker/extract-rules"
- export TRACKER_LANGUAGE_STOPWORDS_DIR="$PREFIX/share/tracker/languages"
-fi
-
-set -o nounset
-
-if [ "$SEPARATE_USER_MODE" != "true" ]; then
- echo -n "Running as $(whoami) with data in $TEMP_DIR"
- [ -e "$TEMP_DIR/tracker/meta.db" ] && echo -n " (previously used)"; echo
-else
- echo "Running as $(whoami) using real Tracker store"
-fi
-
-if [ -n "$PREFIX" ]; then
- echo "Using Tracker from $PREFIX"
-fi
-
-SESSION_FILE="$XDG_RUNTIME_DIR/tracker-sandbox"
-
-# Slight race condition here if you start two instances simultaneously .. so don't
-if [ ! -e "$SESSION_FILE" ]; then
- eval $(dbus-launch --sh-syntax | tee $SESSION_FILE)
-
- trap "rm \"$SESSION_FILE\"; /bin/kill $DBUS_SESSION_BUS_PID; exit" INT TERM EXIT
-
- echo "[$DBUS_SESSION_BUS_PID] Launched new session at $DBUS_SESSION_BUS_ADDRESS"
-else
- eval $(cat $SESSION_FILE)
-
- echo "Using existing session at $DBUS_SESSION_BUS_ADDRESS"
-fi
-
-sh
-
-# Cleanup handled by 'trap' handler above.
-
-# It would be nice if we could ref-count the session instead of pulling the rug
-# out from under any other tracker-sandbox processes that are still running.
-