summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorJim McCusker <jmccusker@5amsolutions.com>2014-11-21 01:14:12 -0500
committerJim McCusker <jmccusker@5amsolutions.com>2014-11-21 01:14:12 -0500
commit0419fa454fee006d330aaec9ac2b78437617b3bb (patch)
tree9493c0c2d089ecf490d155c95ba4f2af3cb94c80 /examples
parentd911bc45d297ef0f76fe8385b505d40dcd1c25e1 (diff)
downloadrdflib-0419fa454fee006d330aaec9ac2b78437617b3bb.tar.gz
added multithreading to the benchmark.
Diffstat (limited to 'examples')
-rw-r--r--examples/graph_digest_benchmark.py52
1 files changed, 40 insertions, 12 deletions
diff --git a/examples/graph_digest_benchmark.py b/examples/graph_digest_benchmark.py
index 326598d1..1d3461fe 100644
--- a/examples/graph_digest_benchmark.py
+++ b/examples/graph_digest_benchmark.py
@@ -12,6 +12,9 @@ from urllib import *
from io import StringIO
from collections import defaultdict
+from threading import *
+from Queue import Queue, Empty
+
bioportal_query = '''
PREFIX metadata: <http://data.bioontology.org/metadata/>
@@ -39,9 +42,10 @@ stat_cols = [
'graph_digest',
'to_hash_runtime',
'canonicalize_triples_runtime',
+ 'error',
]
-def bioportal_benchmark(apikey, output_file):
+def bioportal_benchmark(apikey, output_file, threads):
metadata = Namespace("http://data.bioontology.org/metadata/")
url = 'http://data.bioontology.org/ontologies?apikey=%s'%apikey
ontology_graph = Graph()
@@ -52,6 +56,32 @@ def bioportal_benchmark(apikey, output_file):
w = open(output_file, 'w')
writer = csv.DictWriter(w,stat_cols)
writer.writeheader()
+ tasks = Queue()
+ finished_tasks = Queue()
+ lock = Lock()
+ task_count = len(ontologies)
+ class Worker(Thread):
+ def run(self):
+ while True:
+ lock.acquire()
+ stats = tasks.get()
+ lock.release()
+ print stats['ontology'], stats['download_url']
+ try:
+ og = Graph()
+ og.load(stats['download_url']+"?apikey=%s"%apikey)
+ ig = to_isomorphic(og)
+ graph_digest = ig.graph_digest(stats)
+ except Exception as e:
+ print e
+ stats['error'] = str(e)
+ finished_tasks.put(stats)
+ tasks.task_done()
+ for i in range(int(threads)):
+ print "Starting worker", i
+ t = Worker()
+ t.daemon = True
+ t.start()
for ontology, title, download in ontologies:
stats = defaultdict(str)
stats.update({
@@ -59,16 +89,14 @@ def bioportal_benchmark(apikey, output_file):
"ontology": title,
"download_url": download
})
- print title, download
- try:
- og = Graph()
- og.load(download+"?apikey=%s"%apikey)
- ig = to_isomorphic(og)
- graph_digest = ig.graph_digest(stats)
- writer.writerow(stats)
- w.flush()
- except Exception as e:
- print e
+ tasks.put(stats)
+ written_tasks = 0
+ while written_tasks < task_count:
+ stats = finished_tasks.get()
+ print "Writing", stats['ontology']
+ writer.writerow(stats)
+ w.flush()
+ written_tasks += 1
if __name__ == '__main__':
- bioportal_benchmark(sys.argv[1], sys.argv[2])
+ bioportal_benchmark(sys.argv[1], sys.argv[2], sys.argv[3])