summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorJim McCusker <jmccusker@5amsolutions.com>2014-11-21 02:23:30 -0500
committerJim McCusker <jmccusker@5amsolutions.com>2014-11-21 02:23:30 -0500
commitceae5427e2fe938538044791291d4646cb8b9bf9 (patch)
treeb11a71330b0a128063742d59bafeb6930d980a47 /examples
parent86e324e4092e5951c9031c041f5ea16ad59a0b47 (diff)
downloadrdflib-ceae5427e2fe938538044791291d4646cb8b9bf9.tar.gz
throttling the bioportal downloads to a max of 4 connections.
Diffstat (limited to 'examples')
-rw-r--r--examples/graph_digest_benchmark.py23
1 files changed, 12 insertions, 11 deletions
diff --git a/examples/graph_digest_benchmark.py b/examples/graph_digest_benchmark.py
index c3d69239..ef7a8da9 100644
--- a/examples/graph_digest_benchmark.py
+++ b/examples/graph_digest_benchmark.py
@@ -11,6 +11,7 @@ import sys, csv
from urllib import *
from io import StringIO
from collections import defaultdict
+from urllib2 import urlopen
from multiprocessing import *
from Queue import Empty
@@ -58,14 +59,20 @@ def bioportal_benchmark(apikey, output_file, threads):
writer.writeheader()
tasks = Queue()
finished_tasks = Queue()
+ dl_lock = Semaphore(4)
task_count = len(ontologies)
- def worker(q,finished_tasks):
+ def worker(q,finished_tasks, dl_lock):
try:
while True:
stats = q.get()
- og = stats['graph']
- print stats['ontology'], stats['download_url']
+ og = Graph()
try:
+ try:
+ dl_lock.acquire()
+ og.load(stats['download_url']+"?apikey=%s"%apikey)
+ finally:
+ dl_lock.release()
+ print stats['ontology'], stats['download_url']
ig = to_isomorphic(og)
graph_digest = ig.graph_digest(stats)
except Exception as e:
@@ -76,7 +83,7 @@ def bioportal_benchmark(apikey, output_file, threads):
pass
for i in range(int(threads)):
print "Starting worker", i
- t = Process(target=worker, args=[tasks,finished_tasks])
+ t = Process(target=worker, args=[tasks,finished_tasks, dl_lock])
t.daemon = True
t.start()
for ontology, title, download in ontologies:
@@ -86,13 +93,7 @@ def bioportal_benchmark(apikey, output_file, threads):
"ontology": title,
"download_url": download
})
- try:
- og = Graph()
- og.load(stats['download_url']+"?apikey=%s"%apikey)
- stats['graph'] = og
- tasks.put(stats)
- except Exception as e:
- print ontology, e
+ tasks.put(stats)
tasks.close()
written_tasks = 0
while written_tasks < task_count: