schema: Add support for Virtuoso to the importer.

Change-Id: I38d3b9c055392880f52f6fb518d87037a70237f5
author: Sam Thursfield <sam.thursfield@codethink.co.uk> 2015-06-30 18:43:04 +0100
committer: Sam Thursfield <sam.thursfield@codethink.co.uk> 2015-06-30 18:43:04 +0100
commit: bd421595def748b2c1d22787a5b5ace7496c8aa4 (patch)
tree: d9f9828e452a1890ac1a8fda91a7cd56b96deff1
parent: 8586368f88e12833588a40e8b03d33841e23f969 (diff)
download: definitions-bd421595def748b2c1d22787a5b5ace7496c8aa4.tar.gz
1 files changed, 53 insertions, 10 deletions
diff --git a/schema/surf-test.py b/schema/surf-test.py
index 532d1a74..f5d47e2c 100644
--- a/schema/surf-test.py
+++ b/schema/surf-test.py
@@ -11,17 +11,40 @@ import os
 import warnings
 
 
+DATABASE = 'memory'
+#DATABASE = 'virtuoso'
+
+
 surf.ns.register(baserock='http://baserock.org/definitions/example-schema#')
 
-graph = rdflib.Graph()
-graph.parse("baserock-owl-schema.turtle", format="turtle")
 
-store = surf.Store(reader='rdflib', writer='rdflib', rdflib_store='IOMemory')
+if DATABASE == 'memory':
+    # For testing
+    store = surf.Store(reader='rdflib', writer='rdflib', rdflib_store='IOMemory')
+
+elif DATABASE == 'virtuoso':
+    # For importing into Virtuoso database
+    # See: https://pythonhosted.org/SuRF/integration/virtuoso.html
+    # Note that you need to work around a bug in Virtuoso in order to
+    # use this. See https://github.com/RDFLib/rdflib/issues/298 for more
+    # info. Virtuoso expects the url parameter in the SPARQL update
+    # request to be called 'query' rather than 'update'. You can change
+    # this on line 488 of SPARQLWrapper/Wrapper.py of
+    # <https://github.com/RDFLib/sparqlwrapper/> as a nasty workaround.
+    store = surf.Store(reader='sparql_protocol',
+                       writer='sparql_protocol',
+                       endpoint='http://localhost:8890/sparql',
+                       default_context='http://example.com')
+
+# This doesn't actually achieve anything. It also doesn't work with the
+# SPARQL writer backend.
+#store.load_triples(source='baserock-owl-schema.turtle', format='turtle')
+# This doesn't do anything either.
+#graph = rdflib.Graph()
+#graph.parse("baserock-owl-schema.turtle", format="turtle")
 
 session = surf.Session(store)
 
-store.load_triples(source='baserock-owl-schema.turtle', format='turtle')
-
 OWL_Class = session.get_class(surf.ns.OWL.Class)
 
 classes = OWL_Class.all()
@@ -50,10 +73,13 @@ def load_morph(path):
         warnings.warn("Problem loading %s: %s" % (path, e))
 
     # FIXME:
-    base_uri = 'http://localhost/'
+    base_uri = 'http://example.com/'
 
     entity = None
 
+    # Note the 'surf' library doesn't seem to do any kind of validity checking
+    # so you can insert whatever random data you feel like, if you want.
+
     if contents['kind'] == 'chunk':
         chunk_uri = base_uri + 'chunks/' + contents['name']
         entity = chunk = Chunk(chunk_uri)
@@ -195,20 +221,37 @@ def load_morph(path):
 
 
 
+print 'Parsing .morph files...'
 for dirname, dirnames, filenames in os.walk('..'):
     if '.git' in dirnames:
         dirnames.remove('.git')
     for filename in sorted(filenames):
         if filename.endswith('.morph'):
             load_morph(os.path.join(dirname, filename))
-session.commit()
 
+print 'Committing to database...'
+try:
+    session.commit()
+except Exception as e:
+    if DATABASE=='virtuoso' and \
+        'Virtuoso 42000 Error SR186: No permission to execute procedure DB' in e.message:
+        print("Permission denied trying to update the database via the "
+                "SPARQL endpoint. By default this endpoint is read-only. "
+                "To enable write access, run the `isql` or `isql-vt` "
+                "commandline interface and run the following statement:\n\n"
+              "  grant SPARQL_UPDATE to \"SPARQL\";\n\n"
+                "WARNING! Only do this if you're using a local test instance "
+                "of Virtuoso. You need to set up a real authenticated user "
+                "account if using an instance of Virtuoso that has "
+                "important data on it.")
+    else:
+        raise
+
+
+store.save()
 
 #Cluster = session.get_class(surf.ns.BASEROCK.Cluster)
 #cluster = Cluster.all()
 #for s in cluster:
 #    s.load()
 #    print s.serialize('json')
-
-import pdb
-pdb.set_trace()
author	Sam Thursfield <sam.thursfield@codethink.co.uk>	2015-06-30 18:43:04 +0100
committer	Sam Thursfield <sam.thursfield@codethink.co.uk>	2015-06-30 18:43:04 +0100
commit	bd421595def748b2c1d22787a5b5ace7496c8aa4 (patch)
tree	d9f9828e452a1890ac1a8fda91a7cd56b96deff1
parent	8586368f88e12833588a40e8b03d33841e23f969 (diff)
download	definitions-bd421595def748b2c1d22787a5b5ace7496c8aa4.tar.gz