1 files changed, 126 insertions, 101 deletions
diff --git a/schema/parse.py b/schema/parse.py
index 36d09466..64eebd01 100644
--- a/schema/parse.py
+++ b/schema/parse.py
@@ -1,54 +1,86 @@
-# Load Baserock Definitions serialisation format V5 into a SurfRDF 'store'.
+# Copyright (C) 2015  Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-# SurfRDF: https://github.com/cosminbasca/surfrdf
 
-# I've now decided that using RDFLib (a lower-level library used by SurfRDF)
-# directly would be just as simple as using the object-mapping provided by
-# SurfRDF. The rdflib.resource.Resource class is actually a pretty good
-# object-mapper itself, if we want one.
+'''parse.py
 
-# Note that neither surf.Resource nor rdflib.resource.Resource do any
-# validation of what properties you set according to the schema. You can
-# set chunk.baserock_dogfood='http://delicious' if you want.
+Load a set of Baserock definitions from on-disk .morph files, and return an
+RDFLib.Graph instance containing the data.
+
+This code understands the syntax of Baserock Definitions format version 5.
+
+The current version of the Baserock Definitions format is defined at:
+
+    http://wiki.baserock.org/definitions/current
+
+'''
 
 
 import rdflib
-import surf
 import yaml
 
 import os
 import warnings
 
 
-surf.ns.register(baserock='http://baserock.org/definitions/example-schema#')
-surf.ns.register(dc='http://purl.org/dc/terms/')
+BASEROCK = rdflib.Namespace('http://baserock.org/definitions/example-schema#')
+DUBLIN_CORE = rdflib.Namespace('http://purl.org/dc/terms/')
 
 
 def parse_morph_file(path):
+    '''Parse an individual .morph file.
+
+    This function does a tiny amount of validation: checking the 'name' and
+    'type' fields.
+
+    Returns a Python dict with the entire contents of the file deserialised
+    from YAML.
+
+    '''
     with open(path) as f:
         text = f.read()
     contents = yaml.safe_load(text)
-    morph_type = contents['kind']
     assert 'name' in contents
     assert contents['kind'] in ['cluster', 'system', 'stratum', 'chunk']
     return contents
 
 
 def get_name_from_morph_file(path):
+    '''Returns the 'name' defined in a specific .morph file.
+
+    This is a convenience function for resolving places where one .morph file
+    in a set references another one.
+
+    '''
     contents = parse_morph_file(path)
     return contents['name']
 
 
-def load_all_morphologies(session, store, path='.'):
-    Chunk = session.get_class(surf.ns.BASEROCK.Chunk)
-    ChunkReference = session.get_class(surf.ns.BASEROCK.ChunkReference)
-    Stratum = session.get_class(surf.ns.BASEROCK.Stratum)
-    StratumArtifact = session.get_class(surf.ns.BASEROCK.StratumArtifact)
-    System = session.get_class(surf.ns.BASEROCK.System)
-    SystemDeployment = session.get_class(surf.ns.BASEROCK.SystemDeployment)
-    Cluster = session.get_class(surf.ns.BASEROCK.Cluster)
+# FIXME: get_uri_for_resource()
+
+
+# FIXME: can you use assignment instead of Resource.set ?
+
+def load_all_morphologies(path='.'):
+    '''Load Baserock Definitions serialisation format V5 as an RDFLib 'graph'.
 
+    This code does very little validation, so the 'graph' that it returns may
+    not fully make sense according to the Baserock data model.
+
+    '''
     toplevel_path = path
+    graph = rdflib.Graph()
 
     def load_morph(toplevel_path, filename):
         try:
@@ -61,59 +93,67 @@ def load_all_morphologies(session, store, path='.'):
 
         entity = None
 
-        # Note the 'surf' library doesn't seem to do any kind of validity checking
-        # so you can insert whatever random data you feel like, if you want.
-
         if contents['kind'] == 'chunk':
             chunk_uri = base_uri + 'chunks/' + contents['name']
-            entity = chunk = Chunk(chunk_uri)
+            entity = chunk = rdflib.resource.Resource(
+                graph, rdflib.URIRef(chunk_uri))
 
             # FIXME: order is lost here !!!!!
             if 'pre-configure-commands' in contents:
-                chunk.baserock_preConfigureCommands = contents['pre-configure-commands']
+                chunk.add(BASEROCK.preConfigureCommands,
+                          rdflib.Literal(contents['pre-configure-commands']))
             if 'configure-commands' in contents:
-                chunk.baserock_configureCommands = contents['configure-commands']
+                chunk.add(BASEROCK.configureCommands,
+                          rdflib.Literal(contents['configure-commands']))
             if 'post-configure-commands' in contents:
-                chunk.baserock_postConfigureCommands = contents['post-configure-commands']
+                chunk.add(BASEROCK.postConfigureCommands,
+                          rdflib.Literal(contents['post-configure-commands']))
             if 'pre-build-commands' in contents:
-                chunk.baserock_preBuildCommands = contents['pre-build-commands']
+                chunk.add(BASEROCK.preBuildCommands,
+                          rdflib.Literal(contents['pre-build-commands']))
             if 'build-commands' in contents:
-                chunk.baserock_buildCommands = contents['build-commands']
+                chunk.add(BASEROCK.buildCommands,
+                          rdflib.Literal(contents['build-commands']))
             if 'post-build-commands' in contents:
-                chunk.baserock_postBuildCommands = contents['post-build-commands']
+                chunk.add(BASEROCK.postBuildCommands,
+                          rdflib.Literal(contents['post-build-commands']))
             if 'pre-install-commands' in contents:
-                chunk.baserock_preInstallCommands = contents['pre-install-commands']
+                chunk.add(BASEROCK.preInstallCommands,
+                          rdflib.Literal(contents['pre-install-commands']))
             if 'install-commands' in contents:
-                chunk.baserock_installCommands = contents['install-commands']
+                chunk.add(BASEROCK.installCommands,
+                          rdflib.Literal(contents['install-commands']))
             if 'post-install-commands' in contents:
-                chunk.baserock_postInstallCommands = contents['post-install-commands']
+                chunk.add(BASEROCK.postInstallCommands,
+                          rdflib.Literal(contents['post-install-commands']))
 
         elif contents['kind'] == 'stratum':
             stratum_uri = base_uri + 'strata/' + contents['name']
-            entity = stratum = Stratum(stratum_uri)
+            entity = stratum = rdflib.resource.Resource(
+                graph, rdflib.URIRef(stratum_uri))
 
-            stratum_build_deps = []
             for entry in contents.get('build-depends', []):
                 build_dep_file = os.path.join(toplevel_path, entry['morph'])
                 build_dep_name = get_name_from_morph_file(build_dep_file)
                 build_dep_uri = base_uri + 'strata/' + build_dep_name
-                stratum_build_deps.append(rdflib.URIRef(build_dep_uri))
-            stratum.baserock_hasBuildDependency = stratum_build_deps
+                stratum.add(BASEROCK.hasBuildDependency,
+                            rdflib.URIRef(build_dep_uri))
 
-            artifacts = []
             for entry in contents.get('products', []):
                 artifact_uri = stratum_uri + '/products/' + entry['artifact']
-                artifact = StratumArtifact(artifact_uri)
+                artifact = rdflib.resource.Resource(
+                    graph, rdflib.URIRef(artifact_uri))
                 # FIXME: order probably lost here
                 if 'includes' in entry:
-                    artifact.baserock_includes = entry['includes']
-                artifacts.append(artifact)
-            stratum.baserock_produces = artifacts
+                    artifact.set(BASEROCK.includes,
+                                 rdflib.Literal(entry['includes']))
+                stratum.add(BASEROCK.produces, artifact)
 
             chunk_refs = []
             for entry in contents.get('chunks', []):
                 chunk_ref_uri = stratum_uri + '/chunk-refs/' + entry['name']
-                chunk_ref = ChunkReference(chunk_ref_uri)
+                chunk_ref = rdflib.resource.Resource(
+                    graph, rdflib.URIRef(chunk_ref_uri))
 
                 if 'morph' in entry:
                     chunk_file = os.path.join(toplevel_path, entry['morph'])
@@ -127,32 +167,32 @@ def load_all_morphologies(session, store, path='.'):
                     chunk_name = entry['name']
 
                 chunk_uri = base_uri + 'chunks/' + chunk_name
-                chunk_ref.baserock_refersToChunk = rdflib.URIRef(chunk_uri)
+                chunk_ref.set(BASEROCK.refersToChunk, rdflib.URIRef(chunk_uri))
 
-                chunk_ref.baserock_repo = entry['repo']
-                chunk_ref.baserock_ref = entry['ref']
+                chunk_ref.set(BASEROCK.repo, rdflib.Literal(entry['repo']))
+                chunk_ref.set(BASEROCK.ref, rdflib.Literal(entry['ref']))
                 if 'unpetrify-ref' in entry:
-                    chunk_ref.baserock_unpetrifyRef = entry['unpetrify-ref']
-                chunk_ref.baserock_buildMode = entry.get('build-mode', 'normal')
-                chunk_ref.baserock_prefix = entry.get('prefix', '/usr')
+                    chunk_ref.set(BASEROCK.unpetrifyRef,
+                              rdflib.Literal(entry['unpetrify-ref']))
+                chunk_ref.set(BASEROCK.buildMode,
+                          rdflib.Literal(entry.get('build-mode', 'normal')))
+                chunk_ref.set(BASEROCK.prefix,
+                          rdflib.Literal(entry.get('prefix', '/usr')))
 
-                chunk_ref_build_deps = []
                 for entry_dep in entry.get('build-depends', []):
                     build_dep_uri = stratum_uri + '/chunk-refs/' + entry_dep
-                    chunk_ref_build_deps.append(build_dep_uri)
-                chunk_ref.baserock_hasChunkBuildDependency = chunk_ref_build_deps
-
-                chunk_refs.append(chunk_ref)
+                    chunk_ref.set(BASEROCK.hasChunkBuildDependency,
+                                  rdflib.URIRef(build_dep_uri))
 
-            stratum.baserock_containsChunkReference = chunk_refs
+                stratum.add(BASEROCK.containsChunkReference, chunk_ref)
 
         elif contents['kind'] == 'system':
             system_uri = base_uri + 'systems/' + contents['name']
-            entity = system = System(system_uri)
+            entity = system = rdflib.resource.Resource(
+                graph, rdflib.URIRef(system_uri))
 
-            system.baserock_arch = contents['arch']
+            system.set(BASEROCK.arch, rdflib.Literal(contents['arch']))
 
-            stratum_artifacts = []
             for entry in contents.get('strata', []):
                 # FIXME: need to include all strata if 'artifacts' isn't specified,
                 # which is difficult becausee they might not all be loaded yet ...
@@ -162,19 +202,20 @@ def load_all_morphologies(session, store, path='.'):
                 artifacts = entry.get('artifacts')
                 if artifacts is None:
                     artifacts = ['%s-runtime' % entry['name'],
-                                '%s-devel' % entry['name']]
+                                 '%s-devel' % entry['name']]
                 for artifact in artifacts:
                     artifact_uri = (base_uri + '/strata/' + entry['name'] +
                                     '/products/' + artifact)
-                    stratum_artifacts.append(artifact_uri)
-            system.baserock_containsStratumArtifact = stratum_artifacts
+                    system.add(BASEROCK.containsStratumArtifact,
+                               rdflib.URIRef(artifact_uri))
 
-            system.baserock_hasConfigurationExtension = \
-                contents.get('configuration-extensions', [])
+            system.set(BASEROCK.hasConfigurationExtension, rdflib.Literal(
+                contents.get('configuration-extensions', [])))
 
         elif contents['kind'] == 'cluster':
             cluster_uri = base_uri + 'clusters/' + contents['name']
-            entity = cluster = Cluster(cluster_uri)
+            entity = cluster = rdflib.resource.Resource(
+                graph, rdflib.URIRef(cluster_uri))
 
             deployments = []
             for entry in contents.get('systems', []):
@@ -185,34 +226,38 @@ def load_all_morphologies(session, store, path='.'):
                 # FIXME: ignores deploy-defaults at present
                 for label, details in entry['deploy'].items():
                     deployment_uri = cluster_uri + '/' + label
-                    deployment = SystemDeployment(deployment_uri)
+                    deployment = rdflib.resource.Resource(
+                        graph, rdflib.URIRef(deployment_uri))
 
-                    deployment.baserock_deploysSystem = rdflib.URIRef(system_uri)
-                    deployment.baserock_hasLabel = label
+                    deployment.set(BASEROCK.deploysSystem,
+                                   rdflib.URIRef(system_uri))
+                    deployment.set(BASEROCK.hasLabel, rdflib.Literal(label))
 
-                    deployment.baserock_hasType = details['type']
-                    deployment.baserock_hasLocation = details['location']
+                    deployment.set(BASEROCK.hasType,
+                                   rdflib.Literal(details['type']))
+                    deployment.set(BASEROCK.hasLocation,
+                                   rdflib.Literal(details['location']))
 
                     settings = []
                     for key, value in details.items():
                         if key in ['type', 'location']:
                             continue
                         # FIXME: RDF must have a way of representing arbitrary
-                        # key/values better than using a string with an = sign...
-                        settings.append('%s=%s' % (key,value))
-                    deployment.baserock_hasConfigurationSetting = settings
-                    deployments.append(deployment)
+                        # key/values better than using a string with an =
+                        # sign...
+                        settings.append('%s=%s' % (key, value))
+                    deployment.set(BASEROCK.hasConfigurationSetting,
+                                   rdflib.Literal(settings))
 
-            cluster.baserock_deploysSystem = deployments
+                    cluster.add(BASEROCK.deploysSystem, deployment)
 
         if 'description' in contents:
-            entity.dc_description = contents['description']
+            entity.set(DUBLIN_CORE.description,
+                       rdflib.Literal(contents['description']))
 
-        # FIXME: is this needed? why?
-        entity.set_dirty(True)
         # FIXME: comments from the .yaml file are lost ... as a quick solution,
-        # you could manually find every line from the YAML that starts with a '#'
-        # and dump that into a property.
+        # you could manually find every line from the YAML that starts with a
+        # '#' and dump that into a property. Or ruamel.yaml might help?
 
     print 'Parsing .morph files...'
     for dirname, dirnames, filenames in os.walk(toplevel_path):
@@ -226,24 +271,4 @@ def load_all_morphologies(session, store, path='.'):
                     print '%s: %r' % (filename, e)
                     raise
 
-    print 'Committing to database...'
-    try:
-        session.commit()
-    except Exception as e:
-        # Special handler for a common error case when trying to insert data
-        # into a fresh install of the Virtuoso database... hopefully this saves
-        # someone time!
-        if 'Virtuoso 42000 Error SR186: No permission to execute procedure DB' in e.message:
-            print("Permission denied trying to update the database via the "
-                  "SPARQL endpoint. By default this endpoint is read-only. "
-                  "To enable write access, run the `isql` or `isql-vt` "
-                  "commandline interface and run the following statement:\n\n"
-                  "  grant SPARQL_UPDATE to \"SPARQL\";\n\n"
-                  "WARNING! Only do this if you're using a local test instance "
-                  "of Virtuoso. You need to set up a real authenticated user "
-                  "account if using an instance of Virtuoso that has "
-                  "important data on it.")
-        else:
-            raise
-
-    store.save()
+    return graph