summaryrefslogtreecommitdiff
path: root/schema/parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'schema/parse.py')
-rw-r--r--schema/parse.py227
1 files changed, 126 insertions, 101 deletions
diff --git a/schema/parse.py b/schema/parse.py
index 36d09466..64eebd01 100644
--- a/schema/parse.py
+++ b/schema/parse.py
@@ -1,54 +1,86 @@
-# Load Baserock Definitions serialisation format V5 into a SurfRDF 'store'.
+# Copyright (C) 2015 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
-# SurfRDF: https://github.com/cosminbasca/surfrdf
-# I've now decided that using RDFLib (a lower-level library used by SurfRDF)
-# directly would be just as simple as using the object-mapping provided by
-# SurfRDF. The rdflib.resource.Resource class is actually a pretty good
-# object-mapper itself, if we want one.
+'''parse.py
-# Note that neither surf.Resource nor rdflib.resource.Resource do any
-# validation of what properties you set according to the schema. You can
-# set chunk.baserock_dogfood='http://delicious' if you want.
+Load a set of Baserock definitions from on-disk .morph files, and return an
+RDFLib.Graph instance containing the data.
+
+This code understands the syntax of Baserock Definitions format version 5.
+
+The current version of the Baserock Definitions format is defined at:
+
+ http://wiki.baserock.org/definitions/current
+
+'''
import rdflib
-import surf
import yaml
import os
import warnings
-surf.ns.register(baserock='http://baserock.org/definitions/example-schema#')
-surf.ns.register(dc='http://purl.org/dc/terms/')
+BASEROCK = rdflib.Namespace('http://baserock.org/definitions/example-schema#')
+DUBLIN_CORE = rdflib.Namespace('http://purl.org/dc/terms/')
def parse_morph_file(path):
+ '''Parse an individual .morph file.
+
+ This function does a tiny amount of validation: checking the 'name' and
+ 'type' fields.
+
+ Returns a Python dict with the entire contents of the file deserialised
+ from YAML.
+
+ '''
with open(path) as f:
text = f.read()
contents = yaml.safe_load(text)
- morph_type = contents['kind']
assert 'name' in contents
assert contents['kind'] in ['cluster', 'system', 'stratum', 'chunk']
return contents
def get_name_from_morph_file(path):
+ '''Returns the 'name' defined in a specific .morph file.
+
+ This is a convenience function for resolving places where one .morph file
+ in a set references another one.
+
+ '''
contents = parse_morph_file(path)
return contents['name']
-def load_all_morphologies(session, store, path='.'):
- Chunk = session.get_class(surf.ns.BASEROCK.Chunk)
- ChunkReference = session.get_class(surf.ns.BASEROCK.ChunkReference)
- Stratum = session.get_class(surf.ns.BASEROCK.Stratum)
- StratumArtifact = session.get_class(surf.ns.BASEROCK.StratumArtifact)
- System = session.get_class(surf.ns.BASEROCK.System)
- SystemDeployment = session.get_class(surf.ns.BASEROCK.SystemDeployment)
- Cluster = session.get_class(surf.ns.BASEROCK.Cluster)
+# FIXME: get_uri_for_resource()
+
+
+# FIXME: can you use assignment instead of Resource.set ?
+
+def load_all_morphologies(path='.'):
+ '''Load Baserock Definitions serialisation format V5 as an RDFLib 'graph'.
+ This code does very little validation, so the 'graph' that it returns may
+ not fully make sense according to the Baserock data model.
+
+ '''
toplevel_path = path
+ graph = rdflib.Graph()
def load_morph(toplevel_path, filename):
try:
@@ -61,59 +93,67 @@ def load_all_morphologies(session, store, path='.'):
entity = None
- # Note the 'surf' library doesn't seem to do any kind of validity checking
- # so you can insert whatever random data you feel like, if you want.
-
if contents['kind'] == 'chunk':
chunk_uri = base_uri + 'chunks/' + contents['name']
- entity = chunk = Chunk(chunk_uri)
+ entity = chunk = rdflib.resource.Resource(
+ graph, rdflib.URIRef(chunk_uri))
# FIXME: order is lost here !!!!!
if 'pre-configure-commands' in contents:
- chunk.baserock_preConfigureCommands = contents['pre-configure-commands']
+ chunk.add(BASEROCK.preConfigureCommands,
+ rdflib.Literal(contents['pre-configure-commands']))
if 'configure-commands' in contents:
- chunk.baserock_configureCommands = contents['configure-commands']
+ chunk.add(BASEROCK.configureCommands,
+ rdflib.Literal(contents['configure-commands']))
if 'post-configure-commands' in contents:
- chunk.baserock_postConfigureCommands = contents['post-configure-commands']
+ chunk.add(BASEROCK.postConfigureCommands,
+ rdflib.Literal(contents['post-configure-commands']))
if 'pre-build-commands' in contents:
- chunk.baserock_preBuildCommands = contents['pre-build-commands']
+ chunk.add(BASEROCK.preBuildCommands,
+ rdflib.Literal(contents['pre-build-commands']))
if 'build-commands' in contents:
- chunk.baserock_buildCommands = contents['build-commands']
+ chunk.add(BASEROCK.buildCommands,
+ rdflib.Literal(contents['build-commands']))
if 'post-build-commands' in contents:
- chunk.baserock_postBuildCommands = contents['post-build-commands']
+ chunk.add(BASEROCK.postBuildCommands,
+ rdflib.Literal(contents['post-build-commands']))
if 'pre-install-commands' in contents:
- chunk.baserock_preInstallCommands = contents['pre-install-commands']
+ chunk.add(BASEROCK.preInstallCommands,
+ rdflib.Literal(contents['pre-install-commands']))
if 'install-commands' in contents:
- chunk.baserock_installCommands = contents['install-commands']
+ chunk.add(BASEROCK.installCommands,
+ rdflib.Literal(contents['install-commands']))
if 'post-install-commands' in contents:
- chunk.baserock_postInstallCommands = contents['post-install-commands']
+ chunk.add(BASEROCK.postInstallCommands,
+ rdflib.Literal(contents['post-install-commands']))
elif contents['kind'] == 'stratum':
stratum_uri = base_uri + 'strata/' + contents['name']
- entity = stratum = Stratum(stratum_uri)
+ entity = stratum = rdflib.resource.Resource(
+ graph, rdflib.URIRef(stratum_uri))
- stratum_build_deps = []
for entry in contents.get('build-depends', []):
build_dep_file = os.path.join(toplevel_path, entry['morph'])
build_dep_name = get_name_from_morph_file(build_dep_file)
build_dep_uri = base_uri + 'strata/' + build_dep_name
- stratum_build_deps.append(rdflib.URIRef(build_dep_uri))
- stratum.baserock_hasBuildDependency = stratum_build_deps
+ stratum.add(BASEROCK.hasBuildDependency,
+ rdflib.URIRef(build_dep_uri))
- artifacts = []
for entry in contents.get('products', []):
artifact_uri = stratum_uri + '/products/' + entry['artifact']
- artifact = StratumArtifact(artifact_uri)
+ artifact = rdflib.resource.Resource(
+ graph, rdflib.URIRef(artifact_uri))
# FIXME: order probably lost here
if 'includes' in entry:
- artifact.baserock_includes = entry['includes']
- artifacts.append(artifact)
- stratum.baserock_produces = artifacts
+ artifact.set(BASEROCK.includes,
+ rdflib.Literal(entry['includes']))
+ stratum.add(BASEROCK.produces, artifact)
chunk_refs = []
for entry in contents.get('chunks', []):
chunk_ref_uri = stratum_uri + '/chunk-refs/' + entry['name']
- chunk_ref = ChunkReference(chunk_ref_uri)
+ chunk_ref = rdflib.resource.Resource(
+ graph, rdflib.URIRef(chunk_ref_uri))
if 'morph' in entry:
chunk_file = os.path.join(toplevel_path, entry['morph'])
@@ -127,32 +167,32 @@ def load_all_morphologies(session, store, path='.'):
chunk_name = entry['name']
chunk_uri = base_uri + 'chunks/' + chunk_name
- chunk_ref.baserock_refersToChunk = rdflib.URIRef(chunk_uri)
+ chunk_ref.set(BASEROCK.refersToChunk, rdflib.URIRef(chunk_uri))
- chunk_ref.baserock_repo = entry['repo']
- chunk_ref.baserock_ref = entry['ref']
+ chunk_ref.set(BASEROCK.repo, rdflib.Literal(entry['repo']))
+ chunk_ref.set(BASEROCK.ref, rdflib.Literal(entry['ref']))
if 'unpetrify-ref' in entry:
- chunk_ref.baserock_unpetrifyRef = entry['unpetrify-ref']
- chunk_ref.baserock_buildMode = entry.get('build-mode', 'normal')
- chunk_ref.baserock_prefix = entry.get('prefix', '/usr')
+ chunk_ref.set(BASEROCK.unpetrifyRef,
+ rdflib.Literal(entry['unpetrify-ref']))
+ chunk_ref.set(BASEROCK.buildMode,
+ rdflib.Literal(entry.get('build-mode', 'normal')))
+ chunk_ref.set(BASEROCK.prefix,
+ rdflib.Literal(entry.get('prefix', '/usr')))
- chunk_ref_build_deps = []
for entry_dep in entry.get('build-depends', []):
build_dep_uri = stratum_uri + '/chunk-refs/' + entry_dep
- chunk_ref_build_deps.append(build_dep_uri)
- chunk_ref.baserock_hasChunkBuildDependency = chunk_ref_build_deps
-
- chunk_refs.append(chunk_ref)
+ chunk_ref.set(BASEROCK.hasChunkBuildDependency,
+ rdflib.URIRef(build_dep_uri))
- stratum.baserock_containsChunkReference = chunk_refs
+ stratum.add(BASEROCK.containsChunkReference, chunk_ref)
elif contents['kind'] == 'system':
system_uri = base_uri + 'systems/' + contents['name']
- entity = system = System(system_uri)
+ entity = system = rdflib.resource.Resource(
+ graph, rdflib.URIRef(system_uri))
- system.baserock_arch = contents['arch']
+ system.set(BASEROCK.arch, rdflib.Literal(contents['arch']))
- stratum_artifacts = []
for entry in contents.get('strata', []):
# FIXME: need to include all strata if 'artifacts' isn't specified,
# which is difficult becausee they might not all be loaded yet ...
@@ -162,19 +202,20 @@ def load_all_morphologies(session, store, path='.'):
artifacts = entry.get('artifacts')
if artifacts is None:
artifacts = ['%s-runtime' % entry['name'],
- '%s-devel' % entry['name']]
+ '%s-devel' % entry['name']]
for artifact in artifacts:
artifact_uri = (base_uri + '/strata/' + entry['name'] +
'/products/' + artifact)
- stratum_artifacts.append(artifact_uri)
- system.baserock_containsStratumArtifact = stratum_artifacts
+ system.add(BASEROCK.containsStratumArtifact,
+ rdflib.URIRef(artifact_uri))
- system.baserock_hasConfigurationExtension = \
- contents.get('configuration-extensions', [])
+ system.set(BASEROCK.hasConfigurationExtension, rdflib.Literal(
+ contents.get('configuration-extensions', [])))
elif contents['kind'] == 'cluster':
cluster_uri = base_uri + 'clusters/' + contents['name']
- entity = cluster = Cluster(cluster_uri)
+ entity = cluster = rdflib.resource.Resource(
+ graph, rdflib.URIRef(cluster_uri))
deployments = []
for entry in contents.get('systems', []):
@@ -185,34 +226,38 @@ def load_all_morphologies(session, store, path='.'):
# FIXME: ignores deploy-defaults at present
for label, details in entry['deploy'].items():
deployment_uri = cluster_uri + '/' + label
- deployment = SystemDeployment(deployment_uri)
+ deployment = rdflib.resource.Resource(
+ graph, rdflib.URIRef(deployment_uri))
- deployment.baserock_deploysSystem = rdflib.URIRef(system_uri)
- deployment.baserock_hasLabel = label
+ deployment.set(BASEROCK.deploysSystem,
+ rdflib.URIRef(system_uri))
+ deployment.set(BASEROCK.hasLabel, rdflib.Literal(label))
- deployment.baserock_hasType = details['type']
- deployment.baserock_hasLocation = details['location']
+ deployment.set(BASEROCK.hasType,
+ rdflib.Literal(details['type']))
+ deployment.set(BASEROCK.hasLocation,
+ rdflib.Literal(details['location']))
settings = []
for key, value in details.items():
if key in ['type', 'location']:
continue
# FIXME: RDF must have a way of representing arbitrary
- # key/values better than using a string with an = sign...
- settings.append('%s=%s' % (key,value))
- deployment.baserock_hasConfigurationSetting = settings
- deployments.append(deployment)
+ # key/values better than using a string with an =
+ # sign...
+ settings.append('%s=%s' % (key, value))
+ deployment.set(BASEROCK.hasConfigurationSetting,
+ rdflib.Literal(settings))
- cluster.baserock_deploysSystem = deployments
+ cluster.add(BASEROCK.deploysSystem, deployment)
if 'description' in contents:
- entity.dc_description = contents['description']
+ entity.set(DUBLIN_CORE.description,
+ rdflib.Literal(contents['description']))
- # FIXME: is this needed? why?
- entity.set_dirty(True)
# FIXME: comments from the .yaml file are lost ... as a quick solution,
- # you could manually find every line from the YAML that starts with a '#'
- # and dump that into a property.
+ # you could manually find every line from the YAML that starts with a
+ # '#' and dump that into a property. Or ruamel.yaml might help?
print 'Parsing .morph files...'
for dirname, dirnames, filenames in os.walk(toplevel_path):
@@ -226,24 +271,4 @@ def load_all_morphologies(session, store, path='.'):
print '%s: %r' % (filename, e)
raise
- print 'Committing to database...'
- try:
- session.commit()
- except Exception as e:
- # Special handler for a common error case when trying to insert data
- # into a fresh install of the Virtuoso database... hopefully this saves
- # someone time!
- if 'Virtuoso 42000 Error SR186: No permission to execute procedure DB' in e.message:
- print("Permission denied trying to update the database via the "
- "SPARQL endpoint. By default this endpoint is read-only. "
- "To enable write access, run the `isql` or `isql-vt` "
- "commandline interface and run the following statement:\n\n"
- " grant SPARQL_UPDATE to \"SPARQL\";\n\n"
- "WARNING! Only do this if you're using a local test instance "
- "of Virtuoso. You need to set up a real authenticated user "
- "account if using an instance of Virtuoso that has "
- "important data on it.")
- else:
- raise
-
- store.save()
+ return graph