diff options
Diffstat (limited to 'schema/parse.py')
-rw-r--r-- | schema/parse.py | 227 |
1 files changed, 126 insertions, 101 deletions
diff --git a/schema/parse.py b/schema/parse.py index 36d09466..64eebd01 100644 --- a/schema/parse.py +++ b/schema/parse.py @@ -1,54 +1,86 @@ -# Load Baserock Definitions serialisation format V5 into a SurfRDF 'store'. +# Copyright (C) 2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. -# SurfRDF: https://github.com/cosminbasca/surfrdf -# I've now decided that using RDFLib (a lower-level library used by SurfRDF) -# directly would be just as simple as using the object-mapping provided by -# SurfRDF. The rdflib.resource.Resource class is actually a pretty good -# object-mapper itself, if we want one. +'''parse.py -# Note that neither surf.Resource nor rdflib.resource.Resource do any -# validation of what properties you set according to the schema. You can -# set chunk.baserock_dogfood='http://delicious' if you want. +Load a set of Baserock definitions from on-disk .morph files, and return an +RDFLib.Graph instance containing the data. + +This code understands the syntax of Baserock Definitions format version 5. + +The current version of the Baserock Definitions format is defined at: + + http://wiki.baserock.org/definitions/current + +''' import rdflib -import surf import yaml import os import warnings -surf.ns.register(baserock='http://baserock.org/definitions/example-schema#') -surf.ns.register(dc='http://purl.org/dc/terms/') +BASEROCK = rdflib.Namespace('http://baserock.org/definitions/example-schema#') +DUBLIN_CORE = rdflib.Namespace('http://purl.org/dc/terms/') def parse_morph_file(path): + '''Parse an individual .morph file. + + This function does a tiny amount of validation: checking the 'name' and + 'type' fields. + + Returns a Python dict with the entire contents of the file deserialised + from YAML. + + ''' with open(path) as f: text = f.read() contents = yaml.safe_load(text) - morph_type = contents['kind'] assert 'name' in contents assert contents['kind'] in ['cluster', 'system', 'stratum', 'chunk'] return contents def get_name_from_morph_file(path): + '''Returns the 'name' defined in a specific .morph file. + + This is a convenience function for resolving places where one .morph file + in a set references another one. + + ''' contents = parse_morph_file(path) return contents['name'] -def load_all_morphologies(session, store, path='.'): - Chunk = session.get_class(surf.ns.BASEROCK.Chunk) - ChunkReference = session.get_class(surf.ns.BASEROCK.ChunkReference) - Stratum = session.get_class(surf.ns.BASEROCK.Stratum) - StratumArtifact = session.get_class(surf.ns.BASEROCK.StratumArtifact) - System = session.get_class(surf.ns.BASEROCK.System) - SystemDeployment = session.get_class(surf.ns.BASEROCK.SystemDeployment) - Cluster = session.get_class(surf.ns.BASEROCK.Cluster) +# FIXME: get_uri_for_resource() + + +# FIXME: can you use assignment instead of Resource.set ? + +def load_all_morphologies(path='.'): + '''Load Baserock Definitions serialisation format V5 as an RDFLib 'graph'. + This code does very little validation, so the 'graph' that it returns may + not fully make sense according to the Baserock data model. + + ''' toplevel_path = path + graph = rdflib.Graph() def load_morph(toplevel_path, filename): try: @@ -61,59 +93,67 @@ def load_all_morphologies(session, store, path='.'): entity = None - # Note the 'surf' library doesn't seem to do any kind of validity checking - # so you can insert whatever random data you feel like, if you want. - if contents['kind'] == 'chunk': chunk_uri = base_uri + 'chunks/' + contents['name'] - entity = chunk = Chunk(chunk_uri) + entity = chunk = rdflib.resource.Resource( + graph, rdflib.URIRef(chunk_uri)) # FIXME: order is lost here !!!!! if 'pre-configure-commands' in contents: - chunk.baserock_preConfigureCommands = contents['pre-configure-commands'] + chunk.add(BASEROCK.preConfigureCommands, + rdflib.Literal(contents['pre-configure-commands'])) if 'configure-commands' in contents: - chunk.baserock_configureCommands = contents['configure-commands'] + chunk.add(BASEROCK.configureCommands, + rdflib.Literal(contents['configure-commands'])) if 'post-configure-commands' in contents: - chunk.baserock_postConfigureCommands = contents['post-configure-commands'] + chunk.add(BASEROCK.postConfigureCommands, + rdflib.Literal(contents['post-configure-commands'])) if 'pre-build-commands' in contents: - chunk.baserock_preBuildCommands = contents['pre-build-commands'] + chunk.add(BASEROCK.preBuildCommands, + rdflib.Literal(contents['pre-build-commands'])) if 'build-commands' in contents: - chunk.baserock_buildCommands = contents['build-commands'] + chunk.add(BASEROCK.buildCommands, + rdflib.Literal(contents['build-commands'])) if 'post-build-commands' in contents: - chunk.baserock_postBuildCommands = contents['post-build-commands'] + chunk.add(BASEROCK.postBuildCommands, + rdflib.Literal(contents['post-build-commands'])) if 'pre-install-commands' in contents: - chunk.baserock_preInstallCommands = contents['pre-install-commands'] + chunk.add(BASEROCK.preInstallCommands, + rdflib.Literal(contents['pre-install-commands'])) if 'install-commands' in contents: - chunk.baserock_installCommands = contents['install-commands'] + chunk.add(BASEROCK.installCommands, + rdflib.Literal(contents['install-commands'])) if 'post-install-commands' in contents: - chunk.baserock_postInstallCommands = contents['post-install-commands'] + chunk.add(BASEROCK.postInstallCommands, + rdflib.Literal(contents['post-install-commands'])) elif contents['kind'] == 'stratum': stratum_uri = base_uri + 'strata/' + contents['name'] - entity = stratum = Stratum(stratum_uri) + entity = stratum = rdflib.resource.Resource( + graph, rdflib.URIRef(stratum_uri)) - stratum_build_deps = [] for entry in contents.get('build-depends', []): build_dep_file = os.path.join(toplevel_path, entry['morph']) build_dep_name = get_name_from_morph_file(build_dep_file) build_dep_uri = base_uri + 'strata/' + build_dep_name - stratum_build_deps.append(rdflib.URIRef(build_dep_uri)) - stratum.baserock_hasBuildDependency = stratum_build_deps + stratum.add(BASEROCK.hasBuildDependency, + rdflib.URIRef(build_dep_uri)) - artifacts = [] for entry in contents.get('products', []): artifact_uri = stratum_uri + '/products/' + entry['artifact'] - artifact = StratumArtifact(artifact_uri) + artifact = rdflib.resource.Resource( + graph, rdflib.URIRef(artifact_uri)) # FIXME: order probably lost here if 'includes' in entry: - artifact.baserock_includes = entry['includes'] - artifacts.append(artifact) - stratum.baserock_produces = artifacts + artifact.set(BASEROCK.includes, + rdflib.Literal(entry['includes'])) + stratum.add(BASEROCK.produces, artifact) chunk_refs = [] for entry in contents.get('chunks', []): chunk_ref_uri = stratum_uri + '/chunk-refs/' + entry['name'] - chunk_ref = ChunkReference(chunk_ref_uri) + chunk_ref = rdflib.resource.Resource( + graph, rdflib.URIRef(chunk_ref_uri)) if 'morph' in entry: chunk_file = os.path.join(toplevel_path, entry['morph']) @@ -127,32 +167,32 @@ def load_all_morphologies(session, store, path='.'): chunk_name = entry['name'] chunk_uri = base_uri + 'chunks/' + chunk_name - chunk_ref.baserock_refersToChunk = rdflib.URIRef(chunk_uri) + chunk_ref.set(BASEROCK.refersToChunk, rdflib.URIRef(chunk_uri)) - chunk_ref.baserock_repo = entry['repo'] - chunk_ref.baserock_ref = entry['ref'] + chunk_ref.set(BASEROCK.repo, rdflib.Literal(entry['repo'])) + chunk_ref.set(BASEROCK.ref, rdflib.Literal(entry['ref'])) if 'unpetrify-ref' in entry: - chunk_ref.baserock_unpetrifyRef = entry['unpetrify-ref'] - chunk_ref.baserock_buildMode = entry.get('build-mode', 'normal') - chunk_ref.baserock_prefix = entry.get('prefix', '/usr') + chunk_ref.set(BASEROCK.unpetrifyRef, + rdflib.Literal(entry['unpetrify-ref'])) + chunk_ref.set(BASEROCK.buildMode, + rdflib.Literal(entry.get('build-mode', 'normal'))) + chunk_ref.set(BASEROCK.prefix, + rdflib.Literal(entry.get('prefix', '/usr'))) - chunk_ref_build_deps = [] for entry_dep in entry.get('build-depends', []): build_dep_uri = stratum_uri + '/chunk-refs/' + entry_dep - chunk_ref_build_deps.append(build_dep_uri) - chunk_ref.baserock_hasChunkBuildDependency = chunk_ref_build_deps - - chunk_refs.append(chunk_ref) + chunk_ref.set(BASEROCK.hasChunkBuildDependency, + rdflib.URIRef(build_dep_uri)) - stratum.baserock_containsChunkReference = chunk_refs + stratum.add(BASEROCK.containsChunkReference, chunk_ref) elif contents['kind'] == 'system': system_uri = base_uri + 'systems/' + contents['name'] - entity = system = System(system_uri) + entity = system = rdflib.resource.Resource( + graph, rdflib.URIRef(system_uri)) - system.baserock_arch = contents['arch'] + system.set(BASEROCK.arch, rdflib.Literal(contents['arch'])) - stratum_artifacts = [] for entry in contents.get('strata', []): # FIXME: need to include all strata if 'artifacts' isn't specified, # which is difficult becausee they might not all be loaded yet ... @@ -162,19 +202,20 @@ def load_all_morphologies(session, store, path='.'): artifacts = entry.get('artifacts') if artifacts is None: artifacts = ['%s-runtime' % entry['name'], - '%s-devel' % entry['name']] + '%s-devel' % entry['name']] for artifact in artifacts: artifact_uri = (base_uri + '/strata/' + entry['name'] + '/products/' + artifact) - stratum_artifacts.append(artifact_uri) - system.baserock_containsStratumArtifact = stratum_artifacts + system.add(BASEROCK.containsStratumArtifact, + rdflib.URIRef(artifact_uri)) - system.baserock_hasConfigurationExtension = \ - contents.get('configuration-extensions', []) + system.set(BASEROCK.hasConfigurationExtension, rdflib.Literal( + contents.get('configuration-extensions', []))) elif contents['kind'] == 'cluster': cluster_uri = base_uri + 'clusters/' + contents['name'] - entity = cluster = Cluster(cluster_uri) + entity = cluster = rdflib.resource.Resource( + graph, rdflib.URIRef(cluster_uri)) deployments = [] for entry in contents.get('systems', []): @@ -185,34 +226,38 @@ def load_all_morphologies(session, store, path='.'): # FIXME: ignores deploy-defaults at present for label, details in entry['deploy'].items(): deployment_uri = cluster_uri + '/' + label - deployment = SystemDeployment(deployment_uri) + deployment = rdflib.resource.Resource( + graph, rdflib.URIRef(deployment_uri)) - deployment.baserock_deploysSystem = rdflib.URIRef(system_uri) - deployment.baserock_hasLabel = label + deployment.set(BASEROCK.deploysSystem, + rdflib.URIRef(system_uri)) + deployment.set(BASEROCK.hasLabel, rdflib.Literal(label)) - deployment.baserock_hasType = details['type'] - deployment.baserock_hasLocation = details['location'] + deployment.set(BASEROCK.hasType, + rdflib.Literal(details['type'])) + deployment.set(BASEROCK.hasLocation, + rdflib.Literal(details['location'])) settings = [] for key, value in details.items(): if key in ['type', 'location']: continue # FIXME: RDF must have a way of representing arbitrary - # key/values better than using a string with an = sign... - settings.append('%s=%s' % (key,value)) - deployment.baserock_hasConfigurationSetting = settings - deployments.append(deployment) + # key/values better than using a string with an = + # sign... + settings.append('%s=%s' % (key, value)) + deployment.set(BASEROCK.hasConfigurationSetting, + rdflib.Literal(settings)) - cluster.baserock_deploysSystem = deployments + cluster.add(BASEROCK.deploysSystem, deployment) if 'description' in contents: - entity.dc_description = contents['description'] + entity.set(DUBLIN_CORE.description, + rdflib.Literal(contents['description'])) - # FIXME: is this needed? why? - entity.set_dirty(True) # FIXME: comments from the .yaml file are lost ... as a quick solution, - # you could manually find every line from the YAML that starts with a '#' - # and dump that into a property. + # you could manually find every line from the YAML that starts with a + # '#' and dump that into a property. Or ruamel.yaml might help? print 'Parsing .morph files...' for dirname, dirnames, filenames in os.walk(toplevel_path): @@ -226,24 +271,4 @@ def load_all_morphologies(session, store, path='.'): print '%s: %r' % (filename, e) raise - print 'Committing to database...' - try: - session.commit() - except Exception as e: - # Special handler for a common error case when trying to insert data - # into a fresh install of the Virtuoso database... hopefully this saves - # someone time! - if 'Virtuoso 42000 Error SR186: No permission to execute procedure DB' in e.message: - print("Permission denied trying to update the database via the " - "SPARQL endpoint. By default this endpoint is read-only. " - "To enable write access, run the `isql` or `isql-vt` " - "commandline interface and run the following statement:\n\n" - " grant SPARQL_UPDATE to \"SPARQL\";\n\n" - "WARNING! Only do this if you're using a local test instance " - "of Virtuoso. You need to set up a real authenticated user " - "account if using an instance of Virtuoso that has " - "important data on it.") - else: - raise - - store.save() + return graph |