From 2ac1b843eea5b9515b8d60595e9911980ecb62f4 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Wed, 1 Jul 2015 11:32:16 +0100 Subject: schema: Separate loading code into separate file Change-Id: Ib1373e68eb49ab95d2ebdb55deae16d899706aa8 --- schema/parse.py | 210 +++++++++++++++++++++++++++++++++++++++++++++++ schema/surf-test.py | 231 ++++------------------------------------------------ 2 files changed, 224 insertions(+), 217 deletions(-) create mode 100644 schema/parse.py diff --git a/schema/parse.py b/schema/parse.py new file mode 100644 index 00000000..a118e70b --- /dev/null +++ b/schema/parse.py @@ -0,0 +1,210 @@ +# Load Baserock Definitions serialisation format V5 into a SurfRDF 'store'. + +# https://github.com/cosminbasca/surfrdf + + +import rdflib +import surf +import yaml + +import os +import warnings + + +surf.ns.register(baserock='http://baserock.org/definitions/example-schema#') + + +def load_all_morphologies(session, store): + Chunk = session.get_class(surf.ns.BASEROCK.Chunk) + ChunkReference = session.get_class(surf.ns.BASEROCK.ChunkReference) + Stratum = session.get_class(surf.ns.BASEROCK.Stratum) + StratumArtifact = session.get_class(surf.ns.BASEROCK.StratumArtifact) + System = session.get_class(surf.ns.BASEROCK.System) + SystemDeployment = session.get_class(surf.ns.BASEROCK.SystemDeployment) + Cluster = session.get_class(surf.ns.BASEROCK.Cluster) + + def load_morph(path): + try: + with open(path) as f: + text = f.read() + contents = yaml.safe_load(text) + morph_type = contents['kind'] + assert 'name' in contents + assert contents['kind'] in ['cluster', 'system', 'stratum', 'chunk'] + except Exception as e: + warnings.warn("Problem loading %s: %s" % (path, e)) + + # FIXME: + base_uri = 'http://example.com/' + + entity = None + + # Note the 'surf' library doesn't seem to do any kind of validity checking + # so you can insert whatever random data you feel like, if you want. + + if contents['kind'] == 'chunk': + chunk_uri = base_uri + 'chunks/' + contents['name'] + entity = chunk = Chunk(chunk_uri) + + # FIXME: I think order is lost here !!!!! + if 'pre-configure-commands' in contents: + chunk.baserock_preConfigureCommands = contents['pre-configure-commands'] + if 'configure-commands' in contents: + chunk.baserock_configureCommands = contents['configure-commands'] + if 'post-configure-commands' in contents: + chunk.baserock_postConfigureCommands = contents['post-configure-commands'] + if 'pre-build-commands' in contents: + chunk.baserock_preBuildCommands = contents['pre-build-commands'] + if 'build-commands' in contents: + chunk.baserock_buildCommands = contents['build-commands'] + if 'post-build-commands' in contents: + chunk.baserock_postBuildCommands = contents['post-build-commands'] + if 'pre-install-commands' in contents: + chunk.baserock_preInstallCommands = contents['pre-install-commands'] + if 'install-commands' in contents: + chunk.baserock_installCommands = contents['install-commands'] + if 'post-install-commands' in contents: + chunk.baserock_postInstallCommands = contents['post-install-commands'] + + elif contents['kind'] == 'stratum': + stratum_uri = base_uri + 'strata/' + contents['name'] + entity = stratum = Stratum(stratum_uri) + + stratum_build_deps = [] + for entry in contents.get('build-depends', []): + build_dep_uri = base_uri + 'strata/' + entry['morph'] + stratum_build_deps.append(rdflib.URIRef(build_dep_uri)) + stratum.baserock_hasBuildDependency = stratum_build_deps + + artifacts = [] + for entry in contents.get('products', []): + artifact_uri = stratum_uri + '/products/' + entry['artifact'] + artifact = StratumArtifact(artifact_uri) + # FIXME: order probably lost here + if 'includes' in entry: + artifact.baserock_includes = entry['includes'] + artifacts.append(artifact) + stratum.baserock_produces = artifacts + + chunk_refs = [] + for entry in contents.get('chunks', []): + chunk_ref_uri = stratum_uri + '/chunk-refs/' + entry['name'] + chunk_ref = ChunkReference(chunk_ref_uri) + + # FIXME: this ignores the 'morph' field, and assumes 'name' is + # usable as-is. + chunk_uri = base_uri + 'chunks/' + entry['name'] + chunk_ref.baserock_refersToChunk = rdflib.URIRef(chunk_uri) + + chunk_ref.baserock_repo = entry['repo'] + chunk_ref.baserock_ref = entry['ref'] + if 'unpetrify-ref' in entry: + chunk_ref.baserock_unpetrifyRef = entry['unpetrify-ref'] + chunk_ref.baserock_buildMode = entry.get('build-mode', 'normal') + chunk_ref.baserock_prefix = entry.get('prefix', '/usr') + + chunk_ref_build_deps = [] + for entry_dep in entry.get('build-depends', []): + build_dep_uri = stratum_uri + '/chunk-refs/' + entry_dep + chunk_ref_build_deps.append(build_dep_uri) + chunk_ref.baserock_hasChunkBuildDependency = chunk_ref_build_deps + + chunk_refs.append(chunk_ref) + + stratum.baserock_containsChunkReference = chunk_refs + + elif contents['kind'] == 'system': + system_uri = base_uri + 'systems/' + contents['name'] + entity = system = System(system_uri) + + system.baserock_arch = contents['arch'] + + stratum_artifacts = [] + for entry in contents.get('strata', []): + # FIXME: need to include all strata if 'artifacts' isn't specified, + # which is difficult becausee they might not all be loaded yet ... + # so for now I cheat and just assume -runtime and -devel. If there + # are extra artifacts for the stratum they won't be incuded by + # default. I'm not sure if this is how Morph behaves or not. + artifacts = entry.get('artifacts') + if artifacts is None: + artifacts = ['%s-runtime' % entry['name'], + '%s-devel' % entry['name']] + for artifact in artifacts: + artifact_uri = (base_uri + '/strata/' + entry['name'] + + '/products/' + artifact) + stratum_artifacts.append(artifact_uri) + system.baserock_containsStratumArtifact = stratum_artifacts + + system.baserock_hasConfigurationExtension = \ + contents.get('configuration-extensions', []) + + elif contents['kind'] == 'cluster': + cluster_uri = base_uri + 'clusters/' + contents['name'] + entity = cluster = Cluster(cluster_uri) + + deployments = [] + for entry in contents.get('systems', []): + # FIXME: can't get the URI from the 'morph' entry... need to load + # the actual .morph file and get the name from there. + system_uri = 'http://FIXME' + + # FIXME: ignores deploy-defaults at present + for label, details in entry['deploy'].items(): + deployment_uri = cluster_uri + '/' + label + deployment = SystemDeployment(deployment_uri) + + deployment.baserock_deploysSystem = rdflib.URIRef(system_uri) + deployment.baserock_hasLabel = label + + deployment.baserock_hasType = details['type'] + deployment.baserock_hasLocation = details['location'] + + settings = [] + for key, value in details.items(): + if key in ['type', 'location']: + continue + # FIXME: RDF must have a way of representing arbitrary + # key/values better than using a string with an = sign... + settings.append('%s=%s' % (key,value)) + deployment.baserock_hasConfigurationSetting = settings + deployments.append(deployment) + + cluster.baserock_deploysSystem = deployments + + if 'description' in contents: + entity.baserock_description = contents['description'] + + # FIXME: is this needed? why? + entity.set_dirty(True) + # FIXME: comments from the .yaml file are lost ... as a quick solution, + # you could manually find every line from the YAML that starts with a '#' + # and dump that into a property. + + print 'Parsing .morph files...' + for dirname, dirnames, filenames in os.walk('..'): + if '.git' in dirnames: + dirnames.remove('.git') + for filename in sorted(filenames): + if filename.endswith('.morph'): + load_morph(os.path.join(dirname, filename)) + + print 'Committing to database...' + try: + session.commit() + except Exception as e: + if DATABASE=='virtuoso' and \ + 'Virtuoso 42000 Error SR186: No permission to execute procedure DB' in e.message: + print("Permission denied trying to update the database via the " + "SPARQL endpoint. By default this endpoint is read-only. " + "To enable write access, run the `isql` or `isql-vt` " + "commandline interface and run the following statement:\n\n" + " grant SPARQL_UPDATE to \"SPARQL\";\n\n" + "WARNING! Only do this if you're using a local test instance " + "of Virtuoso. You need to set up a real authenticated user " + "account if using an instance of Virtuoso that has " + "important data on it.") + else: + raise + + store.save() diff --git a/schema/surf-test.py b/schema/surf-test.py index f5d47e2c..0f0fab87 100644 --- a/schema/surf-test.py +++ b/schema/surf-test.py @@ -1,6 +1,5 @@ -# Surf test - -# https://github.com/cosminbasca/surfrdf +# Test of various things you can do with an RDF schema for definitions. +# Using SurfRDF: https://github.com/cosminbasca/surfrdf import rdflib @@ -10,6 +9,8 @@ import yaml import os import warnings +import parse + DATABASE = 'memory' #DATABASE = 'virtuoso' @@ -36,222 +37,18 @@ elif DATABASE == 'virtuoso': endpoint='http://localhost:8890/sparql', default_context='http://example.com') -# This doesn't actually achieve anything. It also doesn't work with the -# SPARQL writer backend. -#store.load_triples(source='baserock-owl-schema.turtle', format='turtle') -# This doesn't do anything either. -#graph = rdflib.Graph() -#graph.parse("baserock-owl-schema.turtle", format="turtle") session = surf.Session(store) -OWL_Class = session.get_class(surf.ns.OWL.Class) - -classes = OWL_Class.all() -for cl in classes: - print cl +parse.load_all_morphologies(session, store) - -Chunk = session.get_class(surf.ns.BASEROCK.Chunk) -ChunkReference = session.get_class(surf.ns.BASEROCK.ChunkReference) -Stratum = session.get_class(surf.ns.BASEROCK.Stratum) -StratumArtifact = session.get_class(surf.ns.BASEROCK.StratumArtifact) -System = session.get_class(surf.ns.BASEROCK.System) -SystemDeployment = session.get_class(surf.ns.BASEROCK.SystemDeployment) Cluster = session.get_class(surf.ns.BASEROCK.Cluster) - - -def load_morph(path): - try: - with open(path) as f: - text = f.read() - contents = yaml.safe_load(text) - morph_type = contents['kind'] - assert 'name' in contents - assert contents['kind'] in ['cluster', 'system', 'stratum', 'chunk'] - except Exception as e: - warnings.warn("Problem loading %s: %s" % (path, e)) - - # FIXME: - base_uri = 'http://example.com/' - - entity = None - - # Note the 'surf' library doesn't seem to do any kind of validity checking - # so you can insert whatever random data you feel like, if you want. - - if contents['kind'] == 'chunk': - chunk_uri = base_uri + 'chunks/' + contents['name'] - entity = chunk = Chunk(chunk_uri) - - # FIXME: I think order is lost here !!!!! - if 'pre-configure-commands' in contents: - chunk.baserock_preConfigureCommands = contents['pre-configure-commands'] - if 'configure-commands' in contents: - chunk.baserock_configureCommands = contents['configure-commands'] - if 'post-configure-commands' in contents: - chunk.baserock_postConfigureCommands = contents['post-configure-commands'] - if 'pre-build-commands' in contents: - chunk.baserock_preBuildCommands = contents['pre-build-commands'] - if 'build-commands' in contents: - chunk.baserock_buildCommands = contents['build-commands'] - if 'post-build-commands' in contents: - chunk.baserock_postBuildCommands = contents['post-build-commands'] - if 'pre-install-commands' in contents: - chunk.baserock_preInstallCommands = contents['pre-install-commands'] - if 'install-commands' in contents: - chunk.baserock_installCommands = contents['install-commands'] - if 'post-install-commands' in contents: - chunk.baserock_postInstallCommands = contents['post-install-commands'] - - elif contents['kind'] == 'stratum': - stratum_uri = base_uri + 'strata/' + contents['name'] - entity = stratum = Stratum(stratum_uri) - - stratum_build_deps = [] - for entry in contents.get('build-depends', []): - build_dep_uri = base_uri + 'strata/' + entry['morph'] - stratum_build_deps.append(rdflib.URIRef(build_dep_uri)) - stratum.baserock_hasBuildDependency = stratum_build_deps - - artifacts = [] - for entry in contents.get('products', []): - artifact_uri = stratum_uri + '/products/' + entry['artifact'] - artifact = StratumArtifact(artifact_uri) - # FIXME: order probably lost here - if 'includes' in entry: - artifact.baserock_includes = entry['includes'] - artifacts.append(artifact) - stratum.baserock_produces = artifacts - - chunk_refs = [] - for entry in contents.get('chunks', []): - chunk_ref_uri = stratum_uri + '/chunk-refs/' + entry['name'] - chunk_ref = ChunkReference(chunk_ref_uri) - - # FIXME: this ignores the 'morph' field, and assumes 'name' is - # usable as-is. - chunk_uri = base_uri + 'chunks/' + entry['name'] - chunk_ref.baserock_refersToChunk = rdflib.URIRef(chunk_uri) - - chunk_ref.baserock_repo = entry['repo'] - chunk_ref.baserock_ref = entry['ref'] - if 'unpetrify-ref' in entry: - chunk_ref.baserock_unpetrifyRef = entry['unpetrify-ref'] - chunk_ref.baserock_buildMode = entry.get('build-mode', 'normal') - chunk_ref.baserock_prefix = entry.get('prefix', '/usr') - - chunk_ref_build_deps = [] - for entry_dep in entry.get('build-depends', []): - build_dep_uri = stratum_uri + '/chunk-refs/' + entry_dep - chunk_ref_build_deps.append(build_dep_uri) - chunk_ref.baserock_hasChunkBuildDependency = chunk_ref_build_deps - - chunk_refs.append(chunk_ref) - - stratum.baserock_containsChunkReference = chunk_refs - - elif contents['kind'] == 'system': - system_uri = base_uri + 'systems/' + contents['name'] - entity = system = System(system_uri) - - system.baserock_arch = contents['arch'] - - stratum_artifacts = [] - for entry in contents.get('strata', []): - # FIXME: need to include all strata if 'artifacts' isn't specified, - # which is difficult becausee they might not all be loaded yet ... - # so for now I cheat and just assume -runtime and -devel. If there - # are extra artifacts for the stratum they won't be incuded by - # default. I'm not sure if this is how Morph behaves or not. - artifacts = entry.get('artifacts') - if artifacts is None: - artifacts = ['%s-runtime' % entry['name'], - '%s-devel' % entry['name']] - for artifact in artifacts: - artifact_uri = (base_uri + '/strata/' + entry['name'] + - '/products/' + artifact) - stratum_artifacts.append(artifact_uri) - system.baserock_containsStratumArtifact = stratum_artifacts - - system.baserock_hasConfigurationExtension = \ - contents.get('configuration-extensions', []) - - elif contents['kind'] == 'cluster': - cluster_uri = base_uri + 'clusters/' + contents['name'] - entity = cluster = Cluster(cluster_uri) - - deployments = [] - for entry in contents.get('systems', []): - # FIXME: can't get the URI from the 'morph' entry... need to load - # the actual .morph file and get the name from there. - system_uri = 'http://FIXME' - - # FIXME: ignores deploy-defaults at present - for label, details in entry['deploy'].items(): - deployment_uri = cluster_uri + '/' + label - deployment = SystemDeployment(deployment_uri) - - deployment.baserock_deploysSystem = rdflib.URIRef(system_uri) - deployment.baserock_hasLabel = label - - deployment.baserock_hasType = details['type'] - deployment.baserock_hasLocation = details['location'] - - settings = [] - for key, value in details.items(): - if key in ['type', 'location']: - continue - # FIXME: RDF must have a way of representing arbitrary - # key/values better than using a string with an = sign... - settings.append('%s=%s' % (key,value)) - deployment.baserock_hasConfigurationSetting = settings - deployments.append(deployment) - - cluster.baserock_deploysSystem = deployments - - if 'description' in contents: - entity.baserock_description = contents['description'] - - # FIXME: is this needed? why? - entity.set_dirty(True) - # FIXME: comments from the .yaml file are lost ... as a quick solution, - # you could manually find every line from the YAML that starts with a '#' - # and dump that into a property. - - - -print 'Parsing .morph files...' -for dirname, dirnames, filenames in os.walk('..'): - if '.git' in dirnames: - dirnames.remove('.git') - for filename in sorted(filenames): - if filename.endswith('.morph'): - load_morph(os.path.join(dirname, filename)) - -print 'Committing to database...' -try: - session.commit() -except Exception as e: - if DATABASE=='virtuoso' and \ - 'Virtuoso 42000 Error SR186: No permission to execute procedure DB' in e.message: - print("Permission denied trying to update the database via the " - "SPARQL endpoint. By default this endpoint is read-only. " - "To enable write access, run the `isql` or `isql-vt` " - "commandline interface and run the following statement:\n\n" - " grant SPARQL_UPDATE to \"SPARQL\";\n\n" - "WARNING! Only do this if you're using a local test instance " - "of Virtuoso. You need to set up a real authenticated user " - "account if using an instance of Virtuoso that has " - "important data on it.") - else: - raise - - -store.save() - -#Cluster = session.get_class(surf.ns.BASEROCK.Cluster) -#cluster = Cluster.all() -#for s in cluster: -# s.load() -# print s.serialize('json') +cluster = Cluster.all() +for s in cluster: + s.load() + # hack + text = s.serialize('json') + import json + data = json.loads(text) + print json.dumps(data, indent=4) + break -- cgit v1.2.1