summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2015-07-01 18:29:22 +0100
committerSam Thursfield <sam.thursfield@codethink.co.uk>2015-07-01 18:29:22 +0100
commit11e5784ed2616b51a464d5e7c0b487cf575d08bf (patch)
tree5a5a6587d2ed03fff29578399e66ed89fb7d55f0
parent7677c810240d9d50f86b21d7eccd57a9d484ccef (diff)
downloaddefinitions-11e5784ed2616b51a464d5e7c0b487cf575d08bf.tar.gz
schema: Add prototype validation code
Current sticking points: there's a bug in rdflib that means the Resource() api doesn't work how I'd expect ... and we need the RDF, RDFS, OWL and DC schemas available too in order to do a 'validation' stage. Change-Id: I1d64fbb02af05724bcf967457c287600bb494d5a
-rw-r--r--schema/surf-test.py32
-rw-r--r--schema/validate.py71
2 files changed, 97 insertions, 6 deletions
diff --git a/schema/surf-test.py b/schema/surf-test.py
index 0f0fab87..6e60bfe5 100644
--- a/schema/surf-test.py
+++ b/schema/surf-test.py
@@ -10,6 +10,7 @@ import os
import warnings
import parse
+import validate
DATABASE = 'memory'
@@ -40,15 +41,34 @@ elif DATABASE == 'virtuoso':
session = surf.Session(store)
+
parse.load_all_morphologies(session, store)
+schema = rdflib.Graph()
+schema.parse("baserock-owl-schema.turtle", format="turtle")
+
+# Only works for 'memory' database, but I don't really care any more.
+all_data = store.reader.graph
+
+validate.check_data_against_schema(
+ data=all_data,
+ schema=schema)
+
+
+def serialize_to_json_ld(surflib_resource):
+ rdflib_graph = surflib_resource.graph()
+
+ context = {
+ "@vocab": "http://baserock.org/definitions/example-schema#",
+ "@language": "en"
+ }
+ # requires rdflib-jsonld Python module.
+ return rdflib_graph.serialize(format='json-ld', indent=4, context=context)
+
+
Cluster = session.get_class(surf.ns.BASEROCK.Cluster)
cluster = Cluster.all()
for s in cluster:
s.load()
- # hack
- text = s.serialize('json')
- import json
- data = json.loads(text)
- print json.dumps(data, indent=4)
- break
+ print serialize_to_json_ld(s)
+
diff --git a/schema/validate.py b/schema/validate.py
new file mode 100644
index 00000000..56eb7230
--- /dev/null
+++ b/schema/validate.py
@@ -0,0 +1,71 @@
+import rdflib
+
+
+class ValidationError(Exception):
+ pass
+
+class UnknownClass(ValidationError):
+ def __init__(self, owl_class):
+ super(UnknownClass, self).__init__(
+ "Class %s is not defined by the given schema." % owl_class)
+
+
+def check_data_against_schema(data, schema):
+ '''Validate data against a schema.
+
+ The data is assumed to be an RDFLib.Graph instance containing a set of
+ arbitrary triples.
+
+ The schema is assumed to be an RDFLib.Graph instance containing one or more
+ OWL ontologies.
+
+ This function assumes 'data' should be completely authorative according to
+ the schema (i.e. everything that must be known about a resource is known).
+ You can see this as assuming a "closed world" rather than an "open world".
+ It also assumes there should be nothing in 'data' that the schema does not
+ describe.
+
+ '''
+ for uriref in data.subjects():
+ resource = rdflib.resource.Resource(data, uriref)
+ validate_resource(resource, data, schema)
+
+
+def validate_resource(resource, data, schema):
+ RDF = rdflib.RDF
+ OWL = rdflib.OWL
+
+ # Are the type of 'resource' all classes defined in the schema?
+ classes = schema[:RDF.type:OWL.Class]
+ resource_types = resource.value(RDF.type)
+
+ for resource_type in resource_types:
+ if resource_type.identifier not in classes:
+ raise UnknownClass(resource_type)
+
+ resource_property_value_pairs = resource.predicate_objects()
+ for prop, value in resource_property_value_pairs:
+ validate_resource_property(resource, prop, value, data,
+ schema)
+
+
+def validate_resource_property(resource, prop, value, data, schema):
+ RDF = rdflib.RDF
+ OWL = rdflib.OWL
+
+ resource_types = resource.value(RDF.type)
+
+ prop_uriref = prop.identifier
+
+ # FIXME: this gives no results at all ...
+ prop = rdflib.resource.Resource(schema, prop)
+ print list(prop.items())
+
+ # But this gives the expected results. :(
+ print list(schema[prop_uriref:])
+
+ # FIXME: some properties are defined in rdfs, dc, owl, .... need those
+ # ontologies available to validate too.
+
+ import pdb
+ pdb.set_trace()