summaryrefslogtreecommitdiff
path: root/schema/validate.py
diff options
context:
space:
mode:
Diffstat (limited to 'schema/validate.py')
-rw-r--r--schema/validate.py71
1 files changed, 71 insertions, 0 deletions
diff --git a/schema/validate.py b/schema/validate.py
new file mode 100644
index 00000000..56eb7230
--- /dev/null
+++ b/schema/validate.py
@@ -0,0 +1,71 @@
+import rdflib
+
+
+class ValidationError(Exception):
+ pass
+
+class UnknownClass(ValidationError):
+ def __init__(self, owl_class):
+ super(UnknownClass, self).__init__(
+ "Class %s is not defined by the given schema." % owl_class)
+
+
+def check_data_against_schema(data, schema):
+ '''Validate data against a schema.
+
+ The data is assumed to be an RDFLib.Graph instance containing a set of
+ arbitrary triples.
+
+ The schema is assumed to be an RDFLib.Graph instance containing one or more
+ OWL ontologies.
+
+ This function assumes 'data' should be completely authorative according to
+ the schema (i.e. everything that must be known about a resource is known).
+ You can see this as assuming a "closed world" rather than an "open world".
+ It also assumes there should be nothing in 'data' that the schema does not
+ describe.
+
+ '''
+ for uriref in data.subjects():
+ resource = rdflib.resource.Resource(data, uriref)
+ validate_resource(resource, data, schema)
+
+
+def validate_resource(resource, data, schema):
+ RDF = rdflib.RDF
+ OWL = rdflib.OWL
+
+ # Are the type of 'resource' all classes defined in the schema?
+ classes = schema[:RDF.type:OWL.Class]
+ resource_types = resource.value(RDF.type)
+
+ for resource_type in resource_types:
+ if resource_type.identifier not in classes:
+ raise UnknownClass(resource_type)
+
+ resource_property_value_pairs = resource.predicate_objects()
+ for prop, value in resource_property_value_pairs:
+ validate_resource_property(resource, prop, value, data,
+ schema)
+
+
+def validate_resource_property(resource, prop, value, data, schema):
+ RDF = rdflib.RDF
+ OWL = rdflib.OWL
+
+ resource_types = resource.value(RDF.type)
+
+ prop_uriref = prop.identifier
+
+ # FIXME: this gives no results at all ...
+ prop = rdflib.resource.Resource(schema, prop)
+ print list(prop.items())
+
+ # But this gives the expected results. :(
+ print list(schema[prop_uriref:])
+
+ # FIXME: some properties are defined in rdfs, dc, owl, .... need those
+ # ontologies available to validate too.
+
+ import pdb
+ pdb.set_trace()