diff options
Diffstat (limited to 'schema/validate.py')
-rw-r--r-- | schema/validate.py | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/schema/validate.py b/schema/validate.py new file mode 100644 index 00000000..56eb7230 --- /dev/null +++ b/schema/validate.py @@ -0,0 +1,71 @@ +import rdflib + + +class ValidationError(Exception): + pass + +class UnknownClass(ValidationError): + def __init__(self, owl_class): + super(UnknownClass, self).__init__( + "Class %s is not defined by the given schema." % owl_class) + + +def check_data_against_schema(data, schema): + '''Validate data against a schema. + + The data is assumed to be an RDFLib.Graph instance containing a set of + arbitrary triples. + + The schema is assumed to be an RDFLib.Graph instance containing one or more + OWL ontologies. + + This function assumes 'data' should be completely authorative according to + the schema (i.e. everything that must be known about a resource is known). + You can see this as assuming a "closed world" rather than an "open world". + It also assumes there should be nothing in 'data' that the schema does not + describe. + + ''' + for uriref in data.subjects(): + resource = rdflib.resource.Resource(data, uriref) + validate_resource(resource, data, schema) + + +def validate_resource(resource, data, schema): + RDF = rdflib.RDF + OWL = rdflib.OWL + + # Are the type of 'resource' all classes defined in the schema? + classes = schema[:RDF.type:OWL.Class] + resource_types = resource.value(RDF.type) + + for resource_type in resource_types: + if resource_type.identifier not in classes: + raise UnknownClass(resource_type) + + resource_property_value_pairs = resource.predicate_objects() + for prop, value in resource_property_value_pairs: + validate_resource_property(resource, prop, value, data, + schema) + + +def validate_resource_property(resource, prop, value, data, schema): + RDF = rdflib.RDF + OWL = rdflib.OWL + + resource_types = resource.value(RDF.type) + + prop_uriref = prop.identifier + + # FIXME: this gives no results at all ... + prop = rdflib.resource.Resource(schema, prop) + print list(prop.items()) + + # But this gives the expected results. :( + print list(schema[prop_uriref:]) + + # FIXME: some properties are defined in rdfs, dc, owl, .... need those + # ontologies available to validate too. + + import pdb + pdb.set_trace() |