import rdflib class ValidationError(Exception): pass class UnknownClass(ValidationError): def __init__(self, owl_class): super(UnknownClass, self).__init__( "Class %s is not defined by the given schema." % owl_class) def check_data_against_schema(data, schema): '''Validate data against a schema. The data is assumed to be an RDFLib.Graph instance containing a set of arbitrary triples. The schema is assumed to be an RDFLib.Graph instance containing one or more OWL ontologies. This function assumes 'data' should be completely authorative according to the schema (i.e. everything that must be known about a resource is known). You can see this as assuming a "closed world" rather than an "open world". It also assumes there should be nothing in 'data' that the schema does not describe. ''' for uriref in data.subjects(): resource = rdflib.resource.Resource(data, uriref) validate_resource(resource, data, schema) def validate_resource(resource, data, schema): RDF = rdflib.RDF OWL = rdflib.OWL # Are the type of 'resource' all classes defined in the schema? classes = schema[:RDF.type:OWL.Class] resource_types = resource.value(RDF.type) for resource_type in resource_types: if resource_type.identifier not in classes: raise UnknownClass(resource_type) resource_property_value_pairs = resource.predicate_objects() for prop, value in resource_property_value_pairs: validate_resource_property(resource, prop, value, data, schema) def validate_resource_property(resource, prop, value, data, schema): RDF = rdflib.RDF OWL = rdflib.OWL resource_types = resource.value(RDF.type) prop_uriref = prop.identifier # FIXME: this gives no results at all ... prop = rdflib.resource.Resource(schema, prop) print list(prop.items()) # But this gives the expected results. :( print list(schema[prop_uriref:]) # FIXME: some properties are defined in rdfs, dc, owl, .... need those # ontologies available to validate too. import pdb pdb.set_trace()