1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
import rdflib
class ValidationError(Exception):
pass
class UnknownClass(ValidationError):
def __init__(self, owl_class):
super(UnknownClass, self).__init__(
"Class %s is not defined by the given schema." % owl_class)
def check_data_against_schema(data, schema):
'''Validate data against a schema.
The data is assumed to be an RDFLib.Graph instance containing a set of
arbitrary triples.
The schema is assumed to be an RDFLib.Graph instance containing one or more
OWL ontologies.
This function assumes 'data' should be completely authorative according to
the schema (i.e. everything that must be known about a resource is known).
You can see this as assuming a "closed world" rather than an "open world".
It also assumes there should be nothing in 'data' that the schema does not
describe.
'''
for uriref in data.subjects():
resource = rdflib.resource.Resource(data, uriref)
validate_resource(resource, data, schema)
def validate_resource(resource, data, schema):
RDF = rdflib.RDF
OWL = rdflib.OWL
# Are the type of 'resource' all classes defined in the schema?
classes = schema[:RDF.type:OWL.Class]
resource_types = resource.value(RDF.type)
for resource_type in resource_types:
if resource_type.identifier not in classes:
raise UnknownClass(resource_type)
resource_property_value_pairs = resource.predicate_objects()
for prop, value in resource_property_value_pairs:
validate_resource_property(resource, prop, value, data,
schema)
def validate_resource_property(resource, prop, value, data, schema):
RDF = rdflib.RDF
OWL = rdflib.OWL
resource_types = resource.value(RDF.type)
prop_uriref = prop.identifier
# FIXME: this gives no results at all ...
prop = rdflib.resource.Resource(schema, prop)
print list(prop.items())
# But this gives the expected results. :(
print list(schema[prop_uriref:])
# FIXME: some properties are defined in rdfs, dc, owl, .... need those
# ontologies available to validate too.
import pdb
pdb.set_trace()
|