summaryrefslogtreecommitdiff
path: root/schema/validate.py
blob: 56eb723015d8495905c719bb3da599f3f814a3f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import rdflib


class ValidationError(Exception):
    pass

class UnknownClass(ValidationError):
    def __init__(self, owl_class):
        super(UnknownClass, self).__init__(
            "Class %s is not defined by the given schema." % owl_class)


def check_data_against_schema(data, schema):
    '''Validate data against a schema.

    The data is assumed to be an RDFLib.Graph instance containing a set of
    arbitrary triples.

    The schema is assumed to be an RDFLib.Graph instance containing one or more
    OWL ontologies.

    This function assumes 'data' should be completely authorative according to
    the schema (i.e. everything that must be known about a resource is known).
    You can see this as assuming a "closed world" rather than an "open world".
    It also assumes there should be nothing in 'data' that the schema does not
    describe.

    '''
    for uriref in data.subjects():
        resource = rdflib.resource.Resource(data, uriref)
        validate_resource(resource, data, schema)


def validate_resource(resource, data, schema):
    RDF = rdflib.RDF
    OWL = rdflib.OWL

    # Are the type of 'resource' all classes defined in the schema?
    classes = schema[:RDF.type:OWL.Class]
    resource_types = resource.value(RDF.type)

    for resource_type in resource_types:
        if resource_type.identifier not in classes:
            raise UnknownClass(resource_type)

    resource_property_value_pairs = resource.predicate_objects()
    for prop, value in resource_property_value_pairs:
        validate_resource_property(resource, prop, value, data,
                                   schema)


def validate_resource_property(resource, prop, value, data, schema):
    RDF = rdflib.RDF
    OWL = rdflib.OWL

    resource_types = resource.value(RDF.type)

    prop_uriref = prop.identifier

    # FIXME: this gives no results at all ...
    prop = rdflib.resource.Resource(schema, prop)
    print list(prop.items())

    # But this gives the expected results. :(
    print list(schema[prop_uriref:])

    # FIXME: some properties are defined in rdfs, dc, owl, .... need those
    # ontologies available to validate too.

    import pdb
    pdb.set_trace()