summaryrefslogtreecommitdiff
path: root/rdflib/plugins/serializers/hext.py
blob: c86882a2bc2980246ee99bd74fb5562d275bf99f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
HextuplesSerializer RDF graph serializer for RDFLib.
See <https://github.com/ontola/hextuples> for details about the format.
"""
from typing import IO, Optional, Union
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib.term import Literal, URIRef, Node, BNode
from rdflib.serializer import Serializer
from rdflib.namespace import RDF, XSD
import warnings

__all__ = ["HextuplesSerializer"]


class HextuplesSerializer(Serializer):
    """
    Serializes RDF graphs to NTriples format.
    """

    def __init__(self, store: Union[Graph, ConjunctiveGraph]):
        self.default_context: Optional[Node]
        if isinstance(store, ConjunctiveGraph):
            self.contexts = list(store.contexts())
            if store.default_context:
                self.default_context = store.default_context
                self.contexts.append(store.default_context)
            else:
                self.default_context = None
        else:
            self.contexts = [store]
            self.default_context = None

        Serializer.__init__(self, store)

    def serialize(
        self,
        stream: IO[bytes],
        base: Optional[str] = None,
        encoding: Optional[str] = "utf-8",
        **kwargs,
    ):
        if base is not None:
            warnings.warn(
                "base has no meaning for Hextuples serialization. "
                "I will ignore this value"
            )

        if encoding not in [None, "utf-8"]:
            warnings.warn(
                f"Hextuples files are always utf-8 encoded. "
                f"I was passed: {encoding}, "
                "but I'm still going to use utf-8 anyway!"
            )

        if self.store.formula_aware is True:
            raise Exception(
                "Hextuple serialization can't (yet) handle formula-aware stores"
            )

        for context in self.contexts:
            for triple in context:
                hl = self._hex_line(triple, context)
                if hl is not None:
                    stream.write(hl.encode())

    def _hex_line(self, triple, context):
        if isinstance(
            triple[0], (URIRef, BNode)
        ):  # exclude QuotedGraph and other objects
            # value
            value = (
                triple[2]
                if isinstance(triple[2], Literal)
                else self._iri_or_bn(triple[2])
            )

            # datatype
            if isinstance(triple[2], URIRef):
                # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode"
                datatype = "globalId"
            elif isinstance(triple[2], BNode):
                # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode"
                datatype = "localId"
            elif isinstance(triple[2], Literal):
                if triple[2].datatype is not None:
                    datatype = f"{triple[2].datatype}"
                else:
                    if triple[2].language is not None:  # language
                        datatype = RDF.langString
                    else:
                        datatype = XSD.string
            else:
                return None  # can't handle non URI, BN or Literal Object (QuotedGraph)

            # language
            if isinstance(triple[2], Literal):
                if triple[2].language is not None:
                    language = f"{triple[2].language}"
                else:
                    language = ""
            else:
                language = ""

            return '["%s", "%s", "%s", "%s", "%s", "%s"]\n' % (
                self._iri_or_bn(triple[0]),
                triple[1],
                value,
                datatype,
                language,
                self._context(context),
            )
        else:  # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects
            return None

    def _iri_or_bn(self, i_):
        if isinstance(i_, URIRef):
            return f"{i_}"
        elif isinstance(i_, BNode):
            return f"{i_.n3()}"
        else:
            return None

    def _context(self, context):
        if self.default_context is None:
            return ""
        if context.identifier == "urn:x-rdflib:default":
            return ""
        elif context is not None and self.default_context is not None:
            if context.identifier == self.default_context.identifier:
                return ""
        return context.identifier