summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rdflib/plugins/parsers/hext.py13
-rw-r--r--rdflib/plugins/serializers/hext.py14
-rw-r--r--test/test_parser_hext.py39
-rw-r--r--test/test_roundtrip.py25
-rw-r--r--test/test_serializer_hext.py74
5 files changed, 153 insertions, 12 deletions
diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py
index 59e045cf..fab70c78 100644
--- a/rdflib/plugins/parsers/hext.py
+++ b/rdflib/plugins/parsers/hext.py
@@ -24,14 +24,23 @@ class HextuplesParser(Parser):
pass
def _load_json_line(self, line: str):
- return [x if x != "" else None for x in json.loads(line)]
+ # this complex handing is because the 'value' component is
+ # allowed to be "" but not None
+ # all other "" values are treated as None
+ ret1 = json.loads(line)
+ ret2 = [x if x != "" else None for x in ret1]
+ if ret1[2] == "":
+ ret2[2] = ""
+ return ret2
def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
# all values check
# subject, predicate, value, datatype cannot be None
# language and graph may be None
if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None:
- raise ValueError("subject, predicate, value, datatype cannot be None")
+ raise ValueError(
+ "subject, predicate, value, datatype cannot be None. Given: "
+ f"{tup}")
# 1 - subject
s: Union[URIRef, BNode]
diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py
index c86882a2..3fdf3684 100644
--- a/rdflib/plugins/serializers/hext.py
+++ b/rdflib/plugins/serializers/hext.py
@@ -2,7 +2,8 @@
HextuplesSerializer RDF graph serializer for RDFLib.
See <https://github.com/ontola/hextuples> for details about the format.
"""
-from typing import IO, Optional, Union
+from typing import IO, Optional, Type, Union
+import json
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib.term import Literal, URIRef, Node, BNode
from rdflib.serializer import Serializer
@@ -19,7 +20,9 @@ class HextuplesSerializer(Serializer):
def __init__(self, store: Union[Graph, ConjunctiveGraph]):
self.default_context: Optional[Node]
+ self.graph_type: Type[Graph]
if isinstance(store, ConjunctiveGraph):
+ self.graph_type = ConjunctiveGraph
self.contexts = list(store.contexts())
if store.default_context:
self.default_context = store.default_context
@@ -27,6 +30,7 @@ class HextuplesSerializer(Serializer):
else:
self.default_context = None
else:
+ self.graph_type = Graph
self.contexts = [store]
self.default_context = None
@@ -101,14 +105,14 @@ class HextuplesSerializer(Serializer):
else:
language = ""
- return '["%s", "%s", "%s", "%s", "%s", "%s"]\n' % (
+ return json.dumps([
self._iri_or_bn(triple[0]),
triple[1],
value,
datatype,
language,
- self._context(context),
- )
+ self._context(context)
+ ]) + "\n"
else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects
return None
@@ -121,7 +125,7 @@ class HextuplesSerializer(Serializer):
return None
def _context(self, context):
- if self.default_context is None:
+ if self.graph_type == Graph:
return ""
if context.identifier == "urn:x-rdflib:default":
return ""
diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py
index 27d00838..fdf41911 100644
--- a/test/test_parser_hext.py
+++ b/test/test_parser_hext.py
@@ -22,13 +22,30 @@ def test_small_string():
assert len(d) == 10
+def test_small_string_cg():
+ s = """
+ ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""]
+ ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]
+ ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+ ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""]
+ ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""]
+ ["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""]
+ ["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""]
+ ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""]
+ ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""]
+ ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""]
+ """
+ d = ConjunctiveGraph().parse(data=s, format="hext")
+ assert len(d) == 10
+
+
def test_small_file_singlegraph():
d = Dataset().parse(Path(__file__).parent / "test_parser_hext_singlegraph.ndjson", format="hext")
assert len(d) == 10
def test_small_file_multigraph():
- d = ConjunctiveGraph()
+ d = Dataset()
assert len(d) == 0
d.parse(
Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
@@ -47,6 +64,26 @@ def test_small_file_multigraph():
assert total_triples == 18
+def test_small_file_multigraph_cg():
+ d = ConjunctiveGraph()
+ assert len(d) == 0
+ d.parse(
+ Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
+ format="hext",
+ publicID=d.default_context.identifier
+ )
+
+ """There are 22 lines in the file test_parser_hext_multigraph.ndjson. When loaded
+ into a CG, we get only 18 quads since the the CG can contextualise
+ the triples and thus deduplicate 4."""
+ total_triples = 0
+ # count all the triples in the Dataset
+ for context in d.contexts():
+ for triple in context.triples((None, None, None)):
+ total_triples += 1
+ assert total_triples == 18
+
+
def test_roundtrip():
# these are some RDF files that HexT can round-trip since the have no
# literals with no datatype declared:
diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
index 3b083cdd..4a7b7acd 100644
--- a/test/test_roundtrip.py
+++ b/test/test_roundtrip.py
@@ -11,6 +11,7 @@ from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
import rdflib
import rdflib.compare
from rdflib.util import guess_format
+from rdflib.namespace import XSD
"""
Test round-tripping by all serializers/parser that are registered.
@@ -116,6 +117,14 @@ XFAILS = {
reason="rdflib.compare.isomorphic does not work for quoted graphs.",
raises=AssertionError,
),
+ ("hext", "n3-writer-test-22.n3"): pytest.mark.xfail(
+ reason='HexTuples conflates "" and ""^^xsd:string strings',
+ raises=AssertionError,
+ ),
+ ("hext", "rdf-test-21.n3"): pytest.mark.xfail(
+ reason='HexTuples conflates "" and ""^^xsd:string strings',
+ raises=AssertionError,
+ ),
}
# This is for files which can only be represented properly in one format
@@ -155,6 +164,18 @@ def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) ->
g2 = rdflib.ConjunctiveGraph()
g2.parse(data=s, format=testfmt)
+ if testfmt == "hext":
+ # HexTuples always sets Literal("abc") -> Literal("abc", datatype=XSD.string)
+ # and this prevents roundtripping since most other formats don't equate "" with
+ # ""^^xsd:string, at least not in these tests
+ #
+ # So we have to scrub the literals' string datatype declarations...
+ for c in g2.contexts():
+ for s, p, o in c.triples((None, None, None)):
+ if type(o) == rdflib.Literal and o.datatype == XSD.string:
+ c.remove((s, p, o))
+ c.add((s, p, rdflib.Literal(str(o))))
+
if verbose:
both, first, second = rdflib.compare.graph_diff(g1, g2)
print("Diff:")
@@ -193,8 +214,8 @@ def get_formats() -> Set[str]:
def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
formats = get_formats()
for testfmt in formats:
- if testfmt == "hext":
- continue
+ # if testfmt == "hext":
+ # continue
logging.debug("testfmt = %s", testfmt)
for f, infmt in files:
constrained_formats = CONSTRAINED_FORMAT_MAP.get(f.name, None)
diff --git a/test/test_serializer_hext.py b/test/test_serializer_hext.py
index c322a211..7231338f 100644
--- a/test/test_serializer_hext.py
+++ b/test/test_serializer_hext.py
@@ -1,7 +1,7 @@
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.absolute()))
-from rdflib import Dataset, Graph
+from rdflib import Dataset, Graph, ConjunctiveGraph
import json
@@ -31,7 +31,7 @@ def test_hext_graph():
g.parse(data=turtle_data, format="turtle")
out = g.serialize(format="hext")
- # note: cant' test for BNs in result as they will be different ever time
+ # note: can't test for BNs in result as they will be different every time
testing_lines = [
[False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'],
[False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", ""]'],
@@ -54,6 +54,76 @@ def test_hext_graph():
assert all([x[0] for x in testing_lines])
+def test_hext_cg():
+ """Tests ConjunctiveGraph data"""
+ d = ConjunctiveGraph()
+ trig_data = """
+ PREFIX ex: <http://example.com/>
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+
+ ex:g1 {
+ ex:s1
+ ex:p1 ex:o1 , ex:o2 ;
+ ex:p2 [
+ a owl:Thing ;
+ rdf:value "thingy" ;
+ ] ;
+ ex:p3 "Object 3" , "Object 4 - English"@en ;
+ ex:p4 "2021-12-03"^^xsd:date ;
+ ex:p5 42 ;
+ ex:p6 "42" ;
+ .
+ }
+
+ ex:g2 {
+ ex:s1
+ ex:p1 ex:o1 , ex:o2 ;
+ .
+ ex:s11 ex:p11 ex:o11 , ex:o12 .
+ }
+
+ # default graph triples
+ ex:s1 ex:p1 ex:o1 , ex:o2 .
+ ex:s21 ex:p21 ex:o21 , ex:o22 .
+
+ # other default graph triples
+ {
+ ex:s1 ex:p1 ex:o1 , ex:o2 .
+ }
+ """
+ d.parse(data=trig_data, format="trig", publicID=d.default_context.identifier)
+ out = d.serialize(format="hext")
+ # note: cant' test for BNs in result as they will be different ever time
+ testing_lines = [
+ [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""]'],
+ [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""]'],
+ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'],
+ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'],
+ [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"]'],
+ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"]'],
+ [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"]'],
+ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"]'],
+ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p2"'],
+ [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'],
+ [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'],
+ [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'],
+ ]
+ for line in out.splitlines():
+ for test in testing_lines:
+ if test[1] in line:
+ test[0] = True
+
+ assert all([x[0] for x in testing_lines])
+
+
def test_hext_dataset():
"""Tests context-aware (multigraph) data"""
d = Dataset()