5 files changed, 153 insertions, 12 deletions
diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py
index 59e045cf..fab70c78 100644
--- a/rdflib/plugins/parsers/hext.py
+++ b/rdflib/plugins/parsers/hext.py
@@ -24,14 +24,23 @@ class HextuplesParser(Parser):
         pass
 
     def _load_json_line(self, line: str):
-        return [x if x != "" else None for x in json.loads(line)]
+        # this complex handing is because the 'value' component is
+        # allowed to be "" but not None
+        # all other "" values are treated as None
+        ret1 = json.loads(line)
+        ret2 = [x if x != "" else None for x in ret1]
+        if ret1[2] == "":
+            ret2[2] = ""
+        return ret2
 
     def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
         # all values check
         # subject, predicate, value, datatype cannot be None
         # language and graph may be None
         if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None:
-            raise ValueError("subject, predicate, value, datatype cannot be None")
+            raise ValueError(
+                "subject, predicate, value, datatype cannot be None. Given: "
+                f"{tup}")
 
         # 1 - subject
         s: Union[URIRef, BNode]
diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py
index c86882a2..3fdf3684 100644
--- a/rdflib/plugins/serializers/hext.py
+++ b/rdflib/plugins/serializers/hext.py
@@ -2,7 +2,8 @@
 HextuplesSerializer RDF graph serializer for RDFLib.
 See <https://github.com/ontola/hextuples> for details about the format.
 """
-from typing import IO, Optional, Union
+from typing import IO, Optional, Type, Union
+import json
 from rdflib.graph import Graph, ConjunctiveGraph
 from rdflib.term import Literal, URIRef, Node, BNode
 from rdflib.serializer import Serializer
@@ -19,7 +20,9 @@ class HextuplesSerializer(Serializer):
 
     def __init__(self, store: Union[Graph, ConjunctiveGraph]):
         self.default_context: Optional[Node]
+        self.graph_type: Type[Graph]
         if isinstance(store, ConjunctiveGraph):
+            self.graph_type = ConjunctiveGraph
             self.contexts = list(store.contexts())
             if store.default_context:
                 self.default_context = store.default_context
@@ -27,6 +30,7 @@ class HextuplesSerializer(Serializer):
             else:
                 self.default_context = None
         else:
+            self.graph_type = Graph
             self.contexts = [store]
             self.default_context = None
 
@@ -101,14 +105,14 @@ class HextuplesSerializer(Serializer):
             else:
                 language = ""
 
-            return '["%s", "%s", "%s", "%s", "%s", "%s"]\n' % (
+            return json.dumps([
                 self._iri_or_bn(triple[0]),
                 triple[1],
                 value,
                 datatype,
                 language,
-                self._context(context),
-            )
+                self._context(context)
+            ]) + "\n"
         else:  # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects
             return None
 
@@ -121,7 +125,7 @@ class HextuplesSerializer(Serializer):
             return None
 
     def _context(self, context):
-        if self.default_context is None:
+        if self.graph_type == Graph:
             return ""
         if context.identifier == "urn:x-rdflib:default":
             return ""
diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py
index 27d00838..fdf41911 100644
--- a/test/test_parser_hext.py
+++ b/test/test_parser_hext.py
@@ -22,13 +22,30 @@ def test_small_string():
     assert len(d) == 10
 
 
+def test_small_string_cg():
+    s = """
+        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""]
+        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]
+        ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+        ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""]
+        ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""]
+        ["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""]
+        ["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""]
+        ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""]
+        ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""]
+        ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""]
+        """
+    d = ConjunctiveGraph().parse(data=s, format="hext")
+    assert len(d) == 10
+
+
 def test_small_file_singlegraph():
     d = Dataset().parse(Path(__file__).parent / "test_parser_hext_singlegraph.ndjson", format="hext")
     assert len(d) == 10
 
 
 def test_small_file_multigraph():
-    d = ConjunctiveGraph()
+    d = Dataset()
     assert len(d) == 0
     d.parse(
         Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
@@ -47,6 +64,26 @@ def test_small_file_multigraph():
     assert total_triples == 18
 
 
+def test_small_file_multigraph_cg():
+    d = ConjunctiveGraph()
+    assert len(d) == 0
+    d.parse(
+        Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
+        format="hext",
+        publicID=d.default_context.identifier
+    )
+
+    """There are 22 lines in the file test_parser_hext_multigraph.ndjson. When loaded
+    into a CG, we get only 18 quads since the the CG can contextualise
+    the triples and thus deduplicate 4."""
+    total_triples = 0
+    # count all the triples in the Dataset
+    for context in d.contexts():
+        for triple in context.triples((None, None, None)):
+            total_triples += 1
+    assert total_triples == 18
+
+
 def test_roundtrip():
     # these are some RDF files that HexT can round-trip since the have no
     # literals with no datatype declared:
diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
index 3b083cdd..4a7b7acd 100644
--- a/test/test_roundtrip.py
+++ b/test/test_roundtrip.py
@@ -11,6 +11,7 @@ from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
 import rdflib
 import rdflib.compare
 from rdflib.util import guess_format
+from rdflib.namespace import XSD
 
 """
 Test round-tripping by all serializers/parser that are registered.
@@ -116,6 +117,14 @@ XFAILS = {
         reason="rdflib.compare.isomorphic does not work for quoted graphs.",
         raises=AssertionError,
     ),
+    ("hext", "n3-writer-test-22.n3"): pytest.mark.xfail(
+        reason='HexTuples conflates "" and ""^^xsd:string strings',
+        raises=AssertionError,
+    ),
+    ("hext", "rdf-test-21.n3"): pytest.mark.xfail(
+        reason='HexTuples conflates "" and ""^^xsd:string strings',
+        raises=AssertionError,
+    ),
 }
 
 # This is for files which can only be represented properly in one format
@@ -155,6 +164,18 @@ def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) ->
     g2 = rdflib.ConjunctiveGraph()
     g2.parse(data=s, format=testfmt)
 
+    if testfmt == "hext":
+        # HexTuples always sets Literal("abc") -> Literal("abc", datatype=XSD.string)
+        # and this prevents roundtripping since most other formats don't equate "" with
+        # ""^^xsd:string, at least not in these tests
+        #
+        # So we have to scrub the literals' string datatype declarations...
+        for c in g2.contexts():
+            for s, p, o in c.triples((None, None, None)):
+                if type(o) == rdflib.Literal and o.datatype == XSD.string:
+                    c.remove((s, p, o))
+                    c.add((s, p, rdflib.Literal(str(o))))
+
     if verbose:
         both, first, second = rdflib.compare.graph_diff(g1, g2)
         print("Diff:")
@@ -193,8 +214,8 @@ def get_formats() -> Set[str]:
 def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
     formats = get_formats()
     for testfmt in formats:
-        if testfmt == "hext":
-            continue
+        # if testfmt == "hext":
+        #     continue
         logging.debug("testfmt = %s", testfmt)
         for f, infmt in files:
             constrained_formats = CONSTRAINED_FORMAT_MAP.get(f.name, None)
diff --git a/test/test_serializer_hext.py b/test/test_serializer_hext.py
index c322a211..7231338f 100644
--- a/test/test_serializer_hext.py
+++ b/test/test_serializer_hext.py
@@ -1,7 +1,7 @@
 import sys
 from pathlib import Path
 sys.path.append(str(Path(__file__).parent.parent.absolute()))
-from rdflib import Dataset, Graph
+from rdflib import Dataset, Graph, ConjunctiveGraph
 import json
 
 
@@ -31,7 +31,7 @@ def test_hext_graph():
 
     g.parse(data=turtle_data, format="turtle")
     out = g.serialize(format="hext")
-    # note: cant' test for BNs in result as they will be different ever time
+    # note: can't test for BNs in result as they will be different every time
     testing_lines = [
         [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'],
         [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", ""]'],
@@ -54,6 +54,76 @@ def test_hext_graph():
     assert all([x[0] for x in testing_lines])
 
 
+def test_hext_cg():
+    """Tests ConjunctiveGraph data"""
+    d = ConjunctiveGraph()
+    trig_data = """
+            PREFIX ex: <http://example.com/>
+            PREFIX owl: <http://www.w3.org/2002/07/owl#>
+            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+
+            ex:g1 {
+                ex:s1
+                    ex:p1 ex:o1 , ex:o2 ;
+                    ex:p2 [
+                        a owl:Thing ;
+                        rdf:value "thingy" ;
+                    ] ;
+                    ex:p3 "Object 3" , "Object 4 - English"@en ;
+                    ex:p4 "2021-12-03"^^xsd:date ;
+                    ex:p5 42 ;
+                    ex:p6 "42" ;
+                .
+            }
+
+            ex:g2 {
+                ex:s1
+                    ex:p1 ex:o1 , ex:o2 ;
+                .
+                ex:s11 ex:p11 ex:o11 , ex:o12 .
+            }
+
+            # default graph triples
+            ex:s1 ex:p1 ex:o1 , ex:o2 .
+            ex:s21 ex:p21 ex:o21 , ex:o22 .
+
+            # other default graph triples
+            {
+                ex:s1 ex:p1 ex:o1 , ex:o2 .
+            }
+           """
+    d.parse(data=trig_data, format="trig", publicID=d.default_context.identifier)
+    out = d.serialize(format="hext")
+    # note: cant' test for BNs in result as they will be different ever time
+    testing_lines = [
+        [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""]'],
+        [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""]'],
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'],
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'],
+        [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"]'],
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"]'],
+        [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"]'],
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"]'],
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p2"'],
+        [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'],
+        [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'],
+    ]
+    for line in out.splitlines():
+        for test in testing_lines:
+            if test[1] in line:
+                test[0] = True
+
+    assert all([x[0] for x in testing_lines])
+
+
 def test_hext_dataset():
     """Tests context-aware (multigraph) data"""
     d = Dataset()