From c0fec2db7ade95c90fd6e590339801f51a28c280 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Thu, 8 Oct 2020 16:57:35 +1000 Subject: Serialize Changes: 1) Change default format from 'xml' to 'turtle' 2) If destination is None and encoding is None, output a str, not bytes 3) Add a new convenience method to print the Graph contents (turtle by default) Lots of fixes in tests to adapt to these changes (serialize is used a lot to check graph state in tests). Blacked files which were involved in this change. --- README.md | 2 +- rdflib/graph.py | 28 ++++++++++++++++++++++------ test/test_finalnewline.py | 4 ++-- test/test_issue1003.py | 42 +++++++++++++++++------------------------- test/test_issue1043.py | 2 +- test/test_issue161.py | 4 ++-- test/test_issue248.py | 2 +- test/test_issue801.py | 2 +- test/test_issue_git_336.py | 6 ++---- test/test_n3.py | 4 ++-- test/test_namespace.py | 10 ++++------ test/test_nquads.py | 4 ++-- test/test_nt_misc.py | 6 +++--- test/test_prefixTypes.py | 6 +++--- test/test_prettyxml.py | 8 ++------ test/test_roundtrip.py | 3 +-- test/test_sparql.py | 2 +- test/test_sparqlupdatestore.py | 2 +- test/test_trig.py | 18 +++++++++--------- test/test_trix_serialize.py | 4 ++-- test/test_turtle_serialize.py | 6 +++--- 21 files changed, 82 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index 6e3e6a8b..7b3317cc 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ g.bind("xsd", XSD) ``` This will allow the n-triples triple above to be serialised like this: ```python -print(g.serialize(format="turtle").decode("utf-8")) +print(g.serialize(format="turtle")) ``` With these results: diff --git a/rdflib/graph.py b/rdflib/graph.py index e0d5f854..4bfc605d 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Union import logging from warnings import warn import random @@ -957,12 +957,17 @@ class Graph(Node): return self.namespace_manager.absolutize(uri, defrag) def serialize( - self, destination=None, format="xml", base=None, encoding=None, **args - ) -> Optional[bytes]: + self, destination=None, format="turtle", base=None, encoding=None, **args + ) -> Optional[Union[bytes, str]]: """Serialize the Graph to destination If destination is None serialize method returns the serialization as - bytes. Format defaults to xml (AKA rdf/xml). + bytes or string. + + If encoding is None and destination is None, returns a string + If encoding is set, and Destination is None, returns bytes + + Format defaults to turtle. Format support can be extended with plugins, but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in. @@ -975,8 +980,12 @@ class Graph(Node): serializer = plugin.get(format, Serializer)(self) if destination is None: stream = BytesIO() - serializer.serialize(stream, base=base, encoding=encoding, **args) - return stream.getvalue() + if encoding is None: + serializer.serialize(stream, base=base, encoding="utf-8", **args) + return stream.getvalue().decode("utf-8") + else: + serializer.serialize(stream, base=base, encoding=encoding, **args) + return stream.getvalue() if hasattr(destination, "write"): stream = destination serializer.serialize(stream, base=base, encoding=encoding, **args) @@ -999,6 +1008,13 @@ class Graph(Node): shutil.copy(name, dest) os.remove(name) + def print(self, format="turtle", encoding="utf-8", out=None): + print( + self.serialize(None, format=format, encoding=encoding).decode(encoding), + file=out, + flush=True, + ) + def parse( self, source=None, diff --git a/test/test_finalnewline.py b/test/test_finalnewline.py index c78ac247..9b465b24 100644 --- a/test/test_finalnewline.py +++ b/test/test_finalnewline.py @@ -19,9 +19,9 @@ def testFinalNewline(): failed = set() for p in rdflib.plugin.plugins(None, rdflib.plugin.Serializer): - v = graph.serialize(format=p.name) + v = graph.serialize(format=p.name, encoding="latin-1") lines = v.split("\n".encode("latin-1")) - if "\n".encode("latin-1") not in v or (lines[-1] != "".encode("latin-1")): + if b"\n" not in v or (lines[-1] != b""): failed.add(p.name) assert len(failed) == 0, "No final newline for formats: '%s'" % failed diff --git a/test/test_issue1003.py b/test/test_issue1003.py index d59caf3d..752bb536 100644 --- a/test/test_issue1003.py +++ b/test/test_issue1003.py @@ -34,14 +34,14 @@ g.bind("skos", SKOS) g1 = Graph() g1 += g # @base should not be in output -assert "@base" not in g.serialize(format="turtle").decode("utf-8") +assert "@base" not in g.serialize(format="turtle") # 2. base one set for graph, no base set for serialization g2 = Graph(base=base_one) g2 += g # @base should be in output, from Graph (one) -assert "@base ." in g2.serialize(format="turtle").decode("utf-8") +assert "@base ." in g2.serialize(format="turtle") # 3. no base set for graph, base two set for serialization @@ -50,7 +50,7 @@ g3 += g # @base should be in output, from serialization (two) assert "@base ." in g3.serialize( format="turtle", base=base_two -).decode("utf-8") +) # 4. base one set for graph, base two set for serialization, Graph one overrides @@ -59,11 +59,11 @@ g4 += g # @base should be in output, from graph (one) assert "@base ." in g4.serialize( format="turtle", base=base_two -).decode("utf-8") +) # just checking that the serialization setting (two) hasn't snuck through assert "@base ." not in g4.serialize( format="turtle", base=base_two -).decode("utf-8") +) # 5. multiple serialization side effect checking @@ -72,11 +72,11 @@ g5 += g # @base should be in output, from serialization (two) assert "@base ." in g5.serialize( format="turtle", base=base_two -).decode("utf-8") +) # checking for side affects - no base now set for this serialization # @base should not be in output -assert "@base" not in g5.serialize(format="turtle").decode("utf-8") +assert "@base" not in g5.serialize(format="turtle") # 6. checking results for RDF/XML @@ -84,30 +84,22 @@ g6 = Graph() g6 += g g6.bind("dct", DCTERMS) g6.bind("skos", SKOS) -assert "@xml:base" not in g6.serialize(format="xml").decode("utf-8") -assert 'xml:base="http://one.org/"' in g6.serialize(format="xml", base=base_one).decode( - "utf-8" -) +assert "@xml:base" not in g6.serialize(format="xml") +assert 'xml:base="http://one.org/"' in g6.serialize(format="xml", base=base_one) g6.base = base_two -assert 'xml:base="http://two.org/"' in g6.serialize(format="xml").decode("utf-8") -assert 'xml:base="http://one.org/"' in g6.serialize(format="xml", base=base_one).decode( - "utf-8" -) +assert 'xml:base="http://two.org/"' in g6.serialize(format="xml") +assert 'xml:base="http://one.org/"' in g6.serialize(format="xml", base=base_one) # 7. checking results for N3 g7 = Graph() g7 += g g7.bind("dct", DCTERMS) g7.bind("skos", SKOS) -assert "@xml:base" not in g7.serialize(format="xml").decode("utf-8") -assert "@base ." in g7.serialize(format="n3", base=base_one).decode( - "utf-8" -) +assert "@xml:base" not in g7.serialize(format="xml") +assert "@base ." in g7.serialize(format="n3", base=base_one) g7.base = base_two -assert "@base ." in g7.serialize(format="n3").decode("utf-8") -assert "@base ." in g7.serialize(format="n3", base=base_one).decode( - "utf-8" -) +assert "@base ." in g7.serialize(format="n3") +assert "@base ." in g7.serialize(format="n3", base=base_one) # 8. checking results for TriX & TriG # TriX can specify a base per graph but setting a base for the whole @@ -122,12 +114,12 @@ g9 += g g9.base = base_two ds1.base = base_three -trix = ds1.serialize(format="trix", base=Namespace("http://two.org/")).decode("utf-8") +trix = ds1.serialize(format="trix", base=Namespace("http://two.org/")) assert '' in trix assert '' in trix assert ' ." not in trig assert "@base ." not in trig assert "@base ." in trig diff --git a/test/test_issue1043.py b/test/test_issue1043.py index db202d77..03def8c4 100644 --- a/test/test_issue1043.py +++ b/test/test_issue1043.py @@ -23,7 +23,7 @@ class TestIssue1043(unittest.TestCase): g.bind('rdfs', RDFS) n = Namespace("http://example.org/") g.add((n.number, RDFS.label, Literal(0.00000004, datatype=XSD.decimal))) - print(g.serialize(format="turtle").decode("utf-8")) + g.print() sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), expected) diff --git a/test/test_issue161.py b/test/test_issue161.py index fa7529dc..5375742c 100644 --- a/test/test_issue161.py +++ b/test/test_issue161.py @@ -25,6 +25,6 @@ class EntityTest(TestCase): g = ConjunctiveGraph() g.parse(data=turtle, format="turtle") # Shouldn't have got to here - s = g.serialize(format="turtle") + s = g.serialize(format="turtle", encoding='latin-1') - self.assertTrue("@prefix _9".encode("latin-1") not in s) + self.assertTrue(b"@prefix _9" not in s) diff --git a/test/test_issue248.py b/test/test_issue248.py index 528e81a2..5efd44b0 100644 --- a/test/test_issue248.py +++ b/test/test_issue248.py @@ -74,7 +74,7 @@ class TestSerialization(unittest.TestCase): graph.add((concept, rdflib.RDF.type, SKOS["Concept"])) graph.add((concept, SKOS["prefLabel"], rdflib.Literal("Scrapbooks"))) graph.add((concept, DC["LCC"], rdflib.Literal("AC999.0999 - AC999999.Z9999"))) - sg = graph.serialize(format="n3", base=LCCO).decode("utf8") + sg = graph.serialize(format="n3", base=LCCO) # See issue 248 # Actual test should be the inverse of the below ... self.assertTrue("<1> a skos:Concept ;" in sg, sg) diff --git a/test/test_issue801.py b/test/test_issue801.py index ae27f346..69573352 100644 --- a/test/test_issue801.py +++ b/test/test_issue801.py @@ -12,7 +12,7 @@ class TestIssue801(unittest.TestCase): g.bind('', example) node = BNode() g.add((node, example['first%20name'], Literal('John'))) - self.assertEqual(g.serialize(format="turtle").decode().split("\n")[-3], + self.assertEqual(g.serialize(format="turtle").split("\n")[-3], '[] :first%20name "John" .') if __name__ == "__main__": diff --git a/test/test_issue_git_336.py b/test/test_issue_git_336.py index c3d4a581..e74c2c4a 100644 --- a/test/test_issue_git_336.py +++ b/test/test_issue_git_336.py @@ -33,12 +33,10 @@ def test_ns_localname_roundtrip(): rdflib.Literal("Junk"), ) ) - turtledump = g.serialize(format="turtle").decode("utf-8") - xmldump = g.serialize().decode("utf-8") + turtledump = g.serialize(format="turtle") + xmldump = g.serialize(format="xml") g1 = rdflib.Graph() - g1.parse(data=xmldump, format="xml") - g1.parse(data=turtledump, format="turtle") diff --git a/test/test_n3.py b/test/test_n3.py index 7cd394cd..47dddc03 100644 --- a/test/test_n3.py +++ b/test/test_n3.py @@ -125,8 +125,8 @@ class TestN3Case(unittest.TestCase): URIRef("http://example.com/people/Linda"), ) ) - s = g.serialize(base="http://example.com/", format="n3") - self.assertTrue("".encode("latin-1") in s) + s = g.serialize(base="http://example.com/", format="n3", encoding="latin-1") + self.assertTrue(b"" in s) g2 = ConjunctiveGraph() g2.parse(data=s, publicID="http://example.com/", format="n3") self.assertEqual(list(g), list(g2)) diff --git a/test/test_namespace.py b/test/test_namespace.py index 510d8515..2706467f 100644 --- a/test/test_namespace.py +++ b/test/test_namespace.py @@ -72,11 +72,11 @@ class NamespacePrefixTest(unittest.TestCase): URIRef("http://example.com/baz"), ) ) - n3 = g.serialize(format="n3") + n3 = g.serialize(format="n3", encoding='latin-1') # Gunnar disagrees that this is right: # self.assertTrue(" ns1:bar ." in n3) # as this is much prettier, and ns1 is already defined: - self.assertTrue("ns1:foo ns1:bar ns1:baz .".encode("latin-1") in n3) + self.assertTrue(b"ns1:foo ns1:bar ns1:baz ." in n3) def test_n32(self): # this test not generating prefixes for subjects/objects @@ -88,12 +88,10 @@ class NamespacePrefixTest(unittest.TestCase): URIRef("http://example3.com/baz"), ) ) - n3 = g.serialize(format="n3") + n3 = g.serialize(format="n3", encoding="latin-1") self.assertTrue( - " ns1:bar .".encode( - "latin-1" - ) + b" ns1:bar ." in n3 ) diff --git a/test/test_nquads.py b/test/test_nquads.py index f99984dd..b88f4d4e 100644 --- a/test/test_nquads.py +++ b/test/test_nquads.py @@ -52,9 +52,9 @@ class NQuadsParserTest(unittest.TestCase): g.get_context(uri1).add((bob, likes, pizza)) g.get_context(uri2).add((bob, likes, pizza)) - s = g.serialize(format="nquads") + s = g.serialize(format="nquads", encoding="latin-1") self.assertEqual( - len([x for x in s.split("\n".encode("latin-1")) if x.strip()]), 2 + len([x for x in s.split(b"\n") if x.strip()]), 2 ) g2 = ConjunctiveGraph() diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py index 399f7bff..164776b8 100644 --- a/test/test_nt_misc.py +++ b/test/test_nt_misc.py @@ -24,15 +24,15 @@ class NTTestCase(unittest.TestCase): def testIssue78(self): g = Graph() g.add((URIRef("foo"), URIRef("foo"), Literal("R\u00E4ksm\u00F6rg\u00E5s"))) - s = g.serialize(format="nt") + s = g.serialize(format="nt", encoding="latin-1") self.assertEqual(type(s), bytes) self.assertTrue(r"R\u00E4ksm\u00F6rg\u00E5s".encode("latin-1") in s) def testIssue146(self): g = Graph() g.add((URIRef("foo"), URIRef("foo"), Literal("test\n", lang="en"))) - s = g.serialize(format="nt").strip() - self.assertEqual(s, ' "test\\n"@en .'.encode("latin-1")) + s = g.serialize(format="nt", encoding="latin-1").strip() + self.assertEqual(s, b' "test\\n"@en .') def testIssue1144_rdflib(self): fname = "test/nt/lists-02.nt" diff --git a/test/test_prefixTypes.py b/test/test_prefixTypes.py index 8a785094..045c6056 100644 --- a/test/test_prefixTypes.py +++ b/test/test_prefixTypes.py @@ -25,10 +25,10 @@ class PrefixTypesTest(unittest.TestCase): """ def test(self): - s = graph.serialize(format="n3") + s = graph.serialize(format="n3", encoding="latin-1") print(s) - self.assertTrue("foaf:Document".encode("latin-1") in s) - self.assertTrue("xsd:date".encode("latin-1") in s) + self.assertTrue(b"foaf:Document" in s) + self.assertTrue(b"xsd:date" in s) if __name__ == "__main__": diff --git a/test/test_prettyxml.py b/test/test_prettyxml.py index 4151c354..86027fb3 100644 --- a/test/test_prettyxml.py +++ b/test/test_prettyxml.py @@ -188,9 +188,7 @@ class TestPrettyXmlSerializer(SerializerTestBase): xmlrepr = g.serialize(format="pretty-xml") # then: assert ( - """

See also Å

""".encode( - "utf-8" - ) + """

See also Å

""" in xmlrepr ) @@ -202,9 +200,7 @@ class TestPrettyXmlSerializer(SerializerTestBase): xmlrepr = g.serialize(format="pretty-xml") # then: assert ( - """<p """.encode( - "utf-8" - ) + """<p """ in xmlrepr ) diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index 149e9eb5..5bb433f1 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -51,8 +51,7 @@ def roundtrip(e, verbose=False): if verbose: print("S:") - print(s) - print(s.decode()) + print(s, flush=True) g2 = rdflib.ConjunctiveGraph() g2.parse(data=s, format=testfmt) diff --git a/test/test_sparql.py b/test/test_sparql.py index fdf29c3c..8da1f799 100644 --- a/test/test_sparql.py +++ b/test/test_sparql.py @@ -103,7 +103,7 @@ def test_sparql_update_with_bnode_serialize_parse(): """ graph = Graph() graph.update("INSERT DATA { _:blankA }") - string = graph.serialize(format="ntriples").decode("utf-8") + string = graph.serialize(format="ntriples") raised = False try: Graph().parse(data=string, format="ntriples") diff --git a/test/test_sparqlupdatestore.py b/test/test_sparqlupdatestore.py index ca137beb..eea01136 100644 --- a/test/test_sparqlupdatestore.py +++ b/test/test_sparqlupdatestore.py @@ -183,7 +183,7 @@ class TestSparql11(unittest.TestCase): "INSERT DATA { GRAPH { _:blankA } }" ) g = self.graph.get_context(graphuri) - string = g.serialize(format="ntriples").decode("utf-8") + string = g.serialize(format="ntriples") raised = False try: Graph().parse(data=string, format="ntriples") diff --git a/test/test_trig.py b/test/test_trig.py index d61667fc..7474640f 100644 --- a/test/test_trig.py +++ b/test/test_trig.py @@ -30,7 +30,7 @@ class TestTrig(unittest.TestCase): self.assertEqual(len(g.get_context("urn:a")), 1) self.assertEqual(len(g.get_context("urn:b")), 1) - s = g.serialize(format="trig") + s = g.serialize(format="trig", encoding="latin-1") self.assertTrue(b"{}" not in s) # no empty graphs! def testSameSubject(self): @@ -46,7 +46,7 @@ class TestTrig(unittest.TestCase): self.assertEqual(len(g.get_context("urn:a")), 1) self.assertEqual(len(g.get_context("urn:b")), 1) - s = g.serialize(format="trig") + s = g.serialize(format="trig", encoding="latin-1") self.assertEqual(len(re.findall(b"p1", s)), 1) self.assertEqual(len(re.findall(b"p2", s)), 1) @@ -58,15 +58,15 @@ class TestTrig(unittest.TestCase): g.add(TRIPLE + (rdflib.URIRef("http://example.com/graph1"),)) # In 4.2.0 the first serialization would fail to include the # prefix for the graph but later serialize() calls would work. - first_out = g.serialize(format="trig") - second_out = g.serialize(format="trig") + first_out = g.serialize(format="trig", encoding="latin-1") + second_out = g.serialize(format="trig", encoding="latin-1") self.assertTrue(b"@prefix ns1: ." in second_out) self.assertTrue(b"@prefix ns1: ." in first_out) def testGraphQnameSyntax(self): g = rdflib.ConjunctiveGraph() g.add(TRIPLE + (rdflib.URIRef("http://example.com/graph1"),)) - out = g.serialize(format="trig") + out = g.serialize(format="trig", encoding="latin-1") self.assertTrue(b"ns1:graph1 {" in out) def testGraphUriSyntax(self): @@ -74,13 +74,13 @@ class TestTrig(unittest.TestCase): # getQName will not abbreviate this, so it should serialize as # a '<...>' term. g.add(TRIPLE + (rdflib.URIRef("http://example.com/foo."),)) - out = g.serialize(format="trig") + out = g.serialize(format="trig", encoding="latin-1") self.assertTrue(b" {" in out) def testBlankGraphIdentifier(self): g = rdflib.ConjunctiveGraph() g.add(TRIPLE + (rdflib.BNode(),)) - out = g.serialize(format="trig") + out = g.serialize(format="trig", encoding='latin-1') graph_label_line = out.splitlines()[-4] self.assertTrue(re.match(br"^_:[a-zA-Z0-9]+ \{", graph_label_line)) @@ -153,7 +153,7 @@ class TestTrig(unittest.TestCase): """ g = rdflib.ConjunctiveGraph() g.parse(data=data, format="trig") - data = g.serialize(format="trig") + data = g.serialize(format="trig", encoding="latin-1") self.assertTrue(b"None" not in data) @@ -173,7 +173,7 @@ class TestTrig(unittest.TestCase): cg = rdflib.ConjunctiveGraph() cg.parse(data=data, format="trig") - data = cg.serialize(format="trig") + data = cg.serialize(format="trig", encoding="latin-1") self.assertTrue(b"ns2: " not in data) diff --git a/test/test_trix_serialize.py b/test/test_trix_serialize.py index 4fe78a18..d0824aa9 100644 --- a/test/test_trix_serialize.py +++ b/test/test_trix_serialize.py @@ -39,7 +39,7 @@ class TestTrixSerialize(unittest.TestCase): r3 = URIRef("resource:3") g.add((r3, label, Literal(4))) - r = g.serialize(format="trix") + r = g.serialize(format="trix", encoding="utf-8") g3 = ConjunctiveGraph() g3.parse(BytesIO(r), format="trix") @@ -86,7 +86,7 @@ class TestTrixSerialize(unittest.TestCase): graph = ConjunctiveGraph() graph.bind(None, "http://defaultnamespace") - sg = graph.serialize(format="trix").decode("UTF-8") + sg = graph.serialize(format="trix") self.assertTrue('xmlns="http://defaultnamespace"' not in sg, sg) self.assertTrue('xmlns="http://www.w3.org/2004/03/trix/trix-1/' in sg, sg) diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py index 48c86442..9c073e8c 100644 --- a/test/test_turtle_serialize.py +++ b/test/test_turtle_serialize.py @@ -12,8 +12,8 @@ def testTurtleFinalDot(): u = URIRef("http://ex.org/bob.") g.bind("ns", "http://ex.org/") g.add((u, u, u)) - s = g.serialize(format="turtle") - assert "ns:bob.".encode("latin-1") not in s + s = g.serialize(format="turtle", encoding="latin-1") + assert b"ns:bob." not in s def testTurtleBoolList(): @@ -89,7 +89,7 @@ def test_turtle_namespace(): ) output = [ val - for val in graph.serialize(format="turtle").decode().splitlines() + for val in graph.serialize(format="turtle").splitlines() if not val.startswith("@prefix") ] output = " ".join(output) -- cgit v1.2.1