diff options
author | Ashley Sommer <ashleysommer@gmail.com> | 2020-08-27 13:35:51 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-27 13:35:51 +1000 |
commit | aa527747bd6a5e48ea19463c483f5fb45c7ea230 (patch) | |
tree | eeead812c20e4cd1d30594721bcc09eb99e88e4c | |
parent | 4be27490261cc18ccf2de19827aa83cd832b4305 (diff) | |
parent | 3afffcd19d3a5d240e83b3a59b53e3ee1120c165 (diff) | |
download | rdflib-aa527747bd6a5e48ea19463c483f5fb45c7ea230.tar.gz |
Merge pull request #1140 from RDFLib/improve_graph_parse
improved Graph().parse()
-rw-r--r-- | rdflib/extras/describer.py | 2 | ||||
-rw-r--r-- | rdflib/graph.py | 36 | ||||
-rw-r--r-- | rdflib/util.py | 2 | ||||
-rw-r--r-- | test/rdf/datatypes/test001.borked | 29 | ||||
-rw-r--r-- | test/test_graph.py | 63 | ||||
-rw-r--r-- | test/test_issue247.py | 4 | ||||
-rw-r--r-- | test/test_issue363.py | 2 | ||||
-rw-r--r-- | test/test_issue_git_336.py | 2 | ||||
-rw-r--r-- | test/test_literal.py | 2 | ||||
-rw-r--r-- | test/test_parse_file_guess_format.py | 7 | ||||
-rw-r--r-- | test/test_parser.py | 1 | ||||
-rw-r--r-- | test/test_seq.py | 2 | ||||
-rw-r--r-- | test/test_xmlliterals.py | 2 |
13 files changed, 125 insertions, 29 deletions
diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py index cec3b602..8afce128 100644 --- a/rdflib/extras/describer.py +++ b/rdflib/extras/describer.py @@ -102,7 +102,7 @@ Full example in the ``to_rdf`` method below:: ... </cv:hasWorkHistory> ... </cv:CV> ... </rdf:RDF> - ... ''') + ... ''', format="xml") >>> >>> from rdflib.compare import isomorphic >>> isomorphic(person_graph, expected) #doctest: +SKIP diff --git a/rdflib/graph.py b/rdflib/graph.py index 4fee4f87..0d11b41f 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -24,6 +24,7 @@ from rdflib.namespace import NamespaceManager from rdflib.resource import Resource from rdflib.collection import Collection import rdflib.util # avoid circular dependency +from rdflib.exceptions import ParserError import os import shutil @@ -1000,7 +1001,7 @@ class Graph(Node): **args ): """ - Parse source adding the resulting triples to the Graph. + Parse an RDF source adding the resulting triples to the Graph. The source is specified using one of source, location, file or data. @@ -1014,9 +1015,10 @@ class Graph(Node): is specified. - `file`: A file-like object. - `data`: A string containing the data to be parsed. - - `format`: Used if format can not be determined from source. - Defaults to rdf/xml. Format support can be extended with plugins, - but "xml", "n3", "nt" & "trix" are built in. + - `format`: Used if format can not be determined from source, e.g. file + extension or Media Type. Defaults to text/turtle. Format support can + be extended with plugins, but "xml", "n3" (use for turtle), "nt" & + "trix" are built in. - `publicID`: the logical URI to use as the document base. If None specified the document location is used (at least in the case where there is a document location). @@ -1062,6 +1064,11 @@ class Graph(Node): >>> os.remove(file_name) + >>> # default turtle parsing + >>> result = g.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .") + >>> len(g) + 3 + """ source = create_input_source( @@ -1074,7 +1081,7 @@ class Graph(Node): ) if format is None: format = source.content_type - assumed_xml = False + could_not_guess_format = False if format is None: if ( hasattr(source, "file") @@ -1083,19 +1090,18 @@ class Graph(Node): ): format = rdflib.util.guess_format(source.file.name) if format is None: - format = "application/rdf+xml" - assumed_xml = True + format = "turtle" + could_not_guess_format = True parser = plugin.get(format, Parser)() try: parser.parse(source, self, **args) - except SAXParseException as saxpe: - if assumed_xml: - logger.warning( - "Could not guess format for %r, so assumed xml." - " You can explicitly specify format using the format argument." - % source - ) - raise saxpe + except SyntaxError as se: + if could_not_guess_format: + raise ParserError( + "Could not guess RDF format for %r from file extension so tried Turtle but failed." + "You can explicitly specify format using the format argument." % source) + else: + raise se finally: if source.auto_close: source.close() diff --git a/rdflib/util.py b/rdflib/util.py index 92996ec7..c5f1ff09 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -352,8 +352,8 @@ def parse_date_time(val): SUFFIX_FORMAT_MAP = { + "xml": "xml", "rdf": "xml", - "rdfs": "xml", "owl": "xml", "n3": "n3", "ttl": "turtle", diff --git a/test/rdf/datatypes/test001.borked b/test/rdf/datatypes/test001.borked new file mode 100644 index 00000000..a4c86aea --- /dev/null +++ b/test/rdf/datatypes/test001.borked @@ -0,0 +1,29 @@ +<?xml version="1.0"?> + +<!-- + Copyright World Wide Web Consortium, (Massachusetts Institute of + Technology, Institut National de Recherche en Informatique et en + Automatique, Keio University). + + All Rights Reserved. + + Please see the full Copyright clause at + <http://www.w3.org/Consortium/Legal/copyright-software.html> + + Description: A simple datatype production; a language+ + datatype production. Simply duplicate the constructs under + http://www.w3.org/2000/10/rdf-tests/rdfcore/ntriples/test.nt + + $Id: test001.rdf,v 1.2 2002/11/20 14:51:34 jgrant Exp $ + +--> + +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:eg="http://example.org/"> + + <rdf:Description rdf:about="http://example.org/foo"> + <eg:bar rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10</eg:bar> + <eg:baz rdf:datatype="http://www.w3.org/2001/XMLSchema#integer" xml:lang="fr">10</eg:baz> + </rdf:Description> + +</rdf:RDF> diff --git a/test/test_graph.py b/test/test_graph.py index 560c1a43..6150beea 100644 --- a/test/test_graph.py +++ b/test/test_graph.py @@ -5,7 +5,9 @@ import unittest from tempfile import mkdtemp, mkstemp import shutil -from rdflib import URIRef, RDF, Graph, plugin +from rdflib import URIRef, Graph, plugin +from rdflib.exceptions import ParserError +from rdflib.plugin import PluginException from nose.exc import SkipTest @@ -248,6 +250,65 @@ class GraphTestCase(unittest.TestCase): self.assertEqual((michel, likes, cheese) in g1, True) + def testGuessFormatForParse(self): + self.graph = Graph() + + # files + with self.assertRaises(ParserError): + self.graph.parse(__file__) # here we are trying to parse a Python file!! + + # .nt can be parsed by Turtle Parser + self.graph.parse("test/nt/anons-01.nt") + # RDF/XML + self.graph.parse("test/rdf/datatypes/test001.rdf") # XML + # bad filename but set format + self.graph.parse("test/rdf/datatypes/test001.borked", format="xml") + + # strings + self.graph = Graph() + + with self.assertRaises(ParserError): + self.graph.parse(data="rubbish") + + # Turtle - default + self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .") + + # Turtle - format given + self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .", format="turtle") + + # RDF/XML - format given + rdf = """<rdf:RDF + xmlns:ns1="http://example.org/#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" +> + <rdf:Description rdf:nodeID="ub63bL2C1"> + <ns1:p rdf:resource="http://example.org/q"/> + <ns1:r rdf:resource="http://example.org/s"/> + </rdf:Description> + <rdf:Description rdf:nodeID="ub63bL5C1"> + <ns1:r> + <rdf:Description rdf:nodeID="ub63bL6C11"> + <ns1:s rdf:resource="http://example.org/#t"/> + </rdf:Description> + </ns1:r> + <ns1:p rdf:resource="http://example.org/q"/> + </rdf:Description> +</rdf:RDF> + """ + self.graph.parse(data=rdf, format="xml") + + # URI + self.graph = Graph() + + # only getting HTML + with self.assertRaises(PluginException): + self.graph.parse(location="https://www.google.com") + + self.graph.parse(location="http://www.w3.org/ns/adms.ttl") + self.graph.parse(location="http://www.w3.org/ns/adms.rdf") + # persistent Australian Government online RDF resource without a file-like ending + self.graph.parse(location="https://linked.data.gov.au/def/agrif?_format=text/turtle") + # dynamically create classes for each registered Store diff --git a/test/test_issue247.py b/test/test_issue247.py index 747dd1e0..7a51dd24 100644 --- a/test/test_issue247.py +++ b/test/test_issue247.py @@ -38,7 +38,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase): it contains a XML Literal with a xml:lang attribute: """ g = rdflib.Graph() - g.parse(data=passxml) + g.parse(data=passxml, format="xml") def test_failing_parse_of_literal_with_xmllang_attr(self): """ @@ -47,7 +47,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase): it contains a XML Literal with a xml:lang attribute: """ g = rdflib.Graph() - g.parse(data=failxml) + g.parse(data=failxml, format="xml") if __name__ == "__main__": diff --git a/test/test_issue363.py b/test/test_issue363.py index 792c2441..5f88a6f4 100644 --- a/test/test_issue363.py +++ b/test/test_issue363.py @@ -38,7 +38,7 @@ def test_broken_rdfxml(): def test_parsetype_resource(): - g = rdflib.Graph().parse(data=data2) + g = rdflib.Graph().parse(data=data2, format="xml") print(g.serialize(format="n3")) diff --git a/test/test_issue_git_336.py b/test/test_issue_git_336.py index 6a8abb7c..c3d4a581 100644 --- a/test/test_issue_git_336.py +++ b/test/test_issue_git_336.py @@ -37,7 +37,7 @@ def test_ns_localname_roundtrip(): xmldump = g.serialize().decode("utf-8") g1 = rdflib.Graph() - g1.parse(data=xmldump) + g1.parse(data=xmldump, format="xml") g1.parse(data=turtledump, format="turtle") diff --git a/test/test_literal.py b/test/test_literal.py index 8124f99d..bc6919b7 100644 --- a/test/test_literal.py +++ b/test/test_literal.py @@ -33,7 +33,7 @@ class TestLiteral(unittest.TestCase): </rdf:RDF> """ g = rdflib.Graph() - g.parse(data=d) + g.parse(data=d, format="xml") a = rdflib.Literal("a\\b") b = list(g.objects())[0] self.assertEqual(a, b) diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py index abb039df..5706f8df 100644 --- a/test/test_parse_file_guess_format.py +++ b/test/test_parse_file_guess_format.py @@ -3,7 +3,7 @@ from pathlib import Path from shutil import copyfile from tempfile import TemporaryDirectory -from xml.sax import SAXParseException +from rdflib.exceptions import ParserError from rdflib import Graph, logger as graph_logger @@ -21,11 +21,10 @@ class FileParserGuessFormatTest(unittest.TestCase): g = Graph() with TemporaryDirectory() as tmpdirname: newpath = Path(tmpdirname).joinpath("no_file_ext") - copyfile("test/w3c/turtle/IRI_subject.ttl", str(newpath)) + copyfile("test/rdf/Manifest.rdf", str(newpath)) with self.assertLogs(graph_logger, "WARNING") as log_cm: - with self.assertRaises(SAXParseException): + with self.assertRaises(ParserError): g.parse(str(newpath)) - self.assertTrue(any("Could not guess format" in msg for msg in log_cm.output)) if __name__ == '__main__': diff --git a/test/test_parser.py b/test/test_parser.py index 3aaf5658..e337969c 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -33,6 +33,7 @@ class ParserTestCase(unittest.TestCase): </rdf:RDF> """, + format="xml", publicID="http://example.org", ) diff --git a/test/test_seq.py b/test/test_seq.py index 7f177574..5a987ef4 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -29,7 +29,7 @@ class SeqTestCase(unittest.TestCase): def setUp(self): store = self.store = Graph(store=self.backend) store.open(self.path) - store.parse(data=s) + store.parse(data=s, format="xml") def tearDown(self): self.store.close() diff --git a/test/test_xmlliterals.py b/test/test_xmlliterals.py index fcc0ddf2..aeabbe88 100644 --- a/test/test_xmlliterals.py +++ b/test/test_xmlliterals.py @@ -42,7 +42,7 @@ def testRDFXMLParse(): </rdf:RDF>""" g = rdflib.Graph() - g.parse(data=rdfxml) + g.parse(data=rdfxml, format="xml") l1 = list(g)[0][2] assert l1.datatype == RDF.XMLLiteral |