summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAshley Sommer <ashleysommer@gmail.com>2020-08-27 13:35:51 +1000
committerGitHub <noreply@github.com>2020-08-27 13:35:51 +1000
commitaa527747bd6a5e48ea19463c483f5fb45c7ea230 (patch)
treeeeead812c20e4cd1d30594721bcc09eb99e88e4c
parent4be27490261cc18ccf2de19827aa83cd832b4305 (diff)
parent3afffcd19d3a5d240e83b3a59b53e3ee1120c165 (diff)
downloadrdflib-aa527747bd6a5e48ea19463c483f5fb45c7ea230.tar.gz
Merge pull request #1140 from RDFLib/improve_graph_parse
improved Graph().parse()
-rw-r--r--rdflib/extras/describer.py2
-rw-r--r--rdflib/graph.py36
-rw-r--r--rdflib/util.py2
-rw-r--r--test/rdf/datatypes/test001.borked29
-rw-r--r--test/test_graph.py63
-rw-r--r--test/test_issue247.py4
-rw-r--r--test/test_issue363.py2
-rw-r--r--test/test_issue_git_336.py2
-rw-r--r--test/test_literal.py2
-rw-r--r--test/test_parse_file_guess_format.py7
-rw-r--r--test/test_parser.py1
-rw-r--r--test/test_seq.py2
-rw-r--r--test/test_xmlliterals.py2
13 files changed, 125 insertions, 29 deletions
diff --git a/rdflib/extras/describer.py b/rdflib/extras/describer.py
index cec3b602..8afce128 100644
--- a/rdflib/extras/describer.py
+++ b/rdflib/extras/describer.py
@@ -102,7 +102,7 @@ Full example in the ``to_rdf`` method below::
... </cv:hasWorkHistory>
... </cv:CV>
... </rdf:RDF>
- ... ''')
+ ... ''', format="xml")
>>>
>>> from rdflib.compare import isomorphic
>>> isomorphic(person_graph, expected) #doctest: +SKIP
diff --git a/rdflib/graph.py b/rdflib/graph.py
index 4fee4f87..0d11b41f 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -24,6 +24,7 @@ from rdflib.namespace import NamespaceManager
from rdflib.resource import Resource
from rdflib.collection import Collection
import rdflib.util # avoid circular dependency
+from rdflib.exceptions import ParserError
import os
import shutil
@@ -1000,7 +1001,7 @@ class Graph(Node):
**args
):
"""
- Parse source adding the resulting triples to the Graph.
+ Parse an RDF source adding the resulting triples to the Graph.
The source is specified using one of source, location, file or
data.
@@ -1014,9 +1015,10 @@ class Graph(Node):
is specified.
- `file`: A file-like object.
- `data`: A string containing the data to be parsed.
- - `format`: Used if format can not be determined from source.
- Defaults to rdf/xml. Format support can be extended with plugins,
- but "xml", "n3", "nt" & "trix" are built in.
+ - `format`: Used if format can not be determined from source, e.g. file
+ extension or Media Type. Defaults to text/turtle. Format support can
+ be extended with plugins, but "xml", "n3" (use for turtle), "nt" &
+ "trix" are built in.
- `publicID`: the logical URI to use as the document base. If None
specified the document location is used (at least in the case where
there is a document location).
@@ -1062,6 +1064,11 @@ class Graph(Node):
>>> os.remove(file_name)
+ >>> # default turtle parsing
+ >>> result = g.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .")
+ >>> len(g)
+ 3
+
"""
source = create_input_source(
@@ -1074,7 +1081,7 @@ class Graph(Node):
)
if format is None:
format = source.content_type
- assumed_xml = False
+ could_not_guess_format = False
if format is None:
if (
hasattr(source, "file")
@@ -1083,19 +1090,18 @@ class Graph(Node):
):
format = rdflib.util.guess_format(source.file.name)
if format is None:
- format = "application/rdf+xml"
- assumed_xml = True
+ format = "turtle"
+ could_not_guess_format = True
parser = plugin.get(format, Parser)()
try:
parser.parse(source, self, **args)
- except SAXParseException as saxpe:
- if assumed_xml:
- logger.warning(
- "Could not guess format for %r, so assumed xml."
- " You can explicitly specify format using the format argument."
- % source
- )
- raise saxpe
+ except SyntaxError as se:
+ if could_not_guess_format:
+ raise ParserError(
+ "Could not guess RDF format for %r from file extension so tried Turtle but failed."
+ "You can explicitly specify format using the format argument." % source)
+ else:
+ raise se
finally:
if source.auto_close:
source.close()
diff --git a/rdflib/util.py b/rdflib/util.py
index 92996ec7..c5f1ff09 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -352,8 +352,8 @@ def parse_date_time(val):
SUFFIX_FORMAT_MAP = {
+ "xml": "xml",
"rdf": "xml",
- "rdfs": "xml",
"owl": "xml",
"n3": "n3",
"ttl": "turtle",
diff --git a/test/rdf/datatypes/test001.borked b/test/rdf/datatypes/test001.borked
new file mode 100644
index 00000000..a4c86aea
--- /dev/null
+++ b/test/rdf/datatypes/test001.borked
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright World Wide Web Consortium, (Massachusetts Institute of
+ Technology, Institut National de Recherche en Informatique et en
+ Automatique, Keio University).
+
+ All Rights Reserved.
+
+ Please see the full Copyright clause at
+ <http://www.w3.org/Consortium/Legal/copyright-software.html>
+
+ Description: A simple datatype production; a language+
+ datatype production. Simply duplicate the constructs under
+ http://www.w3.org/2000/10/rdf-tests/rdfcore/ntriples/test.nt
+
+ $Id: test001.rdf,v 1.2 2002/11/20 14:51:34 jgrant Exp $
+
+-->
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:eg="http://example.org/">
+
+ <rdf:Description rdf:about="http://example.org/foo">
+ <eg:bar rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">10</eg:bar>
+ <eg:baz rdf:datatype="http://www.w3.org/2001/XMLSchema#integer" xml:lang="fr">10</eg:baz>
+ </rdf:Description>
+
+</rdf:RDF>
diff --git a/test/test_graph.py b/test/test_graph.py
index 560c1a43..6150beea 100644
--- a/test/test_graph.py
+++ b/test/test_graph.py
@@ -5,7 +5,9 @@ import unittest
from tempfile import mkdtemp, mkstemp
import shutil
-from rdflib import URIRef, RDF, Graph, plugin
+from rdflib import URIRef, Graph, plugin
+from rdflib.exceptions import ParserError
+from rdflib.plugin import PluginException
from nose.exc import SkipTest
@@ -248,6 +250,65 @@ class GraphTestCase(unittest.TestCase):
self.assertEqual((michel, likes, cheese) in g1, True)
+ def testGuessFormatForParse(self):
+ self.graph = Graph()
+
+ # files
+ with self.assertRaises(ParserError):
+ self.graph.parse(__file__) # here we are trying to parse a Python file!!
+
+ # .nt can be parsed by Turtle Parser
+ self.graph.parse("test/nt/anons-01.nt")
+ # RDF/XML
+ self.graph.parse("test/rdf/datatypes/test001.rdf") # XML
+ # bad filename but set format
+ self.graph.parse("test/rdf/datatypes/test001.borked", format="xml")
+
+ # strings
+ self.graph = Graph()
+
+ with self.assertRaises(ParserError):
+ self.graph.parse(data="rubbish")
+
+ # Turtle - default
+ self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .")
+
+ # Turtle - format given
+ self.graph.parse(data="<http://example.com/a> <http://example.com/a> <http://example.com/a> .", format="turtle")
+
+ # RDF/XML - format given
+ rdf = """<rdf:RDF
+ xmlns:ns1="http://example.org/#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+>
+ <rdf:Description rdf:nodeID="ub63bL2C1">
+ <ns1:p rdf:resource="http://example.org/q"/>
+ <ns1:r rdf:resource="http://example.org/s"/>
+ </rdf:Description>
+ <rdf:Description rdf:nodeID="ub63bL5C1">
+ <ns1:r>
+ <rdf:Description rdf:nodeID="ub63bL6C11">
+ <ns1:s rdf:resource="http://example.org/#t"/>
+ </rdf:Description>
+ </ns1:r>
+ <ns1:p rdf:resource="http://example.org/q"/>
+ </rdf:Description>
+</rdf:RDF>
+ """
+ self.graph.parse(data=rdf, format="xml")
+
+ # URI
+ self.graph = Graph()
+
+ # only getting HTML
+ with self.assertRaises(PluginException):
+ self.graph.parse(location="https://www.google.com")
+
+ self.graph.parse(location="http://www.w3.org/ns/adms.ttl")
+ self.graph.parse(location="http://www.w3.org/ns/adms.rdf")
+ # persistent Australian Government online RDF resource without a file-like ending
+ self.graph.parse(location="https://linked.data.gov.au/def/agrif?_format=text/turtle")
+
# dynamically create classes for each registered Store
diff --git a/test/test_issue247.py b/test/test_issue247.py
index 747dd1e0..7a51dd24 100644
--- a/test/test_issue247.py
+++ b/test/test_issue247.py
@@ -38,7 +38,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase):
it contains a XML Literal with a xml:lang attribute:
"""
g = rdflib.Graph()
- g.parse(data=passxml)
+ g.parse(data=passxml, format="xml")
def test_failing_parse_of_literal_with_xmllang_attr(self):
"""
@@ -47,7 +47,7 @@ class TestXMLLiteralwithLangAttr(unittest.TestCase):
it contains a XML Literal with a xml:lang attribute:
"""
g = rdflib.Graph()
- g.parse(data=failxml)
+ g.parse(data=failxml, format="xml")
if __name__ == "__main__":
diff --git a/test/test_issue363.py b/test/test_issue363.py
index 792c2441..5f88a6f4 100644
--- a/test/test_issue363.py
+++ b/test/test_issue363.py
@@ -38,7 +38,7 @@ def test_broken_rdfxml():
def test_parsetype_resource():
- g = rdflib.Graph().parse(data=data2)
+ g = rdflib.Graph().parse(data=data2, format="xml")
print(g.serialize(format="n3"))
diff --git a/test/test_issue_git_336.py b/test/test_issue_git_336.py
index 6a8abb7c..c3d4a581 100644
--- a/test/test_issue_git_336.py
+++ b/test/test_issue_git_336.py
@@ -37,7 +37,7 @@ def test_ns_localname_roundtrip():
xmldump = g.serialize().decode("utf-8")
g1 = rdflib.Graph()
- g1.parse(data=xmldump)
+ g1.parse(data=xmldump, format="xml")
g1.parse(data=turtledump, format="turtle")
diff --git a/test/test_literal.py b/test/test_literal.py
index 8124f99d..bc6919b7 100644
--- a/test/test_literal.py
+++ b/test/test_literal.py
@@ -33,7 +33,7 @@ class TestLiteral(unittest.TestCase):
</rdf:RDF>
"""
g = rdflib.Graph()
- g.parse(data=d)
+ g.parse(data=d, format="xml")
a = rdflib.Literal("a\\b")
b = list(g.objects())[0]
self.assertEqual(a, b)
diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py
index abb039df..5706f8df 100644
--- a/test/test_parse_file_guess_format.py
+++ b/test/test_parse_file_guess_format.py
@@ -3,7 +3,7 @@ from pathlib import Path
from shutil import copyfile
from tempfile import TemporaryDirectory
-from xml.sax import SAXParseException
+from rdflib.exceptions import ParserError
from rdflib import Graph, logger as graph_logger
@@ -21,11 +21,10 @@ class FileParserGuessFormatTest(unittest.TestCase):
g = Graph()
with TemporaryDirectory() as tmpdirname:
newpath = Path(tmpdirname).joinpath("no_file_ext")
- copyfile("test/w3c/turtle/IRI_subject.ttl", str(newpath))
+ copyfile("test/rdf/Manifest.rdf", str(newpath))
with self.assertLogs(graph_logger, "WARNING") as log_cm:
- with self.assertRaises(SAXParseException):
+ with self.assertRaises(ParserError):
g.parse(str(newpath))
- self.assertTrue(any("Could not guess format" in msg for msg in log_cm.output))
if __name__ == '__main__':
diff --git a/test/test_parser.py b/test/test_parser.py
index 3aaf5658..e337969c 100644
--- a/test/test_parser.py
+++ b/test/test_parser.py
@@ -33,6 +33,7 @@ class ParserTestCase(unittest.TestCase):
</rdf:RDF>
""",
+ format="xml",
publicID="http://example.org",
)
diff --git a/test/test_seq.py b/test/test_seq.py
index 7f177574..5a987ef4 100644
--- a/test/test_seq.py
+++ b/test/test_seq.py
@@ -29,7 +29,7 @@ class SeqTestCase(unittest.TestCase):
def setUp(self):
store = self.store = Graph(store=self.backend)
store.open(self.path)
- store.parse(data=s)
+ store.parse(data=s, format="xml")
def tearDown(self):
self.store.close()
diff --git a/test/test_xmlliterals.py b/test/test_xmlliterals.py
index fcc0ddf2..aeabbe88 100644
--- a/test/test_xmlliterals.py
+++ b/test/test_xmlliterals.py
@@ -42,7 +42,7 @@ def testRDFXMLParse():
</rdf:RDF>"""
g = rdflib.Graph()
- g.parse(data=rdfxml)
+ g.parse(data=rdfxml, format="xml")
l1 = list(g)[0][2]
assert l1.datatype == RDF.XMLLiteral