summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Car <nicholas.car@surroundaustralia.com>2020-06-05 20:40:24 +1000
committerGitHub <noreply@github.com>2020-06-05 20:40:24 +1000
commitefa2635b83c6386d8f4cce5ef76d75448de777ab (patch)
tree5ff187e67813a03bf4a35b8b4db02a931090ba4e
parent6b5bd37ccc67bdec62d2e36d174eb7933b5020b2 (diff)
parentbf540416f3d6819626c262ced5ca87be96b80df4 (diff)
downloadrdflib-efa2635b83c6386d8f4cce5ef76d75448de777ab.tar.gz
Merge pull request #1108 from mwatts15/feature/rdflib/rdflib#980-nt-nquads-blank-node-collisions
BNode context dicts for NT and N-Quads parsers
-rw-r--r--rdflib/plugins/parsers/nquads.py24
-rw-r--r--rdflib/plugins/parsers/nt.py16
-rw-r--r--rdflib/plugins/parsers/ntriples.py38
-rw-r--r--test/nquads.rdflib/bnode_context.nquads3
-rw-r--r--test/nquads.rdflib/bnode_context_obj_bnodes.nquads3
-rw-r--r--test/test_nquads.py64
-rw-r--r--test/test_nt_misc.py16
7 files changed, 132 insertions, 32 deletions
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py
index 0c29fc4c..a3bfbc6e 100644
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -40,8 +40,18 @@ __all__ = ["NQuadsParser"]
class NQuadsParser(NTriplesParser):
- def parse(self, inputsource, sink, **kwargs):
- """Parse f as an N-Triples file."""
+ def parse(self, inputsource, sink, bnode_context=None, **kwargs):
+ """
+ Parse inputsource as an N-Quads file.
+
+ :type inputsource: `rdflib.parser.InputSource`
+ :param inputsource: the source of N-Quads-formatted data
+ :type sink: `rdflib.graph.Graph`
+ :param sink: where to send parsed triples
+ :type bnode_context: `dict`, optional
+ :param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
+ See `.NTriplesParser.parse`
+ """
assert sink.store.context_aware, (
"NQuadsParser must be given" " a context aware store."
)
@@ -61,27 +71,27 @@ class NQuadsParser(NTriplesParser):
if self.line is None:
break
try:
- self.parseline()
+ self.parseline(bnode_context)
except ParseError as msg:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
return self.sink
- def parseline(self):
+ def parseline(self, bnode_context=None):
self.eat(r_wspace)
if (not self.line) or self.line.startswith(("#")):
return # The line is empty or a comment
- subject = self.subject()
+ subject = self.subject(bnode_context)
self.eat(r_wspace)
predicate = self.predicate()
self.eat(r_wspace)
- obj = self.object()
+ obj = self.object(bnode_context)
self.eat(r_wspace)
- context = self.uriref() or self.nodeid() or self.sink.identifier
+ context = self.uriref() or self.nodeid(bnode_context) or self.sink.identifier
self.eat(r_tail)
if self.line:
diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py
index d7d3b336..c37a1aa0 100644
--- a/rdflib/plugins/parsers/nt.py
+++ b/rdflib/plugins/parsers/nt.py
@@ -17,11 +17,17 @@ class NTParser(Parser):
See http://www.w3.org/TR/rdf-testcases/#ntriples"""
- def __init__(self):
- super(NTParser, self).__init__()
-
- def parse(self, source, sink, baseURI=None):
+ def parse(self, source, sink, **kwargs):
+ '''
+ Parse the NT format
+
+ :type source: `rdflib.parser.InputSource`
+ :param source: the source of NT-formatted data
+ :type sink: `rdflib.graph.Graph`
+ :param sink: where to send parsed triples
+ :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
+ '''
f = source.getByteStream() # TODO getCharacterStream?
parser = NTriplesParser(NTSink(sink))
- parser.parse(f)
+ parser.parse(f, **kwargs)
f.close()
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 21c931c6..33a4a4e6 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -133,8 +133,6 @@ class NTriplesParser(object):
else:
self._bnode_ids = {}
- self._parse_bnode_ids = None
-
if sink is not None:
self.sink = sink
else:
@@ -144,10 +142,13 @@ class NTriplesParser(object):
"""
Parse f as an N-Triples file.
+ :type f: :term:`file object`
:param f: the N-Triples source
+ :type bnode_context: `dict`, optional
:param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``)
- to `.BNode` instances. An empty dict can be passed in to
- define a distinct context for a given call to `parse`.
+ to `~rdflib.term.BNode` instances. An empty dict can be
+ passed in to define a distinct context for a given call to
+ `parse`.
"""
if not hasattr(f, "read"):
raise ParseError("Item to parse must be a file-like object.")
@@ -156,13 +157,12 @@ class NTriplesParser(object):
self.file = f
self.buffer = ""
- self._parse_bnode_ids = bnode_context
while True:
self.line = self.readline()
if self.line is None:
break
try:
- self.parseline()
+ self.parseline(bnode_context=bnode_context)
except ParseError:
raise ParseError("Invalid line: %r" % self.line)
return self.sink
@@ -200,18 +200,18 @@ class NTriplesParser(object):
return None
self.buffer += buffer
- def parseline(self):
+ def parseline(self, bnode_context=None):
self.eat(r_wspace)
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment
- subject = self.subject()
+ subject = self.subject(bnode_context)
self.eat(r_wspaces)
predicate = self.predicate()
self.eat(r_wspaces)
- object = self.object()
+ object = self.object(bnode_context)
self.eat(r_tail)
if self.line:
@@ -230,9 +230,9 @@ class NTriplesParser(object):
self.line = self.line[m.end():]
return m
- def subject(self):
+ def subject(self, bnode_context=None):
# @@ Consider using dictionary cases
- subj = self.uriref() or self.nodeid()
+ subj = self.uriref() or self.nodeid(bnode_context)
if not subj:
raise ParseError("Subject must be uriref or nodeID")
return subj
@@ -243,8 +243,8 @@ class NTriplesParser(object):
raise ParseError("Predicate must be uriref")
return pred
- def object(self):
- objt = self.uriref() or self.nodeid() or self.literal()
+ def object(self, bnode_context=None):
+ objt = self.uriref() or self.nodeid(bnode_context) or self.literal()
if objt is False:
raise ParseError("Unrecognised object type")
return objt
@@ -257,15 +257,13 @@ class NTriplesParser(object):
return URI(uri)
return False
- def nodeid(self):
+ def nodeid(self, bnode_context=None):
if self.peek("_"):
# Fix for https://github.com/RDFLib/rdflib/issues/204
- if self._parse_bnode_ids is not None:
- bnode_ids = self._parse_bnode_ids
- else:
- bnode_ids = self._bnode_ids
+ if bnode_context is None:
+ bnode_context = self._bnode_ids
bnode_id = self.eat(r_nodeid).group(1)
- new_id = bnode_ids.get(bnode_id, None)
+ new_id = bnode_context.get(bnode_id, None)
if new_id is not None:
# Re-map to id specfic to this doc
return bNode(new_id)
@@ -273,7 +271,7 @@ class NTriplesParser(object):
# Replace with freshly-generated document-specific BNode id
bnode = bNode()
# Store the mapping
- self._bnode_ids[bnode_id] = bnode
+ bnode_context[bnode_id] = bnode
return bnode
return False
diff --git a/test/nquads.rdflib/bnode_context.nquads b/test/nquads.rdflib/bnode_context.nquads
new file mode 100644
index 00000000..59e9350f
--- /dev/null
+++ b/test/nquads.rdflib/bnode_context.nquads
@@ -0,0 +1,3 @@
+_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" _:blah .
+_:bnode2 <http://xmlns.com/foaf/0.1/Friend> "Kevin" _:bluh .
+
diff --git a/test/nquads.rdflib/bnode_context_obj_bnodes.nquads b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads
new file mode 100644
index 00000000..5b90fb00
--- /dev/null
+++ b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads
@@ -0,0 +1,3 @@
+_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" <http://example.org/alice/foaf2.rdf> .
+<http://example.org/Kevin> <http://xmlns.com/foaf/0.1/Friend> _:bnode2 <http://example.org/alice/foaf3.rdf> .
+
diff --git a/test/test_nquads.py b/test/test_nquads.py
index c25bc7ed..da9e8e2d 100644
--- a/test/test_nquads.py
+++ b/test/test_nquads.py
@@ -67,5 +67,69 @@ class NQuadsParserTest(unittest.TestCase):
)
+class BnodeContextTest(unittest.TestCase):
+ def setUp(self):
+ self.data = open("test/nquads.rdflib/bnode_context.nquads", "rb")
+ self.data_obnodes = open("test/nquads.rdflib/bnode_context_obj_bnodes.nquads", "rb")
+
+ def tearDown(self):
+ self.data.close()
+
+ def test_parse_shared_bnode_context(self):
+ bnode_ctx = dict()
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.data.seek(0)
+ h.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.assertEqual(set(h.subjects()), set(g.subjects()))
+
+ def test_parse_shared_bnode_context_same_graph(self):
+ bnode_ctx = dict()
+ g = ConjunctiveGraph()
+ g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx)
+ o1 = set(g.objects())
+ self.data_obnodes.seek(0)
+ g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx)
+ o2 = set(g.objects())
+ self.assertEqual(o1, o2)
+
+ def test_parse_distinct_bnode_context(self):
+ g = ConjunctiveGraph()
+ g.parse(self.data, format="nquads", bnode_context=dict())
+ s1 = set(g.subjects())
+ self.data.seek(0)
+ g.parse(self.data, format="nquads", bnode_context=dict())
+ s2 = set(g.subjects())
+ self.assertNotEqual(set(), s2 - s1)
+
+ def test_parse_distinct_bnode_contexts_between_graphs(self):
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads")
+ s1 = set(g.subjects())
+ self.data.seek(0)
+ h.parse(self.data, format="nquads")
+ s2 = set(h.subjects())
+ self.assertNotEqual(s1, s2)
+
+ def test_parse_distinct_bnode_contexts_named_graphs(self):
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads")
+ self.data.seek(0)
+ h.parse(self.data, format="nquads")
+ self.assertNotEqual(set(h.contexts()), set(g.contexts()))
+
+ def test_parse_shared_bnode_contexts_named_graphs(self):
+ bnode_ctx = dict()
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.data.seek(0)
+ h.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.assertEqual(set(h.contexts()), set(g.contexts()))
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py
index 4a21fed9..af7049d8 100644
--- a/test/test_nt_misc.py
+++ b/test/test_nt_misc.py
@@ -188,6 +188,22 @@ class BNodeContextTestCase(unittest.TestCase):
self.assertEqual(len(my_sink.subs), 1)
+ def test_bnode_shared_across_instances_with_parse_option(self):
+ my_sink = FakeSink()
+ bnode_ctx = dict()
+
+ p = ntriples.NTriplesParser(my_sink)
+ p.parsestring('''
+ _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
+ ''', bnode_context=bnode_ctx)
+
+ q = ntriples.NTriplesParser(my_sink)
+ q.parsestring('''
+ _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
+ ''', bnode_context=bnode_ctx)
+
+ self.assertEqual(len(my_sink.subs), 1)
+
class FakeSink(object):
def __init__(self):