diff options
author | Nicholas Car <nicholas.car@surroundaustralia.com> | 2020-06-05 20:40:24 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-05 20:40:24 +1000 |
commit | efa2635b83c6386d8f4cce5ef76d75448de777ab (patch) | |
tree | 5ff187e67813a03bf4a35b8b4db02a931090ba4e | |
parent | 6b5bd37ccc67bdec62d2e36d174eb7933b5020b2 (diff) | |
parent | bf540416f3d6819626c262ced5ca87be96b80df4 (diff) | |
download | rdflib-efa2635b83c6386d8f4cce5ef76d75448de777ab.tar.gz |
Merge pull request #1108 from mwatts15/feature/rdflib/rdflib#980-nt-nquads-blank-node-collisions
BNode context dicts for NT and N-Quads parsers
-rw-r--r-- | rdflib/plugins/parsers/nquads.py | 24 | ||||
-rw-r--r-- | rdflib/plugins/parsers/nt.py | 16 | ||||
-rw-r--r-- | rdflib/plugins/parsers/ntriples.py | 38 | ||||
-rw-r--r-- | test/nquads.rdflib/bnode_context.nquads | 3 | ||||
-rw-r--r-- | test/nquads.rdflib/bnode_context_obj_bnodes.nquads | 3 | ||||
-rw-r--r-- | test/test_nquads.py | 64 | ||||
-rw-r--r-- | test/test_nt_misc.py | 16 |
7 files changed, 132 insertions, 32 deletions
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py index 0c29fc4c..a3bfbc6e 100644 --- a/rdflib/plugins/parsers/nquads.py +++ b/rdflib/plugins/parsers/nquads.py @@ -40,8 +40,18 @@ __all__ = ["NQuadsParser"] class NQuadsParser(NTriplesParser): - def parse(self, inputsource, sink, **kwargs): - """Parse f as an N-Triples file.""" + def parse(self, inputsource, sink, bnode_context=None, **kwargs): + """ + Parse inputsource as an N-Quads file. + + :type inputsource: `rdflib.parser.InputSource` + :param inputsource: the source of N-Quads-formatted data + :type sink: `rdflib.graph.Graph` + :param sink: where to send parsed triples + :type bnode_context: `dict`, optional + :param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances. + See `.NTriplesParser.parse` + """ assert sink.store.context_aware, ( "NQuadsParser must be given" " a context aware store." ) @@ -61,27 +71,27 @@ class NQuadsParser(NTriplesParser): if self.line is None: break try: - self.parseline() + self.parseline(bnode_context) except ParseError as msg: raise ParseError("Invalid line (%s):\n%r" % (msg, __line)) return self.sink - def parseline(self): + def parseline(self, bnode_context=None): self.eat(r_wspace) if (not self.line) or self.line.startswith(("#")): return # The line is empty or a comment - subject = self.subject() + subject = self.subject(bnode_context) self.eat(r_wspace) predicate = self.predicate() self.eat(r_wspace) - obj = self.object() + obj = self.object(bnode_context) self.eat(r_wspace) - context = self.uriref() or self.nodeid() or self.sink.identifier + context = self.uriref() or self.nodeid(bnode_context) or self.sink.identifier self.eat(r_tail) if self.line: diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py index d7d3b336..c37a1aa0 100644 --- a/rdflib/plugins/parsers/nt.py +++ b/rdflib/plugins/parsers/nt.py @@ -17,11 +17,17 @@ class NTParser(Parser): See http://www.w3.org/TR/rdf-testcases/#ntriples""" - def __init__(self): - super(NTParser, self).__init__() - - def parse(self, source, sink, baseURI=None): + def parse(self, source, sink, **kwargs): + ''' + Parse the NT format + + :type source: `rdflib.parser.InputSource` + :param source: the source of NT-formatted data + :type sink: `rdflib.graph.Graph` + :param sink: where to send parsed triples + :param kwargs: Additional arguments to pass to `.NTriplesParser.parse` + ''' f = source.getByteStream() # TODO getCharacterStream? parser = NTriplesParser(NTSink(sink)) - parser.parse(f) + parser.parse(f, **kwargs) f.close() diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 21c931c6..33a4a4e6 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -133,8 +133,6 @@ class NTriplesParser(object): else: self._bnode_ids = {} - self._parse_bnode_ids = None - if sink is not None: self.sink = sink else: @@ -144,10 +142,13 @@ class NTriplesParser(object): """ Parse f as an N-Triples file. + :type f: :term:`file object` :param f: the N-Triples source + :type bnode_context: `dict`, optional :param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``) - to `.BNode` instances. An empty dict can be passed in to - define a distinct context for a given call to `parse`. + to `~rdflib.term.BNode` instances. An empty dict can be + passed in to define a distinct context for a given call to + `parse`. """ if not hasattr(f, "read"): raise ParseError("Item to parse must be a file-like object.") @@ -156,13 +157,12 @@ class NTriplesParser(object): self.file = f self.buffer = "" - self._parse_bnode_ids = bnode_context while True: self.line = self.readline() if self.line is None: break try: - self.parseline() + self.parseline(bnode_context=bnode_context) except ParseError: raise ParseError("Invalid line: %r" % self.line) return self.sink @@ -200,18 +200,18 @@ class NTriplesParser(object): return None self.buffer += buffer - def parseline(self): + def parseline(self, bnode_context=None): self.eat(r_wspace) if (not self.line) or self.line.startswith("#"): return # The line is empty or a comment - subject = self.subject() + subject = self.subject(bnode_context) self.eat(r_wspaces) predicate = self.predicate() self.eat(r_wspaces) - object = self.object() + object = self.object(bnode_context) self.eat(r_tail) if self.line: @@ -230,9 +230,9 @@ class NTriplesParser(object): self.line = self.line[m.end():] return m - def subject(self): + def subject(self, bnode_context=None): # @@ Consider using dictionary cases - subj = self.uriref() or self.nodeid() + subj = self.uriref() or self.nodeid(bnode_context) if not subj: raise ParseError("Subject must be uriref or nodeID") return subj @@ -243,8 +243,8 @@ class NTriplesParser(object): raise ParseError("Predicate must be uriref") return pred - def object(self): - objt = self.uriref() or self.nodeid() or self.literal() + def object(self, bnode_context=None): + objt = self.uriref() or self.nodeid(bnode_context) or self.literal() if objt is False: raise ParseError("Unrecognised object type") return objt @@ -257,15 +257,13 @@ class NTriplesParser(object): return URI(uri) return False - def nodeid(self): + def nodeid(self, bnode_context=None): if self.peek("_"): # Fix for https://github.com/RDFLib/rdflib/issues/204 - if self._parse_bnode_ids is not None: - bnode_ids = self._parse_bnode_ids - else: - bnode_ids = self._bnode_ids + if bnode_context is None: + bnode_context = self._bnode_ids bnode_id = self.eat(r_nodeid).group(1) - new_id = bnode_ids.get(bnode_id, None) + new_id = bnode_context.get(bnode_id, None) if new_id is not None: # Re-map to id specfic to this doc return bNode(new_id) @@ -273,7 +271,7 @@ class NTriplesParser(object): # Replace with freshly-generated document-specific BNode id bnode = bNode() # Store the mapping - self._bnode_ids[bnode_id] = bnode + bnode_context[bnode_id] = bnode return bnode return False diff --git a/test/nquads.rdflib/bnode_context.nquads b/test/nquads.rdflib/bnode_context.nquads new file mode 100644 index 00000000..59e9350f --- /dev/null +++ b/test/nquads.rdflib/bnode_context.nquads @@ -0,0 +1,3 @@ +_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" _:blah . +_:bnode2 <http://xmlns.com/foaf/0.1/Friend> "Kevin" _:bluh . + diff --git a/test/nquads.rdflib/bnode_context_obj_bnodes.nquads b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads new file mode 100644 index 00000000..5b90fb00 --- /dev/null +++ b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads @@ -0,0 +1,3 @@ +_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" <http://example.org/alice/foaf2.rdf> . +<http://example.org/Kevin> <http://xmlns.com/foaf/0.1/Friend> _:bnode2 <http://example.org/alice/foaf3.rdf> . + diff --git a/test/test_nquads.py b/test/test_nquads.py index c25bc7ed..da9e8e2d 100644 --- a/test/test_nquads.py +++ b/test/test_nquads.py @@ -67,5 +67,69 @@ class NQuadsParserTest(unittest.TestCase): ) +class BnodeContextTest(unittest.TestCase): + def setUp(self): + self.data = open("test/nquads.rdflib/bnode_context.nquads", "rb") + self.data_obnodes = open("test/nquads.rdflib/bnode_context_obj_bnodes.nquads", "rb") + + def tearDown(self): + self.data.close() + + def test_parse_shared_bnode_context(self): + bnode_ctx = dict() + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.data.seek(0) + h.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.assertEqual(set(h.subjects()), set(g.subjects())) + + def test_parse_shared_bnode_context_same_graph(self): + bnode_ctx = dict() + g = ConjunctiveGraph() + g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx) + o1 = set(g.objects()) + self.data_obnodes.seek(0) + g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx) + o2 = set(g.objects()) + self.assertEqual(o1, o2) + + def test_parse_distinct_bnode_context(self): + g = ConjunctiveGraph() + g.parse(self.data, format="nquads", bnode_context=dict()) + s1 = set(g.subjects()) + self.data.seek(0) + g.parse(self.data, format="nquads", bnode_context=dict()) + s2 = set(g.subjects()) + self.assertNotEqual(set(), s2 - s1) + + def test_parse_distinct_bnode_contexts_between_graphs(self): + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads") + s1 = set(g.subjects()) + self.data.seek(0) + h.parse(self.data, format="nquads") + s2 = set(h.subjects()) + self.assertNotEqual(s1, s2) + + def test_parse_distinct_bnode_contexts_named_graphs(self): + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads") + self.data.seek(0) + h.parse(self.data, format="nquads") + self.assertNotEqual(set(h.contexts()), set(g.contexts())) + + def test_parse_shared_bnode_contexts_named_graphs(self): + bnode_ctx = dict() + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.data.seek(0) + h.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.assertEqual(set(h.contexts()), set(g.contexts())) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py index 4a21fed9..af7049d8 100644 --- a/test/test_nt_misc.py +++ b/test/test_nt_misc.py @@ -188,6 +188,22 @@ class BNodeContextTestCase(unittest.TestCase): self.assertEqual(len(my_sink.subs), 1) + def test_bnode_shared_across_instances_with_parse_option(self): + my_sink = FakeSink() + bnode_ctx = dict() + + p = ntriples.NTriplesParser(my_sink) + p.parsestring(''' + _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . + ''', bnode_context=bnode_ctx) + + q = ntriples.NTriplesParser(my_sink) + q.parsestring(''' + _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> . + ''', bnode_context=bnode_ctx) + + self.assertEqual(len(my_sink.subs), 1) + class FakeSink(object): def __init__(self): |