diff options
author | Jörn Hees <joernhees@users.noreply.github.com> | 2015-08-10 19:42:48 +0200 |
---|---|---|
committer | Jörn Hees <joernhees@users.noreply.github.com> | 2015-08-10 19:42:48 +0200 |
commit | 9ba45477f1249bce9e17c08d0db23fc0dbd5ae61 (patch) | |
tree | 1da27121e924beb42672d0f021f4c6c9478c3801 | |
parent | fbc29da56a03929341e65e207684a5358bddcf26 (diff) | |
parent | b8df01f9bc53ccd01d61953108c30afae0f4b36e (diff) | |
download | rdflib-9ba45477f1249bce9e17c08d0db23fc0dbd5ae61.tar.gz |
Merge pull request #496 from RDFLib/fix_canonicalization
fixed #494 canonicalization sometimes collapses BNodes
-rw-r--r-- | rdflib/compare.py | 9 | ||||
-rw-r--r-- | test/test_canonicalization.py | 109 |
2 files changed, 111 insertions, 7 deletions
diff --git a/rdflib/compare.py b/rdflib/compare.py index 0f93639c..06ba9750 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -204,7 +204,9 @@ class Color: return unicode(x) if isinstance(color, Node): return stringify(color) - value = sum(map(self.hashfunc, ' '.join([stringify(x) for x in color]))) + value = 0 + for triple in color: + value += self.hashfunc(' '.join([stringify(x) for x in triple])) val = u"%x" % value self._hash_cache[color] = val return val @@ -290,7 +292,7 @@ class _TripleCanonicalizer(object): def _individuate(self, color, individual): new_color = list(color.color) - new_color.append((len(color.nodes))) + new_color.append((len(color.nodes),)) color.nodes.remove(individual) c = Color([individual], self.hashfunc, tuple(new_color), @@ -320,6 +322,7 @@ class _TripleCanonicalizer(object): sequence = sequence[:si] + colors + sequence[si+1:] except ValueError: sequence = colors[1:] + sequence + return coloring @_runtime("to_hash_runtime") @@ -407,7 +410,6 @@ class _TripleCanonicalizer(object): stats['prunings'] += 1 discrete = [x for x in best if self._discrete(x)] if len(discrete) == 0: - very_best = None best_score = None best_depth = None for coloring in best: @@ -434,6 +436,7 @@ class _TripleCanonicalizer(object): if stats is not None: stats['initial_coloring_runtime'] = _total_seconds(datetime.now() - start_coloring) stats['initial_color_count'] = len(coloring) + if not self._discrete(coloring): depth = [0] coloring = self._traces(coloring, stats=stats, depth=depth) diff --git a/test/test_canonicalization.py b/test/test_canonicalization.py index 2745f490..87b5eeaa 100644 --- a/test/test_canonicalization.py +++ b/test/test_canonicalization.py @@ -37,10 +37,10 @@ def negative_graph_match_test(): True ], [ unicode('''@prefix : <http://example.org/ns#> . - :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]], + :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]], [ :related [ :related :linear_two_step_symmatry_end]].'''), unicode('''@prefix : <http://example.org/ns#> . - :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]], + :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]], [ :related [ :related :linear_two_step_symmatry_end]].'''), True ], @@ -68,7 +68,7 @@ def negative_graph_match_test(): ].'''), False ], - # This test fails because the algorithm purposefully breaks the symmetry of symetric + # This test fails because the algorithm purposefully breaks the symmetry of symetric [ unicode('''@prefix : <http://example.org/ns#> . _:a :rel [ :rel [ @@ -144,8 +144,109 @@ def negative_graph_match_test(): def fn(rdf1, rdf2, identical): digest1 = get_digest_value(rdf1,"text/turtle") digest2 = get_digest_value(rdf2,"text/turtle") + print rdf1 print digest1 + print rdf2 print digest2 assert (digest1 == digest2) == identical for inputs in testInputs: - yield fn, inputs[0], inputs[1], inputs[2]
\ No newline at end of file + yield fn, inputs[0], inputs[1], inputs[2] + +def test_issue494_collapsing_bnodes(): + """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes""" + g = Graph() + g += [ + (BNode('Na1a8fbcf755f41c1b5728f326be50994'), + RDF['object'], + URIRef(u'source')), + (BNode('Na1a8fbcf755f41c1b5728f326be50994'), + RDF['predicate'], + BNode('vcb3')), + (BNode('Na1a8fbcf755f41c1b5728f326be50994'), + RDF['subject'], + BNode('vcb2')), + (BNode('Na1a8fbcf755f41c1b5728f326be50994'), + RDF['type'], + RDF['Statement']), + (BNode('Na713b02f320d409c806ff0190db324f4'), + RDF['object'], + URIRef(u'target')), + (BNode('Na713b02f320d409c806ff0190db324f4'), + RDF['predicate'], + BNode('vcb0')), + (BNode('Na713b02f320d409c806ff0190db324f4'), + RDF['subject'], + URIRef(u'source')), + (BNode('Na713b02f320d409c806ff0190db324f4'), + RDF['type'], + RDF['Statement']), + (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), + RDF['object'], + BNode('vr0KcS4')), + (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), + RDF['predicate'], + BNode('vrby3JV')), + (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), + RDF['subject'], + URIRef(u'source')), + (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), + RDF['type'], + RDF['Statement']), + (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), + RDF['object'], + URIRef(u'source')), + (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), + RDF['predicate'], + BNode('vcb5')), + (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), + RDF['subject'], + URIRef(u'target')), + (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), + RDF['type'], + RDF['Statement']), + (BNode('Nec6864ef180843838aa9805bac835c98'), + RDF['object'], + URIRef(u'source')), + (BNode('Nec6864ef180843838aa9805bac835c98'), + RDF['predicate'], + BNode('vcb4')), + (BNode('Nec6864ef180843838aa9805bac835c98'), + RDF['subject'], + URIRef(u'source')), + (BNode('Nec6864ef180843838aa9805bac835c98'), + RDF['type'], + RDF['Statement']), + ] + + print 'graph length: %d, nodes: %d' % (len(g), len(g.all_nodes())) + print 'triple_bnode degrees:' + for triple_bnode in g.subjects(RDF['type'], RDF['Statement']): + print len(list(g.triples([triple_bnode, None, None]))) + print 'all node degrees:' + g_node_degs = sorted([ + len(list(g.triples([node, None, None]))) + for node in g.all_nodes() + ], reverse=True) + print g_node_degs + + cg = to_canonical_graph(g) + print 'graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes())) + print 'triple_bnode degrees:' + for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']): + print len(list(cg.triples([triple_bnode, None, None]))) + print 'all node degrees:' + cg_node_degs = sorted([ + len(list(cg.triples([node, None, None]))) + for node in cg.all_nodes() + ], reverse=True) + print cg_node_degs + + assert len(g) == len(cg), \ + 'canonicalization changed number of triples in graph' + assert len(g.all_nodes()) == len(cg.all_nodes()), \ + 'canonicalization changed number of nodes in graph' + assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ + len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ + 'canonicalization changed number of statements' + assert g_node_degs == cg_node_degs, \ + 'canonicalization changed node degrees' |