summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörn Hees <joernhees@users.noreply.github.com>2015-08-10 19:42:48 +0200
committerJörn Hees <joernhees@users.noreply.github.com>2015-08-10 19:42:48 +0200
commit9ba45477f1249bce9e17c08d0db23fc0dbd5ae61 (patch)
tree1da27121e924beb42672d0f021f4c6c9478c3801
parentfbc29da56a03929341e65e207684a5358bddcf26 (diff)
parentb8df01f9bc53ccd01d61953108c30afae0f4b36e (diff)
downloadrdflib-9ba45477f1249bce9e17c08d0db23fc0dbd5ae61.tar.gz
Merge pull request #496 from RDFLib/fix_canonicalization
fixed #494 canonicalization sometimes collapses BNodes
-rw-r--r--rdflib/compare.py9
-rw-r--r--test/test_canonicalization.py109
2 files changed, 111 insertions, 7 deletions
diff --git a/rdflib/compare.py b/rdflib/compare.py
index 0f93639c..06ba9750 100644
--- a/rdflib/compare.py
+++ b/rdflib/compare.py
@@ -204,7 +204,9 @@ class Color:
return unicode(x)
if isinstance(color, Node):
return stringify(color)
- value = sum(map(self.hashfunc, ' '.join([stringify(x) for x in color])))
+ value = 0
+ for triple in color:
+ value += self.hashfunc(' '.join([stringify(x) for x in triple]))
val = u"%x" % value
self._hash_cache[color] = val
return val
@@ -290,7 +292,7 @@ class _TripleCanonicalizer(object):
def _individuate(self, color, individual):
new_color = list(color.color)
- new_color.append((len(color.nodes)))
+ new_color.append((len(color.nodes),))
color.nodes.remove(individual)
c = Color([individual], self.hashfunc, tuple(new_color),
@@ -320,6 +322,7 @@ class _TripleCanonicalizer(object):
sequence = sequence[:si] + colors + sequence[si+1:]
except ValueError:
sequence = colors[1:] + sequence
+
return coloring
@_runtime("to_hash_runtime")
@@ -407,7 +410,6 @@ class _TripleCanonicalizer(object):
stats['prunings'] += 1
discrete = [x for x in best if self._discrete(x)]
if len(discrete) == 0:
- very_best = None
best_score = None
best_depth = None
for coloring in best:
@@ -434,6 +436,7 @@ class _TripleCanonicalizer(object):
if stats is not None:
stats['initial_coloring_runtime'] = _total_seconds(datetime.now() - start_coloring)
stats['initial_color_count'] = len(coloring)
+
if not self._discrete(coloring):
depth = [0]
coloring = self._traces(coloring, stats=stats, depth=depth)
diff --git a/test/test_canonicalization.py b/test/test_canonicalization.py
index 2745f490..87b5eeaa 100644
--- a/test/test_canonicalization.py
+++ b/test/test_canonicalization.py
@@ -37,10 +37,10 @@ def negative_graph_match_test():
True
],
[ unicode('''@prefix : <http://example.org/ns#> .
- :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
+ :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
[ :related [ :related :linear_two_step_symmatry_end]].'''),
unicode('''@prefix : <http://example.org/ns#> .
- :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
+ :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
[ :related [ :related :linear_two_step_symmatry_end]].'''),
True
],
@@ -68,7 +68,7 @@ def negative_graph_match_test():
].'''),
False
],
- # This test fails because the algorithm purposefully breaks the symmetry of symetric
+ # This test fails because the algorithm purposefully breaks the symmetry of symetric
[ unicode('''@prefix : <http://example.org/ns#> .
_:a :rel [
:rel [
@@ -144,8 +144,109 @@ def negative_graph_match_test():
def fn(rdf1, rdf2, identical):
digest1 = get_digest_value(rdf1,"text/turtle")
digest2 = get_digest_value(rdf2,"text/turtle")
+ print rdf1
print digest1
+ print rdf2
print digest2
assert (digest1 == digest2) == identical
for inputs in testInputs:
- yield fn, inputs[0], inputs[1], inputs[2] \ No newline at end of file
+ yield fn, inputs[0], inputs[1], inputs[2]
+
+def test_issue494_collapsing_bnodes():
+ """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes"""
+ g = Graph()
+ g += [
+ (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
+ RDF['object'],
+ URIRef(u'source')),
+ (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
+ RDF['predicate'],
+ BNode('vcb3')),
+ (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
+ RDF['subject'],
+ BNode('vcb2')),
+ (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
+ RDF['type'],
+ RDF['Statement']),
+ (BNode('Na713b02f320d409c806ff0190db324f4'),
+ RDF['object'],
+ URIRef(u'target')),
+ (BNode('Na713b02f320d409c806ff0190db324f4'),
+ RDF['predicate'],
+ BNode('vcb0')),
+ (BNode('Na713b02f320d409c806ff0190db324f4'),
+ RDF['subject'],
+ URIRef(u'source')),
+ (BNode('Na713b02f320d409c806ff0190db324f4'),
+ RDF['type'],
+ RDF['Statement']),
+ (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
+ RDF['object'],
+ BNode('vr0KcS4')),
+ (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
+ RDF['predicate'],
+ BNode('vrby3JV')),
+ (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
+ RDF['subject'],
+ URIRef(u'source')),
+ (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
+ RDF['type'],
+ RDF['Statement']),
+ (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
+ RDF['object'],
+ URIRef(u'source')),
+ (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
+ RDF['predicate'],
+ BNode('vcb5')),
+ (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
+ RDF['subject'],
+ URIRef(u'target')),
+ (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
+ RDF['type'],
+ RDF['Statement']),
+ (BNode('Nec6864ef180843838aa9805bac835c98'),
+ RDF['object'],
+ URIRef(u'source')),
+ (BNode('Nec6864ef180843838aa9805bac835c98'),
+ RDF['predicate'],
+ BNode('vcb4')),
+ (BNode('Nec6864ef180843838aa9805bac835c98'),
+ RDF['subject'],
+ URIRef(u'source')),
+ (BNode('Nec6864ef180843838aa9805bac835c98'),
+ RDF['type'],
+ RDF['Statement']),
+ ]
+
+ print 'graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))
+ print 'triple_bnode degrees:'
+ for triple_bnode in g.subjects(RDF['type'], RDF['Statement']):
+ print len(list(g.triples([triple_bnode, None, None])))
+ print 'all node degrees:'
+ g_node_degs = sorted([
+ len(list(g.triples([node, None, None])))
+ for node in g.all_nodes()
+ ], reverse=True)
+ print g_node_degs
+
+ cg = to_canonical_graph(g)
+ print 'graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))
+ print 'triple_bnode degrees:'
+ for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']):
+ print len(list(cg.triples([triple_bnode, None, None])))
+ print 'all node degrees:'
+ cg_node_degs = sorted([
+ len(list(cg.triples([node, None, None])))
+ for node in cg.all_nodes()
+ ], reverse=True)
+ print cg_node_degs
+
+ assert len(g) == len(cg), \
+ 'canonicalization changed number of triples in graph'
+ assert len(g.all_nodes()) == len(cg.all_nodes()), \
+ 'canonicalization changed number of nodes in graph'
+ assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \
+ len(list(cg.subjects(RDF['type'], RDF['Statement']))), \
+ 'canonicalization changed number of statements'
+ assert g_node_degs == cg_node_degs, \
+ 'canonicalization changed node degrees'