diff options
author | Jörn Hees <joernhees@users.noreply.github.com> | 2015-03-18 17:44:02 +0100 |
---|---|---|
committer | Jörn Hees <joernhees@users.noreply.github.com> | 2015-03-18 17:44:02 +0100 |
commit | 395a40101fe133d97f454ee61da0fc748a93b007 (patch) | |
tree | 92f39f1025842b090c968cebda59603de4fd6f6b | |
parent | 0742b9ecd45ad74441f19b4e8661175e80cea824 (diff) | |
parent | af5f38a2f4a82cd0a87753d2a91363236ff3eab6 (diff) | |
download | rdflib-395a40101fe133d97f454ee61da0fc748a93b007.tar.gz |
Merge pull request #473 from joernhees/graphtool
code to convert an rdflib.Graph into a graph_tool.Graph
-rw-r--r-- | rdflib/extras/external_graph_libs.py | 110 | ||||
-rw-r--r-- | setup.cfg | 2 | ||||
-rw-r--r-- | test/test_extras_external_graph_libs.py | 92 |
3 files changed, 199 insertions, 5 deletions
diff --git a/rdflib/extras/external_graph_libs.py b/rdflib/extras/external_graph_libs.py index 883684e7..10daf949 100644 --- a/rdflib/extras/external_graph_libs.py +++ b/rdflib/extras/external_graph_libs.py @@ -9,6 +9,11 @@ from __future__ import unicode_literals Currently the following libraries are supported: - networkx: MultiDiGraph, DiGraph, Graph +- graph_tool: Graph + +Doctests in this file are all skipped, as we can't run them conditionally if +networkx or graph_tool are available and they would err otherwise. +see ../../test/test_extras_external_graph_libs.py for conditional tests """ import logging @@ -232,13 +237,110 @@ def rdflib_to_networkx_graph( return g -def main(): # pragma: no cover +def rdflib_to_graphtool( + graph, + v_prop_names=[str('term')], + e_prop_names=[str('term')], + transform_s=lambda s, p, o: {str('term'): s}, + transform_p=lambda s, p, o: {str('term'): p}, + transform_o=lambda s, p, o: {str('term'): o}, + ): + """Converts the given graph into a graph_tool.Graph(). + + The subjects and objects are the later vertices of the Graph. + The predicates become edges. + + Arguments: + graph: a rdflib.Graph. + v_prop_names: a list of names for the vertex properties. The default is + set to ['term'] (see transform_s, transform_o below). + e_prop_names: a list of names for the edge properties. + transform_s: callable with s, p, o input. Should return a dictionary + containing a value for each name in v_prop_names. By default is set + to {'term': s} which in combination with v_prop_names = ['term'] + adds s as 'term' property to the generated vertex for s. + transform_p: similar to transform_s, but wrt. e_prop_names. By default + returns {'term': p} which adds p as a property to the generated + edge between the vertex for s and the vertex for o. + transform_o: similar to transform_s. + + Returns: + graph_tool.Graph() + + >>> from rdflib import Graph, URIRef, Literal + >>> g = Graph() + >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') + >>> p, q = URIRef('p'), URIRef('q') + >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] + >>> for t in edges: + ... g.add(t) + ... + >>> mdg = rdflib_to_graphtool(g) + >>> len(list(mdg.edges())) + 4 + >>> from graph_tool import util as gt_util + >>> vpterm = mdg.vertex_properties['term'] + >>> va = gt_util.find_vertex(mdg, vpterm, a)[0] + >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0] + >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0] + >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] + True + >>> epterm = mdg.edge_properties['term'] + >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3 + True + >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1 + True + + >>> mdg = rdflib_to_graphtool( + ... g, + ... e_prop_names=[str('name')], + ... transform_p=lambda s, p, o: {str('name'): unicode(p)}) + >>> epterm = mdg.edge_properties['name'] + >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3 + True + >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1 + True + """ + import graph_tool as gt + g = gt.Graph() + + vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names] + for vpn, vprop in vprops: + g.vertex_properties[vpn] = vprop + eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names] + for epn, eprop in eprops: + g.edge_properties[epn] = eprop + node_to_vertex = {} + for s, p, o in graph: + sv = node_to_vertex.get(s) + if sv is None: + v = g.add_vertex() + node_to_vertex[s] = v + tmp_props = transform_s(s, p, o) + for vpn, vprop in vprops: + vprop[v] = tmp_props[vpn] + sv = v + + ov = node_to_vertex.get(o) + if ov is None: + v = g.add_vertex() + node_to_vertex[o] = v + tmp_props = transform_o(s, p, o) + for vpn, vprop in vprops: + vprop[v] = tmp_props[vpn] + ov = v + + e = g.add_edge(sv, ov) + tmp_props = transform_p(s, p, o) + for epn, eprop in eprops: + eprop[e] = tmp_props[epn] + return g + + +if __name__ == '__main__': import sys import logging.config logging.basicConfig(level=logging.DEBUG) import nose nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest']) - -if __name__ == '__main__': - main() @@ -3,4 +3,4 @@ attr=!known_issue,!non_core,!performancetest verbosity=1 with-doctest=1 -exclude=rdflib.plugins.sparql.paths +exclude=rdflib.plugins.sparql.paths|rdflib.extras.external_graph_libs diff --git a/test/test_extras_external_graph_libs.py b/test/test_extras_external_graph_libs.py new file mode 100644 index 00000000..2986d258 --- /dev/null +++ b/test/test_extras_external_graph_libs.py @@ -0,0 +1,92 @@ +from nose import SkipTest +from rdflib import Graph, URIRef, Literal + +def test_rdflib_to_networkx(): + try: + import networkx + except ImportError: + raise SkipTest("couldn't find networkx") + from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph + from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph + from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph + g = Graph() + a, b, l = URIRef('a'), URIRef('b'), Literal('l') + p, q = URIRef('p'), URIRef('q') + edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] + for t in edges: + g.add(t) + + + mdg = rdflib_to_networkx_multidigraph(g) + assert len(mdg.edges()) == 4 + assert mdg.has_edge(a, b) + assert mdg.has_edge(a, b, key=p) + assert mdg.has_edge(a, b, key=q) + + mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s, p, o: {}) + assert mdg.has_edge(a, b, key=0) + assert mdg.has_edge(a, b, key=1) + + + dg = rdflib_to_networkx_digraph(g) + assert dg[a][b]['weight'] == 2 + assert sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)] + assert len(dg.edges()) == 3 + assert dg.size() == 3 + assert dg.size(weight='weight') == 4.0 + + dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s, p, o:{}) + assert 'weight' not in dg[a][b] + assert 'triples' not in dg[a][b] + + + ug = rdflib_to_networkx_graph(g) + assert ug[a][b]['weight'] == 3 + assert sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)] + assert len(ug.edges()) == 2 + assert ug.size() == 2 + assert ug.size(weight='weight') == 4.0 + + ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s, p, o:{}) + assert 'weight' not in ug[a][b] + assert 'triples' not in ug[a][b] + + +def test_rdflib_to_graphtool(): + try: + from graph_tool import util as gt_util + except ImportError: + raise SkipTest("couldn't find graph_tool") + from rdflib.extras.external_graph_libs import rdflib_to_graphtool + g = Graph() + a, b, l = URIRef('a'), URIRef('b'), Literal('l') + p, q = URIRef('p'), URIRef('q') + edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] + for t in edges: + g.add(t) + + mdg = rdflib_to_graphtool(g) + assert len(list(mdg.edges())) == 4 + + vpterm = mdg.vertex_properties['term'] + va = gt_util.find_vertex(mdg, vpterm, a)[0] + vb = gt_util.find_vertex(mdg, vpterm, b)[0] + vl = gt_util.find_vertex(mdg, vpterm, l)[0] + assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] + + epterm = mdg.edge_properties['term'] + assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3 + assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1 + + mdg = rdflib_to_graphtool( + g, + e_prop_names=[str('name')], + transform_p=lambda s, p, o: {str('name'): unicode(p)}) + epterm = mdg.edge_properties['name'] + assert len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3 + assert len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1 + +if __name__ == "__main__": + import sys + import nose + nose.main(defaultTest=sys.argv[0]) |