summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörn Hees <joernhees@users.noreply.github.com>2015-03-18 17:44:02 +0100
committerJörn Hees <joernhees@users.noreply.github.com>2015-03-18 17:44:02 +0100
commit395a40101fe133d97f454ee61da0fc748a93b007 (patch)
tree92f39f1025842b090c968cebda59603de4fd6f6b
parent0742b9ecd45ad74441f19b4e8661175e80cea824 (diff)
parentaf5f38a2f4a82cd0a87753d2a91363236ff3eab6 (diff)
downloadrdflib-395a40101fe133d97f454ee61da0fc748a93b007.tar.gz
Merge pull request #473 from joernhees/graphtool
code to convert an rdflib.Graph into a graph_tool.Graph
-rw-r--r--rdflib/extras/external_graph_libs.py110
-rw-r--r--setup.cfg2
-rw-r--r--test/test_extras_external_graph_libs.py92
3 files changed, 199 insertions, 5 deletions
diff --git a/rdflib/extras/external_graph_libs.py b/rdflib/extras/external_graph_libs.py
index 883684e7..10daf949 100644
--- a/rdflib/extras/external_graph_libs.py
+++ b/rdflib/extras/external_graph_libs.py
@@ -9,6 +9,11 @@ from __future__ import unicode_literals
Currently the following libraries are supported:
- networkx: MultiDiGraph, DiGraph, Graph
+- graph_tool: Graph
+
+Doctests in this file are all skipped, as we can't run them conditionally if
+networkx or graph_tool are available and they would err otherwise.
+see ../../test/test_extras_external_graph_libs.py for conditional tests
"""
import logging
@@ -232,13 +237,110 @@ def rdflib_to_networkx_graph(
return g
-def main(): # pragma: no cover
+def rdflib_to_graphtool(
+ graph,
+ v_prop_names=[str('term')],
+ e_prop_names=[str('term')],
+ transform_s=lambda s, p, o: {str('term'): s},
+ transform_p=lambda s, p, o: {str('term'): p},
+ transform_o=lambda s, p, o: {str('term'): o},
+ ):
+ """Converts the given graph into a graph_tool.Graph().
+
+ The subjects and objects are the later vertices of the Graph.
+ The predicates become edges.
+
+ Arguments:
+ graph: a rdflib.Graph.
+ v_prop_names: a list of names for the vertex properties. The default is
+ set to ['term'] (see transform_s, transform_o below).
+ e_prop_names: a list of names for the edge properties.
+ transform_s: callable with s, p, o input. Should return a dictionary
+ containing a value for each name in v_prop_names. By default is set
+ to {'term': s} which in combination with v_prop_names = ['term']
+ adds s as 'term' property to the generated vertex for s.
+ transform_p: similar to transform_s, but wrt. e_prop_names. By default
+ returns {'term': p} which adds p as a property to the generated
+ edge between the vertex for s and the vertex for o.
+ transform_o: similar to transform_s.
+
+ Returns:
+ graph_tool.Graph()
+
+ >>> from rdflib import Graph, URIRef, Literal
+ >>> g = Graph()
+ >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
+ >>> p, q = URIRef('p'), URIRef('q')
+ >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
+ >>> for t in edges:
+ ... g.add(t)
+ ...
+ >>> mdg = rdflib_to_graphtool(g)
+ >>> len(list(mdg.edges()))
+ 4
+ >>> from graph_tool import util as gt_util
+ >>> vpterm = mdg.vertex_properties['term']
+ >>> va = gt_util.find_vertex(mdg, vpterm, a)[0]
+ >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0]
+ >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0]
+ >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]
+ True
+ >>> epterm = mdg.edge_properties['term']
+ >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3
+ True
+ >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1
+ True
+
+ >>> mdg = rdflib_to_graphtool(
+ ... g,
+ ... e_prop_names=[str('name')],
+ ... transform_p=lambda s, p, o: {str('name'): unicode(p)})
+ >>> epterm = mdg.edge_properties['name']
+ >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3
+ True
+ >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1
+ True
+ """
+ import graph_tool as gt
+ g = gt.Graph()
+
+ vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names]
+ for vpn, vprop in vprops:
+ g.vertex_properties[vpn] = vprop
+ eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names]
+ for epn, eprop in eprops:
+ g.edge_properties[epn] = eprop
+ node_to_vertex = {}
+ for s, p, o in graph:
+ sv = node_to_vertex.get(s)
+ if sv is None:
+ v = g.add_vertex()
+ node_to_vertex[s] = v
+ tmp_props = transform_s(s, p, o)
+ for vpn, vprop in vprops:
+ vprop[v] = tmp_props[vpn]
+ sv = v
+
+ ov = node_to_vertex.get(o)
+ if ov is None:
+ v = g.add_vertex()
+ node_to_vertex[o] = v
+ tmp_props = transform_o(s, p, o)
+ for vpn, vprop in vprops:
+ vprop[v] = tmp_props[vpn]
+ ov = v
+
+ e = g.add_edge(sv, ov)
+ tmp_props = transform_p(s, p, o)
+ for epn, eprop in eprops:
+ eprop[e] = tmp_props[epn]
+ return g
+
+
+if __name__ == '__main__':
import sys
import logging.config
logging.basicConfig(level=logging.DEBUG)
import nose
nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--with-doctest'])
-
-if __name__ == '__main__':
- main()
diff --git a/setup.cfg b/setup.cfg
index a4e33487..8da8b515 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,4 +3,4 @@
attr=!known_issue,!non_core,!performancetest
verbosity=1
with-doctest=1
-exclude=rdflib.plugins.sparql.paths
+exclude=rdflib.plugins.sparql.paths|rdflib.extras.external_graph_libs
diff --git a/test/test_extras_external_graph_libs.py b/test/test_extras_external_graph_libs.py
new file mode 100644
index 00000000..2986d258
--- /dev/null
+++ b/test/test_extras_external_graph_libs.py
@@ -0,0 +1,92 @@
+from nose import SkipTest
+from rdflib import Graph, URIRef, Literal
+
+def test_rdflib_to_networkx():
+ try:
+ import networkx
+ except ImportError:
+ raise SkipTest("couldn't find networkx")
+ from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
+ from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
+ from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
+ g = Graph()
+ a, b, l = URIRef('a'), URIRef('b'), Literal('l')
+ p, q = URIRef('p'), URIRef('q')
+ edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
+ for t in edges:
+ g.add(t)
+
+
+ mdg = rdflib_to_networkx_multidigraph(g)
+ assert len(mdg.edges()) == 4
+ assert mdg.has_edge(a, b)
+ assert mdg.has_edge(a, b, key=p)
+ assert mdg.has_edge(a, b, key=q)
+
+ mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s, p, o: {})
+ assert mdg.has_edge(a, b, key=0)
+ assert mdg.has_edge(a, b, key=1)
+
+
+ dg = rdflib_to_networkx_digraph(g)
+ assert dg[a][b]['weight'] == 2
+ assert sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)]
+ assert len(dg.edges()) == 3
+ assert dg.size() == 3
+ assert dg.size(weight='weight') == 4.0
+
+ dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s, p, o:{})
+ assert 'weight' not in dg[a][b]
+ assert 'triples' not in dg[a][b]
+
+
+ ug = rdflib_to_networkx_graph(g)
+ assert ug[a][b]['weight'] == 3
+ assert sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)]
+ assert len(ug.edges()) == 2
+ assert ug.size() == 2
+ assert ug.size(weight='weight') == 4.0
+
+ ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s, p, o:{})
+ assert 'weight' not in ug[a][b]
+ assert 'triples' not in ug[a][b]
+
+
+def test_rdflib_to_graphtool():
+ try:
+ from graph_tool import util as gt_util
+ except ImportError:
+ raise SkipTest("couldn't find graph_tool")
+ from rdflib.extras.external_graph_libs import rdflib_to_graphtool
+ g = Graph()
+ a, b, l = URIRef('a'), URIRef('b'), Literal('l')
+ p, q = URIRef('p'), URIRef('q')
+ edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
+ for t in edges:
+ g.add(t)
+
+ mdg = rdflib_to_graphtool(g)
+ assert len(list(mdg.edges())) == 4
+
+ vpterm = mdg.vertex_properties['term']
+ va = gt_util.find_vertex(mdg, vpterm, a)[0]
+ vb = gt_util.find_vertex(mdg, vpterm, b)[0]
+ vl = gt_util.find_vertex(mdg, vpterm, l)[0]
+ assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]
+
+ epterm = mdg.edge_properties['term']
+ assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3
+ assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1
+
+ mdg = rdflib_to_graphtool(
+ g,
+ e_prop_names=[str('name')],
+ transform_p=lambda s, p, o: {str('name'): unicode(p)})
+ epterm = mdg.edge_properties['name']
+ assert len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3
+ assert len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1
+
+if __name__ == "__main__":
+ import sys
+ import nose
+ nose.main(defaultTest=sys.argv[0])