summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk Baechle <dl9obn@darc.de>2013-04-11 11:24:31 +0200
committerDirk Baechle <dl9obn@darc.de>2013-04-11 11:24:31 +0200
commitbbfa8f4f91766a17d03d10ba7f202b0a0819b9f4 (patch)
tree0783569b78f43478e775a6dd2722ba4ddc04992c
parent074b6e8f12af8462de902ab988c37f8852a450cf (diff)
downloadlogilab-common-bbfa8f4f91766a17d03d10ba7f202b0a0819b9f4.tar.gz
added pruning of the recursive search tree for detecting cycles in graphs. Closes #2469
provides a significant speedup for get_cycles() Rationale for the whole patch: While trying to analyze the source tree of SCons (www.scons.org), I noticed that pylint took forever to finish a single run. I tracked the problem down a little and finally ended my search at the _get_cycles() function that gets called recursively. They way it is implemented at the moment, you will get a very high number of calls for medium to large graphs that are also very dense in nature (have a high vertex degree). In the case of SCons there are about 176 packages involved, which import each other a lot. This results in 130869104 calls of _get_cycles() and a runtime of 27minutes and 6.835seconds fo the whole pylint run. With this patch you get 10156 calls, taking 30.893s for the complete pylint call. Note, that the pruning of the search tree also reduces the list of output results, since all manifolds that are simple rotated versions of each other, get suppressed automatically.
-rw-r--r--ChangeLog3
-rw-r--r--graph.py9
2 files changed, 9 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index a216b5b..442f5f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,9 @@ ChangeLog for logilab.common
============================
--
+ * graph: added pruning of the recursive search tree for detecting cycles in
+ graphs (closes #2469)
+
* testlib: check for generators in with_tempdir (closes #117533)
* registry:
diff --git a/graph.py b/graph.py
index 75a2ee7..5cae530 100644
--- a/graph.py
+++ b/graph.py
@@ -226,10 +226,10 @@ def get_cycles(graph_dict, vertices=None):
if vertices is None:
vertices = graph_dict.keys()
for vertice in vertices:
- _get_cycles(graph_dict, vertice, [], result)
+ _get_cycles(graph_dict, [], set(), result, vertice)
return result
-def _get_cycles(graph_dict, vertice=None, path=None, result=None):
+def _get_cycles(graph_dict, path, visited, result, vertice):
"""recursive function doing the real work for get_cycles"""
if vertice in path:
cycle = [vertice]
@@ -248,7 +248,10 @@ def _get_cycles(graph_dict, vertice=None, path=None, result=None):
path.append(vertice)
try:
for node in graph_dict[vertice]:
- _get_cycles(graph_dict, node, path, result)
+ # don't check already visited nodes again
+ if node not in visited:
+ _get_cycles(graph_dict, path, visited, result, node)
+ visited.add(node)
except KeyError:
pass
path.pop()