diff options
author | Mridul Seth <seth.mridul@gmail.com> | 2021-05-19 18:37:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-19 09:37:11 -0700 |
commit | 87c127b9b88de237d89c771abb6d964b29fd1356 (patch) | |
tree | ce11bb9abc20dadf054dd1ceab70599c5826ab4b | |
parent | 69b4a686b4064f963f290ac1faa75ee6b0a6ecbc (diff) | |
download | networkx-87c127b9b88de237d89c771abb6d964b29fd1356.tar.gz |
Make nx.hits a wrapper around different implementations, use scipy one by default (#4812)
* implement nstart for scipy version of hits
* deprecate python impl and use scipy one by default
Co-authored-by: Ross Barnowski <rossbar@berkeley.edu>
-rw-r--r-- | doc/developer/deprecations.rst | 2 | ||||
-rw-r--r-- | networkx/algorithms/link_analysis/hits_alg.py | 26 | ||||
-rw-r--r-- | networkx/algorithms/link_analysis/tests/test_hits.py | 69 |
3 files changed, 66 insertions, 31 deletions
diff --git a/doc/developer/deprecations.rst b/doc/developer/deprecations.rst index 59e789fc..459d84f5 100644 --- a/doc/developer/deprecations.rst +++ b/doc/developer/deprecations.rst @@ -77,5 +77,7 @@ Version 3.0 * Remove ``readwrite/json_graph/jit.py`` and related tests. * In ``utils/misc.py`` remove ``generate_unique_node`` and related tests. * In ``algorithms/link_analysis/hits_alg.py`` remove ``hub_matrix`` and ``authority_matrix`` +* In ``algorithms/link_analysis/hits_alg.py``, replace ``hits`` with the + implementation from ``hits_scipy`` and remove ``hits_numpy`` and ``hist_scipy``. * In ``networkx.classes`` remove the ``ordered`` module and the four ``Ordered`` classes defined therein. diff --git a/networkx/algorithms/link_analysis/hits_alg.py b/networkx/algorithms/link_analysis/hits_alg.py index 7ef2d8a5..a9975400 100644 --- a/networkx/algorithms/link_analysis/hits_alg.py +++ b/networkx/algorithms/link_analysis/hits_alg.py @@ -1,5 +1,6 @@ """Hubs and authorities analysis of graph structure. """ +from warnings import warn import networkx as nx __all__ = ["hits", "hits_numpy", "hits_scipy", "authority_matrix", "hub_matrix"] @@ -69,6 +70,10 @@ def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ + return hits_scipy(G, max_iter, tol, nstart, normalized) + + +def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("hits() not defined for graphs with multiedges.") if len(G) == 0: @@ -160,6 +165,10 @@ def hub_matrix(G, nodelist=None): def hits_numpy(G, normalized=True): """Returns HITS hubs and authorities values for nodes. + .. deprecated:: 2.6 + + hits_numpy is deprecated and will be removed in networkx 3.0. + The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. @@ -216,6 +225,8 @@ def hits_numpy(G, normalized=True): doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ + msg = "networkx.hits_numpy is deprecated and will be removed in NetworkX 3.0, use networkx.hits instead." + warn(msg, DeprecationWarning, stacklevel=2) import numpy as np if len(G) == 0: @@ -240,9 +251,13 @@ def hits_numpy(G, normalized=True): return hubs, authorities -def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): +def hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True): """Returns HITS hubs and authorities values for nodes. + .. deprecated:: 2.6 + + hits_scipy is deprecated and will be removed in networkx 3.0 + The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. @@ -306,6 +321,8 @@ def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ + msg = "networkx.hits_scipy is deprecated and will be removed in NetworkX 3.0, use networkx.hits instead." + warn(msg, DeprecationWarning, stacklevel=2) import numpy as np if len(G) == 0: @@ -314,6 +331,13 @@ def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): (n, m) = M.shape # should be square A = M.T * M # authority matrix x = np.ones((n, 1)) / n # initial guess + # choose fixed starting vector if not given + if nstart is None: + x = np.ones((n, 1)) / n # initial guess + else: + x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float) + x = x / x.sum() + # power iteration on authority matrix i = 0 while True: diff --git a/networkx/algorithms/link_analysis/tests/test_hits.py b/networkx/algorithms/link_analysis/tests/test_hits.py index e306d541..be354086 100644 --- a/networkx/algorithms/link_analysis/tests/test_hits.py +++ b/networkx/algorithms/link_analysis/tests/test_hits.py @@ -1,9 +1,13 @@ import pytest +import networkx as nx +np = pytest.importorskip("numpy") +pytest.importorskip("scipy") -import networkx from networkx.testing import almost_equal +from networkx.algorithms.link_analysis.hits_alg import _hits_python + # Example from # A. Langville and C. Meyer, "A survey of eigenvector methods of web # information retrieval." http://citeseer.ist.psu.edu/713792.html @@ -13,7 +17,7 @@ class TestHITS: @classmethod def setup_class(cls): - G = networkx.DiGraph() + G = nx.DiGraph() edges = [(1, 3), (1, 5), (2, 1), (3, 5), (5, 4), (5, 3), (6, 5)] @@ -26,51 +30,56 @@ class TestHITS: zip(sorted(G), [0.366025, 0.000000, 0.211325, 0.000000, 0.211325, 0.211325]) ) - def test_hits(self): + def test_hits_numpy(self): G = self.G - h, a = networkx.hits(G, tol=1.0e-08) + h, a = nx.hits_numpy(G) for n in G: assert almost_equal(h[n], G.h[n], places=4) for n in G: assert almost_equal(a[n], G.a[n], places=4) - def test_hits_nstart(self): - G = self.G - nstart = {i: 1.0 / 2 for i in G} - h, a = networkx.hits(G, nstart=nstart) - - def test_hits_numpy(self): - pytest.importorskip("numpy") + @pytest.mark.parametrize( + "hits_alg", + (nx.hits, nx.hits_scipy, _hits_python), + ) + def test_hits(self, hits_alg): G = self.G - h, a = networkx.hits_numpy(G) + h, a = hits_alg(G, tol=1.0e-08) for n in G: assert almost_equal(h[n], G.h[n], places=4) for n in G: assert almost_equal(a[n], G.a[n], places=4) - - def test_hits_scipy(self): - pytest.importorskip("scipy") - G = self.G - h, a = networkx.hits_scipy(G, tol=1.0e-08) + nstart = {i: 1.0 / 2 for i in G} + h, a = hits_alg(G, nstart=nstart) for n in G: assert almost_equal(h[n], G.h[n], places=4) for n in G: assert almost_equal(a[n], G.a[n], places=4) def test_empty(self): - pytest.importorskip("numpy") - G = networkx.Graph() - assert networkx.hits(G) == ({}, {}) - assert networkx.hits_numpy(G) == ({}, {}) - assert networkx.authority_matrix(G).shape == (0, 0) - assert networkx.hub_matrix(G).shape == (0, 0) - - def test_empty_scipy(self): - pytest.importorskip("scipy") - G = networkx.Graph() - assert networkx.hits_scipy(G) == ({}, {}) + G = nx.Graph() + assert nx.hits(G) == ({}, {}) + assert nx.hits_numpy(G) == ({}, {}) + assert _hits_python(G) == ({}, {}) + assert nx.hits_scipy(G) == ({}, {}) + assert nx.authority_matrix(G).shape == (0, 0) + assert nx.hub_matrix(G).shape == (0, 0) def test_hits_not_convergent(self): - with pytest.raises(networkx.PowerIterationFailedConvergence): + with pytest.raises(nx.PowerIterationFailedConvergence): G = self.G - networkx.hits(G, max_iter=0) + nx.hits(G, max_iter=0) + + +@pytest.mark.parametrize( + "hits_alg", + (nx.hits_numpy, nx.hits_scipy), +) +def test_deprecation_warnings(hits_alg): + """Make sure deprecation warnings are raised. + + To be removed when deprecations expire. + """ + G = nx.DiGraph(nx.path_graph(4)) + with pytest.warns(DeprecationWarning): + pr = hits_alg(G) |