From 61688295e63cec163e0c17cfa29383822f00cf19 Mon Sep 17 00:00:00 2001
From: Vitaliy Pozdnyakov <pozdnyakov.vitaliy@yandex.ru>
Date: Tue, 22 Jun 2021 23:11:53 +0300
Subject: Fix assortativity coefficent calculation (#4851)

Update calculation of assortativity coefficients and update
tests to reflect new behavior.
Introduces mapping kwarg to degree_mixing_matrix and remove
0-padded rows/cols from degree_mixing_matrix.

Co-authored-by: Dan Schult <dschult@colgate.edu>
---
 doc/release/release_dev.rst                        |  4 +
 networkx/algorithms/assortativity/correlation.py   |  4 +-
 networkx/algorithms/assortativity/mixing.py        | 93 +++++++++++++++++-----
 .../algorithms/assortativity/tests/base_test.py    | 21 +++++
 .../assortativity/tests/test_correlation.py        | 20 ++++-
 .../algorithms/assortativity/tests/test_mixing.py  | 71 +++++++++++++----
 6 files changed, 173 insertions(+), 40 deletions(-)

diff --git a/doc/release/release_dev.rst b/doc/release/release_dev.rst
index d6a83ca6..13db1dd2 100644
--- a/doc/release/release_dev.rst
+++ b/doc/release/release_dev.rst
@@ -188,6 +188,10 @@ API Changes
   from ``communicability_betweeness_centrality``
 - [`#4850 <https://github.com/networkx/networkx/pull/4850>`_]
   Added ``dtype`` parameter to adjacency_matrix
+- [`#4851 <https://github.com/networkx/networkx/pull/4851>`_]
+  Output of `numeric_mixing_matrix` and `degree_mixing_matrix` no longer
+  includes rows with all entries zero by default. The functions now accept
+  a parameter `mapping` keyed by value to row index to identify each row.
 - [`#4867 <https://github.com/networkx/networkx/pull/4867>`_]
   The function ``spring_layout`` now ignores 'fixed' nodes not in the graph
 
diff --git a/networkx/algorithms/assortativity/correlation.py b/networkx/algorithms/assortativity/correlation.py
index bcf2ef3c..b885a22e 100644
--- a/networkx/algorithms/assortativity/correlation.py
+++ b/networkx/algorithms/assortativity/correlation.py
@@ -187,15 +187,13 @@ def numeric_assortativity_coefficient(G, attribute, nodes=None):
 
     Assortativity measures the similarity of connections
     in the graph with respect to the given numeric attribute.
-    The numeric attribute must be an integer.
 
     Parameters
     ----------
     G : NetworkX graph
 
     attribute : string
-        Node attribute key.  The corresponding attribute value must be an
-        integer.
+        Node attribute key.
 
     nodes: list or iterable (optional)
         Compute numeric assortativity only for attributes of nodes in
diff --git a/networkx/algorithms/assortativity/mixing.py b/networkx/algorithms/assortativity/mixing.py
index 8e76a143..3048c80b 100644
--- a/networkx/algorithms/assortativity/mixing.py
+++ b/networkx/algorithms/assortativity/mixing.py
@@ -78,6 +78,31 @@ def attribute_mixing_matrix(G, attribute, nodes=None, mapping=None, normalized=T
     -------
     m: numpy array
        Counts or joint probability of occurrence of attribute pairs.
+
+    Notes
+    -----
+    If each node has a unique attribute value, the unnormalized mixing matrix
+    will be equal to the adjacency matrix. To get a denser mixing matrix,
+    the rounding can be performed to form groups of nodes with equal values.
+    For example, the exact height of persons in cm (180.79155222, 163.9080892,
+    163.30095355, 167.99016217, 168.21590163, ...) can be rounded to (180, 163,
+    163, 168, 168, ...).
+
+    Definitions of attribute mixing matrix vary on whether the matrix
+    should include rows for attribute values that don't arise. Here we
+    do not include such empty-rows. But you can force them to appear
+    by inputting a `mapping` that includes those values.
+
+    Examples
+    --------
+    >>> G = nx.path_graph(3)
+    >>> gender = {0: 'male', 1: 'female', 2: 'female'}
+    >>> nx.set_node_attributes(G, gender, 'gender')
+    >>> mapping = {'male': 0, 'female': 1}
+    >>> mix_mat = nx.attribute_mixing_matrix(G, 'gender', mapping=mapping)
+    >>> # mixing from male nodes to female nodes
+    >>> mix_mat[mapping['male'], mapping['female']]
+    0.25
     """
     d = attribute_mixing_dict(G, attribute, nodes)
     a = dict_to_numpy_array(d, mapping=mapping)
@@ -117,7 +142,9 @@ def degree_mixing_dict(G, x="out", y="in", weight=None, nodes=None, normalized=F
     return mixing_dict(xy_iter, normalized=normalized)
 
 
-def degree_mixing_matrix(G, x="out", y="in", weight=None, nodes=None, normalized=True):
+def degree_mixing_matrix(
+    G, x="out", y="in", weight=None, nodes=None, normalized=True, mapping=None
+):
     """Returns mixing matrix for attribute.
 
     Parameters
@@ -143,35 +170,55 @@ def degree_mixing_matrix(G, x="out", y="in", weight=None, nodes=None, normalized
     normalized : bool (default=True)
        Return counts if False or probabilities if True.
 
+    mapping : dictionary, optional
+       Mapping from node degree to integer index in matrix.
+       If not specified, an arbitrary ordering will be used.
+
     Returns
     -------
     m: numpy array
        Counts, or joint probability, of occurrence of node degree.
+
+    Notes
+    -----
+    Definitions of degree mixing matrix vary on whether the matrix
+    should include rows for degree values that don't arise. Here we
+    do not include such empty-rows. But you can force them to appear
+    by inputting a `mapping` that includes those values. See examples.
+
+    Examples
+    --------
+    >>> G = nx.star_graph(3)
+    >>> mix_mat = nx.degree_mixing_matrix(G)
+    >>> mix_mat[0, 1]  # mixing from node degree 1 to node degree 3
+    0.5
+
+    If you want every possible degree to appear as a row, even if no nodes
+    have that degree, use `mapping` as follows,
+
+    >>> max_degree = max(deg for n, deg in G.degree)
+    >>> mapping = {x: x for x in range(max_degree + 1)} # identity mapping
+    >>> mix_mat = nx.degree_mixing_matrix(G, mapping=mapping)
+    >>> mix_mat[3, 1]  # mixing from node degree 3 to node degree 1
+    0.5
     """
     d = degree_mixing_dict(G, x=x, y=y, nodes=nodes, weight=weight)
-    s = set(d.keys())
-    for k, v in d.items():
-        s.update(v.keys())
-    m = max(s)
-    mapping = {x: x for x in range(m + 1)}
     a = dict_to_numpy_array(d, mapping=mapping)
     if normalized:
         a = a / a.sum()
     return a
 
 
-def numeric_mixing_matrix(G, attribute, nodes=None, normalized=True):
+def numeric_mixing_matrix(G, attribute, nodes=None, normalized=True, mapping=None):
     """Returns numeric mixing matrix for attribute.
 
-    The attribute must be an integer.
-
     Parameters
     ----------
     G : graph
        NetworkX graph object.
 
     attribute : string
-       Node attribute key.  The corresponding attribute must be an integer.
+       Node attribute key.
 
     nodes: list or iterable (optional)
         Build the matrix only with nodes in container. The default is all nodes.
@@ -179,21 +226,27 @@ def numeric_mixing_matrix(G, attribute, nodes=None, normalized=True):
     normalized : bool (default=True)
        Return counts if False or probabilities if True.
 
+    mapping : dictionary, optional
+       Mapping from node attribute to integer index in matrix.
+       If not specified, an arbitrary ordering will be used.
+
+    Notes
+    -----
+    If each node has a unique attribute value, the unnormalized mixing matrix
+    will be equal to the adjacency matrix. To get a denser mixing matrix,
+    the rounding can be performed to form groups of nodes with equal values.
+    For example, the exact height of persons in cm (180.79155222, 163.9080892,
+    163.30095355, 167.99016217, 168.21590163, ...) can be rounded to (180, 163,
+    163, 168, 168, ...).
+
     Returns
     -------
     m: numpy array
        Counts, or joint, probability of occurrence of node attribute pairs.
     """
-    d = attribute_mixing_dict(G, attribute, nodes)
-    s = set(d.keys())
-    for k, v in d.items():
-        s.update(v.keys())
-    m = max(s)
-    mapping = {x: x for x in range(m + 1)}
-    a = dict_to_numpy_array(d, mapping=mapping)
-    if normalized:
-        a = a / a.sum()
-    return a
+    return attribute_mixing_matrix(
+        G, attribute, nodes=nodes, normalized=normalized, mapping=mapping
+    )
 
 
 def mixing_dict(xy, normalized=False):
diff --git a/networkx/algorithms/assortativity/tests/base_test.py b/networkx/algorithms/assortativity/tests/base_test.py
index 9a8a1e72..5b371231 100644
--- a/networkx/algorithms/assortativity/tests/base_test.py
+++ b/networkx/algorithms/assortativity/tests/base_test.py
@@ -49,3 +49,24 @@ class BaseTestDegreeMixing:
         cls.M.add_edge(0, 1)
         cls.S = nx.Graph()
         cls.S.add_edges_from([(0, 0), (1, 1)])
+        cls.W = nx.Graph()
+        cls.W.add_edges_from([(0, 3), (1, 3), (2, 3)], weight=0.5)
+        cls.W.add_edge(0, 2, weight=1)
+
+
+class BaseTestNumericMixing:
+    @classmethod
+    def setup_class(cls):
+        N = nx.Graph()
+        N.add_nodes_from([0, 1], margin=-2)
+        N.add_nodes_from([2, 3], margin=-2)
+        N.add_nodes_from([4], margin=-3)
+        N.add_nodes_from([5], margin=-4)
+        N.add_edges_from([(0, 1), (2, 3), (0, 4), (2, 5)])
+        cls.N = N
+
+        F = nx.Graph()
+        F.add_edges_from([(0, 3), (1, 3), (2, 3)], weight=0.5)
+        F.add_edge(0, 2, weight=1)
+        nx.set_node_attributes(F, dict(F.degree(weight="weight")), "margin")
+        cls.F = F
diff --git a/networkx/algorithms/assortativity/tests/test_correlation.py b/networkx/algorithms/assortativity/tests/test_correlation.py
index 548ed82f..763f0a41 100644
--- a/networkx/algorithms/assortativity/tests/test_correlation.py
+++ b/networkx/algorithms/assortativity/tests/test_correlation.py
@@ -5,7 +5,11 @@ pytest.importorskip("scipy")
 
 
 import networkx as nx
-from .base_test import BaseTestAttributeMixing, BaseTestDegreeMixing
+from .base_test import (
+    BaseTestAttributeMixing,
+    BaseTestDegreeMixing,
+    BaseTestNumericMixing,
+)
 from networkx.algorithms.assortativity.correlation import attribute_ac
 
 
@@ -34,6 +38,10 @@ class TestDegreeMixingCorrelation(BaseTestDegreeMixing):
         r = nx.degree_pearson_correlation_coefficient(self.M)
         np.testing.assert_almost_equal(r, -1.0 / 7.0, decimal=4)
 
+    def test_degree_assortativity_weighted(self):
+        r = nx.degree_assortativity_coefficient(self.W, weight="weight")
+        np.testing.assert_almost_equal(r, -0.1429, decimal=4)
+
 
 class TestAttributeMixingCorrelation(BaseTestAttributeMixing):
     def test_attribute_assortativity_undirected(self):
@@ -73,3 +81,13 @@ class TestAttributeMixingCorrelation(BaseTestAttributeMixing):
         a = np.array([[50, 50, 0], [50, 50, 0], [0, 0, 2]])
         r = attribute_ac(a)
         np.testing.assert_almost_equal(r, 0.029, decimal=3)
+
+
+class TestNumericMixingCorrelation(BaseTestNumericMixing):
+    def test_numeric_assortativity_negative(self):
+        r = nx.numeric_assortativity_coefficient(self.N, "margin")
+        np.testing.assert_almost_equal(r, -0.2903, decimal=4)
+
+    def test_numeric_assortativity_float(self):
+        r = nx.numeric_assortativity_coefficient(self.F, "margin")
+        np.testing.assert_almost_equal(r, -0.1429, decimal=4)
diff --git a/networkx/algorithms/assortativity/tests/test_mixing.py b/networkx/algorithms/assortativity/tests/test_mixing.py
index f1a1b4e2..f41d8960 100644
--- a/networkx/algorithms/assortativity/tests/test_mixing.py
+++ b/networkx/algorithms/assortativity/tests/test_mixing.py
@@ -4,7 +4,11 @@ np = pytest.importorskip("numpy")
 
 
 import networkx as nx
-from .base_test import BaseTestAttributeMixing, BaseTestDegreeMixing
+from .base_test import (
+    BaseTestAttributeMixing,
+    BaseTestDegreeMixing,
+    BaseTestNumericMixing,
+)
 
 
 class TestDegreeMixingDict(BaseTestDegreeMixing):
@@ -29,13 +33,16 @@ class TestDegreeMixingDict(BaseTestDegreeMixing):
         d_result = {1: {2: 1}, 2: {1: 1, 3: 3}, 3: {2: 3}}
         assert d == d_result
 
+    def test_degree_mixing_dict_weighted(self):
+        d = nx.degree_mixing_dict(self.W, weight="weight")
+        d_result = {0.5: {1.5: 1}, 1.5: {1.5: 6, 0.5: 1}}
+
 
 class TestDegreeMixingMatrix(BaseTestDegreeMixing):
     def test_degree_mixing_matrix_undirected(self):
         # fmt: off
-        a_result = np.array([[0, 0, 0],
-                             [0, 0, 2],
-                             [0, 2, 2]]
+        a_result = np.array([[0, 2],
+                             [2, 2]]
                             )
         # fmt: on
         a = nx.degree_mixing_matrix(self.P4, normalized=False)
@@ -45,10 +52,9 @@ class TestDegreeMixingMatrix(BaseTestDegreeMixing):
 
     def test_degree_mixing_matrix_directed(self):
         # fmt: off
-        a_result = np.array([[0, 0, 0, 0],
-                             [0, 0, 0, 2],
-                             [0, 1, 0, 1],
-                             [0, 0, 0, 0]]
+        a_result = np.array([[0, 0, 2],
+                             [1, 0, 1],
+                             [0, 0, 0]]
                             )
         # fmt: on
         a = nx.degree_mixing_matrix(self.D, normalized=False)
@@ -58,10 +64,9 @@ class TestDegreeMixingMatrix(BaseTestDegreeMixing):
 
     def test_degree_mixing_matrix_multigraph(self):
         # fmt: off
-        a_result = np.array([[0, 0, 0, 0],
-                             [0, 0, 1, 0],
-                             [0, 1, 0, 3],
-                             [0, 0, 3, 0]]
+        a_result = np.array([[0, 1, 0],
+                             [1, 0, 3],
+                             [0, 3, 0]]
                             )
         # fmt: on
         a = nx.degree_mixing_matrix(self.M, normalized=False)
@@ -71,16 +76,28 @@ class TestDegreeMixingMatrix(BaseTestDegreeMixing):
 
     def test_degree_mixing_matrix_selfloop(self):
         # fmt: off
-        a_result = np.array([[0, 0, 0],
-                             [0, 0, 0],
-                             [0, 0, 2]]
-                            )
+        a_result = np.array([[2]])
         # fmt: on
         a = nx.degree_mixing_matrix(self.S, normalized=False)
         np.testing.assert_equal(a, a_result)
         a = nx.degree_mixing_matrix(self.S)
         np.testing.assert_equal(a, a_result / float(a_result.sum()))
 
+    def test_degree_mixing_matrix_weighted(self):
+        a_result = np.array([[0.0, 1.0], [1.0, 6.0]])
+        a = nx.degree_mixing_matrix(self.W, weight="weight", normalized=False)
+        np.testing.assert_equal(a, a_result)
+        a = nx.degree_mixing_matrix(self.W, weight="weight")
+        np.testing.assert_equal(a, a_result / float(a_result.sum()))
+
+    def test_degree_mixing_matrix_mapping(self):
+        a_result = np.array([[6.0, 1.0], [1.0, 0.0]])
+        mapping = {0.5: 1, 1.5: 0}
+        a = nx.degree_mixing_matrix(
+            self.W, weight="weight", normalized=False, mapping=mapping
+        )
+        np.testing.assert_equal(a, a_result)
+
 
 class TestAttributeMixingDict(BaseTestAttributeMixing):
     def test_attribute_mixing_dict_undirected(self):
@@ -139,3 +156,25 @@ class TestAttributeMixingMatrix(BaseTestAttributeMixing):
         np.testing.assert_equal(a, a_result)
         a = nx.attribute_mixing_matrix(self.M, "fish", mapping=mapping)
         np.testing.assert_equal(a, a_result / float(a_result.sum()))
+
+
+class TestNumericMixingMatrix(BaseTestNumericMixing):
+    def test_numeric_mixing_matrix_negative(self):
+        mapping = {-2: 0, -3: 1, -4: 2}
+        a_result = np.array([[4.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0]])
+        a = nx.numeric_mixing_matrix(
+            self.N, "margin", mapping=mapping, normalized=False
+        )
+        np.testing.assert_equal(a, a_result)
+        a = nx.numeric_mixing_matrix(self.N, "margin", mapping=mapping)
+        np.testing.assert_equal(a, a_result / float(a_result.sum()))
+
+    def test_numeric_mixing_matrix_float(self):
+        mapping = {0.5: 1, 1.5: 0}
+        a_result = np.array([[6.0, 1.0], [1.0, 0.0]])
+        a = nx.numeric_mixing_matrix(
+            self.F, "margin", mapping=mapping, normalized=False
+        )
+        np.testing.assert_equal(a, a_result)
+        a = nx.numeric_mixing_matrix(self.F, "margin", mapping=mapping)
+        np.testing.assert_equal(a, a_result / float(a_result.sum()))
-- 
cgit v1.2.1