Update matching functions for error validation and speed (#4897)

* First steps to update matching functions for #4644 Expand tests Change API to raise NetworkXError when matching involves nodes not in G Update is_*_matching to 100+ times faster. * improve matching_dict_to_set and docs for min_weight_matching * fix sphinx error
author: Dan Schult <dschult@colgate.edu> 2022-02-18 13:56:50 -0500
committer: GitHub <noreply@github.com> 2022-02-18 10:56:50 -0800
commit: 28b3014d68d2b4e40d3e02219770296a827bd55c (patch)
tree: 0db029f94450ad284d8da936330fa814c642a468
parent: 88158b66e396b59d2d382600806e3d28c7bf5509 (diff)
download: networkx-28b3014d68d2b4e40d3e02219770296a827bd55c.tar.gz
3 files changed, 107 insertions, 40 deletions
diff --git a/doc/release/release_dev.rst b/doc/release/release_dev.rst
index 9fcc9d73..adf2e089 100644
--- a/doc/release/release_dev.rst
+++ b/doc/release/release_dev.rst
@@ -54,6 +54,7 @@ API Changes
 - A ``FutureWarning`` has been added to ``attr_matrix`` to indicate that the
   return type will change from a ``numpy.matrix`` object to a ``numpy.ndarray``
   object in NetworkX 3.0.
+- The `is_*_matching` functions now raise exceptions for nodes not in G in any edge
 
 Deprecations
 ------------
diff --git a/networkx/algorithms/matching.py b/networkx/algorithms/matching.py
index 0c874f40..2a57e9a1 100644
--- a/networkx/algorithms/matching.py
+++ b/networkx/algorithms/matching.py
@@ -40,13 +40,13 @@ def maximal_matching(G):
     """
     matching = set()
     nodes = set()
-    for u, v in G.edges():
+    for edge in G.edges():
         # If the edge isn't covered, add it to the matching
         # then remove neighborhood of u and v from consideration.
+        u, v = edge
         if u not in nodes and v not in nodes and u != v:
-            matching.add((u, v))
-            nodes.add(u)
-            nodes.add(v)
+            matching.add(edge)
+            nodes.update(edge)
     return matching
 
 
@@ -64,19 +64,23 @@ def matching_dict_to_set(matching):
     example, key ``u`` with value ``v`` and key ``v`` with value ``u``.
 
     """
-    # Need to compensate for the fact that both pairs (u, v) and (v, u)
-    # appear in `matching.items()`, so we use a set of sets. This way,
-    # only the (frozen)set `{u, v}` appears as an element in the
-    # returned set.
-
-    return {(u, v) for (u, v) in set(map(frozenset, matching.items()))}
+    edges = set()
+    for edge in matching.items():
+        u, v = edge
+        if (v, u) in edges or edge in edges:
+            continue
+        if u == v:
+            raise nx.NetworkXError(f"Selfloops cannot appear in matchings {edge}")
+        edges.add(edge)
+    return edges
 
 
 def is_matching(G, matching):
     """Return True if ``matching`` is a valid matching of ``G``
 
     A *matching* in a graph is a set of edges in which no two distinct
-    edges share a common endpoint.
+    edges share a common endpoint. Each node is incident to at most one
+    edge in the matching. The edges are said to be independent.
 
     Parameters
     ----------
@@ -95,18 +99,31 @@ def is_matching(G, matching):
         Whether the given set or dictionary represents a valid matching
         in the graph.
 
+    Raises
+    ------
+    NetworkXError
+        If the proposed matching has an edge to a node not in G.
+        Or if the matching is not a collection of 2-tuple edges.
+
     """
     if isinstance(matching, dict):
         matching = matching_dict_to_set(matching)
 
+    nodes = set()
     for edge in matching:
         if len(edge) != 2:
+            raise nx.NetworkXError(f"matching has non-2-tuple edge {edge}")
+        u, v = edge
+        if u not in G or v not in G:
+            raise nx.NetworkXError(f"matching contains edge {edge} with node not in G")
+        if u == v:
             return False
-        if not G.has_edge(*edge):
+        if not G.has_edge(u, v):
             return False
-
-    # TODO This is parallelizable.
-    return all(len(set(e1) & set(e2)) == 0 for e1, e2 in combinations(matching, 2))
+        if u in nodes or v in nodes:
+            return False
+        nodes.update(edge)
+    return True
 
 
 def is_maximal_matching(G, matching):
@@ -136,20 +153,32 @@ def is_maximal_matching(G, matching):
     if isinstance(matching, dict):
         matching = matching_dict_to_set(matching)
     # If the given set is not a matching, then it is not a maximal matching.
-    if not is_matching(G, matching):
-        return False
-    # A matching is maximal if adding any unmatched edge to it causes
-    # the resulting set to *not* be a valid matching.
-    #
-    # HACK Since the `matching_dict_to_set` function returns a set of
-    # sets, we need to convert the list of edges to a set of sets in
-    # order for the set difference function to work. Ideally, we would
-    # just be able to do `set(G.edges()) - matching`.
-    all_edges = set(map(frozenset, G.edges()))
-    matched_edges = set(map(frozenset, matching))
-    unmatched_edges = all_edges - matched_edges
-    # TODO This is parallelizable.
-    return all(not is_matching(G, matching | {e}) for e in unmatched_edges)
+    edges = set()
+    nodes = set()
+    for edge in matching:
+        if len(edge) != 2:
+            raise nx.NetworkXError(f"matching has non-2-tuple edge {edge}")
+        u, v = edge
+        if u not in G or v not in G:
+            raise nx.NetworkXError(f"matching contains edge {edge} with node not in G")
+        if u == v:
+            return False
+        if not G.has_edge(u, v):
+            return False
+        if u in nodes or v in nodes:
+            return False
+        nodes.update(edge)
+        edges.add(edge)
+        edges.add((v, u))
+    # A matching is maximal if adding any new edge from G to it
+    # causes the resulting set to match some node twice.
+    # Be careful to check for adding selfloops
+    for u, v in G.edges:
+        if (u, v) not in edges:
+            # could add edge (u, v) to edges and have a bigger matching
+            if u not in nodes and v not in nodes and u != v:
+                return False
+    return True
 
 
 def is_perfect_matching(G, matching):
@@ -179,18 +208,34 @@ def is_perfect_matching(G, matching):
     if isinstance(matching, dict):
         matching = matching_dict_to_set(matching)
 
-    if not is_matching(G, matching):
-        return False
-
-    counts = Counter(sum(matching, ()))
-
-    return all(counts[v] == 1 for v in G)
+    nodes = set()
+    for edge in matching:
+        if len(edge) != 2:
+            raise nx.NetworkXError(f"matching has non-2-tuple edge {edge}")
+        u, v = edge
+        if u not in G or v not in G:
+            raise nx.NetworkXError(f"matching contains edge {edge} with node not in G")
+        if u == v:
+            return False
+        if not G.has_edge(u, v):
+            return False
+        if u in nodes or v in nodes:
+            return False
+        nodes.update(edge)
+    return len(nodes) == len(G)
 
 
 @not_implemented_for("multigraph")
 @not_implemented_for("directed")
 def min_weight_matching(G, maxcardinality=False, weight="weight"):
-    """Use reciprocal edge weights to find max reciprocal weight matching.
+    """Computing a minimum-weight maximal matching of G.
+
+    Use reciprocal edge weights with the maximum-weight algorithm.
+
+    A matching is a subset of edges in which no node occurs more than once.
+    The weight of a matching is the sum of the weights of its edges.
+    A maximal matching cannot add more edges and still be a matching.
+    The cardinality of a matching is the number of matched edges.
 
     This method replaces the weights with their reciprocal and
     then runs :func:`max_weight_matching`.
diff --git a/networkx/algorithms/tests/test_matching.py b/networkx/algorithms/tests/test_matching.py
index 0487227f..ed436aad 100644
--- a/networkx/algorithms/tests/test_matching.py
+++ b/networkx/algorithms/tests/test_matching.py
@@ -421,19 +421,40 @@ class TestIsMatching:
         assert nx.is_matching(G, {(0, 1), (3, 2)})
         assert nx.is_matching(G, {(1, 0), (3, 2)})
 
-    def test_valid(self):
+    def test_valid_matching(self):
         G = nx.path_graph(4)
         assert nx.is_matching(G, {(0, 1), (2, 3)})
 
-    def test_invalid(self):
+    def test_invalid_input(self):
+        error = nx.NetworkXError
+        G = nx.path_graph(4)
+        # edge to node not in G
+        raises(error, nx.is_matching, G, {(0, 5), (2, 3)})
+        # edge not a 2-tuple
+        raises(error, nx.is_matching, G, {(0, 1, 2), (2, 3)})
+        raises(error, nx.is_matching, G, {(0,), (2, 3)})
+
+    def test_selfloops(self):
+        error = nx.NetworkXError
+        G = nx.path_graph(4)
+        # selfloop for node not in G
+        raises(error, nx.is_matching, G, {(5, 5), (2, 3)})
+        # selfloop edge not in G
+        assert not nx.is_matching(G, {(0, 0), (1, 2), (2, 3)})
+        # selfloop edge in G
+        G.add_edge(0, 0)
+        assert not nx.is_matching(G, {(0, 0), (1, 2), (2, 3)})
+
+    def test_invalid_matching(self):
         G = nx.path_graph(4)
         assert not nx.is_matching(G, {(0, 1), (1, 2), (2, 3)})
 
     def test_invalid_edge(self):
         G = nx.path_graph(4)
         assert not nx.is_matching(G, {(0, 3), (1, 2)})
-        assert not nx.is_matching(G, {(0, 4)})
-        G = nx.path_graph(4, create_using=nx.DiGraph)
+        raises(nx.NetworkXError, nx.is_matching, G, {(0, 55)})
+
+        G = nx.DiGraph(G.edges)
         assert nx.is_matching(G, {(0, 1)})
         assert not nx.is_matching(G, {(1, 0)})
author	Dan Schult <dschult@colgate.edu>	2022-02-18 13:56:50 -0500
committer	GitHub <noreply@github.com>	2022-02-18 10:56:50 -0800
commit	28b3014d68d2b4e40d3e02219770296a827bd55c (patch)
tree	0db029f94450ad284d8da936330fa814c642a468
parent	88158b66e396b59d2d382600806e3d28c7bf5509 (diff)
download	networkx-28b3014d68d2b4e40d3e02219770296a827bd55c.tar.gz