From 4d0620478b7a7f00e5a4f041b70ad5724b4f55ce Mon Sep 17 00:00:00 2001 From: kpberry Date: Tue, 12 Oct 2021 12:00:16 -0400 Subject: Faster operators in algorithms/operators/all.py (#5121) * Reimplement union_all to take linear time * Reimplement disjoint_union_all to take linear time * Reimplement union in terms of union_all * Reimplement disjoint_union in terms of disjoint_union_all * Reimplement intersection_all to take linear time * Reimplement intersection in terms of intersection_all * Reimplement compose_all to take linear time * Reimplement compose in terms of compose_all * Refactor union_all to be more similar to compose_all * Fix formatting in algorithms/operators/all.py Co-authored-by: Kevin Berry --- networkx/algorithms/operators/all.py | 131 +++++++++++++++++++++++++++----- networkx/algorithms/operators/binary.py | 108 +------------------------- 2 files changed, 115 insertions(+), 124 deletions(-) (limited to 'networkx') diff --git a/networkx/algorithms/operators/all.py b/networkx/algorithms/operators/all.py index c3706687..92df582d 100644 --- a/networkx/algorithms/operators/all.py +++ b/networkx/algorithms/operators/all.py @@ -44,14 +44,60 @@ def union_all(graphs, rename=(None,)): union disjoint_union_all """ + # collect the graphs in case an iterator was passed + graphs = list(graphs) + if not graphs: raise ValueError("cannot apply union_all to an empty list") - graphs_names = zip_longest(graphs, rename) - U, gname = next(graphs_names) - for H, hname in graphs_names: - U = nx.union(U, H, (gname, hname)) - gname = None - return U + + U = graphs[0] + + if any(G.is_multigraph() != U.is_multigraph() for G in graphs): + raise nx.NetworkXError("All graphs must be graphs or multigraphs.") + + # rename graph to obtain disjoint node labels + def add_prefix(graph, prefix): + if prefix is None: + return graph + + def label(x): + if isinstance(x, str): + name = prefix + x + else: + name = prefix + repr(x) + return name + + return nx.relabel_nodes(graph, label) + + graphs = [add_prefix(G, name) for G, name in zip_longest(graphs, rename)] + + if sum([len(G) for G in graphs]) != len(set().union(*graphs)): + raise nx.NetworkXError( + "The node sets of the graphs are not disjoint.", + "Use appropriate rename" + "=(G1prefix,G2prefix,...,GNprefix)" + "or use disjoint_union(G1,G2,...,GN).", + ) + + # Union is the same type as first graph + R = U.__class__() + + # add graph attributes, later attributes take precedent over earlier ones + for G in graphs: + R.graph.update(G.graph) + + # add nodes and attributes + for G in graphs: + R.add_nodes_from(G.nodes(data=True)) + + if U.is_multigraph(): + for G in graphs: + R.add_edges_from(G.edges(keys=True, data=True)) + else: + for G in graphs: + R.add_edges_from(G.edges(data=True)) + + return R def disjoint_union_all(graphs): @@ -82,13 +128,23 @@ def disjoint_union_all(graphs): If a graph attribute is present in multiple graphs, then the value from the last graph in the list with that attribute is used. """ + graphs = list(graphs) + if not graphs: raise ValueError("cannot apply disjoint_union_all to an empty list") - graphs = iter(graphs) - U = next(graphs) - for H in graphs: - U = nx.disjoint_union(U, H) - return U + + first_labels = [0] + for G in graphs[:-1]: + first_labels.append(len(G) + first_labels[-1]) + + relabeled = [ + nx.convert_node_labels_to_integers(G, first_label=first_label) + for G, first_label in zip(graphs, first_labels) + ] + R = union_all(relabeled) + for G in graphs: + R.graph.update(G.graph) + return R def compose_all(graphs): @@ -120,13 +176,31 @@ def compose_all(graphs): If a graph attribute is present in multiple graphs, then the value from the last graph in the list with that attribute is used. """ + graphs = list(graphs) + if not graphs: raise ValueError("cannot apply compose_all to an empty list") - graphs = iter(graphs) - C = next(graphs) - for H in graphs: - C = nx.compose(C, H) - return C + + U = graphs[0] + + if any(G.is_multigraph() != U.is_multigraph() for G in graphs): + raise nx.NetworkXError("All graphs must be graphs or multigraphs.") + + R = U.__class__() + # add graph attributes, H attributes take precedent over G attributes + for G in graphs: + R.graph.update(G.graph) + + for G in graphs: + R.add_nodes_from(G.nodes(data=True)) + + if U.is_multigraph(): + for G in graphs: + R.add_edges_from(G.edges(keys=True, data=True)) + else: + for G in graphs: + R.add_edges_from(G.edges(data=True)) + return R def intersection_all(graphs): @@ -152,10 +226,27 @@ def intersection_all(graphs): Attributes from the graph, nodes, and edges are not copied to the new graph. """ + graphs = list(graphs) + if not graphs: raise ValueError("cannot apply intersection_all to an empty list") - graphs = iter(graphs) - R = next(graphs) - for H in graphs: - R = nx.intersection(R, H) + + U = graphs[0] + + if any(G.is_multigraph() != U.is_multigraph() for G in graphs): + raise nx.NetworkXError("All graphs must be graphs or multigraphs.") + + # create new graph + node_intersection = set.intersection(*[set(G.nodes) for G in graphs]) + R = U.__class__() + R.add_nodes_from(node_intersection) + + if U.is_multigraph(): + edge_sets = [set(G.edges(keys=True)) for G in graphs] + else: + edge_sets = [set(G.edges()) for G in graphs] + + edge_intersection = set.intersection(*edge_sets) + R.add_edges_from(edge_intersection) + return R diff --git a/networkx/algorithms/operators/binary.py b/networkx/algorithms/operators/binary.py index 1371c071..d2626e7a 100644 --- a/networkx/algorithms/operators/binary.py +++ b/networkx/algorithms/operators/binary.py @@ -61,57 +61,7 @@ def union(G, H, rename=(None, None), name=None): stacklevel=2, ) - if not G.is_multigraph() == H.is_multigraph(): - raise nx.NetworkXError("G and H must both be graphs or multigraphs.") - # Union is the same type as G - R = G.__class__() - # add graph attributes, H attributes take precedent over G attributes - R.graph.update(G.graph) - R.graph.update(H.graph) - - # rename graph to obtain disjoint node labels - def add_prefix(graph, prefix): - if prefix is None: - return graph - - def label(x): - if isinstance(x, str): - name = prefix + x - else: - name = prefix + repr(x) - return name - - return nx.relabel_nodes(graph, label) - - G = add_prefix(G, rename[0]) - H = add_prefix(H, rename[1]) - if set(G) & set(H): - raise nx.NetworkXError( - "The node sets of G and H are not disjoint.", - "Use appropriate rename=(Gprefix,Hprefix)" "or use disjoint_union(G,H).", - ) - if G.is_multigraph(): - G_edges = G.edges(keys=True, data=True) - else: - G_edges = G.edges(data=True) - if H.is_multigraph(): - H_edges = H.edges(keys=True, data=True) - else: - H_edges = H.edges(data=True) - - # add nodes - R.add_nodes_from(G) - R.add_nodes_from(H) - # add edges - R.add_edges_from(G_edges) - R.add_edges_from(H_edges) - # add node attributes - for n in G: - R.nodes[n].update(G.nodes[n]) - for n in H: - R.nodes[n].update(H.nodes[n]) - - return R + return nx.union_all([G, H], rename) def disjoint_union(G, H): @@ -140,12 +90,7 @@ def disjoint_union(G, H): to the union graph. If a graph attribute is present in both G and H the value from H is used. """ - R1 = nx.convert_node_labels_to_integers(G) - R2 = nx.convert_node_labels_to_integers(H, first_label=len(R1)) - R = union(R1, R2) - R.graph.update(G.graph) - R.graph.update(H.graph) - return R + return nx.disjoint_union_all([G, H]) def intersection(G, H): @@ -179,33 +124,7 @@ def intersection(G, H): >>> R.remove_nodes_from(n for n in G if n not in H) >>> R.remove_edges_from(e for e in G.edges if e not in H.edges) """ - if not G.is_multigraph() == H.is_multigraph(): - raise nx.NetworkXError("G and H must both be graphs or multigraphs.") - - # create new graph - if set(G) != set(H): - R = G.__class__() - R.add_nodes_from(set(G.nodes).intersection(set(H.nodes))) - else: - R = nx.create_empty_copy(G) - - if G.number_of_edges() <= H.number_of_edges(): - if G.is_multigraph(): - edges = G.edges(keys=True) - else: - edges = G.edges() - for e in edges: - if H.has_edge(*e): - R.add_edge(*e) - else: - if H.is_multigraph(): - edges = H.edges(keys=True) - else: - edges = H.edges() - for e in edges: - if G.has_edge(*e): - R.add_edge(*e) - return R + return nx.intersection_all([G, H]) def difference(G, H): @@ -328,26 +247,7 @@ def compose(G, H): This can cause surprises (i.e., edge `(1, 2)` may or may not be the same in two graphs) if you use MultiGraph without keeping track of edge keys. """ - if not G.is_multigraph() == H.is_multigraph(): - raise nx.NetworkXError("G and H must both be graphs or multigraphs.") - - R = G.__class__() - # add graph attributes, H attributes take precedent over G attributes - R.graph.update(G.graph) - R.graph.update(H.graph) - - R.add_nodes_from(G.nodes(data=True)) - R.add_nodes_from(H.nodes(data=True)) - - if G.is_multigraph(): - R.add_edges_from(G.edges(keys=True, data=True)) - else: - R.add_edges_from(G.edges(data=True)) - if H.is_multigraph(): - R.add_edges_from(H.edges(keys=True, data=True)) - else: - R.add_edges_from(H.edges(data=True)) - return R + return nx.compose_all([G, H]) def full_join(G, H, rename=(None, None)): -- cgit v1.2.1