From 95f26e04c8e60b6cb8ca49cffe8e04bcc24b35fe Mon Sep 17 00:00:00 2001 From: Faiz Surani Date: Tue, 31 May 2022 11:43:07 +0200 Subject: [PATCH] WIP Rework louvain/edge ratio algorithms --- algorithm/edge_ratio.py | 53 +++++++++++++++++++++++------------------ louvain.py | 24 ++++++++++++------- main.py | 10 +++++++- 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/algorithm/edge_ratio.py b/algorithm/edge_ratio.py index 2806b00..32d6708 100644 --- a/algorithm/edge_ratio.py +++ b/algorithm/edge_ratio.py @@ -1,6 +1,7 @@ import networkx from networkx import subgraph_view, edge_boundary +from itertools import chain # TODO: Type partition @@ -9,12 +10,15 @@ def global_edge_ratio(G: networkx.DiGraph, partitions): for partition in partitions: internal_edges = len(G.subgraph(partition).edges) out_edges = len(list(edge_boundary(G, partition, G.nodes - partition))) - score_sum += internal_edges / (internal_edges + out_edges) + denom = internal_edges + out_edges + if denom == 0: + continue + score_sum += internal_edges / denom return score_sum def local_edge_ratio( - G: networkx.DiGraph, u, neighbour, node_to_community, inner_partition, partition + G: networkx.DiGraph, u, neighbour, node_to_community, inner_partition, partition, original_graph: networkx.DiGraph ): """ Calculates the change in score if u is moved to the community of the given neighbour. @@ -26,31 +30,34 @@ def local_edge_ratio( :param partition: Total partition of the complete graph. :return: Change in local score """ - nodes_in_u = G.nodes[u].get("nodes", {u}) + nodes_in_u = original_graph.nodes[u].get("nodes", {u}) - old_internal_edges_u = len(G.subgraph(nodes_in_u).edges) - old_out_edges_u = len(list(edge_boundary(G, nodes_in_u, G.nodes - nodes_in_u))) - old_score_u = old_internal_edges_u / (old_internal_edges_u + old_out_edges_u) + old_score_u = edge_boundary_ratio(original_graph, nodes_in_u) nodes_in_neighbour = G.nodes[neighbour].get("nodes", {neighbour}) - - old_internal_edges_neighbour = len(G.subgraph(nodes_in_neighbour).edges) - old_out_edges_neighbour = len( - list(edge_boundary(G, nodes_in_neighbour, G.nodes - nodes_in_neighbour)) - ) - old_score_neighbour = old_internal_edges_neighbour / ( - old_internal_edges_neighbour + old_out_edges_neighbour - ) + old_score_neighbour = edge_boundary_ratio(original_graph, nodes_in_neighbour) nodes_in_u_and_neighbour = [*nodes_in_u, *nodes_in_neighbour] - new_internal_edges = len(G.subgraph(nodes_in_u_and_neighbour).edges) - new_out_edges = len( - list( - edge_boundary( - G, nodes_in_u_and_neighbour, G.nodes - nodes_in_u_and_neighbour - ) - ) - ) - new_score = new_internal_edges / (new_internal_edges + new_out_edges) + new_score = edge_boundary_ratio(original_graph, nodes_in_u_and_neighbour) return new_score - old_score_u - old_score_neighbour + + +def edge_boundary_ratio(G: networkx.DiGraph, partition): + """ + Calculates the the edge boundary ratio efficiently by checking each edge + """ + edge_boundary_size = 0 + in_edge_size = 0 + partition = set(partition) + for node in partition: + for neighbour in chain(G.predecessors(node), G.successors(node)): + if neighbour not in partition: + edge_boundary_size += 1 + else: + in_edge_size += 1 + in_edge_size = in_edge_size / 2 + denom = (edge_boundary_size + in_edge_size) + if denom == 0: + return 0 + return in_edge_size / denom diff --git a/louvain.py b/louvain.py index 486f6f0..5ef46cf 100644 --- a/louvain.py +++ b/louvain.py @@ -52,27 +52,32 @@ def louvain_partitions( partition = [{u} for u in G.nodes()] # Get initial community score - mod = global_community_measure(G, partition) + comm_score = global_community_measure(G, partition) graph = G.__class__() graph.add_nodes_from(G) - graph.add_weighted_edges_from(G.edges()) + graph.add_weighted_edges_from(G.edges(data=True)) + print(f"Created graph copy") m = graph.size() # Don't look at improvement on the first iteration partition, inner_partition, _ = _one_level( - graph, m, partition, local_community_measure, resolution + graph, m, partition, local_community_measure, G, resolution ) improvement = True + counter = 0 while improvement: + print("Executing iteration {}".format(counter)) + counter += 1 yield partition - new_mod = global_community_measure(graph, partition) - if new_mod - mod <= threshold: + new_community_score = global_community_measure(graph, partition) + print(f"Calculated modularity: {new_community_score}") + if new_community_score - comm_score <= threshold: return - mod = new_mod + comm_score = new_community_score graph = _gen_graph(graph, inner_partition) partition, inner_partition, improvement = _one_level( - graph, m, partition, local_community_measure, resolution + graph, m, partition, local_community_measure, G, resolution ) @@ -81,6 +86,7 @@ def _one_level( m: int, partition: list[set[int]], local_community_measure: Callable, + original_graph: nx.DiGraph, resolution=1, ): """Calculate one level of the Louvain partitions tree @@ -115,9 +121,9 @@ def _one_level( # TODO - Compute for each neighbour, the increase in score. # We pass the node_to_community dict, as well as the current node, and its neighbours - for neighbour in G.neighbours(u): + for neighbour in G.neighbors(u): new_score = local_community_measure( - G, u, neighbour, node_to_community, inner_partition, partition + G, u, neighbour, node_to_community, inner_partition, partition, original_graph ) if new_score > best_community_score: best_community_score = new_score diff --git a/main.py b/main.py index 2892af6..9cb791b 100644 --- a/main.py +++ b/main.py @@ -35,7 +35,15 @@ def load_network( def main(): - G = load_network() + # G = load_network() + # Barabasi-Albert graph + G = nx.barabasi_albert_graph(500, 5) + # Add edge weight of 1 to each edge + for u, v, d in G.edges(data=True): + d["weight"] = 1 + G = nx.to_directed(G) + print(f"Loaded {len(G.nodes)} nodes and {len(G.edges)} edges") + communities = louvain_communities(G, global_edge_ratio, local_edge_ratio) print(len(communities))