From 95f26e04c8e60b6cb8ca49cffe8e04bcc24b35fe Mon Sep 17 00:00:00 2001
From: Faiz Surani <faiz.surani@gmail.com>
Date: Tue, 31 May 2022 11:43:07 +0200
Subject: [PATCH] WIP Rework louvain/edge ratio algorithms

---
 algorithm/edge_ratio.py | 53 +++++++++++++++++++++++------------------
 louvain.py              | 24 ++++++++++++-------
 main.py                 | 10 +++++++-
 3 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/algorithm/edge_ratio.py b/algorithm/edge_ratio.py
index 2806b00..32d6708 100644
--- a/algorithm/edge_ratio.py
+++ b/algorithm/edge_ratio.py
@@ -1,6 +1,7 @@
 import networkx
 
 from networkx import subgraph_view, edge_boundary
+from itertools import chain
 
 
 # TODO: Type partition
@@ -9,12 +10,15 @@ def global_edge_ratio(G: networkx.DiGraph, partitions):
     for partition in partitions:
         internal_edges = len(G.subgraph(partition).edges)
         out_edges = len(list(edge_boundary(G, partition, G.nodes - partition)))
-        score_sum += internal_edges / (internal_edges + out_edges)
+        denom = internal_edges + out_edges
+        if denom == 0:
+            continue
+        score_sum += internal_edges / denom
     return score_sum
 
 
 def local_edge_ratio(
-    G: networkx.DiGraph, u, neighbour, node_to_community, inner_partition, partition
+    G: networkx.DiGraph, u, neighbour, node_to_community, inner_partition, partition, original_graph: networkx.DiGraph
 ):
     """
     Calculates the change in score if u is moved to the community of the given neighbour.
@@ -26,31 +30,34 @@ def local_edge_ratio(
     :param partition: Total partition of the complete graph.
     :return: Change in local score
     """
-    nodes_in_u = G.nodes[u].get("nodes", {u})
+    nodes_in_u = original_graph.nodes[u].get("nodes", {u})
 
-    old_internal_edges_u = len(G.subgraph(nodes_in_u).edges)
-    old_out_edges_u = len(list(edge_boundary(G, nodes_in_u, G.nodes - nodes_in_u)))
-    old_score_u = old_internal_edges_u / (old_internal_edges_u + old_out_edges_u)
+    old_score_u = edge_boundary_ratio(original_graph, nodes_in_u)
 
     nodes_in_neighbour = G.nodes[neighbour].get("nodes", {neighbour})
-
-    old_internal_edges_neighbour = len(G.subgraph(nodes_in_neighbour).edges)
-    old_out_edges_neighbour = len(
-        list(edge_boundary(G, nodes_in_neighbour, G.nodes - nodes_in_neighbour))
-    )
-    old_score_neighbour = old_internal_edges_neighbour / (
-        old_internal_edges_neighbour + old_out_edges_neighbour
-    )
+    old_score_neighbour = edge_boundary_ratio(original_graph, nodes_in_neighbour)
 
     nodes_in_u_and_neighbour = [*nodes_in_u, *nodes_in_neighbour]
-    new_internal_edges = len(G.subgraph(nodes_in_u_and_neighbour).edges)
-    new_out_edges = len(
-        list(
-            edge_boundary(
-                G, nodes_in_u_and_neighbour, G.nodes - nodes_in_u_and_neighbour
-            )
-        )
-    )
-    new_score = new_internal_edges / (new_internal_edges + new_out_edges)
+    new_score = edge_boundary_ratio(original_graph, nodes_in_u_and_neighbour)
 
     return new_score - old_score_u - old_score_neighbour
+
+
+def edge_boundary_ratio(G: networkx.DiGraph, partition):
+    """
+    Calculates the the edge boundary ratio efficiently by checking each edge
+    """
+    edge_boundary_size = 0
+    in_edge_size = 0
+    partition = set(partition)
+    for node in partition:
+        for neighbour in chain(G.predecessors(node), G.successors(node)):
+            if neighbour not in partition:
+                edge_boundary_size += 1
+            else:
+                in_edge_size += 1
+    in_edge_size = in_edge_size / 2
+    denom = (edge_boundary_size + in_edge_size)
+    if denom == 0:
+        return 0
+    return in_edge_size / denom
diff --git a/louvain.py b/louvain.py
index 486f6f0..5ef46cf 100644
--- a/louvain.py
+++ b/louvain.py
@@ -52,27 +52,32 @@ def louvain_partitions(
     partition = [{u} for u in G.nodes()]
 
     # Get initial community score
-    mod = global_community_measure(G, partition)
+    comm_score = global_community_measure(G, partition)
 
     graph = G.__class__()
     graph.add_nodes_from(G)
-    graph.add_weighted_edges_from(G.edges())
+    graph.add_weighted_edges_from(G.edges(data=True))
+    print(f"Created graph copy")
 
     m = graph.size()
     # Don't look at improvement on the first iteration
     partition, inner_partition, _ = _one_level(
-        graph, m, partition, local_community_measure, resolution
+        graph, m, partition, local_community_measure, G, resolution
     )
     improvement = True
+    counter = 0
     while improvement:
+        print("Executing iteration {}".format(counter))
+        counter += 1
         yield partition
-        new_mod = global_community_measure(graph, partition)
-        if new_mod - mod <= threshold:
+        new_community_score = global_community_measure(graph, partition)
+        print(f"Calculated modularity: {new_community_score}")
+        if new_community_score - comm_score <= threshold:
             return
-        mod = new_mod
+        comm_score = new_community_score
         graph = _gen_graph(graph, inner_partition)
         partition, inner_partition, improvement = _one_level(
-            graph, m, partition, local_community_measure, resolution
+            graph, m, partition, local_community_measure, G, resolution
         )
 
 
@@ -81,6 +86,7 @@ def _one_level(
     m: int,
     partition: list[set[int]],
     local_community_measure: Callable,
+    original_graph: nx.DiGraph,
     resolution=1,
 ):
     """Calculate one level of the Louvain partitions tree
@@ -115,9 +121,9 @@ def _one_level(
 
             # TODO - Compute for each neighbour, the increase in score.
             # We pass the node_to_community dict, as well as the current node, and its neighbours
-            for neighbour in G.neighbours(u):
+            for neighbour in G.neighbors(u):
                 new_score = local_community_measure(
-                    G, u, neighbour, node_to_community, inner_partition, partition
+                    G, u, neighbour, node_to_community, inner_partition, partition, original_graph
                 )
                 if new_score > best_community_score:
                     best_community_score = new_score
diff --git a/main.py b/main.py
index 2892af6..9cb791b 100644
--- a/main.py
+++ b/main.py
@@ -35,7 +35,15 @@ def load_network(
 
 
 def main():
-    G = load_network()
+    # G = load_network()
+    # Barabasi-Albert graph
+    G = nx.barabasi_albert_graph(500, 5)
+    # Add edge weight of 1 to each edge
+    for u, v, d in G.edges(data=True):
+        d["weight"] = 1
+    G = nx.to_directed(G)
+    print(f"Loaded {len(G.nodes)} nodes and {len(G.edges)} edges")
+
 
     communities = louvain_communities(G, global_edge_ratio, local_edge_ratio)
     print(len(communities))