diff --git a/algorithm/intensity_ratio.py b/algorithm/intensity_ratio.py new file mode 100644 index 0000000..18f8407 --- /dev/null +++ b/algorithm/intensity_ratio.py @@ -0,0 +1,94 @@ +import networkx + +from utils.types import Partition + +from algorithm.edge_ratio import edge_boundary_ratio + + +def global_intensity_ratio(G: networkx.DiGraph, partitions: Partition, _: int): + """ + Calculate the global edge ration score. + :param G: Total graph + :param partitions: Partitions on te graph + :param _: size of the graph. + :return: Edge ratio score + """ + # Sum all partition scores. + return sum( + map(lambda partition: edge_boundary_intensity_ratio(G, partition), partitions) + ) + + +def local_intensity_ratio( + G: networkx.DiGraph, + u: int, + neighbour: int, + node_to_community: dict, + inner_partition: Partition, + m: int, +): + """ + Calculates the change in score if u is moved to the community of the given neighbour. + :param G: Total graph + :param u: node that will be moved + :param neighbour: Neighbour partition to which the node will be moved. + :param node_to_community: Dictionary that maps nodes to communities. + :param inner_partition: Partition in the current stage of louvain. + :param m: Total amount of edges. + :return: Change in local score + """ + # Calculate the current scores + u_partition = inner_partition[node_to_community[u]] + old_score_u = edge_boundary_intensity_ratio(G, u_partition) + + neighbour_partition = inner_partition[node_to_community[neighbour]] + old_score_neighbour = edge_boundary_intensity_ratio(G, neighbour_partition) + + # Calculate the new score + neighbour_partition_with_u = [u, *neighbour_partition] + new_score_u_and_neighbour = edge_boundary_intensity_ratio( + G, neighbour_partition_with_u + ) + + u_partition_without_u = u_partition - {u} + new_score_u_partition_without_u = edge_boundary_intensity_ratio( + G, u_partition_without_u + ) + + return ( + new_score_u_and_neighbour + + new_score_u_partition_without_u + - old_score_u + - old_score_neighbour + ) + + +def edge_boundary_intensity_ratio(G: networkx.DiGraph, partition): + """ + Calculates the the edge boundary ratio efficiently by checking each edge + :param G: The graph + :param partition: The current partition + :return: Edge boundary ratio for the current partition + """ + edge_boundary_size = 0 + in_edge_size = 0 + partition = set(partition) + for node in partition: + for edge in G.out_edges(node, "weight"): + if edge[1] not in partition: + edge_boundary_size += edge[2] + else: + in_edge_size += edge[2] + for edge in G.in_edges(node, "weight"): + if edge[0] not in partition: + edge_boundary_size += edge[2] + else: + in_edge_size += edge[2] + in_edge_size = in_edge_size / 2 + p_in_denom = (len(partition) * (len(partition) - 1)) + p_out_denom = (2 * len(partition) * (G.number_of_nodes() - len(partition))) + if p_in_denom == 0 or p_out_denom == 0: + return 0 + p_in = in_edge_size / p_in_denom + p_out = edge_boundary_size / p_out_denom + return p_in / (p_out + p_in) diff --git a/assess.py b/assess.py index 301a002..fa23a4c 100644 --- a/assess.py +++ b/assess.py @@ -10,6 +10,10 @@ local_edge_ratio, global_edge_ratio, ) +from algorithm.intensity_ratio import ( + local_intensity_ratio, + global_intensity_ratio, +) from algorithm.modularity import ( local_modularity, global_modularity, @@ -42,30 +46,45 @@ ) COMMUNITY_MEASURES = { - "edge_ratio": { - "name": "Edge Ratio", - "partition_func": lambda G: louvain_communities( - G, - global_edge_ratio, - local_edge_ratio, - ), - }, - "modularity": { - "name": "Modularity", + # "edge_ratio": { + # "name": "Edge Ratio", + # "partition_func": lambda G: louvain_communities( + # G, + # global_edge_ratio, + # local_edge_ratio, + # ), + # }, + "intensity_ratio": { + "name": "Intensity Ratio", "partition_func": lambda G: louvain_communities( G, - global_modularity, - local_modularity, - ), - }, - "modularity_density": { - "name": "Modularity Density", - "partition_func": lambda G: louvain_communities( - G, - global_modularity_density, - local_modularity_density, + global_intensity_ratio, + local_intensity_ratio, ), }, + # "modularity": { + # "name": "Modularity", + # "partition_func": lambda G: louvain_communities( + # G, + # global_modularity, + # local_modularity, + # ), + # }, + # "modularity_density": { + # "name": "Modularity Density", + # "partition_func": lambda G: louvain_communities( + # G, + # global_modularity_density, + # local_modularity_density, + # ), + # }, +} + +NAME_TO_GLOBAL_FUNC = { + "edge_ratio": global_edge_ratio, + "intensity_ratio": global_intensity_ratio, + "modularity": global_modularity, + "modularity_density": global_modularity_density, } @@ -102,10 +121,10 @@ def run_benchmarks( partition = COMMUNITY_MEASURES[measure]["partition_func"](G) ground_truth_partition = G.graph["partition"] print( - f"Modularity for ground truth: {nx.algorithms.community.modularity(G, ground_truth_partition)}" + f"{measure} for ground truth: {NAME_TO_GLOBAL_FUNC[measure](G, ground_truth_partition, G.size())}" ) print( - f"Modularity for algorithm: {nx.algorithms.community.modularity(G, partition)}" + f"{measure} for algorithm: {NAME_TO_GLOBAL_FUNC[measure](G, partition, G.size())}" ) nmi_scores.append(nmi_score(ground_truth_partition, partition)) print( diff --git a/requirements.txt b/requirements.txt index 2f50622..c30a7b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ scikit-learn~=1.1 click~=8.1 powerlaw scipy +infomap