Skip to content

Commit

Permalink
WIP LFR model graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
ProbablyFaiz committed Jun 5, 2022
1 parent 4fd0a30 commit 1ef9ba2
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 21 deletions.
22 changes: 12 additions & 10 deletions assess.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from sklearn.metrics import normalized_mutual_info_score

from algorithm.edge_ratio import local_edge_ratio, global_edge_ratio
from graph_generation import generate_sbm_graph
from graph_generation_sbm import generate_sbm_graph
from louvain import louvain_communities


Partition = list[set[str]]

GRAPH_SIZE = 500
GRAPH_SIZE = 5000

RANDOM_GRAPH_SEEDS = (
2022_0,
Expand All @@ -28,14 +28,14 @@
)

COMMUNITY_MEASURES = {
"edge_ratio": {
"name": "Edge Ratio",
"partition_func": lambda G: louvain_communities(
G,
global_edge_ratio,
local_edge_ratio,
),
},
# "edge_ratio": {
# "name": "Edge Ratio",
# "partition_func": lambda G: louvain_communities(
# G,
# global_edge_ratio,
# local_edge_ratio,
# ),
# },
"modularity": {
"name": "Modularity",
"partition_func": lambda G: nx.community.louvain_communities(G),
Expand Down Expand Up @@ -76,6 +76,8 @@ def run_benchmarks(
G = generate_sbm_graph(graph_size, seed=seed)
partition = COMMUNITY_MEASURES[measure]["partition_func"](G)
ground_truth_partition = G.graph["partition"]
print(f"Modularity for ground truth: {nx.algorithms.community.modularity(G, ground_truth_partition)}")
print(f"Modularity for algorithm: {nx.algorithms.community.modularity(G, partition)}")
nmi_scores.append(nmi_score(ground_truth_partition, partition))
print(
f"Measure {measure}, Seed {seed}: NMI"
Expand Down
33 changes: 25 additions & 8 deletions calculate_edge_probabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,23 @@
import networkx as nx
import csv
from pathlib import Path
import powerlaw

from main import load_network


def main():
G = load_network()
# "court" field contains ground truth community labels
nodes_by_community = defaultdict(set)
for node in G.nodes:
court = G.nodes[node].get("court", None)
# Ignore nodes without a ground truth community label
if court is not None:
nodes_by_community[court].add(node)
create_edge_prob_csv(G, nodes_by_community)
create_community_size_csv(nodes_by_community)
# nodes_by_community = defaultdict(set)
# for node in G.nodes:
# court = G.nodes[node].get("court", None)
# # Ignore nodes without a ground truth community label
# if court is not None:
# nodes_by_community[court].add(node)
# create_edge_prob_csv(G, nodes_by_community)
# create_community_size_csv(nodes_by_community)
print(estimate_power_law_degree_exponent(G))


def create_edge_prob_csv(G, nodes_by_community):
Expand Down Expand Up @@ -53,5 +55,20 @@ def create_community_size_csv(nodes_by_community: dict[set]):
writer.writerow([community, len(nodes) / total_nodes])


def estimate_power_law_degree_exponent(G):
degrees = [d for n, d in G.degree()]
fit = powerlaw.Fit(degrees)
# Now do a powerlaw fit for the community sizes
nodes_by_community = defaultdict(set)
for node in G.nodes:
court = G.nodes[node].get("court", None)
if court is not None:
nodes_by_community[court].add(node)
community_sizes = [len(nodes) for nodes in nodes_by_community.values()]
print(community_sizes)
fit2 = powerlaw.Fit(community_sizes)
return fit.power_law.alpha, fit2.power_law.alpha


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions graph_generation_lfr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Creates LFR benchmark graphs based on the properties of the main graph

# Power law degree exponent = 3.5651290105965305
# Power law community size exponent = 4.1918
# Average degree = 6.783669266744867
# Minimum degree = 1
# Maximum degree = 6756
# Minimum community size = 21716
# Maximum community size = 116565

import networkx as nx
6 changes: 4 additions & 2 deletions graph_generation.py → graph_generation_sbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ def generate_sbm_graph(
edge_prob_mat = EDGE_PROBS
if community_sizes is None:
community_sizes = COMMUNITY_SIZES
# Scale edge probabilities by 100,000 / n
edge_prob_mat = [[p * 50000 / n for p in row] for row in edge_prob_mat]
community_sizes = [int(round(n * s)) for s in community_sizes]
G = nx.stochastic_block_model(
community_sizes, edge_prob_mat, seed=seed, directed=True
)
# Add weight=1 to each edge
for u, v in G.edges:
G[u][v]["weight"] = 1
# for u, v in G.edges:
# G[u][v]["weight"] = 1
return G


Expand Down
13 changes: 13 additions & 0 deletions graph_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# From the main graph, randomly sample n nodes and their edges from the graph

from random import sample

import networkx as nx


def generate_random_graph(
G: nx.DiGraph,
n: int,
) -> nx.DiGraph:
random_nodes = sample(list(G.nodes), n)
return nx.subgraph(G, random_nodes)
2 changes: 1 addition & 1 deletion louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def louvain_partitions(
yield partition
new_community_score = global_community_measure(G, partition)
# print(f"Calculated global measure score: {new_community_score}")
if new_community_score - comm_score <= threshold:
if abs(new_community_score - comm_score) <= threshold:
return
comm_score = new_community_score
graph = _gen_graph(graph, inner_partition)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
networkx~=2.8
scikit-learn~=1.1
click~=8.1
powerlaw

0 comments on commit 1ef9ba2

Please sign in to comment.