Skip to content

Commit

Permalink
Compute matrix exponential via spectral decomposition (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-schindler authored Feb 22, 2023
1 parent a280c9d commit 33890b7
Show file tree
Hide file tree
Showing 19 changed files with 275 additions and 257 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,17 @@ or using the click app:



Other examples can be found as jupyter-notebooks in `examples/` directory, including:
Other examples can be found as jupyter-notebooks in the `examples/` directory, including:
* Example 1: Undirected SBM
* Example 2: Multiscale
* Example 3: Directed networks
* Example 4: Custom constructors
* Example 5: Hypergraphs

Finally, we provide applications to real-world networks in the `examples/real_examples/` directory, including:
* Powergrid network
* Protein structures


## Our other available packages

Expand Down
53 changes: 23 additions & 30 deletions docs/source/examples/example.ipynb

Large diffs are not rendered by default.

35 changes: 20 additions & 15 deletions examples/Example_1_undirected_sbm.ipynb

Large diffs are not rendered by default.

84 changes: 32 additions & 52 deletions examples/Example_2_multiscale.ipynb

Large diffs are not rendered by default.

74 changes: 21 additions & 53 deletions examples/Example_3_directed.ipynb

Large diffs are not rendered by default.

46 changes: 20 additions & 26 deletions examples/Example_4_constructors.ipynb

Large diffs are not rendered by default.

70 changes: 32 additions & 38 deletions examples/Example_5_hypergraph.ipynb

Large diffs are not rendered by default.

26 changes: 17 additions & 9 deletions examples/benchmark/run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
import os
import pandas as pd
import pygenstability as pgs
import scipy.sparse as sp
import logging

logging.basicConfig(level=logging.DEBUG)
logging.getLogger("matplotlib").setLevel(logging.INFO)


def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
def get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="louvain", n_tries=5):
"""Estimate computational times for simple benchmarking."""
df = pd.DataFrame()
node_sizes = []
Expand All @@ -33,7 +34,8 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
G = nx.erdos_renyi_graph(size * 60, size * 1000.0 / math.comb(size * 60, 2))
_node_sizes.append(len(G))
_edge_sizes.append(len(G.edges))
A = nx.to_scipy_sparse_array(G)
A = nx.to_numpy_array(G)
A = sp.csgraph.csgraph_from_dense(A)

if Path("timing.csv").exists():
os.remove("timing.csv")
Expand All @@ -47,7 +49,9 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
with_postprocessing=True,
with_ttprime=True,
with_optimal_scales=False,
n_louvain=50,
with_spectral_decomp=True,
method=method,
n_tries=50,
constructor=constructor,
n_workers=1,
)
Expand Down Expand Up @@ -80,15 +84,19 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
ax2 = ax1.twiny()
ax2.plot(edge_sizes, -np.ones(len(edge_sizes)))
ax2.set_xlabel("Graph size [# edges]")
df.to_csv(f"comp_time_{constructor}_{graph_type}.csv")
df.to_csv(f"comp_time_{constructor}_{graph_type}_{method}.csv")
plt.ylabel("Computational time [s]")
plt.savefig(f"comp_time_{constructor}_{graph_type}.pdf")
plt.savefig(f"comp_time_{constructor}_{graph_type}_{method}.pdf")


if __name__ == "__main__":
sizes = list(range(2, 11))

get_comp_time(sizes, graph_type="SBM", constructor="linearized")
get_comp_time(sizes, graph_type="ER", constructor="linearized")
get_comp_time(sizes, graph_type="SBM", constructor="continuous_combinatorial")
get_comp_time(sizes, graph_type="ER", constructor="continuous_combinatorial")
get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="louvain")
get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="leiden")
get_comp_time(sizes, graph_type="SBM", constructor="continuous_combinatorial", method="louvain")
get_comp_time(sizes, graph_type="SBM", constructor="continuous_combinatorial", method="leiden")
get_comp_time(sizes, graph_type="ER", constructor="linearized", method="louvain")
get_comp_time(sizes, graph_type="ER", constructor="linearized", method="leiden")
get_comp_time(sizes, graph_type="ER", constructor="continuous_combinatorial", method="louvain")
get_comp_time(sizes, graph_type="ER", constructor="continuous_combinatorial", method="leiden")
1 change: 1 addition & 0 deletions examples/multiscale_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def create_graph():
n_tries=20,
constructor="continuous_combinatorial",
n_workers=4,
exp_comp_mode='expm',
)
print(results)
# plots results
Expand Down
70 changes: 44 additions & 26 deletions src/pygenstability/constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sys

import numpy as np
import scipy.linalg as la
import scipy.sparse as sp

L = logging.getLogger(__name__)
Expand All @@ -34,20 +35,11 @@ def load_constructor(constructor, graph, **kwargs):
return constructor


def _threshold_matrix(matrix, threshold=THRESHOLD):
"""Threshold a matrix to remove small numbers for speed up."""
matrix.data[np.abs(matrix.data) < threshold * np.max(matrix)] = 0.0
matrix.eliminate_zeros()


def _apply_expm(matrix):
"""Apply matrix exponential.
TODO: implement other variants
"""
exp = sp.csr_matrix(sp.linalg.expm(matrix.toarray().astype(DTYPE)))
_threshold_matrix(exp)
return exp
def _compute_spectral_decomp(matrix):
"""Solve eigenalue problem."""
lambdas, v = la.eig(matrix.toarray())
vinv = la.inv(v.real)
return lambdas.real, v.real, vinv


def _check_total_degree(degrees):
Expand All @@ -71,24 +63,39 @@ class Constructor:
to return quality matrix, null model, and possible global shift.
"""

def __init__(self, graph, with_spectral_gap=False, **kwargs):
def __init__(self, graph, with_spectral_gap=False, exp_comp_mode="spectral", **kwargs):
"""The constructor calls the prepare method upon initialisation.
Args:
graph (csgraph): graph for which to run clustering
with_spectral_gap (bool): set to True to use spectral gap to rescale
kwargs (dict): any other properties to pass to the constructor.
exp_comp_mode (str): mode to compute matrix exponential, can be expm or spectral
"""
self.graph = graph
self.with_spectral_gap = with_spectral_gap
self.spectral_gap = None
self.exp_comp_mode = exp_comp_mode

# these two variable can be used in prepare method
# these three variable can be used in prepare method
self.partial_quality_matrix = None
self.partial_null_model = None
self.spectral_decomp = None, None, None
self.threshold = THRESHOLD

self.prepare(**kwargs)

def _get_exp(self, scale):
"""Compute matrix exponential at a given scale."""
if self.exp_comp_mode == "expm":
exp = sp.linalg.expm(-scale * self.partial_quality_matrix.toarray().astype(DTYPE))
if self.exp_comp_mode == "spectral":
lambdas, v, vinv = self.spectral_decomp
exp = v @ np.diag(np.exp(-scale * lambdas)) @ vinv

exp[np.abs(exp) < self.threshold * np.max(exp)] = 0.0
return sp.csc_matrix(exp)

def prepare(self, **kwargs):
"""Prepare the constructor with non-scale dependent computations."""

Expand Down Expand Up @@ -153,13 +160,18 @@ def prepare(self, **kwargs):
self.partial_null_model = np.array([pi, pi], dtype=DTYPE)
if self.with_spectral_gap:
self.spectral_gap = _get_spectral_gap(laplacian)
self.partial_quality_matrix = laplacian

if self.exp_comp_mode == "spectral":
self.spectral_decomp = _compute_spectral_decomp(laplacian)
if self.exp_comp_mode == "expm":
self.partial_quality_matrix = laplacian

def get_data(self, scale):
"""Return quality and null model at given scale."""
if self.with_spectral_gap:
scale /= self.spectral_gap
exp = _apply_expm(-scale * self.partial_quality_matrix)

exp = self._get_exp(scale)
quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
return {"quality": quality_matrix, "null_model": self.partial_null_model}

Expand Down Expand Up @@ -188,13 +200,18 @@ def prepare(self, **kwargs):

if self.with_spectral_gap:
self.spectral_gap = _get_spectral_gap(normed_laplacian)
self.partial_quality_matrix = normed_laplacian

if self.exp_comp_mode == "spectral":
self.spectral_decomp = _compute_spectral_decomp(normed_laplacian)
if self.exp_comp_mode == "expm":
self.partial_quality_matrix = normed_laplacian

def get_data(self, scale):
"""Return quality and null model at given scale."""
if self.with_spectral_gap:
scale /= self.spectral_gap
exp = _apply_expm(-scale * self.partial_quality_matrix)

exp = self._get_exp(scale)
quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
return {"quality": quality_matrix, "null_model": self.partial_null_model}

Expand Down Expand Up @@ -247,35 +264,36 @@ class constructor_directed(Constructor):
.. math::
F(t)=\Pi \exp\left(-\alpha L-\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)I\right)
F(t)=\Pi \exp\left(-\alpha L-\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
\boldsymbol{1}\boldsymbol{1}^T\right)
where :math:`a` denotes the vector of dangling nodes, i.e. :math:`a_i=1` if the
out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`I` denotes the identity matrix
and :math:`0\le \alpha < 1` the damping factor, and associated null model
out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`\boldsymbol{1}` denotes
the vector of ones and :math:`0\le \alpha < 1` the damping factor, and associated null model
:math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`.
"""

def prepare(self, **kwargs):
"""Prepare the constructor with non-scale dependent computations."""
assert self.exp_comp_mode == "expm"

alpha = kwargs.get("alpha", 0.8)
n_nodes = self.graph.shape[0]
ones = np.ones((n_nodes, n_nodes)) / n_nodes

out_degrees = self.graph.toarray().sum(axis=1).flatten()
dinv = np.divide(1, out_degrees, where=out_degrees != 0)

self.partial_quality_matrix = sp.csr_matrix(
alpha * np.diag(dinv).dot(self.graph.toarray())
+ ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(ones)
- np.eye(n_nodes)
)

pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0])
pi /= pi.sum()
self.partial_null_model = np.array([pi, pi])

def get_data(self, scale):
"""Return quality and null model at given scale."""
exp = _apply_expm(scale * self.partial_quality_matrix)
exp = self._get_exp(-scale)
quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
return {"quality": quality_matrix, "null_model": self.partial_null_model}
11 changes: 10 additions & 1 deletion src/pygenstability/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,13 @@ def plot_single_partition(


def plot_optimal_partitions(
graph, all_results, edge_color="0.5", edge_width=0.5, folder="optimal_partitions", ext=".pdf"
graph,
all_results,
edge_color="0.5",
edge_width=0.5,
folder="optimal_partitions",
ext=".pdf",
show=False,
):
"""Plot the community structures at each optimal scale.
Expand All @@ -235,6 +241,7 @@ def plot_optimal_partitions(
edge_width (float): width of edgs
folder (str): folder to save figures
ext (str): extension of figures files
show (bool): show each plot with plt.show() or not
"""
if not os.path.isdir(folder):
os.mkdir(folder)
Expand All @@ -253,6 +260,8 @@ def plot_optimal_partitions(
graph, all_results, optimal_scale_id, edge_color=edge_color, edge_width=edge_width
)
plt.savefig(f"{folder}/scale_{optimal_scale_id}{ext}", bbox_inches="tight")
if show: # pragma: no cover
plt.show()


def plot_communities(
Expand Down
18 changes: 14 additions & 4 deletions src/pygenstability/pygenstability.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def wrap(*args, **kw):
result = f(*args, **kw)
t_end = time()
with open("timing.csv", "a", encoding="utf-8") as file:
print(f"{f.__name__}, {t_start - t_end}", file=file)
print(f"{f.__name__}, {t_end - t_start}", file=file)
else:
result = f(*args, **kw)
return result
Expand Down Expand Up @@ -117,6 +117,7 @@ def run(
with_postprocessing=True,
with_ttprime=True,
with_spectral_gap=False,
exp_comp_mode="spectral",
result_file="results.pkl",
n_workers=4,
tqdm_disable=False,
Expand Down Expand Up @@ -149,6 +150,7 @@ def run(
with_postprocessing (bool): apply the final postprocessing step
with_ttprime (bool): compute the NVI(t,tprime) matrix to compare scales t and tprime
with_spectral_gap (bool): normalise scale by spectral gap
exp_comp_mode (str): mode to compute matrix exponential, can be expm or spectral
result_file (str): path to the result file
n_workers (int): number of workers for multiprocessing
tqdm_disable (bool): disable progress bars
Expand All @@ -175,8 +177,15 @@ def run(
log_scale=log_scale,
scales=scales,
)
assert exp_comp_mode in ["spectral", "expm"]
if constructor == "directed":
L.info("We cannot use spectral exponential computation for directed contructor")
exp_comp_mode = "expm"

with multiprocessing.Pool(n_workers) as pool:
constructor = load_constructor(constructor, graph, with_spectral_gap=with_spectral_gap)
constructor = load_constructor(
constructor, graph, with_spectral_gap=with_spectral_gap, exp_comp_mode=exp_comp_mode
)

L.info("Precompute constructors...")
constructor_data = _get_constructor_data(
Expand Down Expand Up @@ -255,9 +264,10 @@ def evaluate_NVI(index_pair, partitions):
.. math::
NVI = S(p1) + S(p2) - MI(p1, p2)
NVI = \frac{E(p1) + E(p2) - 2MI(p1, p2)}{JE(p1,p2)}
where :math:`S` is the entropy and :math:`MI` the mutual information.
where :math:`E` is the entropy, :math:`JE` the joint entropy
and :math:`MI` the mutual information.
Args:
index_pair (list): list of two indices to select pairs of partitions
Expand Down
1 change: 1 addition & 0 deletions tests/data/test_run_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ number_of_communities:
- 3.0
run_params:
constructor: linearized
exp_comp_mode: spectral
log_scale: true
max_scale: 0
method: louvain
Expand Down
1 change: 1 addition & 0 deletions tests/data/test_run_default_leiden.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ number_of_communities:
- 3.0
run_params:
constructor: linearized
exp_comp_mode: spectral
log_scale: true
max_scale: 0
method: leiden
Expand Down
1 change: 1 addition & 0 deletions tests/data/test_run_gap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ number_of_communities:
- 3.0
run_params:
constructor: linearized
exp_comp_mode: spectral
log_scale: true
max_scale: -1
method: louvain
Expand Down
1 change: 1 addition & 0 deletions tests/data/test_run_minimal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ number_of_communities:
- 3.0
run_params:
constructor: linearized
exp_comp_mode: spectral
log_scale: true
max_scale: 0
method: louvain
Expand Down
1 change: 1 addition & 0 deletions tests/data/test_run_times.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ number_of_communities:
- 3.0
run_params:
constructor: linearized
exp_comp_mode: spectral
log_scale: false
max_scale: 0.5
method: louvain
Expand Down
Loading

0 comments on commit 33890b7

Please sign in to comment.