Compute matrix exponential via spectral decomposition (#68)

barahona-research-group · Feb 22, 2023 · 33890b7 · 33890b7
1 parent a280c9d
commit 33890b7
Show file tree

Hide file tree

Showing 19 changed files with 275 additions and 257 deletions.
diff --git a/README.md b/README.md
@@ -138,13 +138,17 @@ or using the click app:
 
 
 
-Other examples can be found as jupyter-notebooks in `examples/` directory, including:
+Other examples can be found as jupyter-notebooks in the `examples/` directory, including:
 * Example 1: Undirected SBM
 * Example 2: Multiscale
 * Example 3: Directed networks
 * Example 4: Custom constructors
 * Example 5: Hypergraphs
 
+Finally, we provide applications to real-world networks in the `examples/real_examples/` directory, including:
+* Powergrid network
+* Protein structures
+
 
 ## Our other available packages
 

diff --git a/docs/source/examples/example.ipynb b/docs/source/examples/example.ipynb
diff --git a/examples/Example_1_undirected_sbm.ipynb b/examples/Example_1_undirected_sbm.ipynb
diff --git a/examples/Example_2_multiscale.ipynb b/examples/Example_2_multiscale.ipynb
diff --git a/examples/Example_3_directed.ipynb b/examples/Example_3_directed.ipynb
diff --git a/examples/Example_4_constructors.ipynb b/examples/Example_4_constructors.ipynb
diff --git a/examples/Example_5_hypergraph.ipynb b/examples/Example_5_hypergraph.ipynb
diff --git a/examples/benchmark/run_benchmark.py b/examples/benchmark/run_benchmark.py
@@ -6,13 +6,14 @@
 import os
 import pandas as pd
 import pygenstability as pgs
+import scipy.sparse as sp
 import logging
 
 logging.basicConfig(level=logging.DEBUG)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
 
 
-def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
+def get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="louvain", n_tries=5):
     """Estimate computational times for simple benchmarking."""
     df = pd.DataFrame()
     node_sizes = []
@@ -33,7 +34,8 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
                 G = nx.erdos_renyi_graph(size * 60, size * 1000.0 / math.comb(size * 60, 2))
             _node_sizes.append(len(G))
             _edge_sizes.append(len(G.edges))
-            A = nx.to_scipy_sparse_array(G)
+            A = nx.to_numpy_array(G)
+            A = sp.csgraph.csgraph_from_dense(A)
 
             if Path("timing.csv").exists():
                 os.remove("timing.csv")
@@ -47,7 +49,9 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
                 with_postprocessing=True,
                 with_ttprime=True,
                 with_optimal_scales=False,
-                n_louvain=50,
+                with_spectral_decomp=True,
+                method=method,
+                n_tries=50,
                 constructor=constructor,
                 n_workers=1,
             )
@@ -80,15 +84,19 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", n_tries=5):
     ax2 = ax1.twiny()
     ax2.plot(edge_sizes, -np.ones(len(edge_sizes)))
     ax2.set_xlabel("Graph size [# edges]")
-    df.to_csv(f"comp_time_{constructor}_{graph_type}.csv")
+    df.to_csv(f"comp_time_{constructor}_{graph_type}_{method}.csv")
     plt.ylabel("Computational time [s]")
-    plt.savefig(f"comp_time_{constructor}_{graph_type}.pdf")
+    plt.savefig(f"comp_time_{constructor}_{graph_type}_{method}.pdf")
 
 
 if __name__ == "__main__":
     sizes = list(range(2, 11))
 
-    get_comp_time(sizes, graph_type="SBM", constructor="linearized")
-    get_comp_time(sizes, graph_type="ER", constructor="linearized")
-    get_comp_time(sizes, graph_type="SBM", constructor="continuous_combinatorial")
-    get_comp_time(sizes, graph_type="ER", constructor="continuous_combinatorial")
+    get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="louvain")
+    get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="leiden")
+    get_comp_time(sizes, graph_type="SBM", constructor="continuous_combinatorial", method="louvain")
+    get_comp_time(sizes, graph_type="SBM", constructor="continuous_combinatorial", method="leiden")
+    get_comp_time(sizes, graph_type="ER", constructor="linearized", method="louvain")
+    get_comp_time(sizes, graph_type="ER", constructor="linearized", method="leiden")
+    get_comp_time(sizes, graph_type="ER", constructor="continuous_combinatorial", method="louvain")
+    get_comp_time(sizes, graph_type="ER", constructor="continuous_combinatorial", method="leiden")
diff --git a/examples/multiscale_example.py b/examples/multiscale_example.py
@@ -134,6 +134,7 @@ def create_graph():
         n_tries=20,
         constructor="continuous_combinatorial",
         n_workers=4,
+        exp_comp_mode='expm',
     )
     print(results)
     # plots results

diff --git a/src/pygenstability/constructors.py b/src/pygenstability/constructors.py
@@ -12,6 +12,7 @@
 import sys
 
 import numpy as np
+import scipy.linalg as la
 import scipy.sparse as sp
 
 L = logging.getLogger(__name__)
@@ -34,20 +35,11 @@ def load_constructor(constructor, graph, **kwargs):
     return constructor
 
 
-def _threshold_matrix(matrix, threshold=THRESHOLD):
-    """Threshold a matrix to remove small numbers for speed up."""
-    matrix.data[np.abs(matrix.data) < threshold * np.max(matrix)] = 0.0
-    matrix.eliminate_zeros()
-
-
-def _apply_expm(matrix):
-    """Apply matrix exponential.
-
-    TODO: implement other variants
-    """
-    exp = sp.csr_matrix(sp.linalg.expm(matrix.toarray().astype(DTYPE)))
-    _threshold_matrix(exp)
-    return exp
+def _compute_spectral_decomp(matrix):
+    """Solve eigenalue problem."""
+    lambdas, v = la.eig(matrix.toarray())
+    vinv = la.inv(v.real)
+    return lambdas.real, v.real, vinv
 
 
 def _check_total_degree(degrees):
@@ -71,24 +63,39 @@ class Constructor:
     to return quality matrix, null model, and possible global shift.
     """
 
-    def __init__(self, graph, with_spectral_gap=False, **kwargs):
+    def __init__(self, graph, with_spectral_gap=False, exp_comp_mode="spectral", **kwargs):
         """The constructor calls the prepare method upon initialisation.
 
         Args:
             graph (csgraph): graph for which to run clustering
             with_spectral_gap (bool): set to True to use spectral gap to rescale
             kwargs (dict): any other properties to pass to the constructor.
+            exp_comp_mode (str): mode to compute matrix exponential, can be expm or spectral
         """
         self.graph = graph
         self.with_spectral_gap = with_spectral_gap
         self.spectral_gap = None
+        self.exp_comp_mode = exp_comp_mode
 
-        # these two variable can be used in prepare method
+        # these three variable can be used in prepare method
         self.partial_quality_matrix = None
         self.partial_null_model = None
+        self.spectral_decomp = None, None, None
+        self.threshold = THRESHOLD
 
         self.prepare(**kwargs)
 
+    def _get_exp(self, scale):
+        """Compute matrix exponential at a given scale."""
+        if self.exp_comp_mode == "expm":
+            exp = sp.linalg.expm(-scale * self.partial_quality_matrix.toarray().astype(DTYPE))
+        if self.exp_comp_mode == "spectral":
+            lambdas, v, vinv = self.spectral_decomp
+            exp = v @ np.diag(np.exp(-scale * lambdas)) @ vinv
+
+        exp[np.abs(exp) < self.threshold * np.max(exp)] = 0.0
+        return sp.csc_matrix(exp)
+
     def prepare(self, **kwargs):
         """Prepare the constructor with non-scale dependent computations."""
 
@@ -153,13 +160,18 @@ def prepare(self, **kwargs):
         self.partial_null_model = np.array([pi, pi], dtype=DTYPE)
         if self.with_spectral_gap:
             self.spectral_gap = _get_spectral_gap(laplacian)
-        self.partial_quality_matrix = laplacian
+
+        if self.exp_comp_mode == "spectral":
+            self.spectral_decomp = _compute_spectral_decomp(laplacian)
+        if self.exp_comp_mode == "expm":
+            self.partial_quality_matrix = laplacian
 
     def get_data(self, scale):
         """Return quality and null model at given scale."""
         if self.with_spectral_gap:
             scale /= self.spectral_gap
-        exp = _apply_expm(-scale * self.partial_quality_matrix)
+
+        exp = self._get_exp(scale)
         quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
         return {"quality": quality_matrix, "null_model": self.partial_null_model}
 
@@ -188,13 +200,18 @@ def prepare(self, **kwargs):
 
         if self.with_spectral_gap:
             self.spectral_gap = _get_spectral_gap(normed_laplacian)
-        self.partial_quality_matrix = normed_laplacian
+
+        if self.exp_comp_mode == "spectral":
+            self.spectral_decomp = _compute_spectral_decomp(normed_laplacian)
+        if self.exp_comp_mode == "expm":
+            self.partial_quality_matrix = normed_laplacian
 
     def get_data(self, scale):
         """Return quality and null model at given scale."""
         if self.with_spectral_gap:
             scale /= self.spectral_gap
-        exp = _apply_expm(-scale * self.partial_quality_matrix)
+
+        exp = self._get_exp(scale)
         quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
         return {"quality": quality_matrix, "null_model": self.partial_null_model}
 
@@ -247,35 +264,36 @@ class constructor_directed(Constructor):
 
     .. math::
 
-        F(t)=\Pi \exp\left(-\alpha L-\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)I\right)
+        F(t)=\Pi \exp\left(-\alpha L-\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
+        \boldsymbol{1}\boldsymbol{1}^T\right)
 
     where :math:`a` denotes the vector of dangling nodes, i.e. :math:`a_i=1` if the
-    out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`I` denotes the identity matrix
-    and :math:`0\le \alpha < 1` the damping factor, and associated null model
+    out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`\boldsymbol{1}` denotes
+    the vector of ones and :math:`0\le \alpha < 1` the damping factor, and associated null model
     :math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`.
     """
 
     def prepare(self, **kwargs):
         """Prepare the constructor with non-scale dependent computations."""
+        assert self.exp_comp_mode == "expm"
+
         alpha = kwargs.get("alpha", 0.8)
         n_nodes = self.graph.shape[0]
         ones = np.ones((n_nodes, n_nodes)) / n_nodes
 
         out_degrees = self.graph.toarray().sum(axis=1).flatten()
         dinv = np.divide(1, out_degrees, where=out_degrees != 0)
-
         self.partial_quality_matrix = sp.csr_matrix(
             alpha * np.diag(dinv).dot(self.graph.toarray())
             + ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(ones)
             - np.eye(n_nodes)
         )
-
         pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0])
         pi /= pi.sum()
         self.partial_null_model = np.array([pi, pi])
 
     def get_data(self, scale):
         """Return quality and null model at given scale."""
-        exp = _apply_expm(scale * self.partial_quality_matrix)
+        exp = self._get_exp(-scale)
         quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
         return {"quality": quality_matrix, "null_model": self.partial_null_model}
diff --git a/src/pygenstability/plotting.py b/src/pygenstability/plotting.py
@@ -224,7 +224,13 @@ def plot_single_partition(
 
 
 def plot_optimal_partitions(
-    graph, all_results, edge_color="0.5", edge_width=0.5, folder="optimal_partitions", ext=".pdf"
+    graph,
+    all_results,
+    edge_color="0.5",
+    edge_width=0.5,
+    folder="optimal_partitions",
+    ext=".pdf",
+    show=False,
 ):
     """Plot the community structures at each optimal scale.
 
@@ -235,6 +241,7 @@ def plot_optimal_partitions(
         edge_width (float): width of edgs
         folder (str): folder to save figures
         ext (str): extension of figures files
+        show (bool): show each plot with plt.show() or not
     """
     if not os.path.isdir(folder):
         os.mkdir(folder)
@@ -253,6 +260,8 @@ def plot_optimal_partitions(
             graph, all_results, optimal_scale_id, edge_color=edge_color, edge_width=edge_width
         )
         plt.savefig(f"{folder}/scale_{optimal_scale_id}{ext}", bbox_inches="tight")
+        if show:  # pragma: no cover
+            plt.show()
 
 
 def plot_communities(

diff --git a/src/pygenstability/pygenstability.py b/src/pygenstability/pygenstability.py
@@ -47,7 +47,7 @@ def wrap(*args, **kw):
             result = f(*args, **kw)
             t_end = time()
             with open("timing.csv", "a", encoding="utf-8") as file:
-                print(f"{f.__name__}, {t_start - t_end}", file=file)
+                print(f"{f.__name__}, {t_end - t_start}", file=file)
         else:
             result = f(*args, **kw)
         return result
@@ -117,6 +117,7 @@ def run(
     with_postprocessing=True,
     with_ttprime=True,
     with_spectral_gap=False,
+    exp_comp_mode="spectral",
     result_file="results.pkl",
     n_workers=4,
     tqdm_disable=False,
@@ -149,6 +150,7 @@ def run(
         with_postprocessing (bool): apply the final postprocessing step
         with_ttprime (bool): compute the NVI(t,tprime) matrix to compare scales t and tprime
         with_spectral_gap (bool): normalise scale by spectral gap
+        exp_comp_mode (str): mode to compute matrix exponential, can be expm or spectral
         result_file (str): path to the result file
         n_workers (int): number of workers for multiprocessing
         tqdm_disable (bool): disable progress bars
@@ -175,8 +177,15 @@ def run(
         log_scale=log_scale,
         scales=scales,
     )
+    assert exp_comp_mode in ["spectral", "expm"]
+    if constructor == "directed":
+        L.info("We cannot use spectral exponential computation for directed contructor")
+        exp_comp_mode = "expm"
+
     with multiprocessing.Pool(n_workers) as pool:
-        constructor = load_constructor(constructor, graph, with_spectral_gap=with_spectral_gap)
+        constructor = load_constructor(
+            constructor, graph, with_spectral_gap=with_spectral_gap, exp_comp_mode=exp_comp_mode
+        )
 
         L.info("Precompute constructors...")
         constructor_data = _get_constructor_data(
@@ -255,9 +264,10 @@ def evaluate_NVI(index_pair, partitions):
 
     .. math::
 
-        NVI = S(p1) + S(p2) - MI(p1, p2)
+        NVI = \frac{E(p1) + E(p2) - 2MI(p1, p2)}{JE(p1,p2)}
 
-    where :math:`S` is the entropy and :math:`MI` the mutual information.
+    where :math:`E` is the entropy, :math:`JE` the joint entropy
+    and :math:`MI` the mutual information.
 
     Args:
         index_pair (list): list of two indices to select pairs of partitions

diff --git a/tests/data/test_run_default.yaml b/tests/data/test_run_default.yaml
@@ -117,6 +117,7 @@ number_of_communities:
 - 3.0
 run_params:
   constructor: linearized
+  exp_comp_mode: spectral
   log_scale: true
   max_scale: 0
   method: louvain

diff --git a/tests/data/test_run_default_leiden.yaml b/tests/data/test_run_default_leiden.yaml
@@ -95,6 +95,7 @@ number_of_communities:
 - 3.0
 run_params:
   constructor: linearized
+  exp_comp_mode: spectral
   log_scale: true
   max_scale: 0
   method: leiden

diff --git a/tests/data/test_run_gap.yaml b/tests/data/test_run_gap.yaml
@@ -95,6 +95,7 @@ number_of_communities:
 - 3.0
 run_params:
   constructor: linearized
+  exp_comp_mode: spectral
   log_scale: true
   max_scale: -1
   method: louvain

diff --git a/tests/data/test_run_minimal.yaml b/tests/data/test_run_minimal.yaml
@@ -117,6 +117,7 @@ number_of_communities:
 - 3.0
 run_params:
   constructor: linearized
+  exp_comp_mode: spectral
   log_scale: true
   max_scale: 0
   method: louvain

diff --git a/tests/data/test_run_times.yaml b/tests/data/test_run_times.yaml
@@ -49,6 +49,7 @@ number_of_communities:
 - 3.0
 run_params:
   constructor: linearized
+  exp_comp_mode: spectral
   log_scale: false
   max_scale: 0.5
   method: louvain