Implementation of linearized_directed constructor (#70)

barahona-research-group · Feb 27, 2023 · bb25818 · bb25818
1 parent 33890b7
commit bb25818
Show file tree

Hide file tree

Showing 20 changed files with 2,818 additions and 1,551 deletions.
diff --git a/README.md b/README.md
@@ -82,15 +82,25 @@ results  = identify_optimal_scales(results, block_size = 10, window_size = 5)
 plotting.plot_optimal_partitions(nx_graph, results)
 ```
 
-## Custom constructors
+## Constructors
 
+We provide an object-oriented module for constructing quality matrices and null models in `pygenstability/constructors.py`. Various constructors are implemented for different types of graphs:
+
+- `linearized` based on linearized MS for large undirected weighted graphs [2]
+- `continuous_combinatorial` based on combinatorial Lablacian for undirected weighted graphs [2]
+- `continuous_normalized` based on random-walk normalized Laplacians for undirected weighted graphs [2]
+- `signed_modularity` based on signed modularity for signed graphs [8]
+- `directed` based on random-walk Laplacian with teleportation for directed weighted graphs [2]
+- `linearized_directed` based on random-walk Laplacian with teleportation for large  directed weighted graphs
+
+For the computationally efficient
+analysis of **large** graphs we recommend using the `linearized` or `linearized_directed` constructors instead of `continuous_combinatorial`, `continuous_normalized` and `directed` that rely on the computation of matrix exponentials.
+
 For those of you that wish to implement their own constructor, you will need to design a function with the following properties:
 
 - take a scipy.csgraph `graph` and a float `time` as argument
 - return a `quality_matrix` (sparse scipy matrix) and a `null_model` (multiples of two, in a numpy array)
 
-Please see `pygenstability/constructors.py` for the existing implemented constructors. 
-
 ## Contributers
 
 - Alexis Arnaudon, GitHub: `arnaudon <https://github.com/arnaudon>`
@@ -176,6 +186,8 @@ If you are interested in trying our other packages, see the below list:
 
 [7] Preprint incoming ...
 
+[8] Gómez, S., Jensen, P., & Arenas, A. (2009). ‘Analysis of community structure in            networks of correlated data‘. *Physical Review E*, 80(1), 016114.
+
 ## Licence
 
 This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

diff --git a/examples/Example_3_directed.ipynb b/examples/Example_3_directed.ipynb
diff --git a/examples/benchmark/run_benchmark.py b/examples/benchmark/run_benchmark.py
@@ -49,7 +49,6 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="lou
                 with_postprocessing=True,
                 with_ttprime=True,
                 with_optimal_scales=False,
-                with_spectral_decomp=True,
                 method=method,
                 n_tries=50,
                 constructor=constructor,

diff --git a/src/pygenstability/constructors.py b/src/pygenstability/constructors.py
@@ -264,13 +264,13 @@ class constructor_directed(Constructor):
 
     .. math::
 
-        F(t)=\Pi \exp\left(-\alpha L-\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
-        \boldsymbol{1}\boldsymbol{1}^T\right)
+        F(t)=\Pi \exp\left(t \left(\alpha L+\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
+        \boldsymbol{1}\boldsymbol{1}^T-I\right)\right)
 
-    where :math:`a` denotes the vector of dangling nodes, i.e. :math:`a_i=1` if the
-    out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`\boldsymbol{1}` denotes
-    the vector of ones and :math:`0\le \alpha < 1` the damping factor, and associated null model
-    :math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`.
+    where :math:`I` denotes the identidy matrix, :math:`a` denotes the vector of dangling nodes,
+    i.e. :math:`a_i=1` if the out-degree :math:`d_i=0` and :math:`a_i=0` otherwise,
+    :math:`\boldsymbol{1}` denotes the vector of ones and :math:`0\le \alpha < 1` the damping
+    factor, and associated null model :math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`.
     """
 
     def prepare(self, **kwargs):
@@ -281,13 +281,16 @@ def prepare(self, **kwargs):
         n_nodes = self.graph.shape[0]
         ones = np.ones((n_nodes, n_nodes)) / n_nodes
 
-        out_degrees = self.graph.toarray().sum(axis=1).flatten()
+        out_degrees = np.array(self.graph.sum(1)).flatten()
+        _check_total_degree(out_degrees)
         dinv = np.divide(1, out_degrees, where=out_degrees != 0)
+
         self.partial_quality_matrix = sp.csr_matrix(
             alpha * np.diag(dinv).dot(self.graph.toarray())
             + ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(ones)
             - np.eye(n_nodes)
         )
+
         pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0])
         pi /= pi.sum()
         self.partial_null_model = np.array([pi, pi])
@@ -297,3 +300,57 @@ def get_data(self, scale):
         exp = self._get_exp(-scale)
         quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
         return {"quality": quality_matrix, "null_model": self.partial_null_model}
+
+
+class constructor_linearized_directed(Constructor):
+    r"""Constructor for directed Markov stability.
+
+    The quality matrix is:
+
+    .. math::
+
+        F(t)=\Pi t \left(\alpha L+\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
+        \boldsymbol{1}\boldsymbol{1}^T-I\right)
+
+    where :math:`a` denotes the vector of dangling nodes, i.e. :math:`a_i=1` if the
+    out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`\boldsymbol{1}` denotes
+    the vector of ones and :math:`0\le \alpha \le 1` the damping factor, and associated null model
+    :math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`. For large graphs teleportation
+    leads to memory error and so we recommend `\alpha=1`.
+    """
+
+    def prepare(self, **kwargs):
+        """Prepare the constructor with non-scale dependent computations."""
+        alpha = kwargs.get("alpha", 0.8)
+        n_nodes = self.graph.shape[0]
+
+        out_degrees = np.array(self.graph.sum(1)).flatten()
+        _check_total_degree(out_degrees)
+        dinv = np.divide(1, out_degrees, where=out_degrees != 0)
+
+        if alpha < 1:
+            ones = np.ones((n_nodes, n_nodes)) / n_nodes
+
+            self.partial_quality_matrix = sp.csr_matrix(
+                alpha * np.diag(dinv).dot(self.graph.toarray())
+                + ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(
+                    ones
+                )
+                - np.eye(n_nodes)
+            )
+
+        if alpha == 1:
+            self.partial_quality_matrix = sp.csr_matrix(
+                sp.diags(dinv).dot(self.graph) - sp.diags(np.ones(n_nodes))
+            )
+
+        pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0])
+        pi /= pi.sum()
+        self.partial_null_model = np.array([pi, pi])
+
+    def get_data(self, scale):
+        """Return quality and null model at given scale."""
+        quality_matrix = sp.diags(self.partial_null_model[0]).dot(
+            scale * self.partial_quality_matrix
+        )
+        return {"quality": quality_matrix, "null_model": self.partial_null_model}
diff --git a/src/pygenstability/pygenstability.py b/src/pygenstability/pygenstability.py
@@ -124,6 +124,7 @@ def run(
     with_optimal_scales=True,
     optimal_scales_kwargs=None,
     method="louvain",
+    constructor_kwargs=None,
 ):
     """This is the main function to compute graph clustering across scales with Markov Stability.
 
@@ -158,6 +159,7 @@ def run(
         optimal_scales_kwargs (dict): kwargs to pass to optimal scale selection, see
             optimal_scale module.
         method (str): optimiation method, louvain or leiden
+        constructor_kwargs (dict): additional kwargs to pass to constructor prepare method
 
     Returns:
         Results dict with the following entries
@@ -178,14 +180,18 @@ def run(
         scales=scales,
     )
     assert exp_comp_mode in ["spectral", "expm"]
-    if constructor == "directed":
+    if constructor in ("directed", "linearized_directed"):
         L.info("We cannot use spectral exponential computation for directed contructor")
         exp_comp_mode = "expm"
 
+    if constructor_kwargs is None:
+        constructor_kwargs = {}
+    constructor_kwargs.update(
+        {"with_spectral_gap": with_spectral_gap, "exp_comp_mode": exp_comp_mode}
+    )
+
     with multiprocessing.Pool(n_workers) as pool:
-        constructor = load_constructor(
-            constructor, graph, with_spectral_gap=with_spectral_gap, exp_comp_mode=exp_comp_mode
-        )
+        constructor = load_constructor(constructor, graph, **constructor_kwargs)
 
         L.info("Precompute constructors...")
         constructor_data = _get_constructor_data(