Skip to content

Commit

Permalink
Implementation of linearized_directed constructor (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-schindler authored Feb 27, 2023
1 parent 33890b7 commit bb25818
Show file tree
Hide file tree
Showing 20 changed files with 2,818 additions and 1,551 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,25 @@ results = identify_optimal_scales(results, block_size = 10, window_size = 5)
plotting.plot_optimal_partitions(nx_graph, results)
```

## Custom constructors
## Constructors

We provide an object-oriented module for constructing quality matrices and null models in `pygenstability/constructors.py`. Various constructors are implemented for different types of graphs:

- `linearized` based on linearized MS for large undirected weighted graphs [2]
- `continuous_combinatorial` based on combinatorial Lablacian for undirected weighted graphs [2]
- `continuous_normalized` based on random-walk normalized Laplacians for undirected weighted graphs [2]
- `signed_modularity` based on signed modularity for signed graphs [8]
- `directed` based on random-walk Laplacian with teleportation for directed weighted graphs [2]
- `linearized_directed` based on random-walk Laplacian with teleportation for large directed weighted graphs

For the computationally efficient
analysis of **large** graphs we recommend using the `linearized` or `linearized_directed` constructors instead of `continuous_combinatorial`, `continuous_normalized` and `directed` that rely on the computation of matrix exponentials.

For those of you that wish to implement their own constructor, you will need to design a function with the following properties:

- take a scipy.csgraph `graph` and a float `time` as argument
- return a `quality_matrix` (sparse scipy matrix) and a `null_model` (multiples of two, in a numpy array)

Please see `pygenstability/constructors.py` for the existing implemented constructors.

## Contributers

- Alexis Arnaudon, GitHub: `arnaudon <https://github.com/arnaudon>`
Expand Down Expand Up @@ -176,6 +186,8 @@ If you are interested in trying our other packages, see the below list:

[7] Preprint incoming ...

[8] Gómez, S., Jensen, P., & Arenas, A. (2009). ‘Analysis of community structure in networks of correlated data‘. *Physical Review E*, 80(1), 016114.

## Licence

This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
Expand Down
98 changes: 90 additions & 8 deletions examples/Example_3_directed.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion examples/benchmark/run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def get_comp_time(sizes, graph_type="SBM", constructor="linearized", method="lou
with_postprocessing=True,
with_ttprime=True,
with_optimal_scales=False,
with_spectral_decomp=True,
method=method,
n_tries=50,
constructor=constructor,
Expand Down
71 changes: 64 additions & 7 deletions src/pygenstability/constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,13 +264,13 @@ class constructor_directed(Constructor):
.. math::
F(t)=\Pi \exp\left(-\alpha L-\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
\boldsymbol{1}\boldsymbol{1}^T\right)
F(t)=\Pi \exp\left(t \left(\alpha L+\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
\boldsymbol{1}\boldsymbol{1}^T-I\right)\right)
where :math:`a` denotes the vector of dangling nodes, i.e. :math:`a_i=1` if the
out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`\boldsymbol{1}` denotes
the vector of ones and :math:`0\le \alpha < 1` the damping factor, and associated null model
:math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`.
where :math:`I` denotes the identidy matrix, :math:`a` denotes the vector of dangling nodes,
i.e. :math:`a_i=1` if the out-degree :math:`d_i=0` and :math:`a_i=0` otherwise,
:math:`\boldsymbol{1}` denotes the vector of ones and :math:`0\le \alpha < 1` the damping
factor, and associated null model :math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`.
"""

def prepare(self, **kwargs):
Expand All @@ -281,13 +281,16 @@ def prepare(self, **kwargs):
n_nodes = self.graph.shape[0]
ones = np.ones((n_nodes, n_nodes)) / n_nodes

out_degrees = self.graph.toarray().sum(axis=1).flatten()
out_degrees = np.array(self.graph.sum(1)).flatten()
_check_total_degree(out_degrees)
dinv = np.divide(1, out_degrees, where=out_degrees != 0)

self.partial_quality_matrix = sp.csr_matrix(
alpha * np.diag(dinv).dot(self.graph.toarray())
+ ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(ones)
- np.eye(n_nodes)
)

pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0])
pi /= pi.sum()
self.partial_null_model = np.array([pi, pi])
Expand All @@ -297,3 +300,57 @@ def get_data(self, scale):
exp = self._get_exp(-scale)
quality_matrix = sp.diags(self.partial_null_model[0]).dot(exp)
return {"quality": quality_matrix, "null_model": self.partial_null_model}


class constructor_linearized_directed(Constructor):
r"""Constructor for directed Markov stability.
The quality matrix is:
.. math::
F(t)=\Pi t \left(\alpha L+\left(\frac{1-\alpha}{N}+\alpha \mathrm{diag}(a)\right)
\boldsymbol{1}\boldsymbol{1}^T-I\right)
where :math:`a` denotes the vector of dangling nodes, i.e. :math:`a_i=1` if the
out-degree :math:`d_i=0` and :math:`a_i=0` otherwise, :math:`\boldsymbol{1}` denotes
the vector of ones and :math:`0\le \alpha \le 1` the damping factor, and associated null model
:math:`v_0=v_1=\pi` given by the PageRank vector :math:`\pi`. For large graphs teleportation
leads to memory error and so we recommend `\alpha=1`.
"""

def prepare(self, **kwargs):
"""Prepare the constructor with non-scale dependent computations."""
alpha = kwargs.get("alpha", 0.8)
n_nodes = self.graph.shape[0]

out_degrees = np.array(self.graph.sum(1)).flatten()
_check_total_degree(out_degrees)
dinv = np.divide(1, out_degrees, where=out_degrees != 0)

if alpha < 1:
ones = np.ones((n_nodes, n_nodes)) / n_nodes

self.partial_quality_matrix = sp.csr_matrix(
alpha * np.diag(dinv).dot(self.graph.toarray())
+ ((1 - alpha) * np.diag(np.ones(n_nodes)) + np.diag(alpha * (dinv == 0.0))).dot(
ones
)
- np.eye(n_nodes)
)

if alpha == 1:
self.partial_quality_matrix = sp.csr_matrix(
sp.diags(dinv).dot(self.graph) - sp.diags(np.ones(n_nodes))
)

pi = abs(sp.linalg.eigs(self.partial_quality_matrix.transpose(), which="SM", k=1)[1][:, 0])
pi /= pi.sum()
self.partial_null_model = np.array([pi, pi])

def get_data(self, scale):
"""Return quality and null model at given scale."""
quality_matrix = sp.diags(self.partial_null_model[0]).dot(
scale * self.partial_quality_matrix
)
return {"quality": quality_matrix, "null_model": self.partial_null_model}
14 changes: 10 additions & 4 deletions src/pygenstability/pygenstability.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def run(
with_optimal_scales=True,
optimal_scales_kwargs=None,
method="louvain",
constructor_kwargs=None,
):
"""This is the main function to compute graph clustering across scales with Markov Stability.
Expand Down Expand Up @@ -158,6 +159,7 @@ def run(
optimal_scales_kwargs (dict): kwargs to pass to optimal scale selection, see
optimal_scale module.
method (str): optimiation method, louvain or leiden
constructor_kwargs (dict): additional kwargs to pass to constructor prepare method
Returns:
Results dict with the following entries
Expand All @@ -178,14 +180,18 @@ def run(
scales=scales,
)
assert exp_comp_mode in ["spectral", "expm"]
if constructor == "directed":
if constructor in ("directed", "linearized_directed"):
L.info("We cannot use spectral exponential computation for directed contructor")
exp_comp_mode = "expm"

if constructor_kwargs is None:
constructor_kwargs = {}
constructor_kwargs.update(
{"with_spectral_gap": with_spectral_gap, "exp_comp_mode": exp_comp_mode}
)

with multiprocessing.Pool(n_workers) as pool:
constructor = load_constructor(
constructor, graph, with_spectral_gap=with_spectral_gap, exp_comp_mode=exp_comp_mode
)
constructor = load_constructor(constructor, graph, **constructor_kwargs)

L.info("Precompute constructors...")
constructor_data = _get_constructor_data(
Expand Down
Loading

0 comments on commit bb25818

Please sign in to comment.