Skip to content

Commit

Permalink
Add function to find densest subgraph
Browse files Browse the repository at this point in the history
  • Loading branch information
mtreinish committed Mar 27, 2022
1 parent 71e34d1 commit 944df0f
Showing 1 changed file with 199 additions and 0 deletions.
199 changes: 199 additions & 0 deletions src/dense_subgraph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.

use hashbrown::{HashMap, HashSet};

use petgraph::graph::NodeIndex;
use petgraph::prelude::*;
use petgraph::visit::IntoEdgeReferences;
use petgraph::EdgeType;

use rayon::prelude::*;

use numpy::ToPyArray;
use pyo3::prelude::*;
use pyo3::Python;

use crate::digraph;
use crate::graph;
use crate::StablePyGraph;

struct SubsetResult {
pub count: usize,
pub error: f64,
pub map: Vec<NodeIndex>,
pub subgraph: Vec<[NodeIndex; 2]>,
}

pub fn densest_subgraph<Ty>(
py: Python,
graph: &StablePyGraph<Ty>,
num_nodes: usize,
weight_callback: Option<PyObject>,
) -> PyResult<(PyObject, PyObject, PyObject)>
where
Ty: EdgeType + Sync,
{
let node_indices: Vec<NodeIndex> = graph.node_indices().collect();
let float_callback =
|callback: PyObject, source_node: usize, target_node: usize| -> PyResult<f64> {
let res = callback.as_ref(py).call1((source_node, target_node))?;
res.extract()
};
let mut weight_map: Option<HashMap<[NodeIndex; 2], f64>> = None;

if weight_callback.is_some() {
let mut inner_weight_map: HashMap<[NodeIndex; 2], f64> =
HashMap::with_capacity(graph.edge_count());
let callback = weight_callback.as_ref().unwrap();
for edge in graph.edge_references() {
let source: NodeIndex = edge.source();
let target: NodeIndex = edge.target();
let weight = float_callback(callback.clone_ref(py), source.index(), target.index())?;
inner_weight_map.insert([source, target], weight);
}
weight_map = Some(inner_weight_map);
}
let reduce_identity_fn = || -> SubsetResult {
SubsetResult {
count: 0,
map: Vec::new(),
error: std::f64::INFINITY,
subgraph: Vec::new(),
}
};

let reduce_fn = |best: SubsetResult, curr: SubsetResult| -> SubsetResult {
if weight_callback.is_some() {
if curr.count >= best.count && curr.error <= best.error {
curr
} else {
best
}
} else if curr.count > best.count {
curr
} else {
best
}
};

let best_result = node_indices
.into_par_iter()
.map(|index| {
let mut subgraph: Vec<[NodeIndex; 2]> = Vec::with_capacity(num_nodes);
let mut bfs = Bfs::new(&graph, index);
let mut bfs_vec: Vec<NodeIndex> = Vec::with_capacity(num_nodes);
let mut bfs_set: HashSet<NodeIndex> = HashSet::with_capacity(num_nodes);

let mut count = 0;
while let Some(node) = bfs.next(&graph) {
bfs_vec.push(node);
bfs_set.insert(node);
count += 1;
if count >= num_nodes {
break;
}
}
let mut connection_count = 0;
for node in &bfs_vec {
for j in graph.node_indices() {
if graph.contains_edge(*node, j) {
connection_count += 1;
subgraph.push([*node, j]);
}
}
}
let error = match &weight_map {
Some(map) => subgraph.iter().map(|edge| map[edge]).sum::<f64>() / num_nodes as f64,
None => 0.,
};
SubsetResult {
count: connection_count,
error,
map: bfs_vec,
subgraph,
}
})
.reduce(reduce_identity_fn, reduce_fn);
let best_map: Vec<usize> = best_result.map.iter().map(|x| x.index()).collect();
let mapping: HashMap<usize, usize> = best_map
.iter()
.enumerate()
.map(|(best_edge, edge)| (*edge, best_edge))
.collect();
let new_cmap: Vec<[usize; 2]> = best_result
.subgraph
.iter()
.map(|c| [mapping[&c[0].index()], mapping[&c[1].index()]])
.collect();
let rows: Vec<usize> = new_cmap.iter().map(|edge| edge[0]).collect();
let cols: Vec<usize> = new_cmap.iter().map(|edge| edge[1]).collect();
Ok((
rows.to_pyarray(py).into(),
cols.to_pyarray(py).into(),
best_map.to_pyarray(py).into(),
))
}

/// Find densest subgraph in a :class:`~.PyGraph`
///
/// This method does not provide any guarantees on the approximation as it
/// does a naive search using BFS traversal.
///
/// :param PyDigraph graph: The graph to find
/// :param int num_nodes: The number of nodes in the subgraph to find
/// :param func weight_callback: An optional callable that if specified will be
/// passed the node indices of each edge in the graph and it is expected to
/// return a float value. If specified the lowest avg weight for edges in
/// a found subgraph will be a criteria for selection in addition to the
/// connectivity of the subgraph.
/// :returns: A tuple of 3 numpy arrays for efficient sparse matrix creation
/// of the adjacency matrix of the subgraph mapping the values back to the
/// node indices on the original graph.
/// :rtype: (rows, cols, value)
#[pyfunction]
#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")]
pub fn graph_dense_subgraph(
py: Python,
graph: &graph::PyGraph,
num_nodes: usize,
weight_callback: Option<PyObject>,
) -> PyResult<(PyObject, PyObject, PyObject)> {
densest_subgraph(py, &graph.graph, num_nodes, weight_callback)
}

/// Find densest subgraph in a :class:`~.PyDiGraph`
///
/// This method does not provide any guarantees on the approximation as it
/// does a naive search using BFS traversal.
///
/// :param PyDigraph graph: The graph to find
/// :param int num_nodes: The number of nodes in the subgraph to find
/// :param func weight_callback: An optional callable that if specified will be
/// passed the node indices of each edge in the graph and it is expected to
/// return a float value. If specified the lowest avg weight for edges in
/// a found subgraph will be a criteria for selection in addition to the
/// connectivity of the subgraph.
/// :returns: A tuple of 3 numpy arrays for efficient sparse matrix creation
/// of the adjacency matrix of the subgraph mapping the values back to the
/// node indices on the original graph.
/// :rtype: (rows, cols, value)
#[pyfunction]
#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")]
pub fn digraph_dense_subgraph(
py: Python,
graph: &digraph::PyDiGraph,
num_nodes: usize,
weight_callback: Option<PyObject>,
) -> PyResult<(PyObject, PyObject, PyObject)> {
densest_subgraph(py, &graph.graph, num_nodes, weight_callback)
}

0 comments on commit 944df0f

Please sign in to comment.