forked from Reed-CompBio/spras
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request Reed-CompBio#73 from Reed-CompBio/all-pairs-shorte…
…st-paths All pairs shortest paths (AllPairs) Code
- Loading branch information
Showing
18 changed files
with
404 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# AllPairs wrapper | ||
FROM python:3.9-alpine3.16 | ||
|
||
WORKDIR /AllPairs | ||
|
||
RUN pip install networkx==2.6.3 | ||
COPY all-pairs-shortest-paths.py /AllPairs/all-pairs-shortest-paths.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# All Pairs Shortest Paths Docker image | ||
|
||
A Docker image for All Pairs Shortest Paths that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/allpairs). | ||
This algorithm was implemented by the SPRAS team and relies on the NetworkX [`shortest_path`](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.shortest_path.html) function. | ||
|
||
To create the Docker image run: | ||
``` | ||
docker build -t reedcompbio/allpairs -f Dockerfile . | ||
``` | ||
from this directory. | ||
|
||
To inspect the installed Python packages: | ||
``` | ||
docker run reedcompbio/allpairs pip list | ||
``` | ||
|
||
|
||
## Testing | ||
Test code is located in `test/AllPairs`. | ||
The `input` subdirectory contains a sample network and source/target file, along with a network and source/target file to check for the correctness of All Pairs Shortest Path. | ||
The expected output graphs for the sample networks are in the `expected` subdirectory. | ||
|
||
The Docker wrapper can be tested with `pytest -k test_ap.py` from the root of the SPRAS repository. | ||
|
||
|
||
## Notes | ||
- The `all-pairs-shortest-paths.py` code is located locally in SPRAS (since the code is short). It is under `docker-wrappers/AllPairs`. | ||
- Samples of an input network and source/target file are located under test/AllPairs/input. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
""" | ||
All Pairs Shortest Paths pathway reconstruction algorithm. | ||
The algorithm takes a network and a list of sources and targets as input. | ||
It outputs the shortest possible path between every source and every target. | ||
""" | ||
|
||
import argparse | ||
from pathlib import Path | ||
|
||
import networkx as nx | ||
|
||
|
||
def parse_arguments(): | ||
""" | ||
Process command line arguments. | ||
@return arguments | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="All Pairs Shortest Paths pathway reconstruction" | ||
) | ||
parser.add_argument("--network", type=Path, required=True, help="Network file of the form <node1> <node2> <weight>." | ||
" Tab-delimited.") | ||
parser.add_argument("--nodes", type=Path, required=True, help="Nodes file of the form <node> <source-or-target>. " | ||
"Tab-delimited.") | ||
parser.add_argument("--output", type=Path, required=True, help="Output file") | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def allpairs(network_file: Path, nodes_file: Path, output_file: Path): | ||
if not network_file.exists(): | ||
raise OSError(f"Network file {str(network_file)} does not exist") | ||
if not nodes_file.exists(): | ||
raise OSError(f"Nodes file {str(nodes_file)} does not exist") | ||
if output_file.exists(): | ||
print(f"Output file {str(output_file)} will be overwritten") | ||
|
||
# Create the parent directories for the output file if needed | ||
output_file.parent.mkdir(parents=True, exist_ok=True) | ||
|
||
# Read the list of nodes | ||
sources = set() | ||
targets = set() | ||
with nodes_file.open() as nodes_f: | ||
for line in nodes_f: | ||
row = line.strip().split(sep='\t') | ||
if row[1] == 'source': | ||
sources.add(row[0]) | ||
elif row[1] == 'target': | ||
targets.add(row[0]) | ||
|
||
# There should be at least one source and one target | ||
assert len(sources) > 0, 'There are no sources.' | ||
assert len(targets) > 0, 'There are no targets.' | ||
|
||
# Read graph & assert all the sources/targets are in network | ||
graph = nx.read_weighted_edgelist(network_file, delimiter='\t') | ||
assert len(sources.intersection(graph.nodes())) == len(sources), 'At least one source is not in the interactome.' | ||
assert len(targets.intersection(graph.nodes())) == len(targets), 'At least one target is not in the interactome.' | ||
|
||
# Finally, compute all-pairs-shortest-paths and record the subgraph. | ||
output = nx.Graph() | ||
for source in sources: | ||
for target in targets: | ||
p = nx.shortest_path(graph, source, target, weight='weight') | ||
nx.add_path(output, p) | ||
|
||
# Write the subgraph as a list of edges. | ||
nx.write_edgelist(output, output_file, data=False, delimiter='\t') | ||
print(f"Wrote output file to {str(output_file)}") | ||
|
||
|
||
def main(): | ||
""" | ||
Parse arguments and run pathway reconstruction | ||
""" | ||
args = parse_arguments() | ||
allpairs(args.network, args.nodes, args.output) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import warnings | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
|
||
from src.prm import PRM | ||
from src.util import prepare_volume, run_container | ||
|
||
__all__ = ['AllPairs'] | ||
|
||
|
||
class AllPairs(PRM): | ||
required_inputs = ['nodetypes', 'network'] | ||
|
||
@staticmethod | ||
def generate_inputs(data, filename_map): | ||
""" | ||
Access fields from the dataset and write the required input files | ||
@param data: dataset | ||
@param filename_map: a dict mapping file types in the required_inputs to the filename for that type | ||
""" | ||
for input_type in AllPairs.required_inputs: | ||
if input_type not in filename_map: | ||
raise ValueError("{input_type} filename is missing") | ||
|
||
# Get sources and targets for node input file | ||
# Borrowed code from pathlinker.py | ||
sources_targets = data.request_node_columns(["sources", "targets"]) | ||
if sources_targets is None: | ||
raise ValueError("All Pairs Shortest Paths requires sources and targets") | ||
|
||
both_series = sources_targets.sources & sources_targets.targets | ||
for _index, row in sources_targets[both_series].iterrows(): | ||
warn_msg = row.NODEID + " has been labeled as both a source and a target." | ||
warnings.warn(warn_msg, stacklevel=2) | ||
|
||
# Create nodetype file | ||
input_df = sources_targets[["NODEID"]].copy() | ||
input_df.columns = ["#Node"] | ||
input_df.loc[sources_targets["sources"] == True, "Node type"] = "source" | ||
input_df.loc[sources_targets["targets"] == True, "Node type"] = "target" | ||
|
||
input_df.to_csv(filename_map["nodetypes"], sep="\t", index=False, columns=["#Node", "Node type"]) | ||
|
||
# This is pretty memory intensive. We might want to keep the interactome centralized. | ||
data.get_interactome().to_csv(filename_map["network"], sep="\t", index=False, | ||
columns=["Interactor1", "Interactor2", "Weight"], | ||
header=["#Interactor1", "Interactor2", "Weight"]) | ||
|
||
@staticmethod | ||
def run(nodetypes=None, network=None, output_file=None, singularity=False): | ||
""" | ||
Run All Pairs Shortest Paths with Docker | ||
@param nodetypes: input node types with sources and targets (required) | ||
@param network: input network file (required) | ||
@param singularity: if True, run using the Singularity container instead of the Docker container | ||
@param output_file: path to the output pathway file (required) | ||
""" | ||
if not nodetypes or not network or not output_file: | ||
raise ValueError('Required All Pairs Shortest Paths arguments are missing') | ||
|
||
work_dir = '/apsp' | ||
|
||
# Each volume is a tuple (src, dest) | ||
volumes = list() | ||
|
||
bind_path, node_file = prepare_volume(nodetypes, work_dir) | ||
volumes.append(bind_path) | ||
|
||
bind_path, network_file = prepare_volume(network, work_dir) | ||
volumes.append(bind_path) | ||
|
||
# Create the parent directories for the output file if needed | ||
Path(output_file).parent.mkdir(parents=True, exist_ok=True) | ||
bind_path, mapped_out_file = prepare_volume(output_file, work_dir) | ||
volumes.append(bind_path) | ||
|
||
command = ['python', | ||
'/AllPairs/all-pairs-shortest-paths.py', | ||
'--network', network_file, | ||
'--nodes', node_file, | ||
'--output', mapped_out_file] | ||
|
||
print('Running All Pairs Shortest Paths with arguments: {}'.format(' '.join(command)), flush=True) | ||
|
||
container_framework = 'singularity' if singularity else 'docker' | ||
out = run_container(container_framework, | ||
'reedcompbio/allpairs', | ||
command, | ||
volumes, | ||
work_dir) | ||
print(out) | ||
|
||
@staticmethod | ||
def parse_output(raw_pathway_file, standardized_pathway_file): | ||
""" | ||
Convert a predicted pathway into the universal format | ||
@param raw_pathway_file: pathway file produced by an algorithm's run function | ||
@param standardized_pathway_file: the same pathway written in the universal format | ||
""" | ||
df = pd.read_csv(raw_pathway_file, sep='\t', header=None) | ||
df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked. | ||
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
A B | ||
A E | ||
B C |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
S1 A | ||
S1 B | ||
A E | ||
A F | ||
E T1 | ||
T1 F | ||
F T2 | ||
F B | ||
B S2 | ||
S2 T3 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Node1 Node2 | ||
A B 1 | ||
B C 1 | ||
C D 1 | ||
D E 1 | ||
A E 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#Node Node type | ||
A source | ||
B source | ||
C target | ||
E target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#Node1 Node2 | ||
S1 A 0.5 | ||
A E 0.5 | ||
E T1 0.5 | ||
E F 0.5 | ||
F E 0.5 | ||
F A 0.5 | ||
T1 F 0.5 | ||
F T2 0.5 | ||
B S1 0.5 | ||
B F 0.5 | ||
B C 0.5 | ||
S2 B 0.5 | ||
S2 C 0.5 | ||
S2 T3 0.5 | ||
C G 0.5 | ||
G C 0.5 | ||
C F 0.5 | ||
G F 0.5 | ||
G T2 0.5 | ||
G T3 0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Node Node type | ||
S1 source | ||
S2 source | ||
T1 target | ||
T2 target | ||
T3 target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Node1 Node2 | ||
A B 1 | ||
B C 1 | ||
C D 1 | ||
D E 1 | ||
A E 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#Node Node type | ||
A source | ||
A target |
Oops, something went wrong.