From b0d5ec2d88e08260c3c07818f540985c4c433e53 Mon Sep 17 00:00:00 2001 From: Nina Young Date: Thu, 28 Jul 2022 12:47:17 -0700 Subject: [PATCH 01/33] added template files for allpairs --- PRRunner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PRRunner.py b/PRRunner.py index e85d1f60..b844cdad 100644 --- a/PRRunner.py +++ b/PRRunner.py @@ -5,6 +5,7 @@ from src.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from src.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from src.pathlinker import PathLinker as pathlinker +from src.all-pairs-shortest-paths import allpairsshortestpaths def run(algorithm, params): """ From aab1711e443363f1580c3ff51850529b2683f750 Mon Sep 17 00:00:00 2001 From: Nina Young Date: Thu, 28 Jul 2022 13:04:22 -0700 Subject: [PATCH 02/33] Added some template files --- .../Dockerfile (unfinished) | 30 +++++ src/all-pairs-shortest-paths.py | 7 ++ src/local_neighborhood_practice.py | 114 ++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) create mode 100644 src/all-pairs-shortest-paths.py create mode 100644 src/local_neighborhood_practice.py diff --git a/docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) b/docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) new file mode 100644 index 00000000..e92b98ea --- /dev/null +++ b/docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) @@ -0,0 +1,30 @@ +# Omics Integrator 1 wrapper +# https://github.com/fraenkel-lab/OmicsIntegrator +# Activates the conda environment before running command inside container +# Uses the strategy from https://pythonspeed.com/articles/activate-conda-dockerfile/ +# by Itamar Turner-Trauring +FROM continuumio/miniconda3:4.9.2 + +# Need to install msgsteiner-1.3 and dependencies +RUN apt-get -qq update --allow-releaseinfo-change && \ + apt-get install -y build-essential libx11-dev libboost-dev libboost-program-options-dev + +RUN commit=0a57ede6beeef6e63b86d19898e560d62015e85d && \ + wget https://github.com/fraenkel-lab/OmicsIntegrator/tarball/$commit && \ + tar -zxvf $commit && \ + rm $commit && \ + mv fraenkel-lab-OmicsIntegrator-* OmicsIntegrator && \ + cd OmicsIntegrator/ && \ + wget http://staff.polito.it/alfredo.braunstein/code/msgsteiner-1.3.tgz && \ + tar -zxvf msgsteiner-1.3.tgz && \ + cd msgsteiner-1.3 && \ + patch Makefile ../patches/Makefile.linux.patch && \ + make + +ENV MSGSTEINER_PATH=/OmicsIntegrator/msgsteiner-1.3/msgsteiner +WORKDIR /OmicsIntegrator + +COPY environment.yml . +RUN conda env create -f environment.yml + +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "oi1"] diff --git a/src/all-pairs-shortest-paths.py b/src/all-pairs-shortest-paths.py new file mode 100644 index 00000000..d491f5b9 --- /dev/null +++ b/src/all-pairs-shortest-paths.py @@ -0,0 +1,7 @@ +from src.PRM import PRM +from pathlib import Path +from src.util import prepare_volume, run_container +import pandas as pd +import networkx + +__all__ = ['allpairsshortestpaths', 'write_conf'] diff --git a/src/local_neighborhood_practice.py b/src/local_neighborhood_practice.py new file mode 100644 index 00000000..243816f2 --- /dev/null +++ b/src/local_neighborhood_practice.py @@ -0,0 +1,114 @@ +# Test wrapper function as a part of the contributor tutorial # +import pandas as pd +import warnings +from src.PRM import PRM +from pathlib import Path +from src.util import prepare_volume, run_container + +__all__ = ['LocalNeighborhood'] + +class LocalNeighborhood(PRM): + required_inputs = ['network', 'nodes'] + + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in LocalNeighborhood.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + if data.contains_node_columns(['prize','sources','targets']): + node_df = data.request_node_columns(['prize','sources','targets']) + node_df.loc[node_df['sources']==True, 'prize'] = 1.0 + node_df.loc[node_df['targets']==True, 'prize'] = 1.0 + node_df.loc[node_df['prize']==True, 'prize'] = 1.0 + + else: + raise ValueError("Local Neighborhood requires node prizes or sources and targets") + + node_df.to_csv(filename_map['prizes'],index=False,columns=['NODEID'],header=False) + + #For now we assume all input networks are undirected until we expand how edge tables work + edges_df = data.get_interactome() + edges_df.to_csv(filename_map['edges'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False) + + @staticmethod + def run(nodetypes=None, network=None, output_file=None, k=None, singularity=False): + """ + Run LocalNeighborhood with Docker + @param nodetypes: input node types with sources and targets (required) + @param network: input network file (required) + @param output_file: path to the output pathway file (required) + @param k: path length (optional) + @param singularity: if True, run using the Singularity container instead of the Docker container + """ + # Add additional parameter validation + # Do not require k + # Use the LocalNeighborhood default + # Could consider setting the default here instead + if not nodetypes or not network or not output_file: + raise ValueError('Required LocalNeighborhood arguments are missing') + + work_dir = '/spras' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, node_file = prepare_volume(nodetypes, work_dir) + volumes.append(bind_path) + + bind_path, network_file = prepare_volume(network, work_dir) + volumes.append(bind_path) + + # LocalNeighborhood does not provide an argument to set the output directory + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + # LocalNeighborhood requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container + + command = ['python3', + '/LocalNeighborhood/run.py', + '--network', network_file, + '--nodes', node_file, + '--output', mapped_out_prefix] + + # Add optional argument + if k is not None: + command.extend(['-k', str(k)]) + + print('Running LocalNeighborhood with arguments: {}'.format(' '.join(command)), flush=True) + + # TODO consider making this a string in the config file instead of a Boolean + container_framework = 'singularity' if singularity else 'docker' + out = run_container(container_framework, + 'ninayoung/local-neighborhood', + command, + volumes, + work_dir) + print(out) + + # Rename the primary output file to match the desired output filename + # Currently LocalNeighborhood only writes one output file so we do not need to delete others + # We may not know the value of k that was used + output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + output_edges.rename(output_file) + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + df = pd.read_csv(raw_pathway_file,sep='|', axis=1) + df.insert(2, 'Rank', '1') + # node 1 | node 2 - add a 1 at the end of every line, read up on dataframes + df.to_csv(standardized_pathway_file, header=False,index=False,sep=' ') From e85a461252c5c3adc20b21073bf030bea3da6fda Mon Sep 17 00:00:00 2001 From: Nina Young Date: Thu, 4 Aug 2022 13:15:56 -0700 Subject: [PATCH 03/33] Finished implementing docker wrappers --- PRRunner.py | 2 +- src/all-pairs-shortest-paths.py | 108 +++++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/PRRunner.py b/PRRunner.py index b844cdad..1ef0e9ad 100644 --- a/PRRunner.py +++ b/PRRunner.py @@ -5,7 +5,7 @@ from src.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from src.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from src.pathlinker import PathLinker as pathlinker -from src.all-pairs-shortest-paths import allpairsshortestpaths +from src.all-pairs-shortest-paths import AllPairs def run(algorithm, params): """ diff --git a/src/all-pairs-shortest-paths.py b/src/all-pairs-shortest-paths.py index d491f5b9..f32415c1 100644 --- a/src/all-pairs-shortest-paths.py +++ b/src/all-pairs-shortest-paths.py @@ -2,6 +2,110 @@ from pathlib import Path from src.util import prepare_volume, run_container import pandas as pd -import networkx +import networkx as nx -__all__ = ['allpairsshortestpaths', 'write_conf'] +__all__ = ['AllPairs'] + +class AllPairs(PRM): + required_inputs = ['nodetypes', 'network'] + + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in AllPairs.required_inputs: + if input_type not in filename_map: + raise ValueError("{input_type} filename is missing") + + #Get sources and targets for node input file + sources_targets = data.request_node_columns(["sources", "targets"]) + if sources_targets is None: + return False + both_series = sources_targets.sources & sources_targets.targets + for index,row in sources_targets[both_series].iterrows(): + warn_msg = row.NODEID+" has been labeled as both a source and a target." + warnings.warn(warn_msg) + + #Create nodetype file + input_df = sources_targets[["NODEID"]].copy() + input_df.columns = ["#Node"] + input_df.loc[sources_targets["sources"] == True,"Node type"]="source" + input_df.loc[sources_targets["targets"] == True,"Node type"]="target" + + input_df.to_csv(filename_map["nodetypes"],sep="\t",index=False,columns=["#Node","Node type"]) + + data.get_interactome().to_csv(filename_map["network"],sep="\t",index=False,columns=["Interactor1","Interactor2","Weight"],header=["#Interactor1","Interactor2","Weight"]) + + + @staticmethod + def run(nodetypes=None, network=None, output_file=None, singularity=False): + """ + Run AllPairs with Docker + @param nodetypes: input node types with sources and targets (required) + @param network: input network file (required) + @param output_file: path to the output pathway file (required) + @param singularity: currently inactive, implement later? + """ + if not nodetypes or not network or not output_file: + raise ValueError('Required AllPairs arguments are missing') + + work_dir = '/spras' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, node_file = prepare_volume(nodetypes, work_dir) + volumes.append(bind_path) + + bind_path, network_file = prepare_volume(network, work_dir) + volumes.append(bind_path) + + # AllPairs does not provide an argument to set the output directory + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + # AllPairs requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container + + command = ['python', + '/AllPairs/run.py', + network_file, + node_file, + '--output', mapped_out_prefix] + + # Add optional argument + if k is not None: + command.extend(['-k', str(k)]) + + print('Running AllPairs with arguments: {}'.format(' '.join(command)), flush=True) + + # TODO consider making this a string in the config file instead of a Boolean + container_framework = 'singularity' if singularity else 'docker' + out = run_container(container_framework, + 'reedcompbio/AllPairs', + command, + volumes, + work_dir) + print(out) + + # Rename the primary output file to match the desired output filename + # Currently AllPairs only writes one output file so we do not need to delete others + # We may not know the value of k that was used + output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + output_edges.rename(output_file) + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + df = pd.read_csv(raw_pathway_file,sep='\t').take([0,1,2],axis=1) + df.to_csv(standardized_pathway_file, header=False,index=False,sep=' ') From 1d0c88edde2628ade6dfd51601a6396a27629c3e Mon Sep 17 00:00:00 2001 From: Nina Young Date: Mon, 8 Aug 2022 20:10:00 -0700 Subject: [PATCH 04/33] Beginning implementation of AllPairs --- .../AllPairs/Dockerfile (unfinished) | 30 ++++++++ .../AllPairs/all-pairs-shortest-paths.py | 71 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 docker-wrappers/AllPairs/Dockerfile (unfinished) create mode 100644 docker-wrappers/AllPairs/all-pairs-shortest-paths.py diff --git a/docker-wrappers/AllPairs/Dockerfile (unfinished) b/docker-wrappers/AllPairs/Dockerfile (unfinished) new file mode 100644 index 00000000..e92b98ea --- /dev/null +++ b/docker-wrappers/AllPairs/Dockerfile (unfinished) @@ -0,0 +1,30 @@ +# Omics Integrator 1 wrapper +# https://github.com/fraenkel-lab/OmicsIntegrator +# Activates the conda environment before running command inside container +# Uses the strategy from https://pythonspeed.com/articles/activate-conda-dockerfile/ +# by Itamar Turner-Trauring +FROM continuumio/miniconda3:4.9.2 + +# Need to install msgsteiner-1.3 and dependencies +RUN apt-get -qq update --allow-releaseinfo-change && \ + apt-get install -y build-essential libx11-dev libboost-dev libboost-program-options-dev + +RUN commit=0a57ede6beeef6e63b86d19898e560d62015e85d && \ + wget https://github.com/fraenkel-lab/OmicsIntegrator/tarball/$commit && \ + tar -zxvf $commit && \ + rm $commit && \ + mv fraenkel-lab-OmicsIntegrator-* OmicsIntegrator && \ + cd OmicsIntegrator/ && \ + wget http://staff.polito.it/alfredo.braunstein/code/msgsteiner-1.3.tgz && \ + tar -zxvf msgsteiner-1.3.tgz && \ + cd msgsteiner-1.3 && \ + patch Makefile ../patches/Makefile.linux.patch && \ + make + +ENV MSGSTEINER_PATH=/OmicsIntegrator/msgsteiner-1.3/msgsteiner +WORKDIR /OmicsIntegrator + +COPY environment.yml . +RUN conda env create -f environment.yml + +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "oi1"] diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py new file mode 100644 index 00000000..9d9b8380 --- /dev/null +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -0,0 +1,71 @@ +""" +All Pairs Shortest Paths pathway reconstruction algorithm. +The algorithm takes a network and a list of sources and targets as input. +It outputs the shortest possible path between every source and every target. +""" + +import argparse +from pathlib import Path +import networkx as nx + + +def parse_arguments(): + """ + Process command line arguments. + @return arguments + """ + parser = argparse.ArgumentParser( + description="AllPairs pathway reconstruction" + ) + parser.add_argument("--network", type=Path, required=True, help="Path to the network file with '|' delimited node pairs") + parser.add_argument("--nodes", type=Path, required=True, help="Path to the nodes file") + parser.add_argument("--output", type=Path, required=True, help="Path to the output file that will be written") + + return parser.parse_args() + + +def allpairs(network_file: Path, nodes_file: Path, output_file: Path): + if not network_file.exists(): + raise OSError(f"Network file {str(network_file)} does not exist") + if not nodes_file.exists(): + raise OSError(f"Nodes file {str(nodes_file)} does not exist") + if output_file.exists(): + print(f"Output files {str(output_file)} will be overwritten") + + # Create the parent directories for the output file if needed + output_file.parent.mkdir(parents=True, exist_ok=True) + + # Read the list of nodes + nodes = set() + with nodes_file.open() as nodes_f: + for line in nodes_f: + nodes.add(line.strip()) + print(f"Read {len(nodes)} unique nodes") + + # Iterate through the network edges and write those that have an endpoint in the node set + in_edge_counter = 0 + out_edge_counter = 0 + with output_file.open('w') as output_f: + with network_file.open() as network_f: + for line in network_f: + line = line.strip() + in_edge_counter += 1 + endpoints = line.split("|") + if len(endpoints) != 2: + raise ValueError(f"Edge {line} does not contain 2 nodes separated by '|'") + if endpoints[0] in nodes or endpoints[1] in nodes: + out_edge_counter += 1 + output_f.write(f"{line}\n") + print(f"Kept {out_edge_counter} of {in_edge_counter} edges") + + +def main(): + """ + Parse arguments and run pathway reconstruction + """ + args = parse_arguments() + allpairs(args.network, args.nodes, args.output) + + +if __name__ == "__main__": + main() From e4b3b6c34be101b5bc9332e9e3fdd946d3bdb4cc Mon Sep 17 00:00:00 2001 From: Nina Young Date: Wed, 10 Aug 2022 14:26:48 -0700 Subject: [PATCH 05/33] moved docker file around --- .../AllPairs/Dockerfile (unfinished) | 30 ------------------- 1 file changed, 30 deletions(-) delete mode 100644 docker-wrappers/AllPairs/Dockerfile (unfinished) diff --git a/docker-wrappers/AllPairs/Dockerfile (unfinished) b/docker-wrappers/AllPairs/Dockerfile (unfinished) deleted file mode 100644 index e92b98ea..00000000 --- a/docker-wrappers/AllPairs/Dockerfile (unfinished) +++ /dev/null @@ -1,30 +0,0 @@ -# Omics Integrator 1 wrapper -# https://github.com/fraenkel-lab/OmicsIntegrator -# Activates the conda environment before running command inside container -# Uses the strategy from https://pythonspeed.com/articles/activate-conda-dockerfile/ -# by Itamar Turner-Trauring -FROM continuumio/miniconda3:4.9.2 - -# Need to install msgsteiner-1.3 and dependencies -RUN apt-get -qq update --allow-releaseinfo-change && \ - apt-get install -y build-essential libx11-dev libboost-dev libboost-program-options-dev - -RUN commit=0a57ede6beeef6e63b86d19898e560d62015e85d && \ - wget https://github.com/fraenkel-lab/OmicsIntegrator/tarball/$commit && \ - tar -zxvf $commit && \ - rm $commit && \ - mv fraenkel-lab-OmicsIntegrator-* OmicsIntegrator && \ - cd OmicsIntegrator/ && \ - wget http://staff.polito.it/alfredo.braunstein/code/msgsteiner-1.3.tgz && \ - tar -zxvf msgsteiner-1.3.tgz && \ - cd msgsteiner-1.3 && \ - patch Makefile ../patches/Makefile.linux.patch && \ - make - -ENV MSGSTEINER_PATH=/OmicsIntegrator/msgsteiner-1.3/msgsteiner -WORKDIR /OmicsIntegrator - -COPY environment.yml . -RUN conda env create -f environment.yml - -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "oi1"] From ed49f4ccb7cefaab626a7939a401c68284c671f4 Mon Sep 17 00:00:00 2001 From: Nina Young Date: Wed, 10 Aug 2022 16:23:45 -0700 Subject: [PATCH 06/33] Updated with networkx --- test/AllPairs/input/sample-in-net.txt | 21 +++++++++++++++++++++ test/AllPairs/input/sample-in-nodetypes.txt | 6 ++++++ 2 files changed, 27 insertions(+) create mode 100644 test/AllPairs/input/sample-in-net.txt create mode 100644 test/AllPairs/input/sample-in-nodetypes.txt diff --git a/test/AllPairs/input/sample-in-net.txt b/test/AllPairs/input/sample-in-net.txt new file mode 100644 index 00000000..0816fa48 --- /dev/null +++ b/test/AllPairs/input/sample-in-net.txt @@ -0,0 +1,21 @@ +#Node1 Node2 +S1 A 0.5 +A E 0.5 +E T1 0.5 +E F 0.5 +F E 0.5 +F A 0.5 +T1 F 0.5 +F T2 0.5 +B S1 0.5 +B F 0.5 +B C 0.5 +S2 B 0.5 +S2 C 0.5 +S2 T3 0.5 +C G 0.5 +G C 0.5 +C F 0.5 +G F 0.5 +G T2 0.5 +G T3 0.5 \ No newline at end of file diff --git a/test/AllPairs/input/sample-in-nodetypes.txt b/test/AllPairs/input/sample-in-nodetypes.txt new file mode 100644 index 00000000..3abac1ca --- /dev/null +++ b/test/AllPairs/input/sample-in-nodetypes.txt @@ -0,0 +1,6 @@ +#Node Node type +S1 source +S2 source +T1 target +T2 target +T3 target \ No newline at end of file From 3b023c8a3dd5e716a91f8fc11bf110e1d2dacf3c Mon Sep 17 00:00:00 2001 From: Nina Young Date: Thu, 11 Aug 2022 23:45:08 -0700 Subject: [PATCH 07/33] Implemented broken file writing --- .../Dockerfile (unfinished) | 30 ------------------- .../AllPairs/all-pairs-shortest-paths.py | 19 +++--------- 2 files changed, 4 insertions(+), 45 deletions(-) delete mode 100644 docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) diff --git a/docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) b/docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) deleted file mode 100644 index e92b98ea..00000000 --- a/docker-wrappers/All-pairs-shortest-paths/Dockerfile (unfinished) +++ /dev/null @@ -1,30 +0,0 @@ -# Omics Integrator 1 wrapper -# https://github.com/fraenkel-lab/OmicsIntegrator -# Activates the conda environment before running command inside container -# Uses the strategy from https://pythonspeed.com/articles/activate-conda-dockerfile/ -# by Itamar Turner-Trauring -FROM continuumio/miniconda3:4.9.2 - -# Need to install msgsteiner-1.3 and dependencies -RUN apt-get -qq update --allow-releaseinfo-change && \ - apt-get install -y build-essential libx11-dev libboost-dev libboost-program-options-dev - -RUN commit=0a57ede6beeef6e63b86d19898e560d62015e85d && \ - wget https://github.com/fraenkel-lab/OmicsIntegrator/tarball/$commit && \ - tar -zxvf $commit && \ - rm $commit && \ - mv fraenkel-lab-OmicsIntegrator-* OmicsIntegrator && \ - cd OmicsIntegrator/ && \ - wget http://staff.polito.it/alfredo.braunstein/code/msgsteiner-1.3.tgz && \ - tar -zxvf msgsteiner-1.3.tgz && \ - cd msgsteiner-1.3 && \ - patch Makefile ../patches/Makefile.linux.patch && \ - make - -ENV MSGSTEINER_PATH=/OmicsIntegrator/msgsteiner-1.3/msgsteiner -WORKDIR /OmicsIntegrator - -COPY environment.yml . -RUN conda env create -f environment.yml - -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "oi1"] diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 9d9b8380..0fb695a9 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -42,21 +42,10 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): nodes.add(line.strip()) print(f"Read {len(nodes)} unique nodes") - # Iterate through the network edges and write those that have an endpoint in the node set - in_edge_counter = 0 - out_edge_counter = 0 - with output_file.open('w') as output_f: - with network_file.open() as network_f: - for line in network_f: - line = line.strip() - in_edge_counter += 1 - endpoints = line.split("|") - if len(endpoints) != 2: - raise ValueError(f"Edge {line} does not contain 2 nodes separated by '|'") - if endpoints[0] in nodes or endpoints[1] in nodes: - out_edge_counter += 1 - output_f.write(f"{line}\n") - print(f"Kept {out_edge_counter} of {in_edge_counter} edges") + path = nx.all_pairs_shortest_path(network_file, cutoff=None) + print(path) + nx.write_edgelist(path, output_file) + print(f"Wrote output file to {str(output_file)}") def main(): From 1b138a3e3951905c694712e3bdf51cb53f513c80 Mon Sep 17 00:00:00 2001 From: Nina Young Date: Tue, 16 Aug 2022 15:26:47 -0700 Subject: [PATCH 08/33] Working unweighted --- .../AllPairs/all-pairs-shortest-paths.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 0fb695a9..4d672660 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -36,15 +36,38 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): output_file.parent.mkdir(parents=True, exist_ok=True) # Read the list of nodes - nodes = set() + graph = nx.Graph() + sources = set() + targets = set() with nodes_file.open() as nodes_f: for line in nodes_f: - nodes.add(line.strip()) - print(f"Read {len(nodes)} unique nodes") + row = line.strip().split() + if row[1] == 'source': + sources.add(row[0]) + elif row[1] == 'target': + targets.add(row[0]) - path = nx.all_pairs_shortest_path(network_file, cutoff=None) - print(path) - nx.write_edgelist(path, output_file) + + with network_file.open() as net_f: + for line in net_f: + if line[0] == '#': + continue + e = line.strip().split() + print(e) + graph.add_edge(e[0], e[1]) + + print(graph) + + output = nx.Graph() + for source in sources: + p = nx.single_source_shortest_path(graph, source, cutoff=None) + for target in targets: + print(source, target, p[target]) + nx.add_path(output, p[target]) + print(output) + + nx.write_edgelist(output, output_file, data=False) + print(output) print(f"Wrote output file to {str(output_file)}") From bfdad8aa8c2e07e561f1b86ebe92efebd6360290 Mon Sep 17 00:00:00 2001 From: Nina Young Date: Fri, 26 Aug 2022 12:48:42 -0700 Subject: [PATCH 09/33] Updated AllPairs to work with a more efficient method of generation --- docker-wrappers/AllPairs/all-pairs-shortest-paths.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 4d672660..09fa4034 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -54,17 +54,15 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): continue e = line.strip().split() print(e) - graph.add_edge(e[0], e[1]) + graph.add_edge(e[0], e[1], weight=float(e[2])) print(graph) output = nx.Graph() for source in sources: - p = nx.single_source_shortest_path(graph, source, cutoff=None) - for target in targets: - print(source, target, p[target]) - nx.add_path(output, p[target]) - print(output) + for target in targets: + p = nx.shortest_path(graph, source, target, weight=None) + nx.add_path(output, p) nx.write_edgelist(output, output_file, data=False) print(output) From aca7f33676a40a5f01c904ac02d4f12743545855 Mon Sep 17 00:00:00 2001 From: Nina Young Date: Thu, 8 Sep 2022 01:14:39 -0700 Subject: [PATCH 10/33] Dockerfile updated and sample inputs added --- docker-wrappers/AllPairs/Dockerfile | 4 ++++ docker-wrappers/AllPairs/sample-in-net.txt | 21 +++++++++++++++++++ .../AllPairs/sample-in-nodetypes.txt | 6 ++++++ 3 files changed, 31 insertions(+) create mode 100644 docker-wrappers/AllPairs/Dockerfile create mode 100644 docker-wrappers/AllPairs/sample-in-net.txt create mode 100644 docker-wrappers/AllPairs/sample-in-nodetypes.txt diff --git a/docker-wrappers/AllPairs/Dockerfile b/docker-wrappers/AllPairs/Dockerfile new file mode 100644 index 00000000..544cc9ea --- /dev/null +++ b/docker-wrappers/AllPairs/Dockerfile @@ -0,0 +1,4 @@ +# AllPairs wrapper +FROM alpine:3.16 + +COPY sample-in-net.txt sample-in-nodetypes.txt . diff --git a/docker-wrappers/AllPairs/sample-in-net.txt b/docker-wrappers/AllPairs/sample-in-net.txt new file mode 100644 index 00000000..0816fa48 --- /dev/null +++ b/docker-wrappers/AllPairs/sample-in-net.txt @@ -0,0 +1,21 @@ +#Node1 Node2 +S1 A 0.5 +A E 0.5 +E T1 0.5 +E F 0.5 +F E 0.5 +F A 0.5 +T1 F 0.5 +F T2 0.5 +B S1 0.5 +B F 0.5 +B C 0.5 +S2 B 0.5 +S2 C 0.5 +S2 T3 0.5 +C G 0.5 +G C 0.5 +C F 0.5 +G F 0.5 +G T2 0.5 +G T3 0.5 \ No newline at end of file diff --git a/docker-wrappers/AllPairs/sample-in-nodetypes.txt b/docker-wrappers/AllPairs/sample-in-nodetypes.txt new file mode 100644 index 00000000..3abac1ca --- /dev/null +++ b/docker-wrappers/AllPairs/sample-in-nodetypes.txt @@ -0,0 +1,6 @@ +#Node Node type +S1 source +S2 source +T1 target +T2 target +T3 target \ No newline at end of file From bf37d14094b777e2560ef544e2972067e693021b Mon Sep 17 00:00:00 2001 From: Anna Ritz Date: Sun, 27 Nov 2022 15:13:14 -0800 Subject: [PATCH 11/33] partway through modifying AllPairs after summer work --- src/all-pairs-shortest-paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/all-pairs-shortest-paths.py b/src/all-pairs-shortest-paths.py index f32415c1..badcef51 100644 --- a/src/all-pairs-shortest-paths.py +++ b/src/all-pairs-shortest-paths.py @@ -7,7 +7,7 @@ __all__ = ['AllPairs'] class AllPairs(PRM): - required_inputs = ['nodetypes', 'network'] + required_inputs = ['sources', 'tagets', 'edges'] @staticmethod def generate_inputs(data, filename_map): From aa859dffdcc372b8db6a4bb695292e9cc4bc9c13 Mon Sep 17 00:00:00 2001 From: Anna Ritz Date: Sun, 27 Nov 2022 17:08:18 -0800 Subject: [PATCH 12/33] committing allpairs information --- .github/workflows/test-spras.yml | 9 +++++ PRRunner.py | 2 +- config/config.yaml | 18 ++++++---- docker-wrappers/AllPairs/Dockerfile | 7 ++-- .../AllPairs/all-pairs-shortest-paths.py | 33 +++++++++---------- ...ll-pairs-shortest-paths.py => allpairs.py} | 33 +++++++++---------- 6 files changed, 56 insertions(+), 46 deletions(-) rename src/{all-pairs-shortest-paths.py => allpairs.py} (79%) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 8199f437..0f035aae 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -126,3 +126,12 @@ jobs: tags: latest cache_froms: reedcompbio/mincostflow:latest push: false + - name: Build AllPairs Docker image + uses: docker/build-push-action@v1 + with: + path: docker-wrappers/AllPairs/. + dockerfile: docker-wrappers/AllPairs/Dockerfile + repository: annaritz/allpairs + tags: latest + cache_froms: annaritz/allpairs:latest + push: false diff --git a/PRRunner.py b/PRRunner.py index d7dc8a63..4ac0fef5 100644 --- a/PRRunner.py +++ b/PRRunner.py @@ -6,7 +6,7 @@ from src.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from src.pathlinker import PathLinker as pathlinker from src.mincostflow import MinCostFlow as mincostflow -from src.all-pairs-shortest-paths import AllPairs as allpairs +from src.allpairs import AllPairs as allpairs def run(algorithm, params): """ diff --git a/config/config.yaml b/config/config.yaml index 29d87e34..28619f00 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,14 +28,14 @@ algorithms: - name: "pathlinker" params: - include: true + include: false directed: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: true + include: false directed: false run1: r: [5] @@ -46,7 +46,7 @@ - name: "omicsintegrator2" params: - include: true + include: false directed: false run1: b: [4] @@ -56,21 +56,25 @@ g: [3] - name: "meo" params: - include: true + include: false directed: true run1: max_path_length: [3] local_search: ["Yes"] rand_restarts: [10] - + - name: "mincostflow" params: - include: true + include: false directed: false run1: flow: [1] # The flow must be an int capacity: [1] - + + - name: "allpairs" + params: + include: true + directed: false # Here we specify which pathways to run and other file location information. # DataLoader.py can currently only load a single dataset diff --git a/docker-wrappers/AllPairs/Dockerfile b/docker-wrappers/AllPairs/Dockerfile index 544cc9ea..d75572e2 100644 --- a/docker-wrappers/AllPairs/Dockerfile +++ b/docker-wrappers/AllPairs/Dockerfile @@ -1,4 +1,7 @@ # AllPairs wrapper -FROM alpine:3.16 +FROM python:3.9-alpine3.16 -COPY sample-in-net.txt sample-in-nodetypes.txt . +WORKDIR /AllPairs + +RUN pip install networkx==2.6.3 +COPY all-pairs-shortest-paths.py /AllPairs/all-pairs-shortest-paths.py diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 09fa4034..71eed8b5 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -8,7 +8,6 @@ from pathlib import Path import networkx as nx - def parse_arguments(): """ Process command line arguments. @@ -17,9 +16,9 @@ def parse_arguments(): parser = argparse.ArgumentParser( description="AllPairs pathway reconstruction" ) - parser.add_argument("--network", type=Path, required=True, help="Path to the network file with '|' delimited node pairs") - parser.add_argument("--nodes", type=Path, required=True, help="Path to the nodes file") - parser.add_argument("--output", type=Path, required=True, help="Path to the output file that will be written") + parser.add_argument("--network", type=Path, required=True, help="Network file of the form ") + parser.add_argument("--nodes", type=Path, required=True, help="Nodes file of the form ") + parser.add_argument("--output", type=Path, required=True, help="Output file") return parser.parse_args() @@ -30,13 +29,12 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): if not nodes_file.exists(): raise OSError(f"Nodes file {str(nodes_file)} does not exist") if output_file.exists(): - print(f"Output files {str(output_file)} will be overwritten") + print(f"Output file {str(output_file)} will be overwritten") # Create the parent directories for the output file if needed output_file.parent.mkdir(parents=True, exist_ok=True) # Read the list of nodes - graph = nx.Graph() sources = set() targets = set() with nodes_file.open() as nodes_f: @@ -47,28 +45,27 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): elif row[1] == 'target': targets.add(row[0]) + # there should be at least one source and one target. + assert len(sources) > 0, 'There are no sources.' + assert len(targets) > 0, 'There are no targets.' + assert len(sources.intersection(targets)) == 0, 'There is at least one source that is also a target.' - with network_file.open() as net_f: - for line in net_f: - if line[0] == '#': - continue - e = line.strip().split() - print(e) - graph.add_edge(e[0], e[1], weight=float(e[2])) - - print(graph) + # Read graph & assert that sources/targets are in network + graph = nx.read_weighted_edgelist(network_file) + assert len(sources.intersection(graph.nodes())) == len(sources), 'At least one source is not in the interactome.' + assert len(targets.intersection(graph.nodes())) == len(targets), 'At least one target is not in the interactome.' + # Finally, compute all-pairs-shortest-paths and record the subgraph. output = nx.Graph() for source in sources: for target in targets: - p = nx.shortest_path(graph, source, target, weight=None) + p = nx.shortest_path(graph, source, target, weight='weight') nx.add_path(output, p) + # Write the subgraph as a list of edges. nx.write_edgelist(output, output_file, data=False) - print(output) print(f"Wrote output file to {str(output_file)}") - def main(): """ Parse arguments and run pathway reconstruction diff --git a/src/all-pairs-shortest-paths.py b/src/allpairs.py similarity index 79% rename from src/all-pairs-shortest-paths.py rename to src/allpairs.py index badcef51..17b86e98 100644 --- a/src/all-pairs-shortest-paths.py +++ b/src/allpairs.py @@ -7,7 +7,7 @@ __all__ = ['AllPairs'] class AllPairs(PRM): - required_inputs = ['sources', 'tagets', 'edges'] + required_inputs = ['nodetypes', 'network'] @staticmethod def generate_inputs(data, filename_map): @@ -22,6 +22,7 @@ def generate_inputs(data, filename_map): raise ValueError("{input_type} filename is missing") #Get sources and targets for node input file + #Borrowed code from pathlinker.py sources_targets = data.request_node_columns(["sources", "targets"]) if sources_targets is None: return False @@ -38,6 +39,7 @@ def generate_inputs(data, filename_map): input_df.to_csv(filename_map["nodetypes"],sep="\t",index=False,columns=["#Node","Node type"]) + #This is pretty memory intensive. We might want to keep the interactome centralized. data.get_interactome().to_csv(filename_map["network"],sep="\t",index=False,columns=["Interactor1","Interactor2","Weight"],header=["#Interactor1","Interactor2","Weight"]) @@ -53,7 +55,7 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): if not nodetypes or not network or not output_file: raise ValueError('Required AllPairs arguments are missing') - work_dir = '/spras' + work_dir = '/allpairs' # Each volume is a tuple (src, dest) volumes = list() @@ -65,39 +67,33 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): volumes.append(bind_path) # AllPairs does not provide an argument to set the output directory - # Use its --output argument to set the output file prefix to specify an absolute path and prefix + # Use its --output argument to set the output file to specify an absolute path and filename out_dir = Path(output_file).parent # AllPairs requires that the output directory exist out_dir.mkdir(parents=True, exist_ok=True) bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) volumes.append(bind_path) - mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container + mapped_out_file = mapped_out_dir + '/out.txt' # Use posix path inside the container command = ['python', - '/AllPairs/run.py', - network_file, - node_file, - '--output', mapped_out_prefix] - - # Add optional argument - if k is not None: - command.extend(['-k', str(k)]) + 'all-pairs-shortest-paths.py', + '--network', network_file, + '--nodes', node_file, + '--output', mapped_out_file] print('Running AllPairs with arguments: {}'.format(' '.join(command)), flush=True) # TODO consider making this a string in the config file instead of a Boolean container_framework = 'singularity' if singularity else 'docker' out = run_container(container_framework, - 'reedcompbio/AllPairs', + 'annaritz/allpairs', command, volumes, work_dir) print(out) # Rename the primary output file to match the desired output filename - # Currently AllPairs only writes one output file so we do not need to delete others - # We may not know the value of k that was used - output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + output_edges = Path(next(out_dir.glob('out.txt'))) output_edges.rename(output_file) @staticmethod @@ -107,5 +103,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param raw_pathway_file: pathway file produced by an algorithm's run function @param standardized_pathway_file: the same pathway written in the universal format """ - df = pd.read_csv(raw_pathway_file,sep='\t').take([0,1,2],axis=1) - df.to_csv(standardized_pathway_file, header=False,index=False,sep=' ') + df = pd.read_csv(raw_pathway_file,sep='\t',header=None) + df.insert(2,'Rank',1) # add a rank column of 1s since the edges are not ranked. + df.to_csv(standardized_pathway_file, header=False, index=False, sep=' ') From 10c00cb0d2ed887d5578a65f4355dbd619e1b735 Mon Sep 17 00:00:00 2001 From: Anna Ritz Date: Mon, 28 Nov 2022 15:59:51 -0800 Subject: [PATCH 13/33] whoops forgot to commit the test functions --- test/AllPairs/expected/out.txt | 10 ++++++++ test/AllPairs/test_ap.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 test/AllPairs/expected/out.txt create mode 100644 test/AllPairs/test_ap.py diff --git a/test/AllPairs/expected/out.txt b/test/AllPairs/expected/out.txt new file mode 100644 index 00000000..62a76492 --- /dev/null +++ b/test/AllPairs/expected/out.txt @@ -0,0 +1,10 @@ +S1 A +S1 B +A E +A F +E T1 +T1 F +F T2 +F B +B S2 +S2 T3 diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py new file mode 100644 index 00000000..8ecc4f32 --- /dev/null +++ b/test/AllPairs/test_ap.py @@ -0,0 +1,46 @@ +import pytest +import docker +from src.allpairs import AllPairs + +TEST_DIR = 'test/AllPairs/' +OUT_FILE = TEST_DIR+'out.txt' + +EXPECTED_FILE = TEST_DIR+'/expected/out.txt' ## TODO not currently checked. + +class TestAllPairs: + """ + Run all pairs shortest paths (AllPairs) tests in the Docker image + """ + def test_allpairs(self): + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + # Only include required arguments + AllPairs.run( + nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', + network=TEST_DIR+'input/sample-in-net.txt', + output_file=OUT_FILE + ) + assert out_path.exists() + + def test_pathlinker_missing(self): + # Test the expected error is raised when required arguments are missing + with pytest.raises(ValueError): + # No nodetypes + AllPairs.run( + network=TEST_DIR + 'input/sample-in-net.txt', + output_file=OUT_FILE) + + # Only run Singularity test if the binary is available on the system + # spython is only available on Unix, but do not explicitly skip non-Unix platforms + @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') + def test_allpairs_singularity(self): + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + # Only include required arguments and run with Singularity + AllPairs.run( + nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', + network=TEST_DIR+'input/sample-in-net.txt', + output_file=OUT_FILE, + singularity=True + ) + assert out_path.exists() From 8124a145ad133f33f63679410ad9fcad26a6821b Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Wed, 3 May 2023 21:33:14 -0500 Subject: [PATCH 14/33] Switch output to tab separated Match behavior added in #79 --- src/allpairs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/allpairs.py b/src/allpairs.py index 17b86e98..7aeae3a5 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -1,8 +1,7 @@ -from src.PRM import PRM +from src.prm import PRM from pathlib import Path from src.util import prepare_volume, run_container import pandas as pd -import networkx as nx __all__ = ['AllPairs'] @@ -105,4 +104,4 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ df = pd.read_csv(raw_pathway_file,sep='\t',header=None) df.insert(2,'Rank',1) # add a rank column of 1s since the edges are not ranked. - df.to_csv(standardized_pathway_file, header=False, index=False, sep=' ') + df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') From 5f5e94683cd536f3324e18492d06533258ad19ac Mon Sep 17 00:00:00 2001 From: ntalluri Date: Sun, 16 Jul 2023 17:12:03 -0700 Subject: [PATCH 15/33] made changes to make code function --- docker-wrappers/AllPairs/all-pairs-shortest-paths.py | 4 ++-- src/allpairs.py | 4 ++-- test/AllPairs/test_ap.py | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 71eed8b5..f5ee4487 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -45,12 +45,12 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): elif row[1] == 'target': targets.add(row[0]) - # there should be at least one source and one target. + # there should be at least one source and one target assert len(sources) > 0, 'There are no sources.' assert len(targets) > 0, 'There are no targets.' assert len(sources.intersection(targets)) == 0, 'There is at least one source that is also a target.' - # Read graph & assert that sources/targets are in network + # Read graph & assert all the sources/targets are in network graph = nx.read_weighted_edgelist(network_file) assert len(sources.intersection(graph.nodes())) == len(sources), 'At least one source is not in the interactome.' assert len(targets.intersection(graph.nodes())) == len(targets), 'At least one target is not in the interactome.' diff --git a/src/allpairs.py b/src/allpairs.py index 7aeae3a5..bf57a5e1 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -75,7 +75,7 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): mapped_out_file = mapped_out_dir + '/out.txt' # Use posix path inside the container command = ['python', - 'all-pairs-shortest-paths.py', + '/AllPairs/all-pairs-shortest-paths.py', '--network', network_file, '--nodes', node_file, '--output', mapped_out_file] @@ -103,5 +103,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param standardized_pathway_file: the same pathway written in the universal format """ df = pd.read_csv(raw_pathway_file,sep='\t',header=None) - df.insert(2,'Rank',1) # add a rank column of 1s since the edges are not ranked. + df.insert(1,'Rank',1) # add a rank column of 1s since the edges are not ranked. df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 8ecc4f32..11b46b76 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -1,9 +1,11 @@ -import pytest import docker +from pathlib import Path +import pytest +import shutil from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' -OUT_FILE = TEST_DIR+'out.txt' +OUT_FILE = TEST_DIR+'output/out.txt' EXPECTED_FILE = TEST_DIR+'/expected/out.txt' ## TODO not currently checked. From 9cf274aa38d72ead5963f5cd502dc3a6096f320c Mon Sep 17 00:00:00 2001 From: ntalluri Date: Sun, 16 Jul 2023 17:21:13 -0700 Subject: [PATCH 16/33] retrying test --- docker-wrappers/AllPairs/all-pairs-shortest-paths.py | 2 ++ src/allpairs.py | 11 +++++++---- src/local_neighborhood_practice.py | 6 ++++-- test/AllPairs/test_ap.py | 6 ++++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index f5ee4487..369596f3 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -6,8 +6,10 @@ import argparse from pathlib import Path + import networkx as nx + def parse_arguments(): """ Process command line arguments. diff --git a/src/allpairs.py b/src/allpairs.py index bf57a5e1..8c64f58b 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -1,8 +1,11 @@ -from src.prm import PRM +import warnings from pathlib import Path -from src.util import prepare_volume, run_container + import pandas as pd +from src.prm import PRM +from src.util import prepare_volume, run_container + __all__ = ['AllPairs'] class AllPairs(PRM): @@ -26,9 +29,9 @@ def generate_inputs(data, filename_map): if sources_targets is None: return False both_series = sources_targets.sources & sources_targets.targets - for index,row in sources_targets[both_series].iterrows(): + for _index,row in sources_targets[both_series].iterrows(): warn_msg = row.NODEID+" has been labeled as both a source and a target." - warnings.warn(warn_msg) + warnings.warn(warn_msg, stacklevel=2) #Create nodetype file input_df = sources_targets[["NODEID"]].copy() diff --git a/src/local_neighborhood_practice.py b/src/local_neighborhood_practice.py index 243816f2..07feaa6a 100644 --- a/src/local_neighborhood_practice.py +++ b/src/local_neighborhood_practice.py @@ -1,8 +1,10 @@ # Test wrapper function as a part of the contributor tutorial # -import pandas as pd import warnings -from src.PRM import PRM from pathlib import Path + +import pandas as pd + +from src.PRM import PRM from src.util import prepare_volume, run_container __all__ = ['LocalNeighborhood'] diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 11b46b76..3ec19545 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -1,7 +1,9 @@ -import docker +import shutil from pathlib import Path + +import docker import pytest -import shutil + from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' From 90a38b3c8983a2a08ab669cab98cc8b2bc61a28c Mon Sep 17 00:00:00 2001 From: ntalluri Date: Mon, 17 Jul 2023 20:08:16 -0700 Subject: [PATCH 17/33] added correctness test --- test/AllPairs/input/correctness-network.txt | 4 ++ test/AllPairs/input/correctness-nodetypes.txt | 5 +++ test/AllPairs/test_ap.py | 40 +++++++++++++++++-- 3 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 test/AllPairs/input/correctness-network.txt create mode 100644 test/AllPairs/input/correctness-nodetypes.txt diff --git a/test/AllPairs/input/correctness-network.txt b/test/AllPairs/input/correctness-network.txt new file mode 100644 index 00000000..cb602119 --- /dev/null +++ b/test/AllPairs/input/correctness-network.txt @@ -0,0 +1,4 @@ +#Node1 Node2 +A B 1 +B C 1 +C D 1 \ No newline at end of file diff --git a/test/AllPairs/input/correctness-nodetypes.txt b/test/AllPairs/input/correctness-nodetypes.txt new file mode 100644 index 00000000..5799a50d --- /dev/null +++ b/test/AllPairs/input/correctness-nodetypes.txt @@ -0,0 +1,5 @@ +#Node Node type +A source +B source +C target +D target \ No newline at end of file diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 3ec19545..decce7b1 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -1,9 +1,6 @@ import shutil from pathlib import Path - -import docker import pytest - from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' @@ -26,7 +23,7 @@ def test_allpairs(self): ) assert out_path.exists() - def test_pathlinker_missing(self): + def test_allpairs_missing(self): # Test the expected error is raised when required arguments are missing with pytest.raises(ValueError): # No nodetypes @@ -34,6 +31,7 @@ def test_pathlinker_missing(self): network=TEST_DIR + 'input/sample-in-net.txt', output_file=OUT_FILE) + # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') @@ -48,3 +46,37 @@ def test_allpairs_singularity(self): singularity=True ) assert out_path.exists() + + def test_correctness(self): + """ + Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run + """ + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + + AllPairs.run( + nodetypes=TEST_DIR+'input/correctness-nodetypes.txt', + network=TEST_DIR+'input/correctness-network.txt', + output_file=OUT_FILE + ) + + assert out_path.exists() + + with open(out_path, 'r') as f: + edge_pairs = f.readlines() + output_edges = [] + for edge in edge_pairs: + node1, node2 = sorted(edge.split()) + output_edges.append((node1, node2)) + output_edges.sort() + + expected_output = [ + ('A', 'B'), + ('B', 'C'), + ('C', 'D'), + ] + + assert output_edges == expected_output + + + From 47a54ec64f9fb356c4634f3ed5b73b44f6c36a4c Mon Sep 17 00:00:00 2001 From: ntalluri Date: Mon, 17 Jul 2023 20:11:14 -0700 Subject: [PATCH 18/33] precommit --- test/AllPairs/test_ap.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index decce7b1..24bfe40f 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -1,6 +1,8 @@ import shutil from pathlib import Path + import pytest + from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' @@ -53,7 +55,7 @@ def test_correctness(self): """ out_path = Path(OUT_FILE) out_path.unlink(missing_ok=True) - + AllPairs.run( nodetypes=TEST_DIR+'input/correctness-nodetypes.txt', network=TEST_DIR+'input/correctness-network.txt', From 6064b32af33f36a358f289c24f84006f9adb4dff Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 18 Jul 2023 13:38:28 -0700 Subject: [PATCH 19/33] made changes left in repo and added a README --- docker-wrappers/AllPairs/README.md | 28 +++++ docker-wrappers/AllPairs/sample-in-net.txt | 21 ---- .../AllPairs/sample-in-nodetypes.txt | 6 - src/allpairs.py | 4 +- src/local_neighborhood_practice.py | 116 ------------------ 5 files changed, 29 insertions(+), 146 deletions(-) create mode 100644 docker-wrappers/AllPairs/README.md delete mode 100644 docker-wrappers/AllPairs/sample-in-net.txt delete mode 100644 docker-wrappers/AllPairs/sample-in-nodetypes.txt delete mode 100644 src/local_neighborhood_practice.py diff --git a/docker-wrappers/AllPairs/README.md b/docker-wrappers/AllPairs/README.md new file mode 100644 index 00000000..25fcd17a --- /dev/null +++ b/docker-wrappers/AllPairs/README.md @@ -0,0 +1,28 @@ +# All Pairs Shortest Paths Docker image + +A Docker image for All Pairs Shortest Paths that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/allpairsshortestpath). + +To create the Docker image run: +``` +docker build -t reedcompbio/allpairsshortestpath -f Dockerfile . +``` +from this directory. + +To inspect the installed Python packages: +``` +docker run reedcompbio/allpairsshortestpath pip list +``` +Windows users may need to add the prefix `winpty` prefix before these commands. + + +## Testing +Test code is located in `test/AllPairs`. +The `input` subdirectory contains a sample network and source/target file, along with a network and source/target file to check for the correctness of All Pairs Shortest Path. +The expected output graphs for the sample network is in the `expected` subdirectory. + +The Docker wrapper can be tested with `pytest -k test_ap.py` from the root of the SPRAS repository. + + +## Notes +- The all-pairs-shortest-paths code is located locally in SPRAS (since the code is short). It is under docker-wrappers/Allpairs +- samples of an input network and source/target file are located under test/AllPairs/input diff --git a/docker-wrappers/AllPairs/sample-in-net.txt b/docker-wrappers/AllPairs/sample-in-net.txt deleted file mode 100644 index 0816fa48..00000000 --- a/docker-wrappers/AllPairs/sample-in-net.txt +++ /dev/null @@ -1,21 +0,0 @@ -#Node1 Node2 -S1 A 0.5 -A E 0.5 -E T1 0.5 -E F 0.5 -F E 0.5 -F A 0.5 -T1 F 0.5 -F T2 0.5 -B S1 0.5 -B F 0.5 -B C 0.5 -S2 B 0.5 -S2 C 0.5 -S2 T3 0.5 -C G 0.5 -G C 0.5 -C F 0.5 -G F 0.5 -G T2 0.5 -G T3 0.5 \ No newline at end of file diff --git a/docker-wrappers/AllPairs/sample-in-nodetypes.txt b/docker-wrappers/AllPairs/sample-in-nodetypes.txt deleted file mode 100644 index 3abac1ca..00000000 --- a/docker-wrappers/AllPairs/sample-in-nodetypes.txt +++ /dev/null @@ -1,6 +0,0 @@ -#Node Node type -S1 source -S2 source -T1 target -T2 target -T3 target \ No newline at end of file diff --git a/src/allpairs.py b/src/allpairs.py index 8c64f58b..997cc63c 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -17,7 +17,6 @@ def generate_inputs(data, filename_map): Access fields from the dataset and write the required input files @param data: dataset @param filename_map: a dict mapping file types in the required_inputs to the filename for that type - @return: """ for input_type in AllPairs.required_inputs: if input_type not in filename_map: @@ -52,7 +51,6 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): @param nodetypes: input node types with sources and targets (required) @param network: input network file (required) @param output_file: path to the output pathway file (required) - @param singularity: currently inactive, implement later? """ if not nodetypes or not network or not output_file: raise ValueError('Required AllPairs arguments are missing') @@ -85,7 +83,7 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): print('Running AllPairs with arguments: {}'.format(' '.join(command)), flush=True) - # TODO consider making this a string in the config file instead of a Boolean + #TODO: chang the docker image once pushed to readcompbio container_framework = 'singularity' if singularity else 'docker' out = run_container(container_framework, 'annaritz/allpairs', diff --git a/src/local_neighborhood_practice.py b/src/local_neighborhood_practice.py deleted file mode 100644 index 07feaa6a..00000000 --- a/src/local_neighborhood_practice.py +++ /dev/null @@ -1,116 +0,0 @@ -# Test wrapper function as a part of the contributor tutorial # -import warnings -from pathlib import Path - -import pandas as pd - -from src.PRM import PRM -from src.util import prepare_volume, run_container - -__all__ = ['LocalNeighborhood'] - -class LocalNeighborhood(PRM): - required_inputs = ['network', 'nodes'] - - @staticmethod - def generate_inputs(data, filename_map): - """ - Access fields from the dataset and write the required input files - @param data: dataset - @param filename_map: a dict mapping file types in the required_inputs to the filename for that type - @return: - """ - for input_type in LocalNeighborhood.required_inputs: - if input_type not in filename_map: - raise ValueError(f"{input_type} filename is missing") - - if data.contains_node_columns(['prize','sources','targets']): - node_df = data.request_node_columns(['prize','sources','targets']) - node_df.loc[node_df['sources']==True, 'prize'] = 1.0 - node_df.loc[node_df['targets']==True, 'prize'] = 1.0 - node_df.loc[node_df['prize']==True, 'prize'] = 1.0 - - else: - raise ValueError("Local Neighborhood requires node prizes or sources and targets") - - node_df.to_csv(filename_map['prizes'],index=False,columns=['NODEID'],header=False) - - #For now we assume all input networks are undirected until we expand how edge tables work - edges_df = data.get_interactome() - edges_df.to_csv(filename_map['edges'],sep='|',index=False,columns=['Interactor1','Interactor2'],header=False) - - @staticmethod - def run(nodetypes=None, network=None, output_file=None, k=None, singularity=False): - """ - Run LocalNeighborhood with Docker - @param nodetypes: input node types with sources and targets (required) - @param network: input network file (required) - @param output_file: path to the output pathway file (required) - @param k: path length (optional) - @param singularity: if True, run using the Singularity container instead of the Docker container - """ - # Add additional parameter validation - # Do not require k - # Use the LocalNeighborhood default - # Could consider setting the default here instead - if not nodetypes or not network or not output_file: - raise ValueError('Required LocalNeighborhood arguments are missing') - - work_dir = '/spras' - - # Each volume is a tuple (src, dest) - volumes = list() - - bind_path, node_file = prepare_volume(nodetypes, work_dir) - volumes.append(bind_path) - - bind_path, network_file = prepare_volume(network, work_dir) - volumes.append(bind_path) - - # LocalNeighborhood does not provide an argument to set the output directory - # Use its --output argument to set the output file prefix to specify an absolute path and prefix - out_dir = Path(output_file).parent - # LocalNeighborhood requires that the output directory exist - out_dir.mkdir(parents=True, exist_ok=True) - bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) - volumes.append(bind_path) - mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container - - command = ['python3', - '/LocalNeighborhood/run.py', - '--network', network_file, - '--nodes', node_file, - '--output', mapped_out_prefix] - - # Add optional argument - if k is not None: - command.extend(['-k', str(k)]) - - print('Running LocalNeighborhood with arguments: {}'.format(' '.join(command)), flush=True) - - # TODO consider making this a string in the config file instead of a Boolean - container_framework = 'singularity' if singularity else 'docker' - out = run_container(container_framework, - 'ninayoung/local-neighborhood', - command, - volumes, - work_dir) - print(out) - - # Rename the primary output file to match the desired output filename - # Currently LocalNeighborhood only writes one output file so we do not need to delete others - # We may not know the value of k that was used - output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) - output_edges.rename(output_file) - - @staticmethod - def parse_output(raw_pathway_file, standardized_pathway_file): - """ - Convert a predicted pathway into the universal format - @param raw_pathway_file: pathway file produced by an algorithm's run function - @param standardized_pathway_file: the same pathway written in the universal format - """ - df = pd.read_csv(raw_pathway_file,sep='|', axis=1) - df.insert(2, 'Rank', '1') - # node 1 | node 2 - add a 1 at the end of every line, read up on dataframes - df.to_csv(standardized_pathway_file, header=False,index=False,sep=' ') From a257f749f3e3f459fbd6de3bb0500db55478cc80 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Thu, 17 Aug 2023 13:18:43 -0500 Subject: [PATCH 20/33] added deterministic test and fixed naming --- .../expected/correctness-expected.txt | 3 ++ .../{out.txt => sample-out-expected.txt} | 0 test/AllPairs/input/correctness-network.txt | 4 ++- test/AllPairs/input/correctness-nodetypes.txt | 2 +- test/AllPairs/test_ap.py | 32 ++++++++++--------- 5 files changed, 24 insertions(+), 17 deletions(-) create mode 100644 test/AllPairs/expected/correctness-expected.txt rename test/AllPairs/expected/{out.txt => sample-out-expected.txt} (100%) diff --git a/test/AllPairs/expected/correctness-expected.txt b/test/AllPairs/expected/correctness-expected.txt new file mode 100644 index 00000000..ff26bb2f --- /dev/null +++ b/test/AllPairs/expected/correctness-expected.txt @@ -0,0 +1,3 @@ +A B +A E +B C \ No newline at end of file diff --git a/test/AllPairs/expected/out.txt b/test/AllPairs/expected/sample-out-expected.txt similarity index 100% rename from test/AllPairs/expected/out.txt rename to test/AllPairs/expected/sample-out-expected.txt diff --git a/test/AllPairs/input/correctness-network.txt b/test/AllPairs/input/correctness-network.txt index cb602119..2857ee44 100644 --- a/test/AllPairs/input/correctness-network.txt +++ b/test/AllPairs/input/correctness-network.txt @@ -1,4 +1,6 @@ #Node1 Node2 A B 1 B C 1 -C D 1 \ No newline at end of file +C D 1 +D E 1 +A E 1 \ No newline at end of file diff --git a/test/AllPairs/input/correctness-nodetypes.txt b/test/AllPairs/input/correctness-nodetypes.txt index 5799a50d..5a1e231d 100644 --- a/test/AllPairs/input/correctness-nodetypes.txt +++ b/test/AllPairs/input/correctness-nodetypes.txt @@ -2,4 +2,4 @@ A source B source C target -D target \ No newline at end of file +E target \ No newline at end of file diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 24bfe40f..97ba0179 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -6,22 +6,22 @@ from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' -OUT_FILE = TEST_DIR+'output/out.txt' +OUT_DIR = TEST_DIR+'output/' -EXPECTED_FILE = TEST_DIR+'/expected/out.txt' ## TODO not currently checked. +EXPECTED_DIR = TEST_DIR+'/expected/' class TestAllPairs: """ Run all pairs shortest paths (AllPairs) tests in the Docker image """ def test_allpairs(self): - out_path = Path(OUT_FILE) + out_path = Path(OUT_DIR+'sample-out.txt') out_path.unlink(missing_ok=True) # Only include required arguments AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_FILE + output_file=OUT_DIR+'sample-out.txt' ) assert out_path.exists() @@ -31,20 +31,20 @@ def test_allpairs_missing(self): # No nodetypes AllPairs.run( network=TEST_DIR + 'input/sample-in-net.txt', - output_file=OUT_FILE) + output_file=OUT_DIR+'sample-out.txt') # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') def test_allpairs_singularity(self): - out_path = Path(OUT_FILE) + out_path = Path(OUT_DIR+'sample-out.txt') out_path.unlink(missing_ok=True) # Only include required arguments and run with Singularity AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_FILE, + output_file=OUT_DIR+'sample-out.txt', singularity=True ) assert out_path.exists() @@ -53,13 +53,13 @@ def test_correctness(self): """ Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run """ - out_path = Path(OUT_FILE) + out_path = Path(OUT_DIR+'correctness.txt') out_path.unlink(missing_ok=True) AllPairs.run( nodetypes=TEST_DIR+'input/correctness-nodetypes.txt', network=TEST_DIR+'input/correctness-network.txt', - output_file=OUT_FILE + output_file=OUT_DIR+'correctness.txt' ) assert out_path.exists() @@ -72,13 +72,15 @@ def test_correctness(self): output_edges.append((node1, node2)) output_edges.sort() - expected_output = [ - ('A', 'B'), - ('B', 'C'), - ('C', 'D'), - ] + with open(EXPECTED_DIR+"correctness-expected.txt", 'r') as file: + correctness_edge_pairs = file.readlines() + correctness_edges = [] + for edge in correctness_edge_pairs: + node1, node2 = edge.split() + correctness_edges.append((node1, node2)) + + assert output_edges == correctness_edges - assert output_edges == expected_output From a705a919302acc793ee5379399b2aa6e50298c36 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Thu, 17 Aug 2023 13:22:42 -0500 Subject: [PATCH 21/33] Revert "added deterministic test and fixed naming" This reverts commit a257f749f3e3f459fbd6de3bb0500db55478cc80. --- .../expected/correctness-expected.txt | 3 -- .../{sample-out-expected.txt => out.txt} | 0 test/AllPairs/input/correctness-network.txt | 4 +-- test/AllPairs/input/correctness-nodetypes.txt | 2 +- test/AllPairs/test_ap.py | 32 +++++++++---------- 5 files changed, 17 insertions(+), 24 deletions(-) delete mode 100644 test/AllPairs/expected/correctness-expected.txt rename test/AllPairs/expected/{sample-out-expected.txt => out.txt} (100%) diff --git a/test/AllPairs/expected/correctness-expected.txt b/test/AllPairs/expected/correctness-expected.txt deleted file mode 100644 index ff26bb2f..00000000 --- a/test/AllPairs/expected/correctness-expected.txt +++ /dev/null @@ -1,3 +0,0 @@ -A B -A E -B C \ No newline at end of file diff --git a/test/AllPairs/expected/sample-out-expected.txt b/test/AllPairs/expected/out.txt similarity index 100% rename from test/AllPairs/expected/sample-out-expected.txt rename to test/AllPairs/expected/out.txt diff --git a/test/AllPairs/input/correctness-network.txt b/test/AllPairs/input/correctness-network.txt index 2857ee44..cb602119 100644 --- a/test/AllPairs/input/correctness-network.txt +++ b/test/AllPairs/input/correctness-network.txt @@ -1,6 +1,4 @@ #Node1 Node2 A B 1 B C 1 -C D 1 -D E 1 -A E 1 \ No newline at end of file +C D 1 \ No newline at end of file diff --git a/test/AllPairs/input/correctness-nodetypes.txt b/test/AllPairs/input/correctness-nodetypes.txt index 5a1e231d..5799a50d 100644 --- a/test/AllPairs/input/correctness-nodetypes.txt +++ b/test/AllPairs/input/correctness-nodetypes.txt @@ -2,4 +2,4 @@ A source B source C target -E target \ No newline at end of file +D target \ No newline at end of file diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 97ba0179..24bfe40f 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -6,22 +6,22 @@ from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' -OUT_DIR = TEST_DIR+'output/' +OUT_FILE = TEST_DIR+'output/out.txt' -EXPECTED_DIR = TEST_DIR+'/expected/' +EXPECTED_FILE = TEST_DIR+'/expected/out.txt' ## TODO not currently checked. class TestAllPairs: """ Run all pairs shortest paths (AllPairs) tests in the Docker image """ def test_allpairs(self): - out_path = Path(OUT_DIR+'sample-out.txt') + out_path = Path(OUT_FILE) out_path.unlink(missing_ok=True) # Only include required arguments AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_DIR+'sample-out.txt' + output_file=OUT_FILE ) assert out_path.exists() @@ -31,20 +31,20 @@ def test_allpairs_missing(self): # No nodetypes AllPairs.run( network=TEST_DIR + 'input/sample-in-net.txt', - output_file=OUT_DIR+'sample-out.txt') + output_file=OUT_FILE) # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') def test_allpairs_singularity(self): - out_path = Path(OUT_DIR+'sample-out.txt') + out_path = Path(OUT_FILE) out_path.unlink(missing_ok=True) # Only include required arguments and run with Singularity AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_DIR+'sample-out.txt', + output_file=OUT_FILE, singularity=True ) assert out_path.exists() @@ -53,13 +53,13 @@ def test_correctness(self): """ Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run """ - out_path = Path(OUT_DIR+'correctness.txt') + out_path = Path(OUT_FILE) out_path.unlink(missing_ok=True) AllPairs.run( nodetypes=TEST_DIR+'input/correctness-nodetypes.txt', network=TEST_DIR+'input/correctness-network.txt', - output_file=OUT_DIR+'correctness.txt' + output_file=OUT_FILE ) assert out_path.exists() @@ -72,15 +72,13 @@ def test_correctness(self): output_edges.append((node1, node2)) output_edges.sort() - with open(EXPECTED_DIR+"correctness-expected.txt", 'r') as file: - correctness_edge_pairs = file.readlines() - correctness_edges = [] - for edge in correctness_edge_pairs: - node1, node2 = edge.split() - correctness_edges.append((node1, node2)) - - assert output_edges == correctness_edges + expected_output = [ + ('A', 'B'), + ('B', 'C'), + ('C', 'D'), + ] + assert output_edges == expected_output From e232a075148712bd90c2fb35ce7239ae10f862d3 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Thu, 17 Aug 2023 13:33:55 -0500 Subject: [PATCH 22/33] added the deterministic test and fixed stuff based on comments --- .../expected/correctness-expected.txt | 3 ++ .../{out.txt => sample-out-expected.txt} | 0 test/AllPairs/input/correctness-network.txt | 4 ++- test/AllPairs/input/correctness-nodetypes.txt | 2 +- test/AllPairs/test_ap.py | 31 ++++++++++--------- 5 files changed, 23 insertions(+), 17 deletions(-) create mode 100644 test/AllPairs/expected/correctness-expected.txt rename test/AllPairs/expected/{out.txt => sample-out-expected.txt} (100%) diff --git a/test/AllPairs/expected/correctness-expected.txt b/test/AllPairs/expected/correctness-expected.txt new file mode 100644 index 00000000..960048bb --- /dev/null +++ b/test/AllPairs/expected/correctness-expected.txt @@ -0,0 +1,3 @@ +A B +A E +B C diff --git a/test/AllPairs/expected/out.txt b/test/AllPairs/expected/sample-out-expected.txt similarity index 100% rename from test/AllPairs/expected/out.txt rename to test/AllPairs/expected/sample-out-expected.txt diff --git a/test/AllPairs/input/correctness-network.txt b/test/AllPairs/input/correctness-network.txt index cb602119..2857ee44 100644 --- a/test/AllPairs/input/correctness-network.txt +++ b/test/AllPairs/input/correctness-network.txt @@ -1,4 +1,6 @@ #Node1 Node2 A B 1 B C 1 -C D 1 \ No newline at end of file +C D 1 +D E 1 +A E 1 \ No newline at end of file diff --git a/test/AllPairs/input/correctness-nodetypes.txt b/test/AllPairs/input/correctness-nodetypes.txt index 5799a50d..5a1e231d 100644 --- a/test/AllPairs/input/correctness-nodetypes.txt +++ b/test/AllPairs/input/correctness-nodetypes.txt @@ -2,4 +2,4 @@ A source B source C target -D target \ No newline at end of file +E target \ No newline at end of file diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 24bfe40f..43f6b245 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -6,22 +6,22 @@ from src.allpairs import AllPairs TEST_DIR = 'test/AllPairs/' -OUT_FILE = TEST_DIR+'output/out.txt' +OUT_DIR = TEST_DIR+'output/' -EXPECTED_FILE = TEST_DIR+'/expected/out.txt' ## TODO not currently checked. +EXPECTED_DIR = TEST_DIR+'/expected/' ## TODO not currently checked. class TestAllPairs: """ Run all pairs shortest paths (AllPairs) tests in the Docker image """ def test_allpairs(self): - out_path = Path(OUT_FILE) + out_path = Path(OUT_DIR+'sample-out.txt') out_path.unlink(missing_ok=True) # Only include required arguments AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_FILE + output_file=OUT_DIR+'sample-out.txt' ) assert out_path.exists() @@ -31,20 +31,20 @@ def test_allpairs_missing(self): # No nodetypes AllPairs.run( network=TEST_DIR + 'input/sample-in-net.txt', - output_file=OUT_FILE) + output_file=OUT_DIR+'sample-out.txt') # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') def test_allpairs_singularity(self): - out_path = Path(OUT_FILE) + out_path = Path(OUT_DIR+'sample-out.txt') out_path.unlink(missing_ok=True) # Only include required arguments and run with Singularity AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_FILE, + output_file=OUT_DIR+'sample-out.txt', singularity=True ) assert out_path.exists() @@ -53,13 +53,13 @@ def test_correctness(self): """ Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run """ - out_path = Path(OUT_FILE) + out_path = Path(OUT_DIR+'correctness-out.txt') out_path.unlink(missing_ok=True) AllPairs.run( nodetypes=TEST_DIR+'input/correctness-nodetypes.txt', network=TEST_DIR+'input/correctness-network.txt', - output_file=OUT_FILE + output_file=OUT_DIR+'correctness-out.txt' ) assert out_path.exists() @@ -72,13 +72,14 @@ def test_correctness(self): output_edges.append((node1, node2)) output_edges.sort() - expected_output = [ - ('A', 'B'), - ('B', 'C'), - ('C', 'D'), - ] + with open(EXPECTED_DIR+'correctness-expected.txt', 'r') as f: + c_edge_pairs = f.readlines() + correct_edges = [] + for edge in c_edge_pairs: + node1, node2 = edge.split() + correct_edges.append((node1, node2)) - assert output_edges == expected_output + assert output_edges == correct_edges From 1b5cf98169373fc9da7da45dbac481d3270127c9 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 17:08:04 -0500 Subject: [PATCH 23/33] Update readme, allow same sources and targets, switch to reedcompbio image --- .github/workflows/test-spras.yml | 5 +++-- docker-wrappers/AllPairs/README.md | 13 +++++++------ .../AllPairs/all-pairs-shortest-paths.py | 9 +++++---- .../LocalNeighborhood/local_neighborhood.py | 2 +- src/allpairs.py | 2 +- test/AllPairs/test_ap.py | 8 +++----- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 3a278177..7217d99e 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -81,6 +81,7 @@ jobs: docker pull reedcompbio/pathlinker:latest docker pull reedcompbio/meo:latest docker pull reedcompbio/mincostflow:latest + docker pull reedcompbio/allpairs:latest - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 with: @@ -131,9 +132,9 @@ jobs: with: path: docker-wrappers/AllPairs/. dockerfile: docker-wrappers/AllPairs/Dockerfile - repository: annaritz/allpairs + repository: reedcompbio/allpairs tags: latest - cache_froms: annaritz/allpairs:latest + cache_froms: reedcompbio/allpairs:latest push: false # Run pre-commit checks on source files diff --git a/docker-wrappers/AllPairs/README.md b/docker-wrappers/AllPairs/README.md index 25fcd17a..a0226ead 100644 --- a/docker-wrappers/AllPairs/README.md +++ b/docker-wrappers/AllPairs/README.md @@ -1,16 +1,17 @@ # All Pairs Shortest Paths Docker image -A Docker image for All Pairs Shortest Paths that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/allpairsshortestpath). +A Docker image for All Pairs Shortest Paths that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/allpairs). +This algorithm was implemented by the SPRAS team and relies on the NetworkX [`shortest_path`](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.shortest_path.html) function. To create the Docker image run: ``` -docker build -t reedcompbio/allpairsshortestpath -f Dockerfile . +docker build -t reedcompbio/allpairs -f Dockerfile . ``` from this directory. To inspect the installed Python packages: ``` -docker run reedcompbio/allpairsshortestpath pip list +docker run reedcompbio/allpairs pip list ``` Windows users may need to add the prefix `winpty` prefix before these commands. @@ -18,11 +19,11 @@ Windows users may need to add the prefix `winpty` prefix before these commands. ## Testing Test code is located in `test/AllPairs`. The `input` subdirectory contains a sample network and source/target file, along with a network and source/target file to check for the correctness of All Pairs Shortest Path. -The expected output graphs for the sample network is in the `expected` subdirectory. +The expected output graphs for the sample networks are in the `expected` subdirectory. The Docker wrapper can be tested with `pytest -k test_ap.py` from the root of the SPRAS repository. ## Notes -- The all-pairs-shortest-paths code is located locally in SPRAS (since the code is short). It is under docker-wrappers/Allpairs -- samples of an input network and source/target file are located under test/AllPairs/input +- The `all-pairs-shortest-paths.py` code is located locally in SPRAS (since the code is short). It is under `docker-wrappers/AllPairs`. +- Samples of an input network and source/target file are located under test/AllPairs/input. diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 369596f3..fc3c7c06 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -16,10 +16,10 @@ def parse_arguments(): @return arguments """ parser = argparse.ArgumentParser( - description="AllPairs pathway reconstruction" + description="All Pairs Shortest Paths pathway reconstruction" ) parser.add_argument("--network", type=Path, required=True, help="Network file of the form ") - parser.add_argument("--nodes", type=Path, required=True, help="Nodes file of the form ") + parser.add_argument("--nodes", type=Path, required=True, help="Nodes file of the form .") parser.add_argument("--output", type=Path, required=True, help="Output file") return parser.parse_args() @@ -42,15 +42,15 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): with nodes_file.open() as nodes_f: for line in nodes_f: row = line.strip().split() + # Assumes the file is formatted properly with two whitespace-delimited columns if row[1] == 'source': sources.add(row[0]) elif row[1] == 'target': targets.add(row[0]) - # there should be at least one source and one target + # There should be at least one source and one target assert len(sources) > 0, 'There are no sources.' assert len(targets) > 0, 'There are no targets.' - assert len(sources.intersection(targets)) == 0, 'There is at least one source that is also a target.' # Read graph & assert all the sources/targets are in network graph = nx.read_weighted_edgelist(network_file) @@ -68,6 +68,7 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): nx.write_edgelist(output, output_file, data=False) print(f"Wrote output file to {str(output_file)}") + def main(): """ Parse arguments and run pathway reconstruction diff --git a/docker-wrappers/LocalNeighborhood/local_neighborhood.py b/docker-wrappers/LocalNeighborhood/local_neighborhood.py index bf26f345..2a2b6096 100644 --- a/docker-wrappers/LocalNeighborhood/local_neighborhood.py +++ b/docker-wrappers/LocalNeighborhood/local_neighborhood.py @@ -29,7 +29,7 @@ def local_neighborhood(network_file: Path, nodes_file: Path, output_file: Path): if not nodes_file.exists(): raise OSError(f"Nodes file {str(nodes_file)} does not exist") if output_file.exists(): - print(f"Output files {str(output_file)} will be overwritten") + print(f"Output file {str(output_file)} will be overwritten") # Create the parent directories for the output file if needed output_file.parent.mkdir(parents=True, exist_ok=True) diff --git a/src/allpairs.py b/src/allpairs.py index 997cc63c..ccfaa1f4 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -86,7 +86,7 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): #TODO: chang the docker image once pushed to readcompbio container_framework = 'singularity' if singularity else 'docker' out = run_container(container_framework, - 'annaritz/allpairs', + 'reedcompbio/allpairs', command, volumes, work_dir) diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 43f6b245..b520f329 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -8,7 +8,8 @@ TEST_DIR = 'test/AllPairs/' OUT_DIR = TEST_DIR+'output/' -EXPECTED_DIR = TEST_DIR+'/expected/' ## TODO not currently checked. +EXPECTED_DIR = TEST_DIR+'/expected/' # TODO not currently checked. + class TestAllPairs: """ @@ -49,7 +50,7 @@ def test_allpairs_singularity(self): ) assert out_path.exists() - def test_correctness(self): + def test_allpairs_correctness(self): """ Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run """ @@ -80,6 +81,3 @@ def test_correctness(self): correct_edges.append((node1, node2)) assert output_edges == correct_edges - - - From 8c38e5220e090f829e7c6bfbf88f5fb322b8a206 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 17:10:25 -0500 Subject: [PATCH 24/33] Describe test case --- test/AllPairs/test_ap.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index b520f329..07cdc98c 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -53,6 +53,12 @@ def test_allpairs_singularity(self): def test_allpairs_correctness(self): """ Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run + The shortest paths are: + A-B-C + A-E + B-C + B-A-E + so the union of the unique edges in these paths will be returned as the pathway. """ out_path = Path(OUT_DIR+'correctness-out.txt') out_path.unlink(missing_ok=True) From c478a85f052411efcdf69d4f8e14496ef6872205 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 17:16:55 -0500 Subject: [PATCH 25/33] Simplify output file mapping --- src/allpairs.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/allpairs.py b/src/allpairs.py index ccfaa1f4..9e9147da 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -1,5 +1,4 @@ import warnings -from pathlib import Path import pandas as pd @@ -27,6 +26,7 @@ def generate_inputs(data, filename_map): sources_targets = data.request_node_columns(["sources", "targets"]) if sources_targets is None: return False + # TODO may allow this but needs testing both_series = sources_targets.sources & sources_targets.targets for _index,row in sources_targets[both_series].iterrows(): warn_msg = row.NODEID+" has been labeled as both a source and a target." @@ -47,15 +47,16 @@ def generate_inputs(data, filename_map): @staticmethod def run(nodetypes=None, network=None, output_file=None, singularity=False): """ - Run AllPairs with Docker + Run All Pairs Shortest Paths with Docker @param nodetypes: input node types with sources and targets (required) @param network: input network file (required) + @param singularity: if True, run using the Singularity container instead of the Docker container @param output_file: path to the output pathway file (required) """ if not nodetypes or not network or not output_file: - raise ValueError('Required AllPairs arguments are missing') + raise ValueError('Required All Pairs Shortest Paths arguments are missing') - work_dir = '/allpairs' + work_dir = '/apsp' # Each volume is a tuple (src, dest) volumes = list() @@ -66,14 +67,7 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): bind_path, network_file = prepare_volume(network, work_dir) volumes.append(bind_path) - # AllPairs does not provide an argument to set the output directory - # Use its --output argument to set the output file to specify an absolute path and filename - out_dir = Path(output_file).parent - # AllPairs requires that the output directory exist - out_dir.mkdir(parents=True, exist_ok=True) - bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) - volumes.append(bind_path) - mapped_out_file = mapped_out_dir + '/out.txt' # Use posix path inside the container + bind_path, mapped_out_file = prepare_volume(output_file, work_dir) command = ['python', '/AllPairs/all-pairs-shortest-paths.py', @@ -83,7 +77,6 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): print('Running AllPairs with arguments: {}'.format(' '.join(command)), flush=True) - #TODO: chang the docker image once pushed to readcompbio container_framework = 'singularity' if singularity else 'docker' out = run_container(container_framework, 'reedcompbio/allpairs', @@ -92,9 +85,6 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): work_dir) print(out) - # Rename the primary output file to match the desired output filename - output_edges = Path(next(out_dir.glob('out.txt'))) - output_edges.rename(output_file) @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): From 8a7f146d2c4d0826be9de4312ce1949be0cc1c91 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 17:39:51 -0500 Subject: [PATCH 26/33] Fix output file mapping --- src/allpairs.py | 4 ++++ test/AllPairs/test_ap.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allpairs.py b/src/allpairs.py index 9e9147da..cb305920 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -1,4 +1,5 @@ import warnings +from pathlib import Path import pandas as pd @@ -67,7 +68,10 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): bind_path, network_file = prepare_volume(network, work_dir) volumes.append(bind_path) + # Create the parent directories for the output file if needed + Path(output_file).parent.mkdir(parents=True, exist_ok=True) bind_path, mapped_out_file = prepare_volume(output_file, work_dir) + volumes.append(bind_path) command = ['python', '/AllPairs/all-pairs-shortest-paths.py', diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 07cdc98c..87797edc 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -8,7 +8,7 @@ TEST_DIR = 'test/AllPairs/' OUT_DIR = TEST_DIR+'output/' -EXPECTED_DIR = TEST_DIR+'/expected/' # TODO not currently checked. +EXPECTED_DIR = TEST_DIR+'expected/' # TODO not currently checked. class TestAllPairs: From 57b0c8ec664a6792f46f1a9019ceb1b62978898e Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 18:06:11 -0500 Subject: [PATCH 27/33] Add test case when source and target are the same --- .../expected/zero-length-expected.txt | 0 test/AllPairs/input/zero-length-network.txt | 6 ++++++ test/AllPairs/input/zero-length-nodetypes.txt | 3 +++ test/AllPairs/test_ap.py | 19 +++++++++++++++++++ 4 files changed, 28 insertions(+) create mode 100644 test/AllPairs/expected/zero-length-expected.txt create mode 100644 test/AllPairs/input/zero-length-network.txt create mode 100644 test/AllPairs/input/zero-length-nodetypes.txt diff --git a/test/AllPairs/expected/zero-length-expected.txt b/test/AllPairs/expected/zero-length-expected.txt new file mode 100644 index 00000000..e69de29b diff --git a/test/AllPairs/input/zero-length-network.txt b/test/AllPairs/input/zero-length-network.txt new file mode 100644 index 00000000..2857ee44 --- /dev/null +++ b/test/AllPairs/input/zero-length-network.txt @@ -0,0 +1,6 @@ +#Node1 Node2 +A B 1 +B C 1 +C D 1 +D E 1 +A E 1 \ No newline at end of file diff --git a/test/AllPairs/input/zero-length-nodetypes.txt b/test/AllPairs/input/zero-length-nodetypes.txt new file mode 100644 index 00000000..f34eb191 --- /dev/null +++ b/test/AllPairs/input/zero-length-nodetypes.txt @@ -0,0 +1,3 @@ +#Node Node type +A source +A target diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 87797edc..5e9e65e3 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -1,3 +1,4 @@ +import filecmp import shutil from pathlib import Path @@ -87,3 +88,21 @@ def test_allpairs_correctness(self): correct_edges.append((node1, node2)) assert output_edges == correct_edges + + def test_allpairs_zero_length(self): + """ + Tests algorithm correctness of all_pairs_shortest_path.py by using AllPairs.run + The test case has a single soucre and target that is the same node, so the only path has + zero length. + Therefore, the output pathway has no edges. + """ + out_path = Path(OUT_DIR+'zero-length-out.txt') + out_path.unlink(missing_ok=True) + + AllPairs.run( + nodetypes=TEST_DIR+'input/zero-length-nodetypes.txt', + network=TEST_DIR+'input/zero-length-network.txt', + output_file=OUT_DIR+'zero-length-out.txt' + ) + + assert filecmp.cmp(OUT_DIR+'zero-length-out.txt', EXPECTED_DIR+'zero-length-expected.txt', shallow=False) From 3a18733e4cb52783f7f1f007ddfad33e1f84df13 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 18:07:50 -0500 Subject: [PATCH 28/33] Apply code formatter --- src/allpairs.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/allpairs.py b/src/allpairs.py index cb305920..f655e040 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -8,6 +8,7 @@ __all__ = ['AllPairs'] + class AllPairs(PRM): required_inputs = ['nodetypes', 'network'] @@ -22,35 +23,36 @@ def generate_inputs(data, filename_map): if input_type not in filename_map: raise ValueError("{input_type} filename is missing") - #Get sources and targets for node input file - #Borrowed code from pathlinker.py + # Get sources and targets for node input file + # Borrowed code from pathlinker.py sources_targets = data.request_node_columns(["sources", "targets"]) if sources_targets is None: - return False - # TODO may allow this but needs testing + raise ValueError("All Pairs Shortest Paths requires sources and targets") + both_series = sources_targets.sources & sources_targets.targets - for _index,row in sources_targets[both_series].iterrows(): - warn_msg = row.NODEID+" has been labeled as both a source and a target." + for _index, row in sources_targets[both_series].iterrows(): + warn_msg = row.NODEID + " has been labeled as both a source and a target." warnings.warn(warn_msg, stacklevel=2) - #Create nodetype file + # Create nodetype file input_df = sources_targets[["NODEID"]].copy() input_df.columns = ["#Node"] - input_df.loc[sources_targets["sources"] == True,"Node type"]="source" - input_df.loc[sources_targets["targets"] == True,"Node type"]="target" + input_df.loc[sources_targets["sources"] == True, "Node type"] = "source" + input_df.loc[sources_targets["targets"] == True, "Node type"] = "target" - input_df.to_csv(filename_map["nodetypes"],sep="\t",index=False,columns=["#Node","Node type"]) - - #This is pretty memory intensive. We might want to keep the interactome centralized. - data.get_interactome().to_csv(filename_map["network"],sep="\t",index=False,columns=["Interactor1","Interactor2","Weight"],header=["#Interactor1","Interactor2","Weight"]) + input_df.to_csv(filename_map["nodetypes"], sep="\t", index=False, columns=["#Node", "Node type"]) + # This is pretty memory intensive. We might want to keep the interactome centralized. + data.get_interactome().to_csv(filename_map["network"], sep="\t", index=False, + columns=["Interactor1", "Interactor2", "Weight"], + header=["#Interactor1", "Interactor2", "Weight"]) @staticmethod def run(nodetypes=None, network=None, output_file=None, singularity=False): """ Run All Pairs Shortest Paths with Docker - @param nodetypes: input node types with sources and targets (required) - @param network: input network file (required) + @param nodetypes: input node types with sources and targets (required) + @param network: input network file (required) @param singularity: if True, run using the Singularity container instead of the Docker container @param output_file: path to the output pathway file (required) """ @@ -89,7 +91,6 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): work_dir) print(out) - @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): """ @@ -97,6 +98,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param raw_pathway_file: pathway file produced by an algorithm's run function @param standardized_pathway_file: the same pathway written in the universal format """ - df = pd.read_csv(raw_pathway_file,sep='\t',header=None) - df.insert(1,'Rank',1) # add a rank column of 1s since the edges are not ranked. + df = pd.read_csv(raw_pathway_file, sep='\t', header=None) + df.insert(1, 'Rank', 1) # add a rank column of 1s since the edges are not ranked. df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') From 6d65777a805c7cb1a333dec9d077e229f64fc654 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 18 Aug 2023 18:15:40 -0500 Subject: [PATCH 29/33] Small cleanup --- docker-wrappers/AllPairs/README.md | 1 - src/allpairs.py | 2 +- test/AllPairs/test_ap.py | 8 +++----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docker-wrappers/AllPairs/README.md b/docker-wrappers/AllPairs/README.md index a0226ead..03cdc681 100644 --- a/docker-wrappers/AllPairs/README.md +++ b/docker-wrappers/AllPairs/README.md @@ -13,7 +13,6 @@ To inspect the installed Python packages: ``` docker run reedcompbio/allpairs pip list ``` -Windows users may need to add the prefix `winpty` prefix before these commands. ## Testing diff --git a/src/allpairs.py b/src/allpairs.py index f655e040..eb8a2071 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -81,7 +81,7 @@ def run(nodetypes=None, network=None, output_file=None, singularity=False): '--nodes', node_file, '--output', mapped_out_file] - print('Running AllPairs with arguments: {}'.format(' '.join(command)), flush=True) + print('Running All Pairs Shortest Paths with arguments: {}'.format(' '.join(command)), flush=True) container_framework = 'singularity' if singularity else 'docker' out = run_container(container_framework, diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 5e9e65e3..06bc5809 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -8,8 +8,7 @@ TEST_DIR = 'test/AllPairs/' OUT_DIR = TEST_DIR+'output/' - -EXPECTED_DIR = TEST_DIR+'expected/' # TODO not currently checked. +EXPECTED_DIR = TEST_DIR+'expected/' class TestAllPairs: @@ -23,7 +22,7 @@ def test_allpairs(self): AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_DIR+'sample-out.txt' + output_file=str(out_path) ) assert out_path.exists() @@ -35,7 +34,6 @@ def test_allpairs_missing(self): network=TEST_DIR + 'input/sample-in-net.txt', output_file=OUT_DIR+'sample-out.txt') - # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') @@ -46,7 +44,7 @@ def test_allpairs_singularity(self): AllPairs.run( nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt', network=TEST_DIR+'input/sample-in-net.txt', - output_file=OUT_DIR+'sample-out.txt', + output_file=str(out_path), singularity=True ) assert out_path.exists() From b115b38498e564b605b9559640ff8ba47a6e76f8 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Sat, 19 Aug 2023 15:53:34 -0500 Subject: [PATCH 30/33] added tab delimiters into main code --- docker-wrappers/AllPairs/all-pairs-shortest-paths.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index fc3c7c06..290b3f1b 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -42,7 +42,6 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): with nodes_file.open() as nodes_f: for line in nodes_f: row = line.strip().split() - # Assumes the file is formatted properly with two whitespace-delimited columns if row[1] == 'source': sources.add(row[0]) elif row[1] == 'target': @@ -53,7 +52,7 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): assert len(targets) > 0, 'There are no targets.' # Read graph & assert all the sources/targets are in network - graph = nx.read_weighted_edgelist(network_file) + graph = nx.read_weighted_edgelist(network_file, delimiter='\t') assert len(sources.intersection(graph.nodes())) == len(sources), 'At least one source is not in the interactome.' assert len(targets.intersection(graph.nodes())) == len(targets), 'At least one target is not in the interactome.' @@ -65,7 +64,7 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): nx.add_path(output, p) # Write the subgraph as a list of edges. - nx.write_edgelist(output, output_file, data=False) + nx.write_edgelist(output, output_file, data=False, delimiter='\t') print(f"Wrote output file to {str(output_file)}") From 02db2d387b881f48d85ccbf976e10023d3869f1a Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Sun, 20 Aug 2023 07:58:02 -0500 Subject: [PATCH 31/33] Update All Pairs code parsing and test files to tab-separated --- .../AllPairs/all-pairs-shortest-paths.py | 8 +++++--- .../expected/correctness-expected.txt | 6 +++--- .../AllPairs/expected/sample-out-expected.txt | 20 +++++++++---------- test/AllPairs/input/correctness-network.txt | 10 +++++----- test/AllPairs/input/correctness-nodetypes.txt | 8 ++++---- test/AllPairs/input/sample-in-net.txt | 2 +- test/AllPairs/input/sample-in-nodetypes.txt | 2 +- test/AllPairs/input/zero-length-network.txt | 10 +++++----- test/AllPairs/input/zero-length-nodetypes.txt | 4 ++-- 9 files changed, 36 insertions(+), 34 deletions(-) diff --git a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py index 290b3f1b..dd6b835c 100644 --- a/docker-wrappers/AllPairs/all-pairs-shortest-paths.py +++ b/docker-wrappers/AllPairs/all-pairs-shortest-paths.py @@ -18,8 +18,10 @@ def parse_arguments(): parser = argparse.ArgumentParser( description="All Pairs Shortest Paths pathway reconstruction" ) - parser.add_argument("--network", type=Path, required=True, help="Network file of the form ") - parser.add_argument("--nodes", type=Path, required=True, help="Nodes file of the form .") + parser.add_argument("--network", type=Path, required=True, help="Network file of the form ." + " Tab-delimited.") + parser.add_argument("--nodes", type=Path, required=True, help="Nodes file of the form . " + "Tab-delimited.") parser.add_argument("--output", type=Path, required=True, help="Output file") return parser.parse_args() @@ -41,7 +43,7 @@ def allpairs(network_file: Path, nodes_file: Path, output_file: Path): targets = set() with nodes_file.open() as nodes_f: for line in nodes_f: - row = line.strip().split() + row = line.strip().split(sep='\t') if row[1] == 'source': sources.add(row[0]) elif row[1] == 'target': diff --git a/test/AllPairs/expected/correctness-expected.txt b/test/AllPairs/expected/correctness-expected.txt index 960048bb..d2d88c3e 100644 --- a/test/AllPairs/expected/correctness-expected.txt +++ b/test/AllPairs/expected/correctness-expected.txt @@ -1,3 +1,3 @@ -A B -A E -B C +A B +A E +B C diff --git a/test/AllPairs/expected/sample-out-expected.txt b/test/AllPairs/expected/sample-out-expected.txt index 62a76492..8985411e 100644 --- a/test/AllPairs/expected/sample-out-expected.txt +++ b/test/AllPairs/expected/sample-out-expected.txt @@ -1,10 +1,10 @@ -S1 A -S1 B -A E -A F -E T1 -T1 F -F T2 -F B -B S2 -S2 T3 +S1 A +S1 B +A E +A F +E T1 +T1 F +F T2 +F B +B S2 +S2 T3 diff --git a/test/AllPairs/input/correctness-network.txt b/test/AllPairs/input/correctness-network.txt index 2857ee44..661980a8 100644 --- a/test/AllPairs/input/correctness-network.txt +++ b/test/AllPairs/input/correctness-network.txt @@ -1,6 +1,6 @@ #Node1 Node2 -A B 1 -B C 1 -C D 1 -D E 1 -A E 1 \ No newline at end of file +A B 1 +B C 1 +C D 1 +D E 1 +A E 1 diff --git a/test/AllPairs/input/correctness-nodetypes.txt b/test/AllPairs/input/correctness-nodetypes.txt index 5a1e231d..f804a182 100644 --- a/test/AllPairs/input/correctness-nodetypes.txt +++ b/test/AllPairs/input/correctness-nodetypes.txt @@ -1,5 +1,5 @@ #Node Node type -A source -B source -C target -E target \ No newline at end of file +A source +B source +C target +E target diff --git a/test/AllPairs/input/sample-in-net.txt b/test/AllPairs/input/sample-in-net.txt index 0816fa48..6286f346 100644 --- a/test/AllPairs/input/sample-in-net.txt +++ b/test/AllPairs/input/sample-in-net.txt @@ -18,4 +18,4 @@ G C 0.5 C F 0.5 G F 0.5 G T2 0.5 -G T3 0.5 \ No newline at end of file +G T3 0.5 diff --git a/test/AllPairs/input/sample-in-nodetypes.txt b/test/AllPairs/input/sample-in-nodetypes.txt index 3abac1ca..05f54a1c 100644 --- a/test/AllPairs/input/sample-in-nodetypes.txt +++ b/test/AllPairs/input/sample-in-nodetypes.txt @@ -3,4 +3,4 @@ S1 source S2 source T1 target T2 target -T3 target \ No newline at end of file +T3 target diff --git a/test/AllPairs/input/zero-length-network.txt b/test/AllPairs/input/zero-length-network.txt index 2857ee44..661980a8 100644 --- a/test/AllPairs/input/zero-length-network.txt +++ b/test/AllPairs/input/zero-length-network.txt @@ -1,6 +1,6 @@ #Node1 Node2 -A B 1 -B C 1 -C D 1 -D E 1 -A E 1 \ No newline at end of file +A B 1 +B C 1 +C D 1 +D E 1 +A E 1 diff --git a/test/AllPairs/input/zero-length-nodetypes.txt b/test/AllPairs/input/zero-length-nodetypes.txt index f34eb191..0e36b81d 100644 --- a/test/AllPairs/input/zero-length-nodetypes.txt +++ b/test/AllPairs/input/zero-length-nodetypes.txt @@ -1,3 +1,3 @@ #Node Node type -A source -A target +A source +A target From 354782c8594bfbb98cc2d44c9554ae9ebca44fbf Mon Sep 17 00:00:00 2001 From: ntalluri Date: Mon, 21 Aug 2023 14:04:50 -0500 Subject: [PATCH 32/33] check the tests on github, passes on my end --- test/AllPairs/test_ap.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 06bc5809..f1c82f85 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -67,7 +67,6 @@ def test_allpairs_correctness(self): network=TEST_DIR+'input/correctness-network.txt', output_file=OUT_DIR+'correctness-out.txt' ) - assert out_path.exists() with open(out_path, 'r') as f: From 91b23469fe7b3f1bb587440731ef0223ba442cf2 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Mon, 21 Aug 2023 14:55:53 -0500 Subject: [PATCH 33/33] Put the rank in the correct column --- src/allpairs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allpairs.py b/src/allpairs.py index eb8a2071..10808415 100644 --- a/src/allpairs.py +++ b/src/allpairs.py @@ -99,5 +99,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param standardized_pathway_file: the same pathway written in the universal format """ df = pd.read_csv(raw_pathway_file, sep='\t', header=None) - df.insert(1, 'Rank', 1) # add a rank column of 1s since the edges are not ranked. + df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked. df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')