From dbe57fc3b21faec332479c2d29e081962b1f0b29 Mon Sep 17 00:00:00 2001 From: Livvy Johnson <4711.liv@gmail.com> Date: Tue, 20 Jun 2023 11:05:24 -0500 Subject: [PATCH 01/44] Copied localneighborhood files, ran local_neighborhood.py --- docker-wrappers/LocalNeighborhood/ln-bad-network.txt | 5 +++++ docker-wrappers/LocalNeighborhood/ln-network.txt | 5 +++++ docker-wrappers/LocalNeighborhood/ln-nodes.txt | 2 ++ docker-wrappers/LocalNeighborhood/ln-output.txt | 3 +++ 4 files changed, 15 insertions(+) create mode 100644 docker-wrappers/LocalNeighborhood/ln-bad-network.txt create mode 100644 docker-wrappers/LocalNeighborhood/ln-network.txt create mode 100644 docker-wrappers/LocalNeighborhood/ln-nodes.txt create mode 100644 docker-wrappers/LocalNeighborhood/ln-output.txt diff --git a/docker-wrappers/LocalNeighborhood/ln-bad-network.txt b/docker-wrappers/LocalNeighborhood/ln-bad-network.txt new file mode 100644 index 00000000..970b0e11 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/ln-bad-network.txt @@ -0,0 +1,5 @@ +A|B|E +C|B +C|D +D|E +A|E diff --git a/docker-wrappers/LocalNeighborhood/ln-network.txt b/docker-wrappers/LocalNeighborhood/ln-network.txt new file mode 100644 index 00000000..5a9b0451 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/ln-network.txt @@ -0,0 +1,5 @@ +A|B +C|B +C|D +D|E +A|E diff --git a/docker-wrappers/LocalNeighborhood/ln-nodes.txt b/docker-wrappers/LocalNeighborhood/ln-nodes.txt new file mode 100644 index 00000000..35d242ba --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/ln-nodes.txt @@ -0,0 +1,2 @@ +A +B diff --git a/docker-wrappers/LocalNeighborhood/ln-output.txt b/docker-wrappers/LocalNeighborhood/ln-output.txt new file mode 100644 index 00000000..58dc92d9 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/ln-output.txt @@ -0,0 +1,3 @@ +A|B +C|B +A|E From 9f177d324c5815a2b499c65d35e79f7689e4e4a7 Mon Sep 17 00:00:00 2001 From: Livvy Johnson <4711.liv@gmail.com> Date: Tue, 20 Jun 2023 11:37:59 -0500 Subject: [PATCH 02/44] reset files --- docker-wrappers/LocalNeighborhood/ln-bad-network.txt | 5 ----- docker-wrappers/LocalNeighborhood/ln-network.txt | 5 ----- docker-wrappers/LocalNeighborhood/ln-nodes.txt | 2 -- docker-wrappers/LocalNeighborhood/ln-output.txt | 3 --- 4 files changed, 15 deletions(-) delete mode 100644 docker-wrappers/LocalNeighborhood/ln-bad-network.txt delete mode 100644 docker-wrappers/LocalNeighborhood/ln-network.txt delete mode 100644 docker-wrappers/LocalNeighborhood/ln-nodes.txt delete mode 100644 docker-wrappers/LocalNeighborhood/ln-output.txt diff --git a/docker-wrappers/LocalNeighborhood/ln-bad-network.txt b/docker-wrappers/LocalNeighborhood/ln-bad-network.txt deleted file mode 100644 index 970b0e11..00000000 --- a/docker-wrappers/LocalNeighborhood/ln-bad-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B|E -C|B -C|D -D|E -A|E diff --git a/docker-wrappers/LocalNeighborhood/ln-network.txt b/docker-wrappers/LocalNeighborhood/ln-network.txt deleted file mode 100644 index 5a9b0451..00000000 --- a/docker-wrappers/LocalNeighborhood/ln-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B -C|B -C|D -D|E -A|E diff --git a/docker-wrappers/LocalNeighborhood/ln-nodes.txt b/docker-wrappers/LocalNeighborhood/ln-nodes.txt deleted file mode 100644 index 35d242ba..00000000 --- a/docker-wrappers/LocalNeighborhood/ln-nodes.txt +++ /dev/null @@ -1,2 +0,0 @@ -A -B diff --git a/docker-wrappers/LocalNeighborhood/ln-output.txt b/docker-wrappers/LocalNeighborhood/ln-output.txt deleted file mode 100644 index 58dc92d9..00000000 --- a/docker-wrappers/LocalNeighborhood/ln-output.txt +++ /dev/null @@ -1,3 +0,0 @@ -A|B -C|B -A|E From a716d70444ff093262b5c6a7dd67d0a4ed61997b Mon Sep 17 00:00:00 2001 From: Livvy Johnson <4711.liv@gmail.com> Date: Wed, 12 Jul 2023 11:10:14 -0500 Subject: [PATCH 03/44] Create Dockerfile and README.md --- docker-wrappers/DOMINO/Dockerfile | 6 ++++++ docker-wrappers/DOMINO/README.md | 3 +++ 2 files changed, 9 insertions(+) create mode 100644 docker-wrappers/DOMINO/Dockerfile create mode 100644 docker-wrappers/DOMINO/README.md diff --git a/docker-wrappers/DOMINO/Dockerfile b/docker-wrappers/DOMINO/Dockerfile new file mode 100644 index 00000000..2e61c580 --- /dev/null +++ b/docker-wrappers/DOMINO/Dockerfile @@ -0,0 +1,6 @@ +# DOMINO wrapper +# https://github.com/Shamir-Lab/DOMINO +FROM python:3.7-alpine + +WORKDIR /DOMINO +RUN pip install domino-python diff --git a/docker-wrappers/DOMINO/README.md b/docker-wrappers/DOMINO/README.md new file mode 100644 index 00000000..bc0633e4 --- /dev/null +++ b/docker-wrappers/DOMINO/README.md @@ -0,0 +1,3 @@ +# DOMINO Docker image + +A Docker image for [DOMINO](https://github.com/Shamir-Lab/DOMINO). From d9b90f45f0d4362240f848b739ef99886c692c8c Mon Sep 17 00:00:00 2001 From: Livvy Johnson <4711.liv@gmail.com> Date: Thu, 13 Jul 2023 10:06:39 -0500 Subject: [PATCH 04/44] change python:3.7-alpine to python:3.7 --- docker-wrappers/DOMINO/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-wrappers/DOMINO/Dockerfile b/docker-wrappers/DOMINO/Dockerfile index 2e61c580..a04ae32b 100644 --- a/docker-wrappers/DOMINO/Dockerfile +++ b/docker-wrappers/DOMINO/Dockerfile @@ -1,6 +1,6 @@ # DOMINO wrapper # https://github.com/Shamir-Lab/DOMINO -FROM python:3.7-alpine +FROM python:3.7 WORKDIR /DOMINO RUN pip install domino-python From bbb7f657304f76b44991c303a27a764c02ad9278 Mon Sep 17 00:00:00 2001 From: Johnson Date: Tue, 18 Jul 2023 11:30:39 -0500 Subject: [PATCH 05/44] Copy visualization files into Dockerfile --- docker-wrappers/DOMINO/Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docker-wrappers/DOMINO/Dockerfile b/docker-wrappers/DOMINO/Dockerfile index a04ae32b..cb9e277e 100644 --- a/docker-wrappers/DOMINO/Dockerfile +++ b/docker-wrappers/DOMINO/Dockerfile @@ -4,3 +4,10 @@ FROM python:3.7 WORKDIR /DOMINO RUN pip install domino-python + +RUN mkdir -p /DOMINO/src/data +WORKDIR /DOMINO/src/data + +RUN wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensg2gene_symbol.txt && \ + wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensmusg2gene_symbol.txt && \ + wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/graph.html.format From 50e31f9951b6f343519be057fe6a4ac3c21bb15a Mon Sep 17 00:00:00 2001 From: Johnson Date: Tue, 18 Jul 2023 12:02:39 -0500 Subject: [PATCH 06/44] Correct path for files --- docker-wrappers/DOMINO/Dockerfile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docker-wrappers/DOMINO/Dockerfile b/docker-wrappers/DOMINO/Dockerfile index cb9e277e..baa26f3a 100644 --- a/docker-wrappers/DOMINO/Dockerfile +++ b/docker-wrappers/DOMINO/Dockerfile @@ -2,12 +2,9 @@ # https://github.com/Shamir-Lab/DOMINO FROM python:3.7 -WORKDIR /DOMINO RUN pip install domino-python -RUN mkdir -p /DOMINO/src/data -WORKDIR /DOMINO/src/data - -RUN wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensg2gene_symbol.txt && \ +RUN cd /usr/local/lib/python3.7/site-packages/src/data && \ + wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensg2gene_symbol.txt && \ wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensmusg2gene_symbol.txt && \ wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/graph.html.format From d05d100266aa3c646c8ad5e8db9528a1afaea01b Mon Sep 17 00:00:00 2001 From: Livvy Johnson <58735771+livj4711@users.noreply.github.com> Date: Wed, 19 Jul 2023 13:18:55 -0500 Subject: [PATCH 07/44] Update docker-wrappers/DOMINO/Dockerfile Co-authored-by: Anthony Gitter --- docker-wrappers/DOMINO/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-wrappers/DOMINO/Dockerfile b/docker-wrappers/DOMINO/Dockerfile index baa26f3a..de6da0eb 100644 --- a/docker-wrappers/DOMINO/Dockerfile +++ b/docker-wrappers/DOMINO/Dockerfile @@ -2,7 +2,7 @@ # https://github.com/Shamir-Lab/DOMINO FROM python:3.7 -RUN pip install domino-python +RUN pip install domino-python==0.1.1 RUN cd /usr/local/lib/python3.7/site-packages/src/data && \ wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensg2gene_symbol.txt && \ From ffe81e95b27037bd6c3e319ba0338627d6abc18d Mon Sep 17 00:00:00 2001 From: Livvy Johnson <58735771+livj4711@users.noreply.github.com> Date: Wed, 19 Jul 2023 14:44:37 -0500 Subject: [PATCH 08/44] Update README.md --- docker-wrappers/DOMINO/README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docker-wrappers/DOMINO/README.md b/docker-wrappers/DOMINO/README.md index bc0633e4..a35e803c 100644 --- a/docker-wrappers/DOMINO/README.md +++ b/docker-wrappers/DOMINO/README.md @@ -1,3 +1,16 @@ # DOMINO Docker image -A Docker image for [DOMINO](https://github.com/Shamir-Lab/DOMINO). +A Docker image for [DOMINO](https://github.com/Shamir-Lab/DOMINO) that is available on [DockerHub](https://hub.docker.com/repository/docker/otjohnson/domino). + +To create the Docker image run: +``` +docker build -t otjohnson/domino -f Dockerfile . +``` +from this directory. + +To inspect the installed Python packages: +``` +winpty docker run otjohnson/domino pip list +``` +The `winpty` prefix is only needed on Windows. + From 982f277e49fc747a6895b57955572d467918d9f0 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Wed, 19 Jul 2023 16:50:41 -0500 Subject: [PATCH 09/44] force visualizations as true and parallelization as 1 thread --- src/domino.py | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 src/domino.py diff --git a/src/domino.py b/src/domino.py new file mode 100644 index 00000000..4f361250 --- /dev/null +++ b/src/domino.py @@ -0,0 +1,168 @@ +from src.prm import PRM +from pathlib import Path +from src.util import prepare_volume, run_container + +import subprocess +import json +import pandas as pd + +__all__ = ['DOMINO'] + +class DOMINO(PRM): + required_inputs = ['network', 'active_genes'] + + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in DOMINO.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + #Get active genes for node input file + if data.contains_node_columns('active'): + #NODEID is always included in the node table + node_df = data.request_node_columns(['active']) + else: + raise ValueError("DOMINO requires active genes") + + #Create active_genes file + node_df.to_csv(filename_map['active_genes'],sep="\t",index=False,columns=['NODEID'], header=False) + + #Create network file + edges_df = data.get_interactome() + edges_df['ppi'] = 'ppi' + edges_df.to_csv(filename_map['network'],sep='\t',index=False,columns=['Interactor1','ppi','Interactor2'],header=['ID_interactor_A','ppi','ID_interactor_B']) + + + @staticmethod + def run(network=None, active_genes=None, output_folder=None, use_cache=true, slices_threshold=None, module_threshold=None, singularity=false): + """ + Run DOMINO with Docker + Let visualization always true, parallelization always 1 + @param network: input network file (required) + @param active_genes: input active genes (required) + @param output_folder: path to the output pathway file (required) + @param use_cache: if True, use auto-generated cache network files (*.pkl) from previous executions with the same network (optional) + @param slices_threshold: the threshold for considering a slice as relevant (optional) + @param module_threshold: the threshold for considering a putative module as final module (optional) + @param singularity: if True, run using the Singularity container instead of the Docker container (optional) + """ + # Assuming defaults are: use_cache=true + + if not network or not active_genes or not output_folder: + raise ValueError('Required DOMINO arguments are missing') + + work_dir = '/spras' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, network_file = prepare_volume(network, work_dir) + volumes.append(bind_path) + + bind_path, node_file = prepare_volume(active_genes, work_dir) + volumes.append(bind_path) + + bind_path, mapped_output_folder = prepare_volume(str(output_folder), work_dir) + volumes.append(bind_path) + + ######## + + bind_path, mapped_slices_file = prepare_volume(str(output_folder), work_dir) + volumes.append(bind_path) + + # Make the slicer command to run within in the container + slicer_command = ['slicer', + '--network_file', network_file, + '--output_file', mapped_slices_file] + + container_framework = 'singularity' if singularity else 'docker' + slicer_out = run_container(container_framework, + 'otjohnson/domino', + slicer_command, + volumes, + work_dir) + print(slicer_out) + + ######## + + + # Makes the Python command to run within in the container + command = ['domino', + '--active_genes_files', node_file, + '--network_file', network_file, + '--slices_file', slices_file, + '--output_folder', mapped_output_folder + '--parallelization, '1' + '--visualization', 'true'] + + # Add optional arguments + if use_cache is not true: + command.extend(['-c', false]) + if slices_threshold is not None: + command.extend(['-sth', str(slices_threshold)]) + if module_threshold is not None: + command.extend(['-mth', str(slices_threshold)]) + + print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) + + # container_framework = 'singularity' if singularity else 'docker' + out = run_container(container_framework, + 'otjohnson/domino', + command, + volumes, + work_dir) + print(out) + + # delete modules.out + # shutil library + # append one html to end of another (argu) + + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + # edges dataframe + # read html file + with open(raw_pathway_file, 'r') as file: + html = file.read() + # for loop over lines of the file + # if line starts with ' let data' + + + # Find the starting index of the line + start_index = html.find('let data = [') + # Find the ending index of the line + end_index = html.find('];', start_index) + 1 # '+ 1' omits the semicolon + + # Extract the line as a string + line = html[start_index:end_index] + # remove beginning of the line to leave the json formatted string + line2 = line.replace('let data = ', '') + + data = json.loads(line2) + + entries = [] + for entry in data: + tmp = entry['data'] + entries.append(tmp) + + df = pd.DataFrame(entries) + newDf = df.loc[:,['source', 'target']].dropna() + + # concatenate modules dataframe to bottom of outerloop data frame + + + newDf['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked + + newDf.to_csv(standardized_pathway_file, header=False, index=False) + From 13e061bab5e99dbb38f62be25c174494e86aff72 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 21 Jul 2023 11:50:47 -0500 Subject: [PATCH 10/44] Update domino.py --- src/domino.py | 83 ++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/src/domino.py b/src/domino.py index 4f361250..951ead94 100644 --- a/src/domino.py +++ b/src/domino.py @@ -2,7 +2,6 @@ from pathlib import Path from src.util import prepare_volume, run_container -import subprocess import json import pandas as pd @@ -40,10 +39,10 @@ def generate_inputs(data, filename_map): @staticmethod - def run(network=None, active_genes=None, output_folder=None, use_cache=true, slices_threshold=None, module_threshold=None, singularity=false): + def run(network=None, active_genes=None, output_folder=None, use_cache=True, slices_threshold=None, module_threshold=None, singularity=False): """ Run DOMINO with Docker - Let visualization always true, parallelization always 1 + Let visualization be always true, parallelization be always 1 thread @param network: input network file (required) @param active_genes: input active genes (required) @param output_folder: path to the output pathway file (required) @@ -59,7 +58,7 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=true, sli work_dir = '/spras' - # Each volume is a tuple (src, dest) + # Each volume is a tuple (source, destination) volumes = list() bind_path, network_file = prepare_volume(network, work_dir) @@ -73,10 +72,9 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=true, sli ######## - bind_path, mapped_slices_file = prepare_volume(str(output_folder), work_dir) - volumes.append(bind_path) + mapped_slices_file = mapped_output_folder + '/slices.txt' - # Make the slicer command to run within in the container + # Make the slicer command to run within the container slicer_command = ['slicer', '--network_file', network_file, '--output_file', mapped_slices_file] @@ -92,22 +90,22 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=true, sli ######## - # Makes the Python command to run within in the container + # Make the Python command to run within the container command = ['domino', '--active_genes_files', node_file, '--network_file', network_file, - '--slices_file', slices_file, - '--output_folder', mapped_output_folder - '--parallelization, '1' + '--slices_file', mapped_slices_file, + '--output_folder', mapped_output_folder, + '--parallelization', '1', '--visualization', 'true'] # Add optional arguments - if use_cache is not true: - command.extend(['-c', false]) + if use_cache is not True: + command.extend(['-c', 'false']) if slices_threshold is not None: command.extend(['-sth', str(slices_threshold)]) if module_threshold is not None: - command.extend(['-mth', str(slices_threshold)]) + command.extend(['-mth', str(module_threshold)]) print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) @@ -119,9 +117,17 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=true, sli work_dir) print(out) - # delete modules.out - # shutil library - # append one html to end of another (argu) + # remove output_folder/modules.out + file_to_rem = Path(output_folder + "/modules.out") + file_to_rem.unlink() + + # concatenate each module html file into one big file + htmlfiles = Path(next(output_folder.glob('module_*.html'))) + with open("bigfile.txt", "w") as fo: + for tempfile in htmlfiles: + with open(tempfile,'r') as fi: fo.write(fi.read()) + + # put bigfile.txt in output_folder? @staticmethod @@ -131,38 +137,27 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param raw_pathway_file: pathway file produced by an algorithm's run function @param standardized_pathway_file: the same pathway written in the universal format """ - # edges dataframe - # read html file - with open(raw_pathway_file, 'r') as file: - html = file.read() - # for loop over lines of the file - # if line starts with ' let data' - + edges = pd.DataFrame() - # Find the starting index of the line - start_index = html.find('let data = [') - # Find the ending index of the line - end_index = html.find('];', start_index) + 1 # '+ 1' omits the semicolon + with open(raw_pathway_file, 'r') as file: + for line in file: + if line.strip().startswith("let data = ["): + line2 = line.replace('let data = ', '') + line3 = line2.replace(';', '') - # Extract the line as a string - line = html[start_index:end_index] - # remove beginning of the line to leave the json formatted string - line2 = line.replace('let data = ', '') + data = json.loads(line3) - data = json.loads(line2) + entries = [] + for entry in data: + tmp = entry['data'] + entries.append(tmp) - entries = [] - for entry in data: - tmp = entry['data'] - entries.append(tmp) + df = pd.DataFrame(entries) + newdf = df.loc[:,['source', 'target']].dropna() - df = pd.DataFrame(entries) - newDf = df.loc[:,['source', 'target']].dropna() - - # concatenate modules dataframe to bottom of outerloop data frame - + edges = pd.concat([edges, newdf], axis=0) - newDf['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked + edges['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked - newDf.to_csv(standardized_pathway_file, header=False, index=False) + edges.to_csv(standardized_pathway_file, header=False, index=False) From 05166c25702e12b272ff89458707249a561c99a7 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 21 Jul 2023 18:28:23 -0500 Subject: [PATCH 11/44] Update domino.py --- src/domino.py | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/domino.py b/src/domino.py index 951ead94..92d923a5 100644 --- a/src/domino.py +++ b/src/domino.py @@ -3,6 +3,7 @@ from src.util import prepare_volume, run_container import json +import shutil import pandas as pd __all__ = ['DOMINO'] @@ -39,13 +40,13 @@ def generate_inputs(data, filename_map): @staticmethod - def run(network=None, active_genes=None, output_folder=None, use_cache=True, slices_threshold=None, module_threshold=None, singularity=False): + def run(network=None, active_genes=None, output_file=None, use_cache=True, slices_threshold=None, module_threshold=None, singularity=False): """ Run DOMINO with Docker Let visualization be always true, parallelization be always 1 thread @param network: input network file (required) @param active_genes: input active genes (required) - @param output_folder: path to the output pathway file (required) + @param output_file: path to the output pathway file (required) @param use_cache: if True, use auto-generated cache network files (*.pkl) from previous executions with the same network (optional) @param slices_threshold: the threshold for considering a slice as relevant (optional) @param module_threshold: the threshold for considering a putative module as final module (optional) @@ -53,7 +54,7 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=True, sli """ # Assuming defaults are: use_cache=true - if not network or not active_genes or not output_folder: + if not network or not active_genes or not output_file: raise ValueError('Required DOMINO arguments are missing') work_dir = '/spras' @@ -67,17 +68,21 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=True, sli bind_path, node_file = prepare_volume(active_genes, work_dir) volumes.append(bind_path) - bind_path, mapped_output_folder = prepare_volume(str(output_folder), work_dir) + # Use its --output_folder argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) volumes.append(bind_path) ######## - mapped_slices_file = mapped_output_folder + '/slices.txt' + bind_path, mapped_slices_dir = prepare_volume('slices_dir', work_dir) + volumes.append(bind_path) + slices_file = mapped_slices_dir + '/slices.txt' - # Make the slicer command to run within the container slicer_command = ['slicer', '--network_file', network_file, - '--output_file', mapped_slices_file] + '--output_file', slices_file] container_framework = 'singularity' if singularity else 'docker' slicer_out = run_container(container_framework, @@ -87,6 +92,7 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=True, sli work_dir) print(slicer_out) + ######## @@ -94,8 +100,8 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=True, sli command = ['domino', '--active_genes_files', node_file, '--network_file', network_file, - '--slices_file', mapped_slices_file, - '--output_folder', mapped_output_folder, + '--slices_file', slices_file, + '--output_folder', mapped_out_dir, '--parallelization', '1', '--visualization', 'true'] @@ -117,17 +123,22 @@ def run(network=None, active_genes=None, output_folder=None, use_cache=True, sli work_dir) print(out) - # remove output_folder/modules.out - file_to_rem = Path(output_folder + "/modules.out") - file_to_rem.unlink() + + ######## + + slices_file.unlink(missing_ok=True) + for domino_output in out_dir.glob('modules.out'): + domino_output.unlink(missing_ok=True) # concatenate each module html file into one big file - htmlfiles = Path(next(output_folder.glob('module_*.html'))) - with open("bigfile.txt", "w") as fo: - for tempfile in htmlfiles: + bigfile = "bigfile.txt" + + with open(bigfile, "w") as fo: + for tempfile in out_dir.glob('module_*.html'): with open(tempfile,'r') as fi: fo.write(fi.read()) + Path(tempfile).unlink(missing_ok=True) - # put bigfile.txt in output_folder? + shutil.move(bigfile, output_file) @staticmethod From 6e9fb5578cad362671dfab252d91849426634c86 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Sat, 22 Jul 2023 22:24:24 -0500 Subject: [PATCH 12/44] Run pre-commit filter formatting --- src/domino.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/domino.py b/src/domino.py index 92d923a5..becd8e64 100644 --- a/src/domino.py +++ b/src/domino.py @@ -1,11 +1,12 @@ -from src.prm import PRM -from pathlib import Path -from src.util import prepare_volume, run_container - import json import shutil +from pathlib import Path + import pandas as pd +from src.prm import PRM +from src.util import prepare_volume, run_container + __all__ = ['DOMINO'] class DOMINO(PRM): From bb63f7afb97c4641871b53943acfc74cab9dd7af Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Mon, 24 Jul 2023 16:40:55 -0500 Subject: [PATCH 13/44] Update domino.py --- src/domino.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/domino.py b/src/domino.py index 92d923a5..a3158a6d 100644 --- a/src/domino.py +++ b/src/domino.py @@ -76,13 +76,15 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice ######## - bind_path, mapped_slices_dir = prepare_volume('slices_dir', work_dir) + bind_path, mapped_slices_dir = prepare_volume('slices.txt', work_dir) volumes.append(bind_path) - slices_file = mapped_slices_dir + '/slices.txt' + # /spras/ADFJGFD/slices.txt slicer_command = ['slicer', '--network_file', network_file, - '--output_file', slices_file] + '--output_file', mapped_slices_dir] + + print('Running slicer with arguments: {}'.format(' '.join(slicer_command)), flush=True) container_framework = 'singularity' if singularity else 'docker' slicer_out = run_container(container_framework, @@ -90,8 +92,6 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice slicer_command, volumes, work_dir) - print(slicer_out) - ######## @@ -100,7 +100,7 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice command = ['domino', '--active_genes_files', node_file, '--network_file', network_file, - '--slices_file', slices_file, + '--slices_file', mapped_slices_dir, '--output_folder', mapped_out_dir, '--parallelization', '1', '--visualization', 'true'] @@ -126,18 +126,19 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice ######## - slices_file.unlink(missing_ok=True) - for domino_output in out_dir.glob('modules.out'): - domino_output.unlink(missing_ok=True) + Path(mapped_slices_dir).unlink(missing_ok=True) + Path(out_dir, 'modules.out').unlink(missing_ok=True) + #for domino_output in out_dir.glob('modules.out'): + # domino_output.unlink(missing_ok=True) # concatenate each module html file into one big file - bigfile = "bigfile.txt" - with open(bigfile, "w") as fo: - for tempfile in out_dir.glob('module_*.html'): - with open(tempfile,'r') as fi: fo.write(fi.read()) - Path(tempfile).unlink(missing_ok=True) + for html_file in out_dir.glob('module_*.html'): + with open(html_file,'r') as fi: + fo.write(fi.read()) + Path(html_file).unlink(missing_ok=True) + Path(out_dir, bigfile) shutil.move(bigfile, output_file) From 5b3caa0ba2c70a2dca8fc662fe434fb05bbe74ce Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Mon, 24 Jul 2023 16:50:42 -0500 Subject: [PATCH 14/44] Add DOMINO to config file --- config/config.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/config.yaml b/config/config.yaml index 2bf1e6b4..1db5da8c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -54,6 +54,7 @@ run2: b: [2] g: [3] + - name: "meo" params: include: true @@ -71,6 +72,14 @@ flow: [1] # The flow must be an int capacity: [1] + - name: "domino" + params: + include: true + directed: false + run1: + use_cache: ["true"] + slices_threshold: [0.3] + modules_threshold: [0.05] # Here we specify which pathways to run and other file location information. # DataLoader.py can currently only load a single dataset From 0b0263bb87d028780826519db039c39413de06d2 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Mon, 24 Jul 2023 16:52:06 -0500 Subject: [PATCH 15/44] Add DOMINO to runner.py --- src/runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runner.py b/src/runner.py index de774adf..7c009c1f 100644 --- a/src/runner.py +++ b/src/runner.py @@ -6,6 +6,7 @@ from src.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from src.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from src.pathlinker import PathLinker as pathlinker +from src.domino import DOMINO as domino def run(algorithm, params): From 619f83f44146a9d6baa21aa750784c0b0a4f689a Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Tue, 25 Jul 2023 10:11:06 -0500 Subject: [PATCH 16/44] Add node-prizes.txt to config.yaml --- config/config.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 1db5da8c..139a8142 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,14 +28,14 @@ algorithms: - name: "pathlinker" params: - include: true + include: false directed: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: true + include: false directed: false run1: r: [5] @@ -46,7 +46,7 @@ - name: "omicsintegrator2" params: - include: true + include: false directed: false run1: b: [4] @@ -57,7 +57,7 @@ - name: "meo" params: - include: true + include: false directed: true run1: max_path_length: [3] @@ -66,7 +66,7 @@ - name: "mincostflow" params: - include: true + include: false directed: false run1: flow: [1] # The flow must be an int @@ -79,7 +79,7 @@ run1: use_cache: ["true"] slices_threshold: [0.3] - modules_threshold: [0.05] + module_threshold: [0.05] # Here we specify which pathways to run and other file location information. # DataLoader.py can currently only load a single dataset @@ -97,7 +97,7 @@ - label: data1 # Reuse some of the same sources file as 'data0' but different network and targets - node_files: ["sources.txt", "alternative-targets.txt"] + node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"] edge_files: ["alternative-network.txt"] other_files: [] # Relative path from the spras directory From 259973f3942100d7dde0d6fdef3b8ef28b3c862f Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Tue, 25 Jul 2023 10:11:43 -0500 Subject: [PATCH 17/44] Add active column to node-prizes.txt --- input/node-prizes.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/input/node-prizes.txt b/input/node-prizes.txt index 82bb2716..0e1f682d 100644 --- a/input/node-prizes.txt +++ b/input/node-prizes.txt @@ -1,3 +1,3 @@ -NODEID prize -A 2 -C 5.7 +NODEID prize active +A 2 true +C 5.7 true From 5ece69a9ef6a1f6d7d99c89392cf5054a80dd4d2 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Tue, 25 Jul 2023 10:14:40 -0500 Subject: [PATCH 18/44] Use output_file as the concatenated html file --- src/domino.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/domino.py b/src/domino.py index 9f7f6399..75ce6d1a 100644 --- a/src/domino.py +++ b/src/domino.py @@ -133,15 +133,12 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice # domino_output.unlink(missing_ok=True) # concatenate each module html file into one big file - with open(bigfile, "w") as fo: + with open(output_file, "w") as fo: for html_file in out_dir.glob('module_*.html'): with open(html_file,'r') as fi: fo.write(fi.read()) Path(html_file).unlink(missing_ok=True) - Path(out_dir, bigfile) - shutil.move(bigfile, output_file) - @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): From a769603e52d88a3b1e8dcece4df2219e6ee32e9c Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Tue, 25 Jul 2023 10:36:50 -0500 Subject: [PATCH 19/44] Add domino to egfr.yaml --- config/egfr.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/config/egfr.yaml b/config/egfr.yaml index f7d34610..23ff250d 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -62,6 +62,18 @@ algorithms: - 3 rand_restarts: - 10 + - + name: domino + params: + directed: false + include: true + run1: + use_cache: + - "true" + slices_threshold: + - 0.3 + module_threshold: + - 0.05 datasets: - data_dir: input From 7672f1726f9f4e297a2702ced67628f7e4bf5e80 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Tue, 25 Jul 2023 17:01:20 -0500 Subject: [PATCH 20/44] add active column to tps-egfr-prizes.txt --- input/tps-egfr-prizes.txt | 1404 ++++++++++++++++++------------------- 1 file changed, 702 insertions(+), 702 deletions(-) diff --git a/input/tps-egfr-prizes.txt b/input/tps-egfr-prizes.txt index 6870bb1e..71afb4b9 100644 --- a/input/tps-egfr-prizes.txt +++ b/input/tps-egfr-prizes.txt @@ -1,702 +1,702 @@ -NODEID prize sources targets -1433Z_HUMAN 1.041379133 True -41_HUMAN 3.389112802 True -4ET_HUMAN 2.569973509 True -A8K1N6_HUMAN 1.948221966 True -A9CQZ4_HUMAN 0.421460919 True -AAGAB_HUMAN 0.906857382 True -ABCF1_HUMAN 1.662535462 True -ABI1_HUMAN 2.262002188 True -ABI2_HUMAN 6.039545959 True -ABLM1_HUMAN 1.851877252 True -ACACA_HUMAN 1.413801552 True -ACAP2_HUMAN 2.26361378 True -ACINU_HUMAN 5.059742801 True -ACK1_HUMAN 4.634804389 True -ACLY_HUMAN 0.924296287 True -ACTB_HUMAN 6.332977709 True -ADAT1_HUMAN 0.15086641 True -ADCY6_HUMAN 0.213467876 True -ADDA_HUMAN 2.023396633 True -ADNP_HUMAN 1.863304115 True -AFAD_HUMAN 5.746711895 True -AFTIN_HUMAN 1.428578311 True -AHNK_HUMAN 1.03846887 True -AKA10_HUMAN 1.256166574 True -AKA11_HUMAN 0.927725859 True -AKA12_HUMAN 0.839912266 True -AKAP1_HUMAN 1.744860335 True -AKAP2_HUMAN 1.596611866 True -AMOT_HUMAN 1.79256998 True -ANS1A_HUMAN 2.76115098 True -ANXA2_HUMAN 1.709856841 True -AP3D1_HUMAN 4.077699923 True -APC1_HUMAN 0.888837295 True -AR6P4_HUMAN 0.701112743 True -AR6P6_HUMAN 2.695059469 True -ARHG5_HUMAN 7.044363255 True -ARHG7_HUMAN 3.809839832 True -ARHGB_HUMAN 2.260010614 True -ARIP4_HUMAN 0.270475986 True -ARMX3_HUMAN 0.11573305 True -ARP8_HUMAN 1.094787599 True -ASPM_HUMAN 0.369667496 True -AT133_HUMAN 1.627668371 True -AT1A1_HUMAN 2.904315518 True -AT2B1_HUMAN 2.165602139 True -ATRX_HUMAN 0.701149125 True -ATX2L_HUMAN 4.425369048 True -AZI1_HUMAN 1.861521522 True -B2L13_HUMAN 1.614443902 True -B4DGC6_HUMAN 0.752406932 True -B4DM10_HUMAN 0.474391755 True -B4DQA8_HUMAN 0.12336285 True -B4DQQ2_HUMAN 0.509838368 True -B4DSL6_HUMAN 1.401622791 True -B4DZC2_HUMAN 0.736249376 True -BACH_HUMAN 0.409715682 True -BACH2_HUMAN 0.942291628 True -BAD_HUMAN 0.390261342 True -BAG6_HUMAN 0.406028443 True -BAP18_HUMAN 2.420530962 True -BARD1_HUMAN 0.113513875 True -BAZ1B_HUMAN 0.714778368 True -BAZ2A_HUMAN 1.358383434 True -BBX_HUMAN 1.196178614 True -BCAR1_HUMAN 5.368797237 True -BCLF1_HUMAN 3.268307286 True -BCS1_HUMAN 1.435079955 True -BIG3_HUMAN 0.978095454 True -BMS1_HUMAN 1.398231144 True -BORG1_HUMAN 0.8309547 True -BORG4_HUMAN 0.908661259 True -BRAP_HUMAN 0.507219767 True -BRD2_HUMAN 1.88956051 True -BRD3_HUMAN 0.351886484 True -BUD13_HUMAN 1.407446196 True -BZW2_HUMAN 0.610443432 True -C170B_HUMAN 0.887656818 True -C2CD5_HUMAN 0.319586924 True -CA052_HUMAN 1.950626165 True -CA172_HUMAN 0.676879108 True -CAAP1_HUMAN 0.46595922 True -CAF1B_HUMAN 0.356408629 True -CALM_HUMAN 0.977490284 True -CALX_HUMAN 1.062601393 True -CAV1_HUMAN 0.258171175 True -CBL_HUMAN 8.769147064 True -CBLB_HUMAN 7.734379891 True -CCD25_HUMAN 2.010322541 True -CCD43_HUMAN 1.116038006 True -CCD50_HUMAN 3.354177584 True -CCD86_HUMAN 0.857141606 True -CCD97_HUMAN 0.294338791 True -CCDC6_HUMAN 0.152259131 True -CCNK_HUMAN 2.334610339 True -CCNL2_HUMAN 1.006534651 True -CD2AP_HUMAN 1.354067165 True -CD2B2_HUMAN 0.678693026 True -CDC20_HUMAN 0.355921365 True -CDCA3_HUMAN 2.072183324 True -CDCA5_HUMAN 0.07382236 True -CDK1_HUMAN 5.284915059 True -CDK12_HUMAN 1.170814249 True -CDK13_HUMAN 1.185727819 True -CDK16_HUMAN 0.811435393 True -CDK5_HUMAN 4.687685262 True -CDKL5_HUMAN 1.429420778 True -CDS2_HUMAN 0.178665681 True -CDV3_HUMAN 2.77465514 True -CEBPZ_HUMAN 1.979375439 True -CFDP1_HUMAN 2.743835706 True -CG050_HUMAN 0.358271075 True -CH10_HUMAN 3.59178713 True -CHAP1_HUMAN 0.866076722 True -CHD3_HUMAN 0.642111098 True -CHD4_HUMAN 2.634889563 True -CHIP_HUMAN 0.745987428 True -CHM2B_HUMAN 0.269597733 True -CI078_HUMAN 0.190066548 True -CK052_HUMAN 0.798347313 True -CLAP1_HUMAN 3.346476868 True -CLASR_HUMAN 0.7666283 True -CLSPN_HUMAN 1.291446804 True -COF1_HUMAN 0.363525169 True -CPSF7_HUMAN 1.220915762 True -CRK_HUMAN 6.545276815 True -CRKL_HUMAN 8.296206643 True -CRTC2_HUMAN 0.342365761 True -CSKI2_HUMAN 0.149093366 True -CSPP1_HUMAN 1.449349623 True -CSTFT_HUMAN 0.78914943 True -CTF18_HUMAN 0.122926304 True -CTNB1_HUMAN 1.182358691 True -CTND1_HUMAN 2.470367915 True -CTR9_HUMAN 0.413950406 True -CUL4B_HUMAN 1.007958822 True -CX023_HUMAN 0.176910408 True -CYTSA_HUMAN 5.913323257 True -CYTSB_HUMAN 3.608453481 True -DBNL_HUMAN 0.570543941 True -DC1L1_HUMAN 0.794719394 True -DC1L2_HUMAN 0.507681958 True -DCAF8_HUMAN 0.35336542 True -DDA1_HUMAN 0.872125054 True -DDX21_HUMAN 4.016586169 True -DDX24_HUMAN 0.312606522 True -DDX3X_HUMAN 4.579670309 True -DDX3Y_HUMAN 1.733646236 True -DDX41_HUMAN 0.439652682 True -DDX42_HUMAN 0.855584012 True -DDX46_HUMAN 2.012385881 True -DDX54_HUMAN 2.605173004 True -DDX55_HUMAN 2.28381716 True -DEN4B_HUMAN 0.656664862 True -DENR_HUMAN 2.704039342 True -DEP1B_HUMAN 1.645992476 True -DESP_HUMAN 1.924142048 True -DHX29_HUMAN 0.503843977 True -DHX57_HUMAN 0.57639098 True -DIDO1_HUMAN 0.369232207 True -DJC17_HUMAN 1.844249365 True -DKC1_HUMAN 0.32638162 True -DLG3_HUMAN 5.621515746 True -DLG5_HUMAN 0.108395669 True -DNLI1_HUMAN 1.037846993 True -DNLI3_HUMAN 1.349276265 True -DNM1L_HUMAN 2.606851846 True -DNMT1_HUMAN 2.657624489 True -DOCK1_HUMAN 3.511089211 True -DOCK4_HUMAN 2.453505343 True -DREB_HUMAN 5.795930546 True -DYR1B_HUMAN 2.068163368 True -DYRK4_HUMAN 1.949040201 True -E41L2_HUMAN 1.513086229 True -E7EQJ0_HUMAN 0.750779742 True -E7EW20_HUMAN 5.137837856 True -E9PAU2_HUMAN 1.79615932 True -EDC3_HUMAN 0.613533076 True -EDC4_HUMAN 1.26073022 True -EF1A1_HUMAN 3.774750081 True -EF1B_HUMAN 0.768939794 True -EF1D_HUMAN 1.240472409 True -EFNB2_HUMAN 2.222686177 True -EGF_HUMAN 10.0 True -EGFR_HUMAN 6.787874699 True -EGLN1_HUMAN 1.876580206 True -EIF3B_HUMAN 2.048949271 True -EIF3G_HUMAN 0.505239033 True -ELF2_HUMAN 2.091908646 True -EMAL4_HUMAN 3.448264704 True -ENPL_HUMAN 1.609266838 True -EP15R_HUMAN 1.081920179 True -EPHA2_HUMAN 1.265344964 True -EPHA3_HUMAN 4.890258242 True -EPHA7_HUMAN 4.251623959 True -EPN1_HUMAN 2.366437637 True -EPS15_HUMAN 2.17910473 True -ERF_HUMAN 0.297944821 True -ESF1_HUMAN 0.193582968 True -EYA4_HUMAN 0.825473321 True -EZH2_HUMAN 0.596255303 True -F122A_HUMAN 0.165182041 True -F122B_HUMAN 0.57123671 True -F134A_HUMAN 0.357816627 True -F208A_HUMAN 3.98948863 True -FA21A_HUMAN 1.002587043 True -FA21C_HUMAN 0.383121185 True -FA53C_HUMAN 4.110099934 True -FACD2_HUMAN 1.547110919 True -FAK1_HUMAN 2.378937999 True -FARP1_HUMAN 0.870195531 True -FBX4_HUMAN 0.409421111 True -FETUA_HUMAN 0.279664326 True -FGR_HUMAN 3.030993962 True -FIP1_HUMAN 0.483195258 True -FKB15_HUMAN 1.588035249 True -FNBP4_HUMAN 2.572531179 True -FOXK1_HUMAN 0.740087924 True -FRIH_HUMAN 1.041424474 True -FUBP1_HUMAN 1.89204653 True -FUBP2_HUMAN 0.320472268 True -FUND2_HUMAN 4.456793301 True -FYN_HUMAN 3.643807551 True -G3BP1_HUMAN 1.113074454 True -G3V3T3_HUMAN 0.87271031 True -G5E9E7_HUMAN 0.326371155 True -GAB1_HUMAN 8.815306611 True -GAREM_HUMAN 4.841146589 True -GCP60_HUMAN 0.107749682 True -GGA3_HUMAN 2.179726191 True -GIT1_HUMAN 1.245649259 True -GLCI1_HUMAN 0.816382534 True -GNL1_HUMAN 0.340091044 True -GOGA4_HUMAN 0.005260565 True -GOGB1_HUMAN 0.037450193 True -GPBL1_HUMAN 0.277084294 True -GPN1_HUMAN 1.7071206 True -GRB2_HUMAN 5.98414078 True -GRM1A_HUMAN 2.179983191 True -GSK3A_HUMAN 2.264695948 True -GUAA_HUMAN 0.856813419 True -H0YLL2_HUMAN 0.237244044 True -H14_HUMAN 0.480383941 True -H1X_HUMAN 0.887789081 True -H2AY_HUMAN 3.280738704 True -H31T_HUMAN 1.028861148 True -H90B2_HUMAN 1.94461154 True -HACD3_HUMAN 2.33003099 True -HAP28_HUMAN 0.780304258 True -HAUS6_HUMAN 0.306719814 True -HBS1L_HUMAN 1.187973612 True -HDAC1_HUMAN 0.724838077 True -HDAC2_HUMAN 1.214528615 True -HDAC4_HUMAN 0.340649889 True -HDGF_HUMAN 1.718351317 True -HERC2_HUMAN 0.860486302 True -HEXI2_HUMAN 1.775588114 True -HGS_HUMAN 4.113095921 True -HIPK2_HUMAN 2.608491664 True -HIPK3_HUMAN 2.762743619 True -HIRP3_HUMAN 1.817388935 True -HJURP_HUMAN 1.423661608 True -HN1_HUMAN 0.792681263 True -HNRH1_HUMAN 0.74667418 True -HNRL1_HUMAN 2.940462135 True -HNRPC_HUMAN 1.502603096 True -HNRPK_HUMAN 1.390457195 True -HNRPU_HUMAN 1.433013628 True -HOIL1_HUMAN 4.929439605 True -HS90A_HUMAN 1.234298016 True -HS90B_HUMAN 0.670369851 True -HSF1_HUMAN 0.165464024 True -HSP71_HUMAN 0.014872075 True -HTSF1_HUMAN 3.021004994 True -HUWE1_HUMAN 1.276421502 True -I2BP1_HUMAN 1.095783118 True -I2BP2_HUMAN 2.290397526 True -ICK_HUMAN 2.942876566 True -ICLN_HUMAN 1.779946227 True -IF2B1_HUMAN 0.397153593 True -IF2P_HUMAN 1.333839578 True -IF4B_HUMAN 2.066970923 True -IF4G1_HUMAN 2.744061508 True -IGF1R_HUMAN 4.146854185 True -IMA1_HUMAN 0.456177003 True -IMA3_HUMAN 0.865015974 True -INADL_HUMAN 0.482587688 True -IPP2M_HUMAN 0.426838551 True -IRS4_HUMAN 7.547365483 True -ITSN1_HUMAN 0.985601405 True -ITSN2_HUMAN 4.658445396 True -IWS1_HUMAN 4.516567373 True -J3KPV7_HUMAN 0.217488248 True -JIP4_HUMAN 1.037505232 True -JUN_HUMAN 1.610647615 True -K0195_HUMAN 0.204430283 True -KAP2_HUMAN 5.164561338 True -KAT7_HUMAN 0.462376222 True -KCC1A_HUMAN 6.429634296 True -KCD12_HUMAN 0.796035657 True -KCRB_HUMAN 0.266035628 True -KDM1B_HUMAN 1.939001645 True -KDM2B_HUMAN 1.604732401 True -KDM5C_HUMAN 0.328270733 True -KHDR1_HUMAN 0.453682829 True -KI18B_HUMAN 0.357228499 True -KI21A_HUMAN 1.529069466 True -KI67_HUMAN 1.597588736 True -KIF15_HUMAN 0.914514901 True -KIF4A_HUMAN 2.352298296 True -KLC4_HUMAN 1.337802724 True -KMT2D_HUMAN 0.92885412 True -KPRA_HUMAN 4.667448297 True -KPRB_HUMAN 0.298385653 True -KRI1_HUMAN 0.204209274 True -KS6A1_HUMAN 1.994604375 True -KSR1_HUMAN 0.924686582 True -KSYK_HUMAN 0.403657302 True -LA_HUMAN 1.542455639 True -LAP2_HUMAN 0.77671361 True -LAP2A_HUMAN 0.813822943 True -LAP2B_HUMAN 2.6381885 True -LAR4B_HUMAN 0.45722427 True -LARP1_HUMAN 1.689058705 True -LARP4_HUMAN 0.265064757 True -LAS1L_HUMAN 3.145898529 True -LCK_HUMAN 2.206612924 True -LIMA1_HUMAN 0.763857349 True -LMNB2_HUMAN 1.724853743 True -LMO7_HUMAN 0.982815959 True -LR16A_HUMAN 0.859394019 True -LRC47_HUMAN 7.07812261 True -LSR_HUMAN 3.6799322 True -LYRIC_HUMAN 1.395786276 True -M4K5_HUMAN 4.3602067 True -MA7D1_HUMAN 1.322821095 True -MA7D3_HUMAN 2.401222598 True -MAGD2_HUMAN 1.165141493 True -MAGG1_HUMAN 1.169869749 True -MAP1B_HUMAN 1.59034755 True -MARCS_HUMAN 3.995716995 True -MARK2_HUMAN 0.876596207 True -MBB1A_HUMAN 2.667451487 True -MBD1_HUMAN 0.15022552 True -MCAF1_HUMAN 0.614796044 True -MCM2_HUMAN 0.974279327 True -MCM9_HUMAN 0.49423583 True -MCMBP_HUMAN 1.71527119 True -MCRS1_HUMAN 1.490941396 True -MDC1_HUMAN 1.148797569 True -MDN1_HUMAN 0.385563624 True -MED12_HUMAN 1.97343122 True -MED13_HUMAN 0.599258914 True -MED8_HUMAN 0.3817834 True -MEP50_HUMAN 1.156940696 True -MEPCE_HUMAN 1.231798941 True -MFAP1_HUMAN 0.632244168 True -MGMT_HUMAN 0.968000032 True -MIPT3_HUMAN 3.39798557 True -MK01_HUMAN 5.81196008 True -MK03_HUMAN 6.943344309 True -MK09_HUMAN 1.94059493 True -MK13_HUMAN 0.232002612 True -MK14_HUMAN 2.520374067 True -MKL1_HUMAN 3.035774107 True -MKX_HUMAN 2.791836251 True -ML12A_HUMAN 0.368932545 True -MMTA2_HUMAN 1.390958084 True -MP2K7_HUMAN 0.489744023 True -MPP5_HUMAN 0.314407653 True -MPRI_HUMAN 1.817481019 True -MPZL1_HUMAN 1.655056839 True -MRE11_HUMAN 0.738190636 True -MYH10_HUMAN 1.849967195 True -MYH9_HUMAN 0.245434248 True -MYO6_HUMAN 1.685717043 True -NAA10_HUMAN 0.345584286 True -NACA_HUMAN 1.252883357 True -NADAP_HUMAN 0.485711685 True -NASP_HUMAN 0.8415347 True -NCOR2_HUMAN 0.563433691 True -NELFE_HUMAN 1.712972547 True -NHLC2_HUMAN 2.542453901 True -NHRF1_HUMAN 0.648251307 True -NIPA_HUMAN 0.671152123 True -NIPBL_HUMAN 1.286280677 True -NJMU_HUMAN 0.316300452 True -NKAP_HUMAN 2.206998828 True -NMD3_HUMAN 1.48472794 True -NOC2L_HUMAN 2.118994452 True -NOL9_HUMAN 0.500840669 True -NOP2_HUMAN 0.911395195 True -NOVA1_HUMAN 1.305002259 True -NP1L1_HUMAN 2.00383472 True -NPM_HUMAN 3.156791285 True -NRBP_HUMAN 1.283989611 True -NSD1_HUMAN 0.84247315 True -NSF1C_HUMAN 3.173010351 True -NSUN2_HUMAN 0.921274484 True -NUCKS_HUMAN 4.778779482 True -NUCL_HUMAN 3.335905044 True -NUFP2_HUMAN 0.448727418 True -NUMA1_HUMAN 0.570846909 True -NUP50_HUMAN 2.300765388 True -NUP53_HUMAN 0.484320863 True -NUP62_HUMAN 3.180694509 True -NUP98_HUMAN 1.5577858 True -ODPA_HUMAN 0.553174067 True -OPTN_HUMAN 4.349167634 True -OSBP1_HUMAN 2.28296418 True -OTUD4_HUMAN 0.678037528 True -OTUD5_HUMAN 0.840764562 True -OTUL_HUMAN 2.244160234 True -OXSR1_HUMAN 0.253401453 True -P3C2A_HUMAN 0.45615936 True -P53_HUMAN 0.476723613 True -P66B_HUMAN 0.371896181 True -PAIRB_HUMAN 2.51822385 True -PAK4_HUMAN 0.388980094 True -PALLD_HUMAN 0.767217448 True -PANK2_HUMAN 3.95726334 True -PARD3_HUMAN 3.62236319 True -PAXB1_HUMAN 0.781979283 True -PAXI_HUMAN 3.020694941 True -PCBP1_HUMAN 0.749287384 True -PCM1_HUMAN 0.303706831 True -PCNP_HUMAN 0.71088608 True -PDCD4_HUMAN 1.107202476 True -PDIA6_HUMAN 2.107908802 True -PDLI1_HUMAN 4.359793588 True -PDS5A_HUMAN 2.164046142 True -PDS5B_HUMAN 1.295429037 True -PEBB_HUMAN 2.324148001 True -PELP1_HUMAN 0.543722582 True -PGAM1_HUMAN 3.97654265 True -PGRC1_HUMAN 2.517487336 True -PHAG1_HUMAN 1.820006712 True -PHIP_HUMAN 1.103467281 True -PI4KA_HUMAN 1.679414343 True -PIAS1_HUMAN 1.852335178 True -PININ_HUMAN 2.829830316 True -PKN2_HUMAN 0.441608599 True -PKP2_HUMAN 0.362666426 True -PKP4_HUMAN 2.580052488 True -PLCG1_HUMAN 5.474908717 True -PLEC_HUMAN 0.406445028 True -PLSL_HUMAN 2.32930645 True -PNKP_HUMAN 1.487891637 True -PO2F1_HUMAN 1.317231008 True -PODXL_HUMAN 0.704978489 True -POGZ_HUMAN 1.45915113 True -PP1B_HUMAN 3.102240692 True -PP6R3_HUMAN 0.654110399 True -PPHLN_HUMAN 1.093241707 True -PPIG_HUMAN 1.074929697 True -PPIL4_HUMAN 0.068517452 True -PPR3D_HUMAN 1.671767942 True -PR38A_HUMAN 0.557098142 True -PRC2A_HUMAN 1.167535516 True -PRC2C_HUMAN 1.475510695 True -PRCC_HUMAN 0.147506532 True -PRP4B_HUMAN 1.390871947 True -PRR12_HUMAN 0.509774703 True -PRSR2_HUMAN 0.51837717 True -PSA3_HUMAN 0.562193321 True -PSA5_HUMAN 0.417782172 True -PSIP1_HUMAN 1.191299642 True -PSMD1_HUMAN 1.261700313 True -PSME3_HUMAN 3.252629288 True -PSMG1_HUMAN 0.28963643 True -PSRC1_HUMAN 0.325860681 True -PTN11_HUMAN 5.337797072 True -PTN18_HUMAN 4.00318551 True -PTN2_HUMAN 1.070394012 True -PTPRA_HUMAN 1.275276522 True -PTSS2_HUMAN 0.915921435 True -PUM1_HUMAN 1.1897884 True -PUR6_HUMAN 2.062531946 True -PURB_HUMAN 0.680503798 True -PWP1_HUMAN 0.822373246 True -Q0D2I6_HUMAN 0.459458706 True -Q5T6U8_HUMAN 0.777224731 True -Q6DEN2_HUMAN 0.757888678 True -Q7Z3D7_HUMAN 0.553775924 True -Q96SA0_HUMAN 0.93702992 True -Q9UE24_HUMAN 0.318201558 True -RAB12_HUMAN 3.585410828 True -RAD50_HUMAN 1.858772717 True -RAM_HUMAN 0.346897035 True -RANB3_HUMAN 0.643212967 True -RB_HUMAN 0.608046504 True -RBM10_HUMAN 0.401842933 True -RBM15_HUMAN 1.002932067 True -RBM33_HUMAN 4.274338683 True -RBM34_HUMAN 2.396137318 True -RBM39_HUMAN 0.247819991 True -RBM5_HUMAN 0.889070975 True -RBM8A_HUMAN 1.666064214 True -RBMX_HUMAN 0.960523947 True -RBP2_HUMAN 1.904638183 True -RCOR1_HUMAN 0.865162883 True -REC8_HUMAN 1.353853663 True -RECQ4_HUMAN 2.028196064 True -RFC1_HUMAN 0.973589645 True -RFIP1_HUMAN 0.112424755 True -RGPA1_HUMAN 2.869531965 True -RHG17_HUMAN 0.630295518 True -RIF1_HUMAN 0.265084042 True -RL14_HUMAN 2.002775674 True -RL15_HUMAN 1.351011619 True -RL1D1_HUMAN 4.674779734 True -RL35A_HUMAN 0.930517337 True -RLA1_HUMAN 3.285584572 True -RN213_HUMAN 2.851664106 True -ROA2_HUMAN 0.284213814 True -ROCK1_HUMAN 0.315973552 True -RPRD2_HUMAN 3.101583086 True -RRAS2_HUMAN 1.818149966 True -RREB1_HUMAN 3.760139144 True -RRP36_HUMAN 2.691948639 True -RS28_HUMAN 1.199977996 True -RSF1_HUMAN 0.611187852 True -RU17_HUMAN 2.521833179 True -RUSD2_HUMAN 0.540831946 True -S38A2_HUMAN 0.730788351 True -S6A15_HUMAN 0.774576224 True -SAC31_HUMAN 0.94307496 True -SAMD1_HUMAN 0.425212621 True -SC16A_HUMAN 0.672503677 True -SCAFB_HUMAN 0.761820137 True -SCAM3_HUMAN 2.106109234 True -SCFD1_HUMAN 2.346160986 True -SCNM1_HUMAN 0.719029187 True -SCRIB_HUMAN 1.301544279 True -SDA1_HUMAN 0.061541886 True -SDE2_HUMAN 1.679042655 True -SENP3_HUMAN 1.40787111 True -SEPT2_HUMAN 0.411055936 True -SEPT9_HUMAN 0.142671537 True -SF01_HUMAN 2.044259155 True -SF3A1_HUMAN 0.775578196 True -SF3A3_HUMAN 1.194999973 True -SF3B1_HUMAN 1.470648597 True -SFR19_HUMAN 0.627237552 True -SG223_HUMAN 3.780996792 True -SGTA_HUMAN 1.050228151 True -SH24A_HUMAN 1.002132364 True -SHC1_HUMAN 7.546900921 True -SHIP2_HUMAN 8.437838929 True -SIN3A_HUMAN 1.619010324 True -SIR1_HUMAN 0.458227657 True -SKA3_HUMAN 1.084749599 True -SMAG2_HUMAN 0.910735601 True -SMAP_HUMAN 0.59280741 True -SMC3_HUMAN 1.118391998 True -SMCA4_HUMAN 2.527187477 True -SMN_HUMAN 0.556568343 True -SMRC1_HUMAN 0.563609677 True -SMRCD_HUMAN 1.955206456 True -SNIP1_HUMAN 0.863432223 True -SNTB1_HUMAN 0.895092461 True -SNUT1_HUMAN 0.199576654 True -SNUT2_HUMAN 2.282001891 True -SNX15_HUMAN 1.73028212 True -SO4C1_HUMAN 0.465662642 True -SON_HUMAN 1.335093161 True -SPB1_HUMAN 3.108234637 True -SPF45_HUMAN 1.051702 True -SPN90_HUMAN 1.844315095 True -SPTB2_HUMAN 0.471878337 True -SPTN1_HUMAN 1.115737962 True -SPY1_HUMAN 3.42908006 True -SRP14_HUMAN 1.556717997 True -SRP72_HUMAN 0.695449223 True -SRPK2_HUMAN 1.67639317 True -SRRM1_HUMAN 2.451247829 True -SRRM2_HUMAN 3.361997339 True -SRS11_HUMAN 0.979500106 True -SRSF4_HUMAN 1.102898188 True -SRSF6_HUMAN 1.903664076 True -SSRP1_HUMAN 0.676450721 True -ST32C_HUMAN 0.769326404 True -STA5A_HUMAN 6.882885706 True -STAM2_HUMAN 4.090988236 True -STAT3_HUMAN 2.887868462 True -STIP1_HUMAN 1.24173366 True -STK39_HUMAN 1.854711439 True -STML2_HUMAN 0.576888473 True -STMN1_HUMAN 2.192732251 True -STRN3_HUMAN 1.334203499 True -STRN4_HUMAN 1.90930866 True -SUGP1_HUMAN 0.527575252 True -SUV91_HUMAN 0.65144438 True -SVIL_HUMAN 3.613842583 True -SYHM_HUMAN 0.055517045 True -T22D4_HUMAN 0.811436232 True -T2FA_HUMAN 1.25655639 True -TAOK1_HUMAN 2.400859868 True -TB10B_HUMAN 1.274479392 True -TB182_HUMAN 0.703132143 True -TBA1A_HUMAN 5.367873118 True -TBCB_HUMAN 2.921534627 True -TBCD1_HUMAN 0.446451974 True -TBCD4_HUMAN 0.550677716 True -TCAL3_HUMAN 1.163540679 True -TCOF_HUMAN 3.223369409 True -TCP4_HUMAN 2.023142846 True -TE2IP_HUMAN 0.386420205 True -TEBP_HUMAN 0.775708432 True -TELO2_HUMAN 7.735939158 True -TENC1_HUMAN 1.610375987 True -TF3C1_HUMAN 3.429972136 True -TF3C4_HUMAN 0.992339047 True -TFDP1_HUMAN 0.083761469 True -TFP11_HUMAN 1.485256404 True -TFPT_HUMAN 0.557146512 True -TICRR_HUMAN 1.305944158 True -TIF1B_HUMAN 0.320264376 True -TIM_HUMAN 0.863369747 True -TJAP1_HUMAN 1.069817499 True -TM1L1_HUMAN 1.606585284 True -TM1L2_HUMAN 7.141828062 True -TM87A_HUMAN 0.237663573 True -TMX1_HUMAN 0.593573422 True -TNIK_HUMAN 1.271095902 True -TOIP1_HUMAN 1.910066228 True -TOP2A_HUMAN 0.547849196 True -TOP2B_HUMAN 5.468058424 True -TP53B_HUMAN 1.203476942 True -TPC12_HUMAN 0.920673989 True -TPR_HUMAN 1.927839259 True -TR150_HUMAN 1.41920725 True -TRA2A_HUMAN 2.212379915 True -TRAD1_HUMAN 0.673971499 True -TRI18_HUMAN 0.997235085 True -TRI25_HUMAN 1.305017212 True -TTC33_HUMAN 0.514102133 True -TX264_HUMAN 2.141372127 True -TXLNA_HUMAN 0.740262272 True -U520_HUMAN 2.005382024 True -UBE2O_HUMAN 2.807881097 True -UBL7_HUMAN 3.682226141 True -UBP10_HUMAN 1.192686682 True -UBP16_HUMAN 0.507055344 True -UBP24_HUMAN 2.226210731 True -UBP2L_HUMAN 0.641599057 True -UBP8_HUMAN 0.613072889 True -UIMC1_HUMAN 0.439297945 True -UNG_HUMAN 0.244686456 True -UNK_HUMAN 1.012344497 True -VAMP4_HUMAN 1.684655515 True -VINC_HUMAN 2.334430099 True -VPRBP_HUMAN 0.769210238 True -VRK3_HUMAN 0.682514818 True -WAC_HUMAN 0.771007822 True -WAP53_HUMAN 1.781641467 True -WAPL_HUMAN 0.489512544 True -WASL_HUMAN 2.709891864 True -WDHD1_HUMAN 2.984224778 True -WDR4_HUMAN 0.117013281 True -WDR44_HUMAN 0.165462318 True -WDR48_HUMAN 4.855701146 True -WDR55_HUMAN 0.240532253 True -WDR62_HUMAN 0.496230535 True -WDR70_HUMAN 2.055000425 True -WDR75_HUMAN 0.442295245 True -WIPF2_HUMAN 2.036351908 True -WIPI2_HUMAN 0.704303272 True -WIZ_HUMAN 2.423180827 True -WNK1_HUMAN 0.562340064 True -WRIP1_HUMAN 0.328288345 True -XPC_HUMAN 0.139447947 True -XRCC1_HUMAN 3.759734218 True -XRCC6_HUMAN 1.36897206 True -XRN2_HUMAN 1.401650058 True -YAP1_HUMAN 1.50029366 True -YBOX1_HUMAN 1.625078319 True -YBOX3_HUMAN 0.283774141 True -YRDC_HUMAN 0.759456845 True -Z280C_HUMAN 3.957096259 True -Z512B_HUMAN 0.086040691 True -ZBT7A_HUMAN 0.668040882 True -ZC11A_HUMAN 0.155049275 True -ZC3H4_HUMAN 1.420959201 True -ZC3HD_HUMAN 0.113112056 True -ZCH18_HUMAN 3.79962429 True -ZEB1_HUMAN 0.134563631 True -ZFY16_HUMAN 2.208481722 True -ZFY19_HUMAN 0.602219063 True -ZKSC1_HUMAN 2.18084174 True -ZMYM4_HUMAN 1.074991674 True -ZN148_HUMAN 0.618912748 True -ZN318_HUMAN 1.99333318 True -ZN609_HUMAN 2.121117169 True -ZN638_HUMAN 1.63818319 True -ZN703_HUMAN 1.160118644 True -ZN830_HUMAN 1.172929746 True -ZNRF2_HUMAN 1.201348674 True -ZO1_HUMAN 1.998443753 True -ZO2_HUMAN 2.359999053 True -ZRAB2_HUMAN 0.282624466 True -ZYX_HUMAN 1.939922301 True +NODEID prize sources targets active +1433Z_HUMAN 1.041379133 True True +41_HUMAN 3.389112802 True True +4ET_HUMAN 2.569973509 True True +A8K1N6_HUMAN 1.948221966 True True +A9CQZ4_HUMAN 0.421460919 True True +AAGAB_HUMAN 0.906857382 True True +ABCF1_HUMAN 1.662535462 True True +ABI1_HUMAN 2.262002188 True True +ABI2_HUMAN 6.039545959 True True +ABLM1_HUMAN 1.851877252 True True +ACACA_HUMAN 1.413801552 True True +ACAP2_HUMAN 2.26361378 True True +ACINU_HUMAN 5.059742801 True True +ACK1_HUMAN 4.634804389 True True +ACLY_HUMAN 0.924296287 True True +ACTB_HUMAN 6.332977709 True True +ADAT1_HUMAN 0.15086641 True True +ADCY6_HUMAN 0.213467876 True True +ADDA_HUMAN 2.023396633 True True +ADNP_HUMAN 1.863304115 True True +AFAD_HUMAN 5.746711895 True True +AFTIN_HUMAN 1.428578311 True True +AHNK_HUMAN 1.03846887 True True +AKA10_HUMAN 1.256166574 True True +AKA11_HUMAN 0.927725859 True True +AKA12_HUMAN 0.839912266 True True +AKAP1_HUMAN 1.744860335 True True +AKAP2_HUMAN 1.596611866 True True +AMOT_HUMAN 1.79256998 True True +ANS1A_HUMAN 2.76115098 True True +ANXA2_HUMAN 1.709856841 True True +AP3D1_HUMAN 4.077699923 True True +APC1_HUMAN 0.888837295 True True +AR6P4_HUMAN 0.701112743 True True +AR6P6_HUMAN 2.695059469 True True +ARHG5_HUMAN 7.044363255 True True +ARHG7_HUMAN 3.809839832 True True +ARHGB_HUMAN 2.260010614 True True +ARIP4_HUMAN 0.270475986 True True +ARMX3_HUMAN 0.11573305 True True +ARP8_HUMAN 1.094787599 True True +ASPM_HUMAN 0.369667496 True True +AT133_HUMAN 1.627668371 True True +AT1A1_HUMAN 2.904315518 True True +AT2B1_HUMAN 2.165602139 True True +ATRX_HUMAN 0.701149125 True True +ATX2L_HUMAN 4.425369048 True True +AZI1_HUMAN 1.861521522 True True +B2L13_HUMAN 1.614443902 True True +B4DGC6_HUMAN 0.752406932 True True +B4DM10_HUMAN 0.474391755 True True +B4DQA8_HUMAN 0.12336285 True True +B4DQQ2_HUMAN 0.509838368 True True +B4DSL6_HUMAN 1.401622791 True True +B4DZC2_HUMAN 0.736249376 True True +BACH_HUMAN 0.409715682 True True +BACH2_HUMAN 0.942291628 True True +BAD_HUMAN 0.390261342 True True +BAG6_HUMAN 0.406028443 True True +BAP18_HUMAN 2.420530962 True True +BARD1_HUMAN 0.113513875 True True +BAZ1B_HUMAN 0.714778368 True True +BAZ2A_HUMAN 1.358383434 True True +BBX_HUMAN 1.196178614 True True +BCAR1_HUMAN 5.368797237 True True +BCLF1_HUMAN 3.268307286 True True +BCS1_HUMAN 1.435079955 True True +BIG3_HUMAN 0.978095454 True True +BMS1_HUMAN 1.398231144 True True +BORG1_HUMAN 0.8309547 True True +BORG4_HUMAN 0.908661259 True True +BRAP_HUMAN 0.507219767 True True +BRD2_HUMAN 1.88956051 True True +BRD3_HUMAN 0.351886484 True True +BUD13_HUMAN 1.407446196 True True +BZW2_HUMAN 0.610443432 True True +C170B_HUMAN 0.887656818 True True +C2CD5_HUMAN 0.319586924 True True +CA052_HUMAN 1.950626165 True True +CA172_HUMAN 0.676879108 True True +CAAP1_HUMAN 0.46595922 True True +CAF1B_HUMAN 0.356408629 True True +CALM_HUMAN 0.977490284 True True +CALX_HUMAN 1.062601393 True True +CAV1_HUMAN 0.258171175 True True +CBL_HUMAN 8.769147064 True True +CBLB_HUMAN 7.734379891 True True +CCD25_HUMAN 2.010322541 True True +CCD43_HUMAN 1.116038006 True True +CCD50_HUMAN 3.354177584 True True +CCD86_HUMAN 0.857141606 True True +CCD97_HUMAN 0.294338791 True True +CCDC6_HUMAN 0.152259131 True True +CCNK_HUMAN 2.334610339 True True +CCNL2_HUMAN 1.006534651 True True +CD2AP_HUMAN 1.354067165 True True +CD2B2_HUMAN 0.678693026 True True +CDC20_HUMAN 0.355921365 True True +CDCA3_HUMAN 2.072183324 True True +CDCA5_HUMAN 0.07382236 True True +CDK1_HUMAN 5.284915059 True True +CDK12_HUMAN 1.170814249 True True +CDK13_HUMAN 1.185727819 True True +CDK16_HUMAN 0.811435393 True True +CDK5_HUMAN 4.687685262 True True +CDKL5_HUMAN 1.429420778 True True +CDS2_HUMAN 0.178665681 True True +CDV3_HUMAN 2.77465514 True True +CEBPZ_HUMAN 1.979375439 True True +CFDP1_HUMAN 2.743835706 True True +CG050_HUMAN 0.358271075 True True +CH10_HUMAN 3.59178713 True True +CHAP1_HUMAN 0.866076722 True True +CHD3_HUMAN 0.642111098 True True +CHD4_HUMAN 2.634889563 True True +CHIP_HUMAN 0.745987428 True True +CHM2B_HUMAN 0.269597733 True True +CI078_HUMAN 0.190066548 True True +CK052_HUMAN 0.798347313 True True +CLAP1_HUMAN 3.346476868 True True +CLASR_HUMAN 0.7666283 True True +CLSPN_HUMAN 1.291446804 True True +COF1_HUMAN 0.363525169 True True +CPSF7_HUMAN 1.220915762 True True +CRK_HUMAN 6.545276815 True True +CRKL_HUMAN 8.296206643 True True +CRTC2_HUMAN 0.342365761 True True +CSKI2_HUMAN 0.149093366 True True +CSPP1_HUMAN 1.449349623 True True +CSTFT_HUMAN 0.78914943 True True +CTF18_HUMAN 0.122926304 True True +CTNB1_HUMAN 1.182358691 True True +CTND1_HUMAN 2.470367915 True True +CTR9_HUMAN 0.413950406 True True +CUL4B_HUMAN 1.007958822 True True +CX023_HUMAN 0.176910408 True True +CYTSA_HUMAN 5.913323257 True True +CYTSB_HUMAN 3.608453481 True True +DBNL_HUMAN 0.570543941 True True +DC1L1_HUMAN 0.794719394 True True +DC1L2_HUMAN 0.507681958 True True +DCAF8_HUMAN 0.35336542 True True +DDA1_HUMAN 0.872125054 True True +DDX21_HUMAN 4.016586169 True True +DDX24_HUMAN 0.312606522 True True +DDX3X_HUMAN 4.579670309 True True +DDX3Y_HUMAN 1.733646236 True True +DDX41_HUMAN 0.439652682 True True +DDX42_HUMAN 0.855584012 True True +DDX46_HUMAN 2.012385881 True True +DDX54_HUMAN 2.605173004 True True +DDX55_HUMAN 2.28381716 True True +DEN4B_HUMAN 0.656664862 True True +DENR_HUMAN 2.704039342 True True +DEP1B_HUMAN 1.645992476 True True +DESP_HUMAN 1.924142048 True True +DHX29_HUMAN 0.503843977 True True +DHX57_HUMAN 0.57639098 True True +DIDO1_HUMAN 0.369232207 True True +DJC17_HUMAN 1.844249365 True True +DKC1_HUMAN 0.32638162 True True +DLG3_HUMAN 5.621515746 True True +DLG5_HUMAN 0.108395669 True True +DNLI1_HUMAN 1.037846993 True True +DNLI3_HUMAN 1.349276265 True True +DNM1L_HUMAN 2.606851846 True True +DNMT1_HUMAN 2.657624489 True True +DOCK1_HUMAN 3.511089211 True True +DOCK4_HUMAN 2.453505343 True True +DREB_HUMAN 5.795930546 True True +DYR1B_HUMAN 2.068163368 True True +DYRK4_HUMAN 1.949040201 True True +E41L2_HUMAN 1.513086229 True True +E7EQJ0_HUMAN 0.750779742 True True +E7EW20_HUMAN 5.137837856 True True +E9PAU2_HUMAN 1.79615932 True True +EDC3_HUMAN 0.613533076 True True +EDC4_HUMAN 1.26073022 True True +EF1A1_HUMAN 3.774750081 True True +EF1B_HUMAN 0.768939794 True True +EF1D_HUMAN 1.240472409 True True +EFNB2_HUMAN 2.222686177 True True +EGF_HUMAN 10 True True +EGFR_HUMAN 6.787874699 True True +EGLN1_HUMAN 1.876580206 True True +EIF3B_HUMAN 2.048949271 True True +EIF3G_HUMAN 0.505239033 True True +ELF2_HUMAN 2.091908646 True True +EMAL4_HUMAN 3.448264704 True True +ENPL_HUMAN 1.609266838 True True +EP15R_HUMAN 1.081920179 True True +EPHA2_HUMAN 1.265344964 True True +EPHA3_HUMAN 4.890258242 True True +EPHA7_HUMAN 4.251623959 True True +EPN1_HUMAN 2.366437637 True True +EPS15_HUMAN 2.17910473 True True +ERF_HUMAN 0.297944821 True True +ESF1_HUMAN 0.193582968 True True +EYA4_HUMAN 0.825473321 True True +EZH2_HUMAN 0.596255303 True True +F122A_HUMAN 0.165182041 True True +F122B_HUMAN 0.57123671 True True +F134A_HUMAN 0.357816627 True True +F208A_HUMAN 3.98948863 True True +FA21A_HUMAN 1.002587043 True True +FA21C_HUMAN 0.383121185 True True +FA53C_HUMAN 4.110099934 True True +FACD2_HUMAN 1.547110919 True True +FAK1_HUMAN 2.378937999 True True +FARP1_HUMAN 0.870195531 True True +FBX4_HUMAN 0.409421111 True True +FETUA_HUMAN 0.279664326 True True +FGR_HUMAN 3.030993962 True True +FIP1_HUMAN 0.483195258 True True +FKB15_HUMAN 1.588035249 True True +FNBP4_HUMAN 2.572531179 True True +FOXK1_HUMAN 0.740087924 True True +FRIH_HUMAN 1.041424474 True True +FUBP1_HUMAN 1.89204653 True True +FUBP2_HUMAN 0.320472268 True True +FUND2_HUMAN 4.456793301 True True +FYN_HUMAN 3.643807551 True True +G3BP1_HUMAN 1.113074454 True True +G3V3T3_HUMAN 0.87271031 True True +G5E9E7_HUMAN 0.326371155 True True +GAB1_HUMAN 8.815306611 True True +GAREM_HUMAN 4.841146589 True True +GCP60_HUMAN 0.107749682 True True +GGA3_HUMAN 2.179726191 True True +GIT1_HUMAN 1.245649259 True True +GLCI1_HUMAN 0.816382534 True True +GNL1_HUMAN 0.340091044 True True +GOGA4_HUMAN 0.005260565 True True +GOGB1_HUMAN 0.037450193 True True +GPBL1_HUMAN 0.277084294 True True +GPN1_HUMAN 1.7071206 True True +GRB2_HUMAN 5.98414078 True True +GRM1A_HUMAN 2.179983191 True True +GSK3A_HUMAN 2.264695948 True True +GUAA_HUMAN 0.856813419 True True +H0YLL2_HUMAN 0.237244044 True True +H14_HUMAN 0.480383941 True True +H1X_HUMAN 0.887789081 True True +H2AY_HUMAN 3.280738704 True True +H31T_HUMAN 1.028861148 True True +H90B2_HUMAN 1.94461154 True True +HACD3_HUMAN 2.33003099 True True +HAP28_HUMAN 0.780304258 True True +HAUS6_HUMAN 0.306719814 True True +HBS1L_HUMAN 1.187973612 True True +HDAC1_HUMAN 0.724838077 True True +HDAC2_HUMAN 1.214528615 True True +HDAC4_HUMAN 0.340649889 True True +HDGF_HUMAN 1.718351317 True True +HERC2_HUMAN 0.860486302 True True +HEXI2_HUMAN 1.775588114 True True +HGS_HUMAN 4.113095921 True True +HIPK2_HUMAN 2.608491664 True True +HIPK3_HUMAN 2.762743619 True True +HIRP3_HUMAN 1.817388935 True True +HJURP_HUMAN 1.423661608 True True +HN1_HUMAN 0.792681263 True True +HNRH1_HUMAN 0.74667418 True True +HNRL1_HUMAN 2.940462135 True True +HNRPC_HUMAN 1.502603096 True True +HNRPK_HUMAN 1.390457195 True True +HNRPU_HUMAN 1.433013628 True True +HOIL1_HUMAN 4.929439605 True True +HS90A_HUMAN 1.234298016 True True +HS90B_HUMAN 0.670369851 True True +HSF1_HUMAN 0.165464024 True True +HSP71_HUMAN 0.014872075 True True +HTSF1_HUMAN 3.021004994 True True +HUWE1_HUMAN 1.276421502 True True +I2BP1_HUMAN 1.095783118 True True +I2BP2_HUMAN 2.290397526 True True +ICK_HUMAN 2.942876566 True True +ICLN_HUMAN 1.779946227 True True +IF2B1_HUMAN 0.397153593 True True +IF2P_HUMAN 1.333839578 True True +IF4B_HUMAN 2.066970923 True True +IF4G1_HUMAN 2.744061508 True True +IGF1R_HUMAN 4.146854185 True True +IMA1_HUMAN 0.456177003 True True +IMA3_HUMAN 0.865015974 True True +INADL_HUMAN 0.482587688 True True +IPP2M_HUMAN 0.426838551 True True +IRS4_HUMAN 7.547365483 True True +ITSN1_HUMAN 0.985601405 True True +ITSN2_HUMAN 4.658445396 True True +IWS1_HUMAN 4.516567373 True True +J3KPV7_HUMAN 0.217488248 True True +JIP4_HUMAN 1.037505232 True True +JUN_HUMAN 1.610647615 True True +K0195_HUMAN 0.204430283 True True +KAP2_HUMAN 5.164561338 True True +KAT7_HUMAN 0.462376222 True True +KCC1A_HUMAN 6.429634296 True True +KCD12_HUMAN 0.796035657 True True +KCRB_HUMAN 0.266035628 True True +KDM1B_HUMAN 1.939001645 True True +KDM2B_HUMAN 1.604732401 True True +KDM5C_HUMAN 0.328270733 True True +KHDR1_HUMAN 0.453682829 True True +KI18B_HUMAN 0.357228499 True True +KI21A_HUMAN 1.529069466 True True +KI67_HUMAN 1.597588736 True True +KIF15_HUMAN 0.914514901 True True +KIF4A_HUMAN 2.352298296 True True +KLC4_HUMAN 1.337802724 True True +KMT2D_HUMAN 0.92885412 True True +KPRA_HUMAN 4.667448297 True True +KPRB_HUMAN 0.298385653 True True +KRI1_HUMAN 0.204209274 True True +KS6A1_HUMAN 1.994604375 True True +KSR1_HUMAN 0.924686582 True True +KSYK_HUMAN 0.403657302 True True +LA_HUMAN 1.542455639 True True +LAP2_HUMAN 0.77671361 True True +LAP2A_HUMAN 0.813822943 True True +LAP2B_HUMAN 2.6381885 True True +LAR4B_HUMAN 0.45722427 True True +LARP1_HUMAN 1.689058705 True True +LARP4_HUMAN 0.265064757 True True +LAS1L_HUMAN 3.145898529 True True +LCK_HUMAN 2.206612924 True True +LIMA1_HUMAN 0.763857349 True True +LMNB2_HUMAN 1.724853743 True True +LMO7_HUMAN 0.982815959 True True +LR16A_HUMAN 0.859394019 True True +LRC47_HUMAN 7.07812261 True True +LSR_HUMAN 3.6799322 True True +LYRIC_HUMAN 1.395786276 True True +M4K5_HUMAN 4.3602067 True True +MA7D1_HUMAN 1.322821095 True True +MA7D3_HUMAN 2.401222598 True True +MAGD2_HUMAN 1.165141493 True True +MAGG1_HUMAN 1.169869749 True True +MAP1B_HUMAN 1.59034755 True True +MARCS_HUMAN 3.995716995 True True +MARK2_HUMAN 0.876596207 True True +MBB1A_HUMAN 2.667451487 True True +MBD1_HUMAN 0.15022552 True True +MCAF1_HUMAN 0.614796044 True True +MCM2_HUMAN 0.974279327 True True +MCM9_HUMAN 0.49423583 True True +MCMBP_HUMAN 1.71527119 True True +MCRS1_HUMAN 1.490941396 True True +MDC1_HUMAN 1.148797569 True True +MDN1_HUMAN 0.385563624 True True +MED12_HUMAN 1.97343122 True True +MED13_HUMAN 0.599258914 True True +MED8_HUMAN 0.3817834 True True +MEP50_HUMAN 1.156940696 True True +MEPCE_HUMAN 1.231798941 True True +MFAP1_HUMAN 0.632244168 True True +MGMT_HUMAN 0.968000032 True True +MIPT3_HUMAN 3.39798557 True True +MK01_HUMAN 5.81196008 True True +MK03_HUMAN 6.943344309 True True +MK09_HUMAN 1.94059493 True True +MK13_HUMAN 0.232002612 True True +MK14_HUMAN 2.520374067 True True +MKL1_HUMAN 3.035774107 True True +MKX_HUMAN 2.791836251 True True +ML12A_HUMAN 0.368932545 True True +MMTA2_HUMAN 1.390958084 True True +MP2K7_HUMAN 0.489744023 True True +MPP5_HUMAN 0.314407653 True True +MPRI_HUMAN 1.817481019 True True +MPZL1_HUMAN 1.655056839 True True +MRE11_HUMAN 0.738190636 True True +MYH10_HUMAN 1.849967195 True True +MYH9_HUMAN 0.245434248 True True +MYO6_HUMAN 1.685717043 True True +NAA10_HUMAN 0.345584286 True True +NACA_HUMAN 1.252883357 True True +NADAP_HUMAN 0.485711685 True True +NASP_HUMAN 0.8415347 True True +NCOR2_HUMAN 0.563433691 True True +NELFE_HUMAN 1.712972547 True True +NHLC2_HUMAN 2.542453901 True True +NHRF1_HUMAN 0.648251307 True True +NIPA_HUMAN 0.671152123 True True +NIPBL_HUMAN 1.286280677 True True +NJMU_HUMAN 0.316300452 True True +NKAP_HUMAN 2.206998828 True True +NMD3_HUMAN 1.48472794 True True +NOC2L_HUMAN 2.118994452 True True +NOL9_HUMAN 0.500840669 True True +NOP2_HUMAN 0.911395195 True True +NOVA1_HUMAN 1.305002259 True True +NP1L1_HUMAN 2.00383472 True True +NPM_HUMAN 3.156791285 True True +NRBP_HUMAN 1.283989611 True True +NSD1_HUMAN 0.84247315 True True +NSF1C_HUMAN 3.173010351 True True +NSUN2_HUMAN 0.921274484 True True +NUCKS_HUMAN 4.778779482 True True +NUCL_HUMAN 3.335905044 True True +NUFP2_HUMAN 0.448727418 True True +NUMA1_HUMAN 0.570846909 True True +NUP50_HUMAN 2.300765388 True True +NUP53_HUMAN 0.484320863 True True +NUP62_HUMAN 3.180694509 True True +NUP98_HUMAN 1.5577858 True True +ODPA_HUMAN 0.553174067 True True +OPTN_HUMAN 4.349167634 True True +OSBP1_HUMAN 2.28296418 True True +OTUD4_HUMAN 0.678037528 True True +OTUD5_HUMAN 0.840764562 True True +OTUL_HUMAN 2.244160234 True True +OXSR1_HUMAN 0.253401453 True True +P3C2A_HUMAN 0.45615936 True True +P53_HUMAN 0.476723613 True True +P66B_HUMAN 0.371896181 True True +PAIRB_HUMAN 2.51822385 True True +PAK4_HUMAN 0.388980094 True True +PALLD_HUMAN 0.767217448 True True +PANK2_HUMAN 3.95726334 True True +PARD3_HUMAN 3.62236319 True True +PAXB1_HUMAN 0.781979283 True True +PAXI_HUMAN 3.020694941 True True +PCBP1_HUMAN 0.749287384 True True +PCM1_HUMAN 0.303706831 True True +PCNP_HUMAN 0.71088608 True True +PDCD4_HUMAN 1.107202476 True True +PDIA6_HUMAN 2.107908802 True True +PDLI1_HUMAN 4.359793588 True True +PDS5A_HUMAN 2.164046142 True True +PDS5B_HUMAN 1.295429037 True True +PEBB_HUMAN 2.324148001 True True +PELP1_HUMAN 0.543722582 True True +PGAM1_HUMAN 3.97654265 True True +PGRC1_HUMAN 2.517487336 True True +PHAG1_HUMAN 1.820006712 True True +PHIP_HUMAN 1.103467281 True True +PI4KA_HUMAN 1.679414343 True True +PIAS1_HUMAN 1.852335178 True True +PININ_HUMAN 2.829830316 True True +PKN2_HUMAN 0.441608599 True True +PKP2_HUMAN 0.362666426 True True +PKP4_HUMAN 2.580052488 True True +PLCG1_HUMAN 5.474908717 True True +PLEC_HUMAN 0.406445028 True True +PLSL_HUMAN 2.32930645 True True +PNKP_HUMAN 1.487891637 True True +PO2F1_HUMAN 1.317231008 True True +PODXL_HUMAN 0.704978489 True True +POGZ_HUMAN 1.45915113 True True +PP1B_HUMAN 3.102240692 True True +PP6R3_HUMAN 0.654110399 True True +PPHLN_HUMAN 1.093241707 True True +PPIG_HUMAN 1.074929697 True True +PPIL4_HUMAN 0.068517452 True True +PPR3D_HUMAN 1.671767942 True True +PR38A_HUMAN 0.557098142 True True +PRC2A_HUMAN 1.167535516 True True +PRC2C_HUMAN 1.475510695 True True +PRCC_HUMAN 0.147506532 True True +PRP4B_HUMAN 1.390871947 True True +PRR12_HUMAN 0.509774703 True True +PRSR2_HUMAN 0.51837717 True True +PSA3_HUMAN 0.562193321 True True +PSA5_HUMAN 0.417782172 True True +PSIP1_HUMAN 1.191299642 True True +PSMD1_HUMAN 1.261700313 True True +PSME3_HUMAN 3.252629288 True True +PSMG1_HUMAN 0.28963643 True True +PSRC1_HUMAN 0.325860681 True True +PTN11_HUMAN 5.337797072 True True +PTN18_HUMAN 4.00318551 True True +PTN2_HUMAN 1.070394012 True True +PTPRA_HUMAN 1.275276522 True True +PTSS2_HUMAN 0.915921435 True True +PUM1_HUMAN 1.1897884 True True +PUR6_HUMAN 2.062531946 True True +PURB_HUMAN 0.680503798 True True +PWP1_HUMAN 0.822373246 True True +Q0D2I6_HUMAN 0.459458706 True True +Q5T6U8_HUMAN 0.777224731 True True +Q6DEN2_HUMAN 0.757888678 True True +Q7Z3D7_HUMAN 0.553775924 True True +Q96SA0_HUMAN 0.93702992 True True +Q9UE24_HUMAN 0.318201558 True True +RAB12_HUMAN 3.585410828 True True +RAD50_HUMAN 1.858772717 True True +RAM_HUMAN 0.346897035 True True +RANB3_HUMAN 0.643212967 True True +RB_HUMAN 0.608046504 True True +RBM10_HUMAN 0.401842933 True True +RBM15_HUMAN 1.002932067 True True +RBM33_HUMAN 4.274338683 True True +RBM34_HUMAN 2.396137318 True True +RBM39_HUMAN 0.247819991 True True +RBM5_HUMAN 0.889070975 True True +RBM8A_HUMAN 1.666064214 True True +RBMX_HUMAN 0.960523947 True True +RBP2_HUMAN 1.904638183 True True +RCOR1_HUMAN 0.865162883 True True +REC8_HUMAN 1.353853663 True True +RECQ4_HUMAN 2.028196064 True True +RFC1_HUMAN 0.973589645 True True +RFIP1_HUMAN 0.112424755 True True +RGPA1_HUMAN 2.869531965 True True +RHG17_HUMAN 0.630295518 True True +RIF1_HUMAN 0.265084042 True True +RL14_HUMAN 2.002775674 True True +RL15_HUMAN 1.351011619 True True +RL1D1_HUMAN 4.674779734 True True +RL35A_HUMAN 0.930517337 True True +RLA1_HUMAN 3.285584572 True True +RN213_HUMAN 2.851664106 True True +ROA2_HUMAN 0.284213814 True True +ROCK1_HUMAN 0.315973552 True True +RPRD2_HUMAN 3.101583086 True True +RRAS2_HUMAN 1.818149966 True True +RREB1_HUMAN 3.760139144 True True +RRP36_HUMAN 2.691948639 True True +RS28_HUMAN 1.199977996 True True +RSF1_HUMAN 0.611187852 True True +RU17_HUMAN 2.521833179 True True +RUSD2_HUMAN 0.540831946 True True +S38A2_HUMAN 0.730788351 True True +S6A15_HUMAN 0.774576224 True True +SAC31_HUMAN 0.94307496 True True +SAMD1_HUMAN 0.425212621 True True +SC16A_HUMAN 0.672503677 True True +SCAFB_HUMAN 0.761820137 True True +SCAM3_HUMAN 2.106109234 True True +SCFD1_HUMAN 2.346160986 True True +SCNM1_HUMAN 0.719029187 True True +SCRIB_HUMAN 1.301544279 True True +SDA1_HUMAN 0.061541886 True True +SDE2_HUMAN 1.679042655 True True +SENP3_HUMAN 1.40787111 True True +SEPT2_HUMAN 0.411055936 True True +SEPT9_HUMAN 0.142671537 True True +SF01_HUMAN 2.044259155 True True +SF3A1_HUMAN 0.775578196 True True +SF3A3_HUMAN 1.194999973 True True +SF3B1_HUMAN 1.470648597 True True +SFR19_HUMAN 0.627237552 True True +SG223_HUMAN 3.780996792 True True +SGTA_HUMAN 1.050228151 True True +SH24A_HUMAN 1.002132364 True True +SHC1_HUMAN 7.546900921 True True +SHIP2_HUMAN 8.437838929 True True +SIN3A_HUMAN 1.619010324 True True +SIR1_HUMAN 0.458227657 True True +SKA3_HUMAN 1.084749599 True True +SMAG2_HUMAN 0.910735601 True True +SMAP_HUMAN 0.59280741 True True +SMC3_HUMAN 1.118391998 True True +SMCA4_HUMAN 2.527187477 True True +SMN_HUMAN 0.556568343 True True +SMRC1_HUMAN 0.563609677 True True +SMRCD_HUMAN 1.955206456 True True +SNIP1_HUMAN 0.863432223 True True +SNTB1_HUMAN 0.895092461 True True +SNUT1_HUMAN 0.199576654 True True +SNUT2_HUMAN 2.282001891 True True +SNX15_HUMAN 1.73028212 True True +SO4C1_HUMAN 0.465662642 True True +SON_HUMAN 1.335093161 True True +SPB1_HUMAN 3.108234637 True True +SPF45_HUMAN 1.051702 True True +SPN90_HUMAN 1.844315095 True True +SPTB2_HUMAN 0.471878337 True True +SPTN1_HUMAN 1.115737962 True True +SPY1_HUMAN 3.42908006 True True +SRP14_HUMAN 1.556717997 True True +SRP72_HUMAN 0.695449223 True True +SRPK2_HUMAN 1.67639317 True True +SRRM1_HUMAN 2.451247829 True True +SRRM2_HUMAN 3.361997339 True True +SRS11_HUMAN 0.979500106 True True +SRSF4_HUMAN 1.102898188 True True +SRSF6_HUMAN 1.903664076 True True +SSRP1_HUMAN 0.676450721 True True +ST32C_HUMAN 0.769326404 True True +STA5A_HUMAN 6.882885706 True True +STAM2_HUMAN 4.090988236 True True +STAT3_HUMAN 2.887868462 True True +STIP1_HUMAN 1.24173366 True True +STK39_HUMAN 1.854711439 True True +STML2_HUMAN 0.576888473 True True +STMN1_HUMAN 2.192732251 True True +STRN3_HUMAN 1.334203499 True True +STRN4_HUMAN 1.90930866 True True +SUGP1_HUMAN 0.527575252 True True +SUV91_HUMAN 0.65144438 True True +SVIL_HUMAN 3.613842583 True True +SYHM_HUMAN 0.055517045 True True +T22D4_HUMAN 0.811436232 True True +T2FA_HUMAN 1.25655639 True True +TAOK1_HUMAN 2.400859868 True True +TB10B_HUMAN 1.274479392 True True +TB182_HUMAN 0.703132143 True True +TBA1A_HUMAN 5.367873118 True True +TBCB_HUMAN 2.921534627 True True +TBCD1_HUMAN 0.446451974 True True +TBCD4_HUMAN 0.550677716 True True +TCAL3_HUMAN 1.163540679 True True +TCOF_HUMAN 3.223369409 True True +TCP4_HUMAN 2.023142846 True True +TE2IP_HUMAN 0.386420205 True True +TEBP_HUMAN 0.775708432 True True +TELO2_HUMAN 7.735939158 True True +TENC1_HUMAN 1.610375987 True True +TF3C1_HUMAN 3.429972136 True True +TF3C4_HUMAN 0.992339047 True True +TFDP1_HUMAN 0.083761469 True True +TFP11_HUMAN 1.485256404 True True +TFPT_HUMAN 0.557146512 True True +TICRR_HUMAN 1.305944158 True True +TIF1B_HUMAN 0.320264376 True True +TIM_HUMAN 0.863369747 True True +TJAP1_HUMAN 1.069817499 True True +TM1L1_HUMAN 1.606585284 True True +TM1L2_HUMAN 7.141828062 True True +TM87A_HUMAN 0.237663573 True True +TMX1_HUMAN 0.593573422 True True +TNIK_HUMAN 1.271095902 True True +TOIP1_HUMAN 1.910066228 True True +TOP2A_HUMAN 0.547849196 True True +TOP2B_HUMAN 5.468058424 True True +TP53B_HUMAN 1.203476942 True True +TPC12_HUMAN 0.920673989 True True +TPR_HUMAN 1.927839259 True True +TR150_HUMAN 1.41920725 True True +TRA2A_HUMAN 2.212379915 True True +TRAD1_HUMAN 0.673971499 True True +TRI18_HUMAN 0.997235085 True True +TRI25_HUMAN 1.305017212 True True +TTC33_HUMAN 0.514102133 True True +TX264_HUMAN 2.141372127 True True +TXLNA_HUMAN 0.740262272 True True +U520_HUMAN 2.005382024 True True +UBE2O_HUMAN 2.807881097 True True +UBL7_HUMAN 3.682226141 True True +UBP10_HUMAN 1.192686682 True True +UBP16_HUMAN 0.507055344 True True +UBP24_HUMAN 2.226210731 True True +UBP2L_HUMAN 0.641599057 True True +UBP8_HUMAN 0.613072889 True True +UIMC1_HUMAN 0.439297945 True True +UNG_HUMAN 0.244686456 True True +UNK_HUMAN 1.012344497 True True +VAMP4_HUMAN 1.684655515 True True +VINC_HUMAN 2.334430099 True True +VPRBP_HUMAN 0.769210238 True True +VRK3_HUMAN 0.682514818 True True +WAC_HUMAN 0.771007822 True True +WAP53_HUMAN 1.781641467 True True +WAPL_HUMAN 0.489512544 True True +WASL_HUMAN 2.709891864 True True +WDHD1_HUMAN 2.984224778 True True +WDR4_HUMAN 0.117013281 True True +WDR44_HUMAN 0.165462318 True True +WDR48_HUMAN 4.855701146 True True +WDR55_HUMAN 0.240532253 True True +WDR62_HUMAN 0.496230535 True True +WDR70_HUMAN 2.055000425 True True +WDR75_HUMAN 0.442295245 True True +WIPF2_HUMAN 2.036351908 True True +WIPI2_HUMAN 0.704303272 True True +WIZ_HUMAN 2.423180827 True True +WNK1_HUMAN 0.562340064 True True +WRIP1_HUMAN 0.328288345 True True +XPC_HUMAN 0.139447947 True True +XRCC1_HUMAN 3.759734218 True True +XRCC6_HUMAN 1.36897206 True True +XRN2_HUMAN 1.401650058 True True +YAP1_HUMAN 1.50029366 True True +YBOX1_HUMAN 1.625078319 True True +YBOX3_HUMAN 0.283774141 True True +YRDC_HUMAN 0.759456845 True True +Z280C_HUMAN 3.957096259 True True +Z512B_HUMAN 0.086040691 True True +ZBT7A_HUMAN 0.668040882 True True +ZC11A_HUMAN 0.155049275 True True +ZC3H4_HUMAN 1.420959201 True True +ZC3HD_HUMAN 0.113112056 True True +ZCH18_HUMAN 3.79962429 True True +ZEB1_HUMAN 0.134563631 True True +ZFY16_HUMAN 2.208481722 True True +ZFY19_HUMAN 0.602219063 True True +ZKSC1_HUMAN 2.18084174 True True +ZMYM4_HUMAN 1.074991674 True True +ZN148_HUMAN 0.618912748 True True +ZN318_HUMAN 1.99333318 True True +ZN609_HUMAN 2.121117169 True True +ZN638_HUMAN 1.63818319 True True +ZN703_HUMAN 1.160118644 True True +ZN830_HUMAN 1.172929746 True True +ZNRF2_HUMAN 1.201348674 True True +ZO1_HUMAN 1.998443753 True True +ZO2_HUMAN 2.359999053 True True +ZRAB2_HUMAN 0.282624466 True True +ZYX_HUMAN 1.939922301 True True From 9d42677ed8cedfa033ad73c01998f3862fa117fe Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 28 Jul 2023 11:37:55 -0500 Subject: [PATCH 21/44] transform gene ids in domino.py --- src/domino.py | 67 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/src/domino.py b/src/domino.py index 75ce6d1a..2f6b06f2 100644 --- a/src/domino.py +++ b/src/domino.py @@ -9,6 +9,10 @@ __all__ = ['DOMINO'] +PERIOD_SUB = '♥' # U+2665 +ID_PREFIX = 'ENSG0' +ID_SUFFIX = '☺.1' + class DOMINO(PRM): required_inputs = ['network', 'active_genes'] @@ -24,22 +28,34 @@ def generate_inputs(data, filename_map): if input_type not in filename_map: raise ValueError(f"{input_type} filename is missing") - #Get active genes for node input file + # Get active genes for node input file if data.contains_node_columns('active'): #NODEID is always included in the node table node_df = data.request_node_columns(['active']) else: raise ValueError("DOMINO requires active genes") + node_df = node_df[node_df['active'] == True] + + # Replace periods in each node id with PERIOD_SUB and transform with a prefix and suffix + node_df['NODEID'] = node_df['NODEID'].apply(pre_domino_id_transform) + # e.g., ENSG0[node_id♥]☺.1 #Create active_genes file node_df.to_csv(filename_map['active_genes'],sep="\t",index=False,columns=['NODEID'], header=False) + #Create network file edges_df = data.get_interactome() edges_df['ppi'] = 'ppi' + + # Replace periods in each node id with PERIOD_SUB and transform with a prefix and suffix + edges_df['Interactor1'] = edges_df['Interactor1'].apply(pre_domino_id_transform) + edges_df['Interactor2'] = edges_df['Interactor2'].apply(pre_domino_id_transform) + edges_df.to_csv(filename_map['network'],sep='\t',index=False,columns=['Interactor1','ppi','Interactor2'],header=['ID_interactor_A','ppi','ID_interactor_B']) + @staticmethod def run(network=None, active_genes=None, output_file=None, use_cache=True, slices_threshold=None, module_threshold=None, singularity=False): """ @@ -96,7 +112,6 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice ######## - # Make the Python command to run within the container command = ['domino', '--active_genes_files', node_file, @@ -122,22 +137,29 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice command, volumes, work_dir) - print(out) - + #print(out) ######## - Path(mapped_slices_dir).unlink(missing_ok=True) - Path(out_dir, 'modules.out').unlink(missing_ok=True) + # Path(mapped_slices_dir).unlink(missing_ok=True) + # Path(out_dir, 'modules.out').unlink(missing_ok=True) #for domino_output in out_dir.glob('modules.out'): # domino_output.unlink(missing_ok=True) + # domino creates a new folder in mapped_out_dir to output its modules files into + out_modules = mapped_out_dir + '/active_genes' + # concatenate each module html file into one big file with open(output_file, "w") as fo: - for html_file in out_dir.glob('module_*.html'): + for html_file in Path(out_modules).glob('module_*.html'): with open(html_file,'r') as fi: fo.write(fi.read()) - Path(html_file).unlink(missing_ok=True) + # Path(html_file).unlink(missing_ok=True) + + + + + @staticmethod @@ -149,6 +171,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ edges = pd.DataFrame() + print("##############") + print("rawpathways:", raw_pathway_file) with open(raw_pathway_file, 'r') as file: for line in file: if line.strip().startswith("let data = ["): @@ -161,13 +185,40 @@ def parse_output(raw_pathway_file, standardized_pathway_file): for entry in data: tmp = entry['data'] entries.append(tmp) + print("tmp:", tmp) df = pd.DataFrame(entries) + print("df:", df) newdf = df.loc[:,['source', 'target']].dropna() + print("newdf:", newdf) edges = pd.concat([edges, newdf], axis=0) + print("edges:", edges) edges['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked + # Remove the prefix and unicode of suffix only, restore the period + edges['source'] = edges['source'].apply(post_domino_id_transform) + edges['target'] = edges['target'].apply(post_domino_id_transform) + edges.to_csv(standardized_pathway_file, header=False, index=False) + +def pre_domino_id_transform(node_id): + """ + Replace periods with PERIOD_SUB, prepend each node id with ID_PREFIX and append each node with ID_SUFFIX + @param node_id: the node id to transformed + """ + node_id = node_id.replace('.', PERIOD_SUB) + return ID_PREFIX + node_id + ID_SUFFIX + + +def post_domino_id_transform(node_id): + """ + Remove prefix and suffix, replace PERIOD_SUB with . + @param node_id: the node id to transformed + """ + node_id = node_id.str[5:-1] + node_id = node_id.str.replace(PERIOD_SUB, '.') + return node_id + From b6d28354cfd38797fddf36c1fc22864655b0bc9d Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 28 Jul 2023 13:17:10 -0500 Subject: [PATCH 22/44] Use path to output dir outside container --- src/domino.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/domino.py b/src/domino.py index 2f6b06f2..e2011800 100644 --- a/src/domino.py +++ b/src/domino.py @@ -104,7 +104,7 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice print('Running slicer with arguments: {}'.format(' '.join(slicer_command)), flush=True) container_framework = 'singularity' if singularity else 'docker' - slicer_out = run_container(container_framework, + run_container(container_framework, 'otjohnson/domino', slicer_command, volumes, @@ -132,7 +132,7 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) # container_framework = 'singularity' if singularity else 'docker' - out = run_container(container_framework, + run_container(container_framework, 'otjohnson/domino', command, volumes, @@ -146,12 +146,12 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice #for domino_output in out_dir.glob('modules.out'): # domino_output.unlink(missing_ok=True) - # domino creates a new folder in mapped_out_dir to output its modules files into - out_modules = mapped_out_dir + '/active_genes' + # domino creates a new folder in out_dir to output its modules files into + out_modules_dir = Path(out_dir, 'active_genes') # concatenate each module html file into one big file with open(output_file, "w") as fo: - for html_file in Path(out_modules).glob('module_*.html'): + for html_file in out_modules_dir.glob('module_*.html'): with open(html_file,'r') as fi: fo.write(fi.read()) # Path(html_file).unlink(missing_ok=True) From add221fde1f84369a8bf0d7e09022c0bc2b7887a Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 28 Jul 2023 13:18:53 -0500 Subject: [PATCH 23/44] Add back container output print statements --- src/domino.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/domino.py b/src/domino.py index e2011800..440fbcf7 100644 --- a/src/domino.py +++ b/src/domino.py @@ -104,11 +104,12 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice print('Running slicer with arguments: {}'.format(' '.join(slicer_command)), flush=True) container_framework = 'singularity' if singularity else 'docker' - run_container(container_framework, + slicer_out = run_container(container_framework, 'otjohnson/domino', slicer_command, volumes, work_dir) + print(slicer_out) ######## @@ -132,12 +133,12 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) # container_framework = 'singularity' if singularity else 'docker' - run_container(container_framework, + domino_out = run_container(container_framework, 'otjohnson/domino', command, volumes, work_dir) - #print(out) + print(domino_out) ######## From b11eeab2d8131e1f03814d5cd0c0ea70e49e77c6 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 28 Jul 2023 15:50:24 -0500 Subject: [PATCH 24/44] fix post_domino_id_transform function in domino.py --- src/domino.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/domino.py b/src/domino.py index 440fbcf7..9eff94e9 100644 --- a/src/domino.py +++ b/src/domino.py @@ -147,7 +147,7 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice #for domino_output in out_dir.glob('modules.out'): # domino_output.unlink(missing_ok=True) - # domino creates a new folder in out_dir to output its modules files into + # domino creates a new folder in out_dir to output its modules files into /active_genes out_modules_dir = Path(out_dir, 'active_genes') # concatenate each module html file into one big file @@ -158,11 +158,6 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice # Path(html_file).unlink(missing_ok=True) - - - - - @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): """ @@ -172,8 +167,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ edges = pd.DataFrame() - print("##############") - print("rawpathways:", raw_pathway_file) with open(raw_pathway_file, 'r') as file: for line in file: if line.strip().startswith("let data = ["): @@ -186,15 +179,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file): for entry in data: tmp = entry['data'] entries.append(tmp) - print("tmp:", tmp) df = pd.DataFrame(entries) - print("df:", df) newdf = df.loc[:,['source', 'target']].dropna() - print("newdf:", newdf) edges = pd.concat([edges, newdf], axis=0) - print("edges:", edges) edges['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked @@ -202,7 +191,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file): edges['source'] = edges['source'].apply(post_domino_id_transform) edges['target'] = edges['target'].apply(post_domino_id_transform) - edges.to_csv(standardized_pathway_file, header=False, index=False) + edges.to_csv(standardized_pathway_file, sep='\t',header=False, index=False) def pre_domino_id_transform(node_id): @@ -211,7 +200,7 @@ def pre_domino_id_transform(node_id): @param node_id: the node id to transformed """ node_id = node_id.replace('.', PERIOD_SUB) - return ID_PREFIX + node_id + ID_SUFFIX + return ID_PREFIX + node_id #+ ID_SUFFIX def post_domino_id_transform(node_id): @@ -219,7 +208,7 @@ def post_domino_id_transform(node_id): Remove prefix and suffix, replace PERIOD_SUB with . @param node_id: the node id to transformed """ - node_id = node_id.str[5:-1] - node_id = node_id.str.replace(PERIOD_SUB, '.') + node_id = node_id[5:-3] + node_id = node_id.replace(PERIOD_SUB, '.') return node_id From 80eaff05b08ba1634c539521494f0ba1fc8428c6 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 28 Jul 2023 16:41:58 -0500 Subject: [PATCH 25/44] remove suffix and period replacement in domino.py --- src/domino.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/domino.py b/src/domino.py index 9eff94e9..82e21d1e 100644 --- a/src/domino.py +++ b/src/domino.py @@ -9,9 +9,7 @@ __all__ = ['DOMINO'] -PERIOD_SUB = '♥' # U+2665 ID_PREFIX = 'ENSG0' -ID_SUFFIX = '☺.1' class DOMINO(PRM): required_inputs = ['network', 'active_genes'] @@ -36,19 +34,18 @@ def generate_inputs(data, filename_map): raise ValueError("DOMINO requires active genes") node_df = node_df[node_df['active'] == True] - # Replace periods in each node id with PERIOD_SUB and transform with a prefix and suffix + # transform each node id with a prefix node_df['NODEID'] = node_df['NODEID'].apply(pre_domino_id_transform) - # e.g., ENSG0[node_id♥]☺.1 + # e.g., ENSG0[node_id] #Create active_genes file node_df.to_csv(filename_map['active_genes'],sep="\t",index=False,columns=['NODEID'], header=False) - #Create network file edges_df = data.get_interactome() edges_df['ppi'] = 'ppi' - # Replace periods in each node id with PERIOD_SUB and transform with a prefix and suffix + # transform each node id with a prefix edges_df['Interactor1'] = edges_df['Interactor1'].apply(pre_domino_id_transform) edges_df['Interactor2'] = edges_df['Interactor2'].apply(pre_domino_id_transform) @@ -187,7 +184,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file): edges['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked - # Remove the prefix and unicode of suffix only, restore the period + # Remove the prefix edges['source'] = edges['source'].apply(post_domino_id_transform) edges['target'] = edges['target'].apply(post_domino_id_transform) @@ -196,19 +193,16 @@ def parse_output(raw_pathway_file, standardized_pathway_file): def pre_domino_id_transform(node_id): """ - Replace periods with PERIOD_SUB, prepend each node id with ID_PREFIX and append each node with ID_SUFFIX + Prepend each node id with ID_PREFIX @param node_id: the node id to transformed """ - node_id = node_id.replace('.', PERIOD_SUB) - return ID_PREFIX + node_id #+ ID_SUFFIX + return ID_PREFIX + node_id def post_domino_id_transform(node_id): """ - Remove prefix and suffix, replace PERIOD_SUB with . + Remove prefix @param node_id: the node id to transformed """ - node_id = node_id[5:-3] - node_id = node_id.replace(PERIOD_SUB, '.') + node_id = node_id[len(ID_PREFIX):] return node_id - From 44320ab95af9341e833a8927d71f7c825674ba79 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Mon, 31 Jul 2023 12:13:51 -0500 Subject: [PATCH 26/44] Add domino to tests --- .github/workflows/test-spras.yml | 10 ++++ test/DOMINO/input/domino-active-genes.txt | 20 +++++++ test/DOMINO/input/domino-network.txt | 37 ++++++++++++ test/DOMINO/input/domino-network.txt.pkl | Bin 0 -> 1386 bytes test/DOMINO/test_domino.py | 66 ++++++++++++++++++++++ 5 files changed, 133 insertions(+) create mode 100644 test/DOMINO/input/domino-active-genes.txt create mode 100644 test/DOMINO/input/domino-network.txt create mode 100644 test/DOMINO/input/domino-network.txt.pkl create mode 100644 test/DOMINO/test_domino.py diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 243aeb1d..5313ac20 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -81,6 +81,7 @@ jobs: docker pull reedcompbio/pathlinker:latest docker pull reedcompbio/meo:latest docker pull reedcompbio/mincostflow:latest + docker pull otjohnson/domino:latest - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 with: @@ -126,6 +127,15 @@ jobs: tags: latest cache_froms: reedcompbio/mincostflow:latest push: false + - name: Build DOMINO Docker image + uses: docker/build-push-action@v1 + with: + path: docker-wrappers/DOMINO/. + dockerfile: docker-wrappers/DOMINO/Dockerfile + repository: otjohnson/domino + tags: latest + cache_froms: otjohnson/domino:latest + push: false # Run pre-commit checks on source files pre-commit: diff --git a/test/DOMINO/input/domino-active-genes.txt b/test/DOMINO/input/domino-active-genes.txt new file mode 100644 index 00000000..8a31334b --- /dev/null +++ b/test/DOMINO/input/domino-active-genes.txt @@ -0,0 +1,20 @@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T \ No newline at end of file diff --git a/test/DOMINO/input/domino-network.txt b/test/DOMINO/input/domino-network.txt new file mode 100644 index 00000000..470498a9 --- /dev/null +++ b/test/DOMINO/input/domino-network.txt @@ -0,0 +1,37 @@ +ID_interactor_A ppi ID_interactor_b +A ppi B +A ppi H +A ppi F +B ppi C +B ppi J +C ppi D +C ppi M +C ppi F +D ppi P +E ppi F +E ppi N +E ppi S +F ppi G +F ppi O +G ppi H +H ppi I +I ppi J +I ppi K +K ppi Q +J ppi M +J ppi P +M ppi N +K ppi L +L ppi Z +O ppi P +O ppi T +P ppi Q +Q ppi R +R ppi T +T ppi U +T ppi V +V ppi U +T ppi W +T ppi X +X ppi Y +Y ppi Z \ No newline at end of file diff --git a/test/DOMINO/input/domino-network.txt.pkl b/test/DOMINO/input/domino-network.txt.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b346ce77dd06bbd676311f7fe580bd31683cbf5f GIT binary patch literal 1386 zcmZvc`+rSG6vZz>^(u;1ih93>R#d&JEfon7kwhYolawC!ab(>j?#(*qj7W7$Qmty0 zzuDdgpZN&C%xAuPt(iS{*4(qSrq)dI`BpmfsG~Mp$+9Htm`W>iDEAa2OUIVEO1RoZ z?WIzwkm5?7r*XYe%j5f%THZ<*xTaS1joG}>%*s|Tvn|@9_GYV|yl-5KwrKl1<->i} zsk~F=N_~2^k>zpA=gEpzpRBMnn$4A#^>3Rb^{M0|hy7ILweNlm%fbx>#&%kMCfw+v z%_{eH$Bm}8FMeX&$xXt|g)ytO(uAM)m9mfcs@x*nib7qbyL57!@Qc7#_@!`r;OlG` zeiir{cL;X|?&2=t*MV}p~(5ru$Z8(?3Koj- zcZaiBfTr}=TZ=y&PGHXb0*(F&H19=d$V<>)$Nmz@xKqhk!dp5O7n+}WIihosA)`R aWxmjl<^RH$(K>rrZx+A8#;oeA9j^g*qOQOI literal 0 HcmV?d00001 diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py new file mode 100644 index 00000000..6e6fe490 --- /dev/null +++ b/test/DOMINO/test_domino.py @@ -0,0 +1,66 @@ +import pytest +import shutil +from pathlib import Path +from src.domino import DOMINO + +TEST_DIR = 'test/DOMINO/' +OUT_FILE_DEFAULT = TEST_DIR+'output/domino-output.txt' +OUT_FILE_OPTIONAL = TEST_DIR+'output/domino-output-thresholds.txt' + +class TestDOMINO: + """ + Run test for the DOMINO run function + """ + + def test_domino_required(self): + # Only include required arguments + out_path = Path(OUT_FILE_DEFAULT) + out_path.unlink(missing_ok=True) + DOMINO.run( + network=TEST_DIR+'input/domino-network.txt', + active_genes=TEST_DIR+'input/domino-active-genes.txt', + output_file=OUT_FILE_DEFAULT) + assert out_path.exists() + + def test_domino_optional(self): + # Include optional argument + out_path = Path(OUT_FILE_OPTIONAL) + out_path.unlink(missing_ok=True) + DOMINO.run( + network=TEST_DIR+'input/domino-network.txt', + active_genes=TEST_DIR+'input/domino-active-genes.txt', + output_file=OUT_FILE_OPTIONAL, + use_cache=False, + slices_threshold=0.4, + module_threshold=0.06) + assert out_path.exists() + + def test_domino_missing_active_genes(self): + # Test the expected error is raised when active_genes argument is missing + with pytest.raises(ValueError): + # No active_genes + DOMINO.run( + network=TEST_DIR+'input/domino-network.txt', + output_file=OUT_FILE_DEFAULT) + + def test_domino_missing_network(self): + # Test the expected error is raised when network argument is missing + with pytest.raises(ValueError): + # No network + DOMINO.run( + active_genes=TEST_DIR+'input/domino-active-genes.txt', + output_file=OUT_FILE_DEFAULT) + + # Only run Singularity test if the binary is available on the system + # spython is only available on Unix, but do not explicitly skip non-Unix platforms + @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') + def test_domino_singularity(self): + out_path = Path(OUT_FILE_DEFAULT) + out_path.unlink(missing_ok=True) + # Only include required arguments and run with Singularity + DOMINO.run( + network=TEST_DIR+'input/domino-network.txt', + active_genes=TEST_DIR+'input/domino-active-genes.txt', + output_file=OUT_FILE_DEFAULT, + singularity=True) + assert out_path.exists() From 3ad2096a160965292ca963e3e7f78cdd44bda0d5 Mon Sep 17 00:00:00 2001 From: Livvy Johnson <58735771+livj4711@users.noreply.github.com> Date: Mon, 31 Jul 2023 12:15:25 -0500 Subject: [PATCH 27/44] Delete domino-network.txt.pkl --- test/DOMINO/input/domino-network.txt.pkl | Bin 1386 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test/DOMINO/input/domino-network.txt.pkl diff --git a/test/DOMINO/input/domino-network.txt.pkl b/test/DOMINO/input/domino-network.txt.pkl deleted file mode 100644 index b346ce77dd06bbd676311f7fe580bd31683cbf5f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1386 zcmZvc`+rSG6vZz>^(u;1ih93>R#d&JEfon7kwhYolawC!ab(>j?#(*qj7W7$Qmty0 zzuDdgpZN&C%xAuPt(iS{*4(qSrq)dI`BpmfsG~Mp$+9Htm`W>iDEAa2OUIVEO1RoZ z?WIzwkm5?7r*XYe%j5f%THZ<*xTaS1joG}>%*s|Tvn|@9_GYV|yl-5KwrKl1<->i} zsk~F=N_~2^k>zpA=gEpzpRBMnn$4A#^>3Rb^{M0|hy7ILweNlm%fbx>#&%kMCfw+v z%_{eH$Bm}8FMeX&$xXt|g)ytO(uAM)m9mfcs@x*nib7qbyL57!@Qc7#_@!`r;OlG` zeiir{cL;X|?&2=t*MV}p~(5ru$Z8(?3Koj- zcZaiBfTr}=TZ=y&PGHXb0*(F&H19=d$V<>)$Nmz@xKqhk!dp5O7n+}WIihosA)`R aWxmjl<^RH$(K>rrZx+A8#;oeA9j^g*qOQOI From 1341802847d7f081118b01bc69a4d5102a383c51 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Wed, 2 Aug 2023 13:53:08 -0500 Subject: [PATCH 28/44] add parse_output test to test_domino.py --- .../expected_output/domino-parse-output.txt | 32 + test/DOMINO/input/domino-concat-modules.txt | 880 ++++++++++++++++++ test/DOMINO/test_domino.py | 23 + 3 files changed, 935 insertions(+) create mode 100644 test/DOMINO/expected_output/domino-parse-output.txt create mode 100644 test/DOMINO/input/domino-concat-modules.txt diff --git a/test/DOMINO/expected_output/domino-parse-output.txt b/test/DOMINO/expected_output/domino-parse-output.txt new file mode 100644 index 00000000..33d1d204 --- /dev/null +++ b/test/DOMINO/expected_output/domino-parse-output.txt @@ -0,0 +1,32 @@ +ENSG00000122691 ENSG00000138757 1 +ENSG00000122691 ENSG00000109320 1 +ENSG00000134954 ENSG00000077150 1 +ENSG00000077150 ENSG00000107968 1 +ENSG00000077150 ENSG00000157557 1 +ENSG00000077150 ENSG00000109320 1 +ENSG00000173120 ENSG00000173039 1 +ENSG00000173039 ENSG00000109320 1 +ENSG00000168884 ENSG00000109320 1 +ENSG00000109320 ENSG00000282905 1 +ENSG00000109320 ENSG00000104856 1 +ENSG00000109320 ENSG00000146232 1 +ENSG00000109320 ENSG00000166135 1 +ENSG00000109320 ENSG00000170606 1 +ENSG00000100906 ENSG00000166135 1 +ENSG00000100906 ENSG00000198873 1 +ENSG00000100906 ENSG00000173020 1 +ENSG00000162924 ENSG00000170606 1 +ENSG00000187079 ENSG00000177606 1 +ENSG00000177606 ENSG00000168062 1 +ENSG00000177606 ENSG00000182979 1 +ENSG00000177606 ENSG00000050748 1 +ENSG00000177606 ENSG00000109339 1 +ENSG00000177606 ENSG00000170345 1 +ENSG00000177606 ENSG00000175592 1 +ENSG00000177606 ENSG00000085276 1 +ENSG00000171223 ENSG00000100721 1 +ENSG00000171223 ENSG00000075426 1 +ENSG00000171223 ENSG00000085276 1 +ENSG00000130522 ENSG00000175592 1 +ENSG00000175592 ENSG00000128272 1 +ENSG00000128272 ENSG00000162772 1 diff --git a/test/DOMINO/input/domino-concat-modules.txt b/test/DOMINO/input/domino-concat-modules.txt new file mode 100644 index 00000000..47482801 --- /dev/null +++ b/test/DOMINO/input/domino-concat-modules.txt @@ -0,0 +1,880 @@ + + + + + + + + + + + + + + React App + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+ + + + + + + + + +
idnamemodules
+ +
+
+
+
+
+
+
+
+
+

Total number of genes: 19

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + React App + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+ + + + + + + + + +
idnamemodules
+ +
+
+
+
+
+
+
+
+
+

Total number of genes: 15

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + + diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 6e6fe490..a4af3e34 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -6,6 +6,8 @@ TEST_DIR = 'test/DOMINO/' OUT_FILE_DEFAULT = TEST_DIR+'output/domino-output.txt' OUT_FILE_OPTIONAL = TEST_DIR+'output/domino-output-thresholds.txt' +OUT_FILE_PARSE = TEST_DIR+'output/domino-parse-output.txt' +OUT_FILE_PARSE_EXP = TEST_DIR+'expected_output/domino-parse-output.txt' class TestDOMINO: """ @@ -20,6 +22,7 @@ def test_domino_required(self): network=TEST_DIR+'input/domino-network.txt', active_genes=TEST_DIR+'input/domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) + # output_file should be empty assert out_path.exists() def test_domino_optional(self): @@ -33,6 +36,7 @@ def test_domino_optional(self): use_cache=False, slices_threshold=0.4, module_threshold=0.06) + # output_file should be empty assert out_path.exists() def test_domino_missing_active_genes(self): @@ -51,6 +55,25 @@ def test_domino_missing_network(self): active_genes=TEST_DIR+'input/domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) + def test_domino_parse_output(self): + # Show how the complicated file formats work in terms of input and expected output + # Concatenated module_0.html and module_1.html file as input + # Output is an edge dataframe + # Expected output is stored in repo and the test will + # confirm the generated output matches the expected output. + out_path = Path(OUT_FILE_PARSE) + out_path.unlink(missing_ok=True) + out_path_exp = Path(OUT_FILE_PARSE_EXP) + DOMINO.parse_output( + TEST_DIR+'input/domino-concat-modules.txt', + OUT_FILE_PARSE) + # assert out_path.exists() + with open(out_path, 'r') as output_file: + generated_output = output_file.read() + with open(out_path_exp, 'r') as output_file: + expected_output = output_file.read() + assert generated_output == expected_output + # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') From 7bf3095387222819354a60ed41e888cf3b16cf85 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 4 Aug 2023 11:19:23 -0500 Subject: [PATCH 29/44] change comparison in parse_output test --- test/DOMINO/test_domino.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index a4af3e34..7d318aaf 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -1,5 +1,7 @@ import pytest import shutil +import filecmp + from pathlib import Path from src.domino import DOMINO @@ -11,7 +13,12 @@ class TestDOMINO: """ - Run test for the DOMINO run function + Run test for the DOMINO run and parse_output function. + We intentionally omit a DOMINO run correctness test. The output + of DOMINO changes between runs without an option to set a seed for + the algorithm. The variability makes it difficult to compare + generated output to expected output. + """ def test_domino_required(self): @@ -56,23 +63,17 @@ def test_domino_missing_network(self): output_file=OUT_FILE_DEFAULT) def test_domino_parse_output(self): - # Show how the complicated file formats work in terms of input and expected output - # Concatenated module_0.html and module_1.html file as input - # Output is an edge dataframe - # Expected output is stored in repo and the test will - # confirm the generated output matches the expected output. + # Input is the concatenated module_0.html and module_1.html file from + # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt + # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples + # Confirms the generated output matches the expected output out_path = Path(OUT_FILE_PARSE) out_path.unlink(missing_ok=True) out_path_exp = Path(OUT_FILE_PARSE_EXP) DOMINO.parse_output( TEST_DIR+'input/domino-concat-modules.txt', OUT_FILE_PARSE) - # assert out_path.exists() - with open(out_path, 'r') as output_file: - generated_output = output_file.read() - with open(out_path_exp, 'r') as output_file: - expected_output = output_file.read() - assert generated_output == expected_output + assert filecmp.cmp(OUT_FILE_PARSE, OUT_FILE_PARSE_EXP) # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms From 78b69ebe6d7443e86e35c3bc21ccebaa863d4c05 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 4 Aug 2023 11:45:59 -0500 Subject: [PATCH 30/44] change otjohnson/domino to reedcompbio/domino --- .github/workflows/test-spras.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 5313ac20..0fc4e9ed 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -81,7 +81,7 @@ jobs: docker pull reedcompbio/pathlinker:latest docker pull reedcompbio/meo:latest docker pull reedcompbio/mincostflow:latest - docker pull otjohnson/domino:latest + docker pull reedcompbio/domino:latest - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 with: @@ -132,9 +132,9 @@ jobs: with: path: docker-wrappers/DOMINO/. dockerfile: docker-wrappers/DOMINO/Dockerfile - repository: otjohnson/domino + repository: reedcompbio/domino tags: latest - cache_froms: otjohnson/domino:latest + cache_froms: reedcompbio/domino:latest push: false # Run pre-commit checks on source files From aa128fc7b4590bb2d14d2b2e6f7da650d06dc546 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 4 Aug 2023 11:48:53 -0500 Subject: [PATCH 31/44] change all algorithms to include: true --- config/config.yaml | 14 +++++++------- config/egfr.yaml | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 139a8142..20e8b1c5 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,14 +28,14 @@ algorithms: - name: "pathlinker" params: - include: false + include: true directed: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: false + include: true directed: false run1: r: [5] @@ -46,7 +46,7 @@ - name: "omicsintegrator2" params: - include: false + include: true directed: false run1: b: [4] @@ -57,7 +57,7 @@ - name: "meo" params: - include: false + include: true directed: true run1: max_path_length: [3] @@ -66,7 +66,7 @@ - name: "mincostflow" params: - include: false + include: true directed: false run1: flow: [1] # The flow must be an int @@ -77,7 +77,7 @@ include: true directed: false run1: - use_cache: ["true"] + use_cache: ["false"] slices_threshold: [0.3] module_threshold: [0.05] @@ -126,7 +126,7 @@ include: true # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: - include: true + include: false # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph diff --git a/config/egfr.yaml b/config/egfr.yaml index 23ff250d..1571960d 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -39,7 +39,7 @@ algorithms: name: omicsintegrator2 params: directed: false - include: false + include: true run1: b: - 4 @@ -54,7 +54,7 @@ algorithms: name: meo params: directed: true - include: false + include: true run1: local_search: - "Yes" @@ -69,7 +69,7 @@ algorithms: include: true run1: use_cache: - - "true" + - "false" slices_threshold: - 0.3 module_threshold: From 2851baac38d8dc6139f5ed35013002e60d982ba5 Mon Sep 17 00:00:00 2001 From: livj4711 <4711.liv@gmail.com> Date: Fri, 4 Aug 2023 11:53:30 -0500 Subject: [PATCH 32/44] update domino.py --- src/domino.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/domino.py b/src/domino.py index 82e21d1e..a98a6a90 100644 --- a/src/domino.py +++ b/src/domino.py @@ -58,6 +58,7 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice """ Run DOMINO with Docker Let visualization be always true, parallelization be always 1 thread + DOMINO produces multiple output module files. We concatenate these files into one file, which we will extract edges from to produce one dataframe @param network: input network file (required) @param active_genes: input active genes (required) @param output_file: path to the output pathway file (required) @@ -88,8 +89,6 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) volumes.append(bind_path) - ######## - bind_path, mapped_slices_dir = prepare_volume('slices.txt', work_dir) volumes.append(bind_path) # /spras/ADFJGFD/slices.txt @@ -108,8 +107,6 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice work_dir) print(slicer_out) - ######## - # Make the Python command to run within the container command = ['domino', '--active_genes_files', node_file, @@ -137,22 +134,16 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice work_dir) print(domino_out) - ######## - - # Path(mapped_slices_dir).unlink(missing_ok=True) - # Path(out_dir, 'modules.out').unlink(missing_ok=True) - #for domino_output in out_dir.glob('modules.out'): - # domino_output.unlink(missing_ok=True) - - # domino creates a new folder in out_dir to output its modules files into /active_genes + # DOMINO creates a new folder in out_dir to output its modules files into /active_genes out_modules_dir = Path(out_dir, 'active_genes') + #Path(out_modules_dir, 'modules.out').unlink(missing_ok=True) - # concatenate each module html file into one big file + # Concatenate each produced module html file into one big file with open(output_file, "w") as fo: for html_file in out_modules_dir.glob('module_*.html'): with open(html_file,'r') as fi: fo.write(fi.read()) - # Path(html_file).unlink(missing_ok=True) + #Path(html_file).unlink(missing_ok=True) @staticmethod @@ -193,6 +184,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file): def pre_domino_id_transform(node_id): """ + DOMINO algorithm requires module edges have 'ENSG0' string. Prepend each node id with ID_PREFIX @param node_id: the node id to transformed """ From 3fb157cd1506a33dda3de32bdf21e9462ab19b57 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Wed, 9 Aug 2023 10:59:21 -0500 Subject: [PATCH 33/44] Initial formatting changes --- docker-wrappers/DOMINO/README.md | 3 +- src/domino.py | 49 ++++++++++++++++---------------- src/runner.py | 2 +- test/DOMINO/test_domino.py | 20 ++++++------- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/docker-wrappers/DOMINO/README.md b/docker-wrappers/DOMINO/README.md index a35e803c..60e1fb67 100644 --- a/docker-wrappers/DOMINO/README.md +++ b/docker-wrappers/DOMINO/README.md @@ -2,6 +2,8 @@ A Docker image for [DOMINO](https://github.com/Shamir-Lab/DOMINO) that is available on [DockerHub](https://hub.docker.com/repository/docker/otjohnson/domino). +DOMINO outputs multiple active modules, which SPRAS combines into a single pathway. + To create the Docker image run: ``` docker build -t otjohnson/domino -f Dockerfile . @@ -13,4 +15,3 @@ To inspect the installed Python packages: winpty docker run otjohnson/domino pip list ``` The `winpty` prefix is only needed on Windows. - diff --git a/src/domino.py b/src/domino.py index a98a6a90..8562f2fd 100644 --- a/src/domino.py +++ b/src/domino.py @@ -1,5 +1,4 @@ import json -import shutil from pathlib import Path import pandas as pd @@ -11,6 +10,7 @@ ID_PREFIX = 'ENSG0' + class DOMINO(PRM): required_inputs = ['network', 'active_genes'] @@ -28,7 +28,7 @@ def generate_inputs(data, filename_map): # Get active genes for node input file if data.contains_node_columns('active'): - #NODEID is always included in the node table + # NODEID is always included in the node table node_df = data.request_node_columns(['active']) else: raise ValueError("DOMINO requires active genes") @@ -38,33 +38,33 @@ def generate_inputs(data, filename_map): node_df['NODEID'] = node_df['NODEID'].apply(pre_domino_id_transform) # e.g., ENSG0[node_id] - #Create active_genes file + # Create active_genes file node_df.to_csv(filename_map['active_genes'],sep="\t",index=False,columns=['NODEID'], header=False) - #Create network file + # Create network file edges_df = data.get_interactome() edges_df['ppi'] = 'ppi' - # transform each node id with a prefix + # Transform each node id with a prefix edges_df['Interactor1'] = edges_df['Interactor1'].apply(pre_domino_id_transform) edges_df['Interactor2'] = edges_df['Interactor2'].apply(pre_domino_id_transform) - edges_df.to_csv(filename_map['network'],sep='\t',index=False,columns=['Interactor1','ppi','Interactor2'],header=['ID_interactor_A','ppi','ID_interactor_B']) - - + edges_df.to_csv(filename_map['network'], sep='\t', index=False, columns=['Interactor1', 'ppi', 'Interactor2'], + header=['ID_interactor_A', 'ppi', 'ID_interactor_B']) @staticmethod def run(network=None, active_genes=None, output_file=None, use_cache=True, slices_threshold=None, module_threshold=None, singularity=False): """ Run DOMINO with Docker Let visualization be always true, parallelization be always 1 thread - DOMINO produces multiple output module files. We concatenate these files into one file, which we will extract edges from to produce one dataframe + DOMINO produces multiple output module files in an HTML format. SPRAS concatenates these files into one file, + which from which it will extract edges from to produce one pathway. @param network: input network file (required) @param active_genes: input active genes (required) @param output_file: path to the output pathway file (required) @param use_cache: if True, use auto-generated cache network files (*.pkl) from previous executions with the same network (optional) - @param slices_threshold: the threshold for considering a slice as relevant (optional) - @param module_threshold: the threshold for considering a putative module as final module (optional) + @param slices_threshold: the p-value threshold for considering a slice as relevant (optional) + @param module_threshold: the p-value threshold for considering a putative module as final module (optional) @param singularity: if True, run using the Singularity container instead of the Docker container (optional) """ # Assuming defaults are: use_cache=true @@ -94,17 +94,17 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice # /spras/ADFJGFD/slices.txt slicer_command = ['slicer', - '--network_file', network_file, - '--output_file', mapped_slices_dir] + '--network_file', network_file, + '--output_file', mapped_slices_dir] print('Running slicer with arguments: {}'.format(' '.join(slicer_command)), flush=True) container_framework = 'singularity' if singularity else 'docker' slicer_out = run_container(container_framework, - 'otjohnson/domino', - slicer_command, - volumes, - work_dir) + 'otjohnson/domino', + slicer_command, + volumes, + work_dir) print(slicer_out) # Make the Python command to run within the container @@ -128,10 +128,10 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice # container_framework = 'singularity' if singularity else 'docker' domino_out = run_container(container_framework, - 'otjohnson/domino', - command, - volumes, - work_dir) + 'otjohnson/domino', + command, + volumes, + work_dir) print(domino_out) # DOMINO creates a new folder in out_dir to output its modules files into /active_genes @@ -145,7 +145,6 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice fo.write(fi.read()) #Path(html_file).unlink(missing_ok=True) - @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): """ @@ -169,17 +168,17 @@ def parse_output(raw_pathway_file, standardized_pathway_file): entries.append(tmp) df = pd.DataFrame(entries) - newdf = df.loc[:,['source', 'target']].dropna() + newdf = df.loc[:, ['source', 'target']].dropna() edges = pd.concat([edges, newdf], axis=0) - edges['rank'] = 1 # adds in a rank column of 1s because the edges are not ranked + edges['rank'] = 1 # Adds in a rank column of 1s because the edges are not ranked # Remove the prefix edges['source'] = edges['source'].apply(post_domino_id_transform) edges['target'] = edges['target'].apply(post_domino_id_transform) - edges.to_csv(standardized_pathway_file, sep='\t',header=False, index=False) + edges.to_csv(standardized_pathway_file, sep='\t', header=False, index=False) def pre_domino_id_transform(node_id): diff --git a/src/runner.py b/src/runner.py index 7c009c1f..8d6da058 100644 --- a/src/runner.py +++ b/src/runner.py @@ -1,4 +1,5 @@ from src.dataset import Dataset +from src.domino import DOMINO as domino # supported algorithm imports from src.meo import MEO as meo @@ -6,7 +7,6 @@ from src.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from src.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from src.pathlinker import PathLinker as pathlinker -from src.domino import DOMINO as domino def run(algorithm, params): diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 7d318aaf..1ec1353c 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -1,8 +1,9 @@ -import pytest -import shutil import filecmp - +import shutil from pathlib import Path + +import pytest + from src.domino import DOMINO TEST_DIR = 'test/DOMINO/' @@ -11,14 +12,14 @@ OUT_FILE_PARSE = TEST_DIR+'output/domino-parse-output.txt' OUT_FILE_PARSE_EXP = TEST_DIR+'expected_output/domino-parse-output.txt' + class TestDOMINO: """ - Run test for the DOMINO run and parse_output function. - We intentionally omit a DOMINO run correctness test. The output - of DOMINO changes between runs without an option to set a seed for - the algorithm. The variability makes it difficult to compare + Run test for the DOMINO run and parse_output function. + We intentionally omit a DOMINO run correctness test. The output + of DOMINO changes between runs without an option to set a seed for + the algorithm. The variability makes it difficult to compare generated output to expected output. - """ def test_domino_required(self): @@ -64,12 +65,11 @@ def test_domino_missing_network(self): def test_domino_parse_output(self): # Input is the concatenated module_0.html and module_1.html file from - # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt + # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples # Confirms the generated output matches the expected output out_path = Path(OUT_FILE_PARSE) out_path.unlink(missing_ok=True) - out_path_exp = Path(OUT_FILE_PARSE_EXP) DOMINO.parse_output( TEST_DIR+'input/domino-concat-modules.txt', OUT_FILE_PARSE) From 417bbef1e504df6faccc41c04a03c258101c5af1 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 10:07:51 -0500 Subject: [PATCH 34/44] Fix parse outputs for empty pathways Improve documentation --- docker-wrappers/DOMINO/README.md | 4 ++++ src/domino.py | 13 ++++++++----- test/DOMINO/test_domino.py | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docker-wrappers/DOMINO/README.md b/docker-wrappers/DOMINO/README.md index 60e1fb67..617364a5 100644 --- a/docker-wrappers/DOMINO/README.md +++ b/docker-wrappers/DOMINO/README.md @@ -15,3 +15,7 @@ To inspect the installed Python packages: winpty docker run otjohnson/domino pip list ``` The `winpty` prefix is only needed on Windows. + +## TODO +- Resolve upstream ValueError with small inputs https://github.com/Shamir-Lab/DOMINO/issues/11 +- Use cache or reuse slices files from previous runs on the same network diff --git a/src/domino.py b/src/domino.py index 8562f2fd..838252a9 100644 --- a/src/domino.py +++ b/src/domino.py @@ -172,11 +172,14 @@ def parse_output(raw_pathway_file, standardized_pathway_file): edges = pd.concat([edges, newdf], axis=0) - edges['rank'] = 1 # Adds in a rank column of 1s because the edges are not ranked - - # Remove the prefix - edges['source'] = edges['source'].apply(post_domino_id_transform) - edges['target'] = edges['target'].apply(post_domino_id_transform) + # DOMINO produces empty output files in some settings such as when it is run with small input files + # and generates a ValueError + if len(edges) > 0: + edges['rank'] = 1 # Adds in a rank column of 1s because the edges are not ranked + + # Remove the prefix + edges['source'] = edges['source'].apply(post_domino_id_transform) + edges['target'] = edges['target'].apply(post_domino_id_transform) edges.to_csv(standardized_pathway_file, sep='\t', header=False, index=False) diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 1ec1353c..4959ceb2 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -20,6 +20,7 @@ class TestDOMINO: of DOMINO changes between runs without an option to set a seed for the algorithm. The variability makes it difficult to compare generated output to expected output. + See https://github.com/Shamir-Lab/DOMINO/issues/5 """ def test_domino_required(self): From bfd81416c8cb9303e7fc61f6a13d06b05e5a0a53 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 10:30:28 -0500 Subject: [PATCH 35/44] Disable cache and move slices file --- config/config.yaml | 11 +++++------ config/egfr.yaml | 8 +++----- src/domino.py | 24 +++++++++++------------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 20e8b1c5..af7f6341 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,14 +28,14 @@ algorithms: - name: "pathlinker" params: - include: true + include: false directed: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: true + include: false directed: false run1: r: [5] @@ -46,7 +46,7 @@ - name: "omicsintegrator2" params: - include: true + include: false directed: false run1: b: [4] @@ -57,7 +57,7 @@ - name: "meo" params: - include: true + include: false directed: true run1: max_path_length: [3] @@ -66,7 +66,7 @@ - name: "mincostflow" params: - include: true + include: false directed: false run1: flow: [1] # The flow must be an int @@ -77,7 +77,6 @@ include: true directed: false run1: - use_cache: ["false"] slices_threshold: [0.3] module_threshold: [0.05] diff --git a/config/egfr.yaml b/config/egfr.yaml index 1571960d..ae6af7eb 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -19,7 +19,7 @@ algorithms: name: omicsintegrator1 params: directed: false - include: true + include: false run1: b: - 0.55 @@ -39,7 +39,7 @@ algorithms: name: omicsintegrator2 params: directed: false - include: true + include: false run1: b: - 4 @@ -54,7 +54,7 @@ algorithms: name: meo params: directed: true - include: true + include: false run1: local_search: - "Yes" @@ -68,8 +68,6 @@ algorithms: directed: false include: true run1: - use_cache: - - "false" slices_threshold: - 0.3 module_threshold: diff --git a/src/domino.py b/src/domino.py index 838252a9..28f94be2 100644 --- a/src/domino.py +++ b/src/domino.py @@ -39,7 +39,7 @@ def generate_inputs(data, filename_map): # e.g., ENSG0[node_id] # Create active_genes file - node_df.to_csv(filename_map['active_genes'],sep="\t",index=False,columns=['NODEID'], header=False) + node_df.to_csv(filename_map['active_genes'], sep="\t", index=False, columns=['NODEID'], header=False) # Create network file edges_df = data.get_interactome() @@ -53,21 +53,19 @@ def generate_inputs(data, filename_map): header=['ID_interactor_A', 'ppi', 'ID_interactor_B']) @staticmethod - def run(network=None, active_genes=None, output_file=None, use_cache=True, slices_threshold=None, module_threshold=None, singularity=False): + def run(network=None, active_genes=None, output_file=None, slices_threshold=None, module_threshold=None, singularity=False): """ - Run DOMINO with Docker - Let visualization be always true, parallelization be always 1 thread + Run DOMINO with Docker. + Let visualization be always true, parallelization be always 1 thread, and use_cache be always false. DOMINO produces multiple output module files in an HTML format. SPRAS concatenates these files into one file, which from which it will extract edges from to produce one pathway. @param network: input network file (required) @param active_genes: input active genes (required) @param output_file: path to the output pathway file (required) - @param use_cache: if True, use auto-generated cache network files (*.pkl) from previous executions with the same network (optional) @param slices_threshold: the p-value threshold for considering a slice as relevant (optional) @param module_threshold: the p-value threshold for considering a putative module as final module (optional) @param singularity: if True, run using the Singularity container instead of the Docker container (optional) """ - # Assuming defaults are: use_cache=true if not network or not active_genes or not output_file: raise ValueError('Required DOMINO arguments are missing') @@ -89,13 +87,14 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) volumes.append(bind_path) - bind_path, mapped_slices_dir = prepare_volume('slices.txt', work_dir) + slices_files = Path(out_dir, 'slices.txt') + bind_path, mapped_slices_file = prepare_volume(str(slices_files), work_dir) volumes.append(bind_path) # /spras/ADFJGFD/slices.txt slicer_command = ['slicer', '--network_file', network_file, - '--output_file', mapped_slices_dir] + '--output_file', mapped_slices_file] print('Running slicer with arguments: {}'.format(' '.join(slicer_command)), flush=True) @@ -111,18 +110,17 @@ def run(network=None, active_genes=None, output_file=None, use_cache=True, slice command = ['domino', '--active_genes_files', node_file, '--network_file', network_file, - '--slices_file', mapped_slices_dir, + '--slices_file', mapped_slices_file, '--output_folder', mapped_out_dir, + '--use_cache', 'false', '--parallelization', '1', '--visualization', 'true'] # Add optional arguments - if use_cache is not True: - command.extend(['-c', 'false']) if slices_threshold is not None: - command.extend(['-sth', str(slices_threshold)]) + command.extend(['--slices_threshold', str(slices_threshold)]) if module_threshold is not None: - command.extend(['-mth', str(module_threshold)]) + command.extend(['--module_threshold', str(module_threshold)]) print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) From bc001033474637a5afff83beca1668768ffd8e6d Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 10:56:35 -0500 Subject: [PATCH 36/44] Remove intermediate files and correct slices arg --- config/config.yaml | 2 +- config/egfr.yaml | 4 ++-- src/domino.py | 29 +++++++++++++++++------------ 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index af7f6341..a16d383c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -77,7 +77,7 @@ include: true directed: false run1: - slices_threshold: [0.3] + slice_threshold: [0.3] module_threshold: [0.05] # Here we specify which pathways to run and other file location information. diff --git a/config/egfr.yaml b/config/egfr.yaml index ae6af7eb..d4949a26 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -10,7 +10,7 @@ algorithms: name: pathlinker params: directed: true - include: true + include: false run1: k: - 10 @@ -68,7 +68,7 @@ algorithms: directed: false include: true run1: - slices_threshold: + slice_threshold: - 0.3 module_threshold: - 0.05 diff --git a/src/domino.py b/src/domino.py index 28f94be2..a2f23053 100644 --- a/src/domino.py +++ b/src/domino.py @@ -53,7 +53,7 @@ def generate_inputs(data, filename_map): header=['ID_interactor_A', 'ppi', 'ID_interactor_B']) @staticmethod - def run(network=None, active_genes=None, output_file=None, slices_threshold=None, module_threshold=None, singularity=False): + def run(network=None, active_genes=None, output_file=None, slice_threshold=None, module_threshold=None, singularity=False): """ Run DOMINO with Docker. Let visualization be always true, parallelization be always 1 thread, and use_cache be always false. @@ -62,7 +62,7 @@ def run(network=None, active_genes=None, output_file=None, slices_threshold=None @param network: input network file (required) @param active_genes: input active genes (required) @param output_file: path to the output pathway file (required) - @param slices_threshold: the p-value threshold for considering a slice as relevant (optional) + @param slice_threshold: the p-value threshold for considering a slice as relevant (optional) @param module_threshold: the p-value threshold for considering a putative module as final module (optional) @param singularity: if True, run using the Singularity container instead of the Docker container (optional) """ @@ -87,10 +87,9 @@ def run(network=None, active_genes=None, output_file=None, slices_threshold=None bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) volumes.append(bind_path) - slices_files = Path(out_dir, 'slices.txt') - bind_path, mapped_slices_file = prepare_volume(str(slices_files), work_dir) + slices_file = Path(out_dir, 'slices.txt') + bind_path, mapped_slices_file = prepare_volume(str(slices_file), work_dir) volumes.append(bind_path) - # /spras/ADFJGFD/slices.txt slicer_command = ['slicer', '--network_file', network_file, @@ -117,8 +116,9 @@ def run(network=None, active_genes=None, output_file=None, slices_threshold=None '--visualization', 'true'] # Add optional arguments - if slices_threshold is not None: - command.extend(['--slices_threshold', str(slices_threshold)]) + if slice_threshold is not None: + # DOMINO readme has the wrong argument https://github.com/Shamir-Lab/DOMINO/issues/12 + command.extend(['--slice_threshold', str(slice_threshold)]) if module_threshold is not None: command.extend(['--module_threshold', str(module_threshold)]) @@ -133,15 +133,20 @@ def run(network=None, active_genes=None, output_file=None, slices_threshold=None print(domino_out) # DOMINO creates a new folder in out_dir to output its modules files into /active_genes + # The filename is determined by the input active_genes and cannot be configured + # Leave these HTML files for user inspection out_modules_dir = Path(out_dir, 'active_genes') - #Path(out_modules_dir, 'modules.out').unlink(missing_ok=True) - # Concatenate each produced module html file into one big file - with open(output_file, "w") as fo: + # Concatenate each produced module HTML file into one file + with open(output_file, 'w') as fo: for html_file in out_modules_dir.glob('module_*.html'): - with open(html_file,'r') as fi: + with open(html_file, 'r') as fi: fo.write(fi.read()) - #Path(html_file).unlink(missing_ok=True) + + # Clean up DOMINO intermediate and pickle files + Path(slices_file).unlink(missing_ok=True) + Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True) + Path(network + '.pkl').unlink(missing_ok=True) @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): From 3f8dd54c10fe7fa6f66c3020445bfc35356abadd Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 11:11:05 -0500 Subject: [PATCH 37/44] Switch to reedcompbio Docker image --- src/domino.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/domino.py b/src/domino.py index a2f23053..943376b2 100644 --- a/src/domino.py +++ b/src/domino.py @@ -99,7 +99,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, container_framework = 'singularity' if singularity else 'docker' slicer_out = run_container(container_framework, - 'otjohnson/domino', + 'reedcompbio/domino', slicer_command, volumes, work_dir) @@ -124,9 +124,8 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) - # container_framework = 'singularity' if singularity else 'docker' domino_out = run_container(container_framework, - 'otjohnson/domino', + 'reedcompbio/domino', command, volumes, work_dir) From ebe918d3a48b1ea724d8c9207e18e42409e6d9c3 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 11:13:17 -0500 Subject: [PATCH 38/44] Reorganize import --- src/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runner.py b/src/runner.py index 8d6da058..2a63a699 100644 --- a/src/runner.py +++ b/src/runner.py @@ -1,7 +1,7 @@ from src.dataset import Dataset -from src.domino import DOMINO as domino # supported algorithm imports +from src.domino import DOMINO as domino from src.meo import MEO as meo from src.mincostflow import MinCostFlow as mincostflow from src.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 From d2d52ded369a4997e711faf8dde8420397294463 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 17:52:00 -0500 Subject: [PATCH 39/44] Add ID transform tests and confirm prefix exists --- src/domino.py | 25 ++++++++++++++++--------- test/DOMINO/test_domino.py | 20 +++++++++++++++++--- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/domino.py b/src/domino.py index 943376b2..bab1d0d0 100644 --- a/src/domino.py +++ b/src/domino.py @@ -6,9 +6,10 @@ from src.prm import PRM from src.util import prepare_volume, run_container -__all__ = ['DOMINO'] +__all__ = ['DOMINO', 'pre_domino_id_transform', 'post_domino_id_transform'] ID_PREFIX = 'ENSG0' +ID_PREFIX_LEN = len(ID_PREFIX) class DOMINO(PRM): @@ -143,7 +144,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, fo.write(fi.read()) # Clean up DOMINO intermediate and pickle files - Path(slices_file).unlink(missing_ok=True) + slices_file.unlink(missing_ok=True) Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True) Path(network + '.pkl').unlink(missing_ok=True) @@ -188,17 +189,23 @@ def parse_output(raw_pathway_file, standardized_pathway_file): def pre_domino_id_transform(node_id): """ - DOMINO algorithm requires module edges have 'ENSG0' string. - Prepend each node id with ID_PREFIX - @param node_id: the node id to transformed + DOMINO requires module edges to have the 'ENSG0' string as a prefix. + Prepend each node id with this ID_PREFIX. + @param node_id: the node id to transform + @return the node id with the prefix added """ return ID_PREFIX + node_id def post_domino_id_transform(node_id): """ - Remove prefix - @param node_id: the node id to transformed + Remove ID_PREFIX from the beginning of the node id if it is present. + @param node_id: the node id to transform + @return the node id without the prefix, if it was present """ - node_id = node_id[len(ID_PREFIX):] - return node_id + # Use removeprefix if SPRAS ever requires Python >= 3.9 + # https://docs.python.org/3/library/stdtypes.html#str.removeprefix + if node_id.startswith(ID_PREFIX): + return node_id[ID_PREFIX_LEN:] + else: + return node_id diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 4959ceb2..c3e121c6 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -4,7 +4,7 @@ import pytest -from src.domino import DOMINO +from src.domino import DOMINO, post_domino_id_transform, pre_domino_id_transform TEST_DIR = 'test/DOMINO/' OUT_FILE_DEFAULT = TEST_DIR+'output/domino-output.txt' @@ -42,8 +42,7 @@ def test_domino_optional(self): network=TEST_DIR+'input/domino-network.txt', active_genes=TEST_DIR+'input/domino-active-genes.txt', output_file=OUT_FILE_OPTIONAL, - use_cache=False, - slices_threshold=0.4, + slice_threshold=0.4, module_threshold=0.06) # output_file should be empty assert out_path.exists() @@ -89,3 +88,18 @@ def test_domino_singularity(self): output_file=OUT_FILE_DEFAULT, singularity=True) assert out_path.exists() + + def test_pre_id_transform(self): + """ + Test the node ID transformation run before DOMINO executes + """ + assert pre_domino_id_transform('123') == 'ENSG0123' + assert pre_domino_id_transform('xyz') == 'ENSG0xyz' + + def test_post_id_transform(self): + """ + Test the node ID transformation run after DOMINO executes + """ + assert post_domino_id_transform('ENSG0123') == '123' + assert post_domino_id_transform('ENSG0xyz') == 'xyz' + assert post_domino_id_transform('123') == '123' From c3f1e07d5cb7377e302a24a098e36b427d38f7e9 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 18:04:29 -0500 Subject: [PATCH 40/44] Check contents when comparing files and replace custom comparison Shallow comparison does not check file contents filecmp.cmp has the same functionality as the util function --- src/util.py | 15 --------------- test/DOMINO/test_domino.py | 2 +- test/LocalNeighborhood/test_ln.py | 5 ++--- test/ml/test_ml.py | 6 +++--- 4 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/util.py b/src/util.py index c2ce696b..911fbf12 100644 --- a/src/util.py +++ b/src/util.py @@ -361,21 +361,6 @@ def process_config(config): return config, datasets, out_dir, algorithm_params, algorithm_directed, pca_params, hac_params -def compare_files(file1, file2) -> bool: - """ - Compare files by reading the contents into lists. Only recommended for small files. - @param file1: first file to compare - @param file2: second file to compare - @return: True or False - """ - with open(file1) as f1: - contents1 = list(f1) - - with open(file2) as f2: - contents2 = list(f2) - - return contents1 == contents2 - def make_required_dirs(path: str): """ diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index c3e121c6..20d43054 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -73,7 +73,7 @@ def test_domino_parse_output(self): DOMINO.parse_output( TEST_DIR+'input/domino-concat-modules.txt', OUT_FILE_PARSE) - assert filecmp.cmp(OUT_FILE_PARSE, OUT_FILE_PARSE_EXP) + assert filecmp.cmp(OUT_FILE_PARSE, OUT_FILE_PARSE_EXP, shallow=False) # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py index 0bfe9c6d..c7cf2491 100644 --- a/test/LocalNeighborhood/test_ln.py +++ b/test/LocalNeighborhood/test_ln.py @@ -1,10 +1,9 @@ import sys +from filecmp import cmp from pathlib import Path import pytest -from src.util import compare_files - # TODO consider refactoring to simplify the import # Modify the path because of the - in the directory SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() @@ -26,7 +25,7 @@ def test_ln(self): output_file=OUT_FILE) assert OUT_FILE.exists(), 'Output file was not written' expected_file = Path(TEST_DIR, 'expected_output', 'ln-output.txt') - assert compare_files(OUT_FILE, expected_file), 'Output file does not match expected output file' + assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' """ Run the local neighborhood algorithm with a missing input file diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py index 336b8555..aefd0419 100644 --- a/test/ml/test_ml.py +++ b/test/ml/test_ml.py @@ -23,7 +23,7 @@ def test_summarize_networks(self): INPUT_DIR + 'test-data-longName/longName.txt', INPUT_DIR + 'test-data-longName2/longName2.txt', INPUT_DIR + 'test-data-empty/empty.txt', INPUT_DIR + 'test-data-spaces/spaces.txt']) dataframe.to_csv(OUT_DIR + 'dataframe.csv') - assert filecmp.cmp(OUT_DIR + 'dataframe.csv', EXPECT_DIR + 'expected-dataframe.csv') + assert filecmp.cmp(OUT_DIR + 'dataframe.csv', EXPECT_DIR + 'expected-dataframe.csv', shallow=False) def test_pca(self): dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt']) @@ -40,13 +40,13 @@ def test_hac_horizontal(self): dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt']) ml.hac_horizontal(dataframe, OUT_DIR + 'hac-horizontal.png', OUT_DIR + 'hac-clusters-horizontal.txt') - assert filecmp.cmp(OUT_DIR + 'hac-clusters-horizontal.txt', EXPECT_DIR + 'expected-hac-horizontal-clusters.txt') + assert filecmp.cmp(OUT_DIR + 'hac-clusters-horizontal.txt', EXPECT_DIR + 'expected-hac-horizontal-clusters.txt', shallow=False) def test_hac_vertical(self): dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt']) ml.hac_vertical(dataframe, OUT_DIR + 'hac-vertical.png', OUT_DIR + 'hac-clusters-vertical.txt') - assert filecmp.cmp(OUT_DIR + 'hac-clusters-vertical.txt', EXPECT_DIR + 'expected-hac-vertical-clusters.txt') + assert filecmp.cmp(OUT_DIR + 'hac-clusters-vertical.txt', EXPECT_DIR + 'expected-hac-vertical-clusters.txt', shallow=False) def test_ensemble_network(self): dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt']) From 4a530bf890e39d9dcc28bcbb1ae3d350ad17dfc1 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 20:20:25 -0500 Subject: [PATCH 41/44] Re-enable other algorithms in config files --- config/config.yaml | 10 +++++----- config/egfr.yaml | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index a16d383c..5d96a964 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,14 +28,14 @@ algorithms: - name: "pathlinker" params: - include: false + include: true directed: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: false + include: true directed: false run1: r: [5] @@ -46,7 +46,7 @@ - name: "omicsintegrator2" params: - include: false + include: true directed: false run1: b: [4] @@ -57,7 +57,7 @@ - name: "meo" params: - include: false + include: true directed: true run1: max_path_length: [3] @@ -66,7 +66,7 @@ - name: "mincostflow" params: - include: false + include: true directed: false run1: flow: [1] # The flow must be an int diff --git a/config/egfr.yaml b/config/egfr.yaml index d4949a26..8b8834e2 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -10,7 +10,7 @@ algorithms: name: pathlinker params: directed: true - include: false + include: true run1: k: - 10 @@ -19,7 +19,7 @@ algorithms: name: omicsintegrator1 params: directed: false - include: false + include: true run1: b: - 0.55 @@ -39,7 +39,7 @@ algorithms: name: omicsintegrator2 params: directed: false - include: false + include: true run1: b: - 4 @@ -54,7 +54,7 @@ algorithms: name: meo params: directed: true - include: false + include: true run1: local_search: - "Yes" From e942e05536af406f6b0ad8029ce92adcf17073fd Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 20:22:22 -0500 Subject: [PATCH 42/44] Revert more config file settings --- config/config.yaml | 2 +- config/egfr.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 5d96a964..4b817af3 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -125,7 +125,7 @@ include: true # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: - include: false + include: true # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph diff --git a/config/egfr.yaml b/config/egfr.yaml index 8b8834e2..a771a602 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -39,7 +39,7 @@ algorithms: name: omicsintegrator2 params: directed: false - include: true + include: false run1: b: - 4 @@ -54,7 +54,7 @@ algorithms: name: meo params: directed: true - include: true + include: false run1: local_search: - "Yes" From 0e78c285604382cfcfc5f4f119b0ced0284e1341 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 11 Aug 2023 20:59:18 -0500 Subject: [PATCH 43/44] Additional documentation and code review --- docker-wrappers/DOMINO/Dockerfile | 1 + docker-wrappers/DOMINO/README.md | 7 +-- input/README.md | 11 ++--- src/domino.py | 74 ++++++++++++++++--------------- test/DOMINO/test_domino.py | 6 +-- 5 files changed, 53 insertions(+), 46 deletions(-) diff --git a/docker-wrappers/DOMINO/Dockerfile b/docker-wrappers/DOMINO/Dockerfile index de6da0eb..9ab193ac 100644 --- a/docker-wrappers/DOMINO/Dockerfile +++ b/docker-wrappers/DOMINO/Dockerfile @@ -4,6 +4,7 @@ FROM python:3.7 RUN pip install domino-python==0.1.1 +# DOMINO requires data files in hard-coded locations RUN cd /usr/local/lib/python3.7/site-packages/src/data && \ wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensg2gene_symbol.txt && \ wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensmusg2gene_symbol.txt && \ diff --git a/docker-wrappers/DOMINO/README.md b/docker-wrappers/DOMINO/README.md index 617364a5..610c73bc 100644 --- a/docker-wrappers/DOMINO/README.md +++ b/docker-wrappers/DOMINO/README.md @@ -1,18 +1,19 @@ # DOMINO Docker image -A Docker image for [DOMINO](https://github.com/Shamir-Lab/DOMINO) that is available on [DockerHub](https://hub.docker.com/repository/docker/otjohnson/domino). +A Docker image for [DOMINO](https://github.com/Shamir-Lab/DOMINO) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/domino). DOMINO outputs multiple active modules, which SPRAS combines into a single pathway. +It is [non-deterministic](https://github.com/Shamir-Lab/DOMINO/issues/5) and cannot be made deterministic with a seed. To create the Docker image run: ``` -docker build -t otjohnson/domino -f Dockerfile . +docker build -t reedcompbio/domino -f Dockerfile . ``` from this directory. To inspect the installed Python packages: ``` -winpty docker run otjohnson/domino pip list +winpty docker run reedcompbio/domino pip list ``` The `winpty` prefix is only needed on Windows. diff --git a/input/README.md b/input/README.md index b302610b..be892f1a 100644 --- a/input/README.md +++ b/input/README.md @@ -11,11 +11,11 @@ All other columns specify additional node attributes such as prizes. Any nodes that are listed in a node file but are not present in one or more edges in the edge file will be removed. For example: ``` -NODEID prize sources targets -A 1.0 True -B 3.3 True -C 2.5 True -D 1.9 True True +NODEID prize sources targets active +A 1.0 True True +B 3.3 True True +C 2.5 True True +D 1.9 True True True ``` A secondary format provides only a list of node identifiers and uses the filename as the node attribute, as in the example `sources.txt`. @@ -49,6 +49,7 @@ The files are originally from the [Temporal Pathway Synthesizer (TPS)](https://g They have been lightly modified for SPRAS by lowering one edge weight that was greater than 1, removing a PSEUDONODE prize, adding a prize of 10.0 to EGF_HUMAN, and converting all edges to undirected edges. The only source is EGF_HUMAN. All proteins with phosphorylation-based prizes are also labeled as targets. +All nodes are considered active. If you use any of the input files `tps-egfr-prizes.txt` or `phosphosite-irefindex13.0-uniprot.txt`, reference the publication diff --git a/src/domino.py b/src/domino.py index bab1d0d0..e3d1f1cb 100644 --- a/src/domino.py +++ b/src/domino.py @@ -32,15 +32,14 @@ def generate_inputs(data, filename_map): # NODEID is always included in the node table node_df = data.request_node_columns(['active']) else: - raise ValueError("DOMINO requires active genes") + raise ValueError('DOMINO requires active genes') node_df = node_df[node_df['active'] == True] - # transform each node id with a prefix + # Transform each node id with a prefix node_df['NODEID'] = node_df['NODEID'].apply(pre_domino_id_transform) - # e.g., ENSG0[node_id] # Create active_genes file - node_df.to_csv(filename_map['active_genes'], sep="\t", index=False, columns=['NODEID'], header=False) + node_df.to_csv(filename_map['active_genes'], sep='\t', index=False, columns=['NODEID'], header=False) # Create network file edges_df = data.get_interactome() @@ -58,10 +57,9 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, """ Run DOMINO with Docker. Let visualization be always true, parallelization be always 1 thread, and use_cache be always false. - DOMINO produces multiple output module files in an HTML format. SPRAS concatenates these files into one file, - which from which it will extract edges from to produce one pathway. - @param network: input network file (required) - @param active_genes: input active genes (required) + DOMINO produces multiple output module files in an HTML format. SPRAS concatenates these files into one file. + @param network: input network file (required) + @param active_genes: input active genes (required) @param output_file: path to the output pathway file (required) @param slice_threshold: the p-value threshold for considering a slice as relevant (optional) @param module_threshold: the p-value threshold for considering a putative module as final module (optional) @@ -82,7 +80,6 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, bind_path, node_file = prepare_volume(active_genes, work_dir) volumes.append(bind_path) - # Use its --output_folder argument to set the output file prefix to specify an absolute path and prefix out_dir = Path(output_file).parent out_dir.mkdir(parents=True, exist_ok=True) bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) @@ -92,6 +89,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, bind_path, mapped_slices_file = prepare_volume(str(slices_file), work_dir) volumes.append(bind_path) + # Make the Python command to run within the container slicer_command = ['slicer', '--network_file', network_file, '--output_file', mapped_slices_file] @@ -107,7 +105,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, print(slicer_out) # Make the Python command to run within the container - command = ['domino', + domino_command = ['domino', '--active_genes_files', node_file, '--network_file', network_file, '--slices_file', mapped_slices_file, @@ -119,20 +117,20 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, # Add optional arguments if slice_threshold is not None: # DOMINO readme has the wrong argument https://github.com/Shamir-Lab/DOMINO/issues/12 - command.extend(['--slice_threshold', str(slice_threshold)]) + domino_command.extend(['--slice_threshold', str(slice_threshold)]) if module_threshold is not None: - command.extend(['--module_threshold', str(module_threshold)]) + domino_command.extend(['--module_threshold', str(module_threshold)]) - print('Running DOMINO with arguments: {}'.format(' '.join(command)), flush=True) + print('Running DOMINO with arguments: {}'.format(' '.join(domino_command)), flush=True) domino_out = run_container(container_framework, 'reedcompbio/domino', - command, + domino_command, volumes, work_dir) print(domino_out) - # DOMINO creates a new folder in out_dir to output its modules files into /active_genes + # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes # The filename is determined by the input active_genes and cannot be configured # Leave these HTML files for user inspection out_modules_dir = Path(out_dir, 'active_genes') @@ -151,45 +149,51 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): """ - Convert a predicted pathway into the universal format - @param raw_pathway_file: pathway file produced by an algorithm's run function - @param standardized_pathway_file: the same pathway written in the universal format + Convert the merged HTML modules into the universal pathway format + @param raw_pathway_file: the merged HTML modules file + @param standardized_pathway_file: the edges from the modules written in the universal format """ - edges = pd.DataFrame() + edges_df = pd.DataFrame() with open(raw_pathway_file, 'r') as file: for line in file: - if line.strip().startswith("let data = ["): - line2 = line.replace('let data = ', '') - line3 = line2.replace(';', '') + clean_line = line.strip() + # The pattern in the HTML that indicates the JSON data + if clean_line.startswith('let data = ['): + clean_line = clean_line.replace('let data = ', '') # Start of the line + clean_line = clean_line.replace(';', '') # End of the line - data = json.loads(line3) + data = json.loads(clean_line) entries = [] + # Iterate over the JSON entries, which contain both node information and edge information for entry in data: - tmp = entry['data'] - entries.append(tmp) + entries.append(entry['data']) - df = pd.DataFrame(entries) - newdf = df.loc[:, ['source', 'target']].dropna() + # Create a dataframe with all the data from the JSON row, keep only the source and target + # columns that indicate edges + # Dropping the other rows eliminates the node information + module_df = pd.DataFrame(entries) + module_df = module_df.loc[:, ['source', 'target']].dropna() - edges = pd.concat([edges, newdf], axis=0) + # Add the edges from this module to the cumulative pathway edges + edges_df = pd.concat([edges_df, module_df], axis=0) # DOMINO produces empty output files in some settings such as when it is run with small input files # and generates a ValueError - if len(edges) > 0: - edges['rank'] = 1 # Adds in a rank column of 1s because the edges are not ranked + if len(edges_df) > 0: + edges_df['rank'] = 1 # Adds in a rank column of 1s because the edges are not ranked # Remove the prefix - edges['source'] = edges['source'].apply(post_domino_id_transform) - edges['target'] = edges['target'].apply(post_domino_id_transform) + edges_df['source'] = edges_df['source'].apply(post_domino_id_transform) + edges_df['target'] = edges_df['target'].apply(post_domino_id_transform) - edges.to_csv(standardized_pathway_file, sep='\t', header=False, index=False) + edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False) def pre_domino_id_transform(node_id): """ - DOMINO requires module edges to have the 'ENSG0' string as a prefix. + DOMINO requires module edges to have the 'ENSG0' string as a prefix for visualization. Prepend each node id with this ID_PREFIX. @param node_id: the node id to transform @return the node id with the prefix added @@ -201,7 +205,7 @@ def post_domino_id_transform(node_id): """ Remove ID_PREFIX from the beginning of the node id if it is present. @param node_id: the node id to transform - @return the node id without the prefix, if it was present + @return the node id without the prefix, if it was present, otherwise the original node id """ # Use removeprefix if SPRAS ever requires Python >= 3.9 # https://docs.python.org/3/library/stdtypes.html#str.removeprefix diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 20d43054..a4d69339 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -15,8 +15,8 @@ class TestDOMINO: """ - Run test for the DOMINO run and parse_output function. - We intentionally omit a DOMINO run correctness test. The output + Run tests for the DOMINO run, parse_output, and id processing functions. + Intentionally omits a DOMINO run correctness test. The output of DOMINO changes between runs without an option to set a seed for the algorithm. The variability makes it difficult to compare generated output to expected output. @@ -35,7 +35,7 @@ def test_domino_required(self): assert out_path.exists() def test_domino_optional(self): - # Include optional argument + # Include optional arguments out_path = Path(OUT_FILE_OPTIONAL) out_path.unlink(missing_ok=True) DOMINO.run( From 8fe055c28baf227a26649e22828efd526d690f99 Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Thu, 17 Aug 2023 21:09:47 -0500 Subject: [PATCH 44/44] Clean up whitespace --- src/domino.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/domino.py b/src/domino.py index e3d1f1cb..cbfb1f12 100644 --- a/src/domino.py +++ b/src/domino.py @@ -106,13 +106,13 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, # Make the Python command to run within the container domino_command = ['domino', - '--active_genes_files', node_file, - '--network_file', network_file, - '--slices_file', mapped_slices_file, - '--output_folder', mapped_out_dir, - '--use_cache', 'false', - '--parallelization', '1', - '--visualization', 'true'] + '--active_genes_files', node_file, + '--network_file', network_file, + '--slices_file', mapped_slices_file, + '--output_folder', mapped_out_dir, + '--use_cache', 'false', + '--parallelization', '1', + '--visualization', 'true'] # Add optional arguments if slice_threshold is not None: