diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 0f68d83a..54d4125d 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -83,7 +83,7 @@ jobs: docker pull reedcompbio/mincostflow:latest docker pull reedcompbio/allpairs:v2 docker pull reedcompbio/domino:latest - docker pull reedcompbio/py4cytoscape:v2 + docker pull reedcompbio/py4cytoscape:v3 docker pull reedcompbio/spras:v0.1.0 - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 @@ -154,8 +154,8 @@ jobs: path: docker-wrappers/Cytoscape/. dockerfile: docker-wrappers/Cytoscape/Dockerfile repository: reedcompbio/py4cytoscape - tags: v2 - cache_froms: reedcompbio/py4cytoscape:latest + tags: v3 + cache_froms: reedcompbio/py4cytoscape:v3 push: false - name: Build SPRAS Docker image uses: docker/build-push-action@v1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a264c5f5..a943cacb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -154,9 +154,10 @@ Use the `run_container` utility function to run the command in the container `|`. -Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank. -See the `add_rank_column` function in `src.util.py`. -The output should have the format ` 1`. +Convert these to be tab-separated vertex pairs followed by a tab `1` and tab `U` at the end of every line, which indicates all edges have the same rank and are undirected. +See the `add_rank_column` and `raw_pathway_df` function in `src.util.py` and `reinsert_direction_col_undirected` function in `src.interactome.py`. +Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created. +The output should have the format ` 1 U`. ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS Import the new class `LocalNeighborhood` in `src/runner.py` so the wrapper functions can be accessed. diff --git a/doc/output.md b/doc/output.md new file mode 100644 index 00000000..431d67c7 --- /dev/null +++ b/doc/output.md @@ -0,0 +1,17 @@ +## File formats + +### Pathway output format +Output pathway files in the standard SPRAS format include a header row and rows providing attributes for each edge. +The header row is `Node1 Node2 Rank Direction`. +Each row lists the two nodes that are connected with an edge, the rank for that edge, and a directionality column to indicate whether the edge is directed or undirected. +The directionality values are either a 'U' for an undirected edge or a 'D' for a directed edge, where the direction is from Node1 to Node2. +Pathways that do not contain ranked edges can output all 1s in the Rank column. + +For example: +``` +Node1 Node2 Rank Direction +A B 1 D +B C 1 D +B D 2 U +D A 3 U +``` diff --git a/docker-wrappers/Cytoscape/README.md b/docker-wrappers/Cytoscape/README.md index 2d747ffd..c3cb8967 100644 --- a/docker-wrappers/Cytoscape/README.md +++ b/docker-wrappers/Cytoscape/README.md @@ -20,6 +20,7 @@ The Docker wrapper can be tested with `pytest`. ## Versions: - v1: Use supervisord to launch Cytoscape from a Python subprocess, then connect to Cytoscape with py4cytoscape. Only loads undirected pathways. Compatible with Singularity in local testing (Apptainer version 1.2.2-1.el7) but fails in GitHub Actions. - v2: Add support for edge direction column. +- v3: Add support for header lines in files ## TODO - Add an auth file for `xvfb-run` diff --git a/docker-wrappers/Cytoscape/cytoscape_util.py b/docker-wrappers/Cytoscape/cytoscape_util.py index dcf110f1..cc8ddd72 100644 --- a/docker-wrappers/Cytoscape/cytoscape_util.py +++ b/docker-wrappers/Cytoscape/cytoscape_util.py @@ -116,7 +116,9 @@ def load_pathways(pathways: List[str], output: str) -> None: suid = p4c.networks.import_network_from_tabular_file( file=path, column_type_list='s,t,x,ea', - delimiters='\t' + delimiters='\t', + first_row_as_column_names=True, + ) p4c.networks.rename_network(name, network=suid) diff --git a/pyproject.toml b/pyproject.toml index 68d10f5c..d19a5988 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "spras" -version = "0.1.0" +version = "0.2.0" description = "Signaling Pathway Reconstruction Analysis Streamliner" authors = [ { name = "Anthony Gitter", email = "gitter@biostat.wisc.edu" }, diff --git a/spras/allpairs.py b/spras/allpairs.py index 1b016534..c1d1bf15 100644 --- a/spras/allpairs.py +++ b/spras/allpairs.py @@ -1,14 +1,13 @@ import warnings from pathlib import Path -import pandas as pd - from spras.containers import prepare_volume, run_container from spras.interactome import ( convert_directed_to_undirected, reinsert_direction_col_undirected, ) from spras.prm import PRM +from spras.util import add_rank_column, raw_pathway_df __all__ = ['AllPairs'] @@ -110,7 +109,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param raw_pathway_file: pathway file produced by an algorithm's run function @param standardized_pathway_file: the same pathway written in the universal format """ - df = pd.read_csv(raw_pathway_file, sep='\t', header=None) - df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked. - df = reinsert_direction_col_undirected(df) - df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') + df = raw_pathway_df(raw_pathway_file, sep='\t', header=None) + if not df.empty: + df = add_rank_column(df) + df = reinsert_direction_col_undirected(df) + df.columns = ['Node1', 'Node2', 'Rank', 'Direction'] + df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/spras/analysis/cytoscape.py b/spras/analysis/cytoscape.py index beab1de8..379284d9 100644 --- a/spras/analysis/cytoscape.py +++ b/spras/analysis/cytoscape.py @@ -48,7 +48,7 @@ def run_cytoscape(pathways: List[Union[str, PurePath]], output_file: str, contai print('Running Cytoscape with arguments: {}'.format(' '.join(command)), flush=True) - container_suffix = "py4cytoscape:v2" + container_suffix = "py4cytoscape:v3" out = run_container(container_framework, container_suffix, command, diff --git a/spras/analysis/graphspace.py b/spras/analysis/graphspace.py index ba87de6e..bee103f6 100644 --- a/spras/analysis/graphspace.py +++ b/spras/analysis/graphspace.py @@ -77,21 +77,21 @@ def load_graph(path: str) -> Tuple[Union[nx.Graph, nx.DiGraph], bool]: directed = False try: - pathways = pd.read_csv(path, sep="\t", header=None) + pathways = pd.read_csv(path, sep="\t", header=0) except pd.errors.EmptyDataError: print(f"The file {path} is empty.") return G, directed - pathways.columns = ["Interactor1", "Interactor2", "Rank", "Direction"] + mask_u = pathways['Direction'] == 'U' mask_d = pathways['Direction'] == 'D' pathways.drop(columns=["Direction"]) if mask_u.all(): - G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"]) + G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"]) directed = False elif mask_d.all(): - G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"], create_using=nx.DiGraph()) + G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"], create_using=nx.DiGraph()) directed = True else: print(f"{path} could not be visualized. GraphSpace does not support mixed direction type graphs currently") diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py index a637d7fe..4fa1fd1d 100644 --- a/spras/analysis/ml.py +++ b/spras/analysis/ml.py @@ -41,10 +41,13 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra with open(file, 'r') as f: lines = f.readlines() + if len(lines) > 0: + lines.pop(0) # skip header line + edges = [] for line in lines: parts = line.split('\t') - if len(parts) > 0: # in case of empty line in file + if len(parts) == 4: # empty lines not allowed but empty files are allowed node1 = parts[0] node2 = parts[1] direction = str(parts[3]).strip() @@ -54,8 +57,10 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra elif direction == "D": # node order does matter for directed edges edges.append(DIR_CONST.join([node1, node2])) - else: - ValueError(f"direction is {direction}, rather than U or D") + elif direction != 'Direction': + raise ValueError(f"direction is {direction}, rather than U or D") + elif len(parts) != 0: + raise ValueError(f"In file {file}, expected line {line} to have 4 values, but found {len(parts)} values.") # getting the algorithm name p = PurePath(file) diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py index 9b0d797d..0e4b4b86 100644 --- a/spras/analysis/summary.py +++ b/spras/analysis/summary.py @@ -33,8 +33,12 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) -> # Iterate through each network file path for file_path in sorted(file_paths): - # Load in the network - nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str))) + + with open(file_path, 'r') as f: + lines = f.readlines()[1:] # skip the header line + + nw = nx.read_edgelist(lines, data=(('weight', float), ('Direction', str))) + # Save the network name, number of nodes, number edges, and number of connected components nw_name = str(file_path) number_nodes = nw.number_of_nodes() diff --git a/spras/domino.py b/spras/domino.py index f9f9f146..3170f64d 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -205,8 +205,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file): edges_df['source'] = edges_df['source'].apply(post_domino_id_transform) edges_df['target'] = edges_df['target'].apply(post_domino_id_transform) edges_df = reinsert_direction_col_undirected(edges_df) + edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction'] + else: + edges_df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) - edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False) + edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False) def pre_domino_id_transform(node_id): @@ -225,9 +228,4 @@ def post_domino_id_transform(node_id): @param node_id: the node id to transform @return the node id without the prefix, if it was present, otherwise the original node id """ - # Use removeprefix if SPRAS ever requires Python >= 3.9 - # https://docs.python.org/3/library/stdtypes.html#str.removeprefix - if node_id.startswith(ID_PREFIX): - return node_id[ID_PREFIX_LEN:] - else: - return node_id + return node_id.removeprefix(ID_PREFIX) diff --git a/spras/meo.py b/spras/meo.py index ba962d92..b614d4c4 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -1,14 +1,12 @@ from pathlib import Path -import pandas as pd - from spras.containers import prepare_volume, run_container from spras.interactome import ( add_directionality_constant, reinsert_direction_col_directed, ) from spras.prm import PRM -from spras.util import add_rank_column +from spras.util import add_rank_column, raw_pathway_df __all__ = ['MEO', 'write_properties'] @@ -181,13 +179,14 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param standardized_pathway_file: the same pathway written in the universal format """ # Columns Source Type Target Oriented Weight - df = pd.read_csv(raw_pathway_file, sep='\t') - # Keep only edges that were assigned an orientation (direction) - df = df.loc[df['Oriented']] - # TODO what should be the edge rank? - # Would need to load the paths output file to rank edges correctly - df = add_rank_column(df) - df = reinsert_direction_col_directed(df) - - df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False, - index=False, sep='\t') + df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) + if not df.empty: + # Keep only edges that were assigned an orientation (direction) + df = df.loc[df['Oriented']] + # TODO what should be the edge rank? + # Would need to load the paths output file to rank edges correctly + df = add_rank_column(df) + df = reinsert_direction_col_directed(df) + df.drop(columns=['Type', 'Oriented', 'Weight'], inplace=True) + df.columns = ['Node1', 'Node2', 'Rank', "Direction"] + df.to_csv(standardized_pathway_file, index=False, sep='\t', header=True) diff --git a/spras/mincostflow.py b/spras/mincostflow.py index e3c75708..b0a47cc7 100644 --- a/spras/mincostflow.py +++ b/spras/mincostflow.py @@ -1,14 +1,12 @@ from pathlib import Path -import pandas as pd - from spras.containers import prepare_volume, run_container from spras.interactome import ( convert_undirected_to_directed, reinsert_direction_col_undirected, ) from spras.prm import PRM -from spras.util import add_rank_column +from spras.util import add_rank_column, raw_pathway_df __all__ = ['MinCostFlow'] @@ -150,10 +148,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param standardized_pathway_file: the same pathway written in the universal format """ - df = pd.read_csv(raw_pathway_file, sep='\t', header=None) - df = add_rank_column(df) - # TODO update MinCostFlow version to support mixed graphs - # Currently directed edges in the input will be converted to undirected edges in the output - df = reinsert_direction_col_undirected(df) - df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') - + df = raw_pathway_df(raw_pathway_file, sep='\t', header=None) + if not df.empty: + df = add_rank_column(df) + # TODO update MinCostFlow version to support mixed graphs + # Currently directed edges in the input will be converted to undirected edges in the output + df = reinsert_direction_col_undirected(df) + df.columns = ['Node1', 'Node2', 'Rank', "Direction"] + df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py index e23ea9f9..16469924 100644 --- a/spras/omicsintegrator1.py +++ b/spras/omicsintegrator1.py @@ -1,11 +1,9 @@ from pathlib import Path -import pandas as pd - from spras.containers import prepare_volume, run_container from spras.interactome import reinsert_direction_col_mixed from spras.prm import PRM -from spras.util import add_rank_column +from spras.util import add_rank_column, raw_pathway_df __all__ = ['OmicsIntegrator1', 'write_conf'] @@ -191,16 +189,12 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # I'm assuming from having read the documentation that we will be passing in optimalForest.sif # as raw_pathway_file, in which case the format should be edge1 interactiontype edge2. # if that assumption is wrong we will need to tweak things - try: - df = pd.read_csv(raw_pathway_file, sep='\t', header=None) - except pd.errors.EmptyDataError: - with open(standardized_pathway_file, 'w'): - pass - return - - df.columns = ["Edge1", "InteractionType", "Edge2"] - df = add_rank_column(df) - df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp") - - df.to_csv(standardized_pathway_file, columns=['Edge1', 'Edge2', 'Rank', "Direction"], header=False, index=False, - sep='\t') + df = raw_pathway_df(raw_pathway_file, sep='\t', header=None) + if not df.empty: + df.columns = ["Edge1", "InteractionType", "Edge2"] + df = add_rank_column(df) + df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp") + df.drop(columns=['InteractionType'], inplace=True) + df.columns = ['Node1', 'Node2', 'Rank', 'Direction'] + + df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 473600a8..ed0d5b56 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -149,12 +149,13 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Omicsintegrator2 returns a single line file if no network is found num_lines = sum(1 for line in open(raw_pathway_file)) if num_lines < 2: - with open(standardized_pathway_file, 'w'): - pass - return - df = pd.read_csv(raw_pathway_file, sep='\t') - df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line - df = df.take([0, 1], axis=1) - df = add_rank_column(df) - df = reinsert_direction_col_undirected(df) - df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') + df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) + else: + df = pd.read_csv(raw_pathway_file, sep='\t', header=0) + df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line + df = df.take([0, 1], axis=1) + df = add_rank_column(df) + df = reinsert_direction_col_undirected(df) + df.columns = ['Node1', 'Node2', 'Rank', "Direction"] + + df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/spras/pathlinker.py b/spras/pathlinker.py index 9af0f93e..59347cb2 100644 --- a/spras/pathlinker.py +++ b/spras/pathlinker.py @@ -1,14 +1,13 @@ import warnings from pathlib import Path -import pandas as pd - from spras.containers import prepare_volume, run_container from spras.interactome import ( convert_undirected_to_directed, reinsert_direction_col_directed, ) from spras.prm import PRM +from spras.util import raw_pathway_df __all__ = ['PathLinker'] @@ -136,7 +135,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param raw_pathway_file: pathway file produced by an algorithm's run function @param standardized_pathway_file: the same pathway written in the universal format """ - # What about multiple raw_pathway_files - df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1) - df = reinsert_direction_col_directed(df) - df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') + # What about multiple raw_pathway_files? + df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) + if not df.empty: + df = df.take([0, 1, 2], axis=1) + df = reinsert_direction_col_directed(df) + df.columns = ['Node1', 'Node2', 'Rank', "Direction"] + df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/spras/util.py b/spras/util.py index ea6cd952..2d2a83d2 100644 --- a/spras/util.py +++ b/spras/util.py @@ -42,6 +42,7 @@ def hash_filename(filename: str, length: Optional[int] = None) -> str: """ return hash_params_sha1_base32({'filename': filename}, length) + def make_required_dirs(path: str): """ Create the directory and parent directories required before an output file can be written to the specified path. @@ -59,3 +60,19 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame: """ df['Rank'] = 1 return df + + +def raw_pathway_df(raw_pathway_file: str, sep: str = '\t', header: int = None) -> pd.DataFrame: + """ + Creates dataframe from contents in raw pathway file, + otherwise returns an empty dataframe with standard output column names + @param raw_pathway_file: path to raw_pathway_file + @param sep: separator used when loading the dataframe, default tab character + @param header: what row the header is in raw_pathway_file, default None + """ + try: + df = pd.read_csv(raw_pathway_file, sep=sep, header=header) + except pd.errors.EmptyDataError: # read an empty file + df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) + + return df diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt index 03571eae..44944b37 100644 --- a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt +++ b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction ABI1_HUMAN MK01_HUMAN 1 U CBLB_HUMAN EGFR_HUMAN 1 U CBL_HUMAN CD2AP_HUMAN 1 U diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt index 30d107b0..b2033b57 100644 --- a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt +++ b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction ABI1_HUMAN MK01_HUMAN 1 U CBLB_HUMAN EGFR_HUMAN 1 U CBL_HUMAN CD2AP_HUMAN 1 U diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt index 065ef6f9..e0adf2fc 100644 --- a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt +++ b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction ABI1_HUMAN MK01_HUMAN 1 U CBL_HUMAN CD2AP_HUMAN 1 U CBL_HUMAN CRKL_HUMAN 1 U diff --git a/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt b/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt index 899147f8..bc9dfc85 100644 --- a/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt +++ b/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction EGF_HUMAN EGFR_HUMAN 1 U EGF_HUMAN S10A4_HUMAN 2 U S10A4_HUMAN MYH9_HUMAN 2 U diff --git a/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt b/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt index 3b1ddef5..a1738b00 100644 --- a/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt +++ b/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction EGF_HUMAN EGFR_HUMAN 1 U EGF_HUMAN S10A4_HUMAN 2 U S10A4_HUMAN MYH9_HUMAN 2 U diff --git a/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt index 9d65620f..5547a49c 100644 --- a/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt +++ b/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction A B 1 D B C 1 D diff --git a/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt b/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt index e2fd8d57..21768464 100644 --- a/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt +++ b/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction A B 1 U B C 1 U diff --git a/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt b/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt index e2fd8d57..21768464 100644 --- a/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt +++ b/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction A B 1 U B C 1 U diff --git a/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt index 65f6f221..e34eeaff 100644 --- a/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt +++ b/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction B A 1 U B C 1 U diff --git a/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt index 9d65620f..5547a49c 100644 --- a/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt +++ b/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction A B 1 D B C 1 D diff --git a/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt index 9d65620f..5547a49c 100644 --- a/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt +++ b/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction A B 1 D B C 1 D diff --git a/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt index 71ed6ccf..a87a0437 100644 --- a/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt +++ b/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 D B C 1 D A D 1 D diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt index afbe030d..885a8574 100644 --- a/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt +++ b/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A D 1 U G H 1 U G I 1 U diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt index afbe030d..885a8574 100644 --- a/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt +++ b/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A D 1 U G H 1 U G I 1 U diff --git a/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt index eddad79c..069481df 100644 --- a/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt +++ b/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction C D 1 U C F 1 U A D 1 U diff --git a/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt index 92b60b6e..ec070652 100644 --- a/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt +++ b/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 D B C 1 D A D 2 D diff --git a/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt index 92b60b6e..ec070652 100644 --- a/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt +++ b/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 D B C 1 D A D 2 D diff --git a/test/analysis/input/standardized-ranked.txt b/test/analysis/input/standardized-ranked.txt index c432c386..27f8222f 100644 --- a/test/analysis/input/standardized-ranked.txt +++ b/test/analysis/input/standardized-ranked.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U A C 3 U C D 5 U diff --git a/test/analysis/input/toy/network1.txt b/test/analysis/input/toy/network1.txt index 21847821..bd5bd343 100644 --- a/test/analysis/input/toy/network1.txt +++ b/test/analysis/input/toy/network1.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U C D 1 U E F 1 U diff --git a/test/analysis/input/toy/network2.txt b/test/analysis/input/toy/network2.txt index f7811bc4..7506195d 100644 --- a/test/analysis/input/toy/network2.txt +++ b/test/analysis/input/toy/network2.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U B C 1 U C D 1 U diff --git a/test/analysis/input/toy/network3.txt b/test/analysis/input/toy/network3.txt index cbf42fb5..eaf05c07 100644 --- a/test/analysis/input/toy/network3.txt +++ b/test/analysis/input/toy/network3.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U A C 1 U A D 1 U diff --git a/test/analysis/input/toy/network4.txt b/test/analysis/input/toy/network4.txt index d711ec1a..61ed9a4b 100644 --- a/test/analysis/input/toy/network4.txt +++ b/test/analysis/input/toy/network4.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U B C 1 U D E 1 U diff --git a/test/analysis/input/toy/network5.txt b/test/analysis/input/toy/network5.txt index 5aaf5c0b..3d0eaf8c 100644 --- a/test/analysis/input/toy/network5.txt +++ b/test/analysis/input/toy/network5.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U B C 1 U C D 1 U diff --git a/test/ml/expected/expected-dataframe.csv b/test/ml/expected/expected-dataframe.csv index b594efb4..15cff881 100644 --- a/test/ml/expected/expected-dataframe.csv +++ b/test/ml/expected/expected-dataframe.csv @@ -1,4 +1,4 @@ -,test-data-s1,test-data-s2,test-data-s3,test-data-longName,test-data-longName2,test-data-empty,test-data-spaces,test-mixed-direction +,test-data-s1,test-data-s2,test-data-s3,test-data-longName,test-data-longName2,test-data-empty,test-data-spaces,test-data-mixed-direction A---B,1,1,0,0,0,0,0,0 C---D,1,1,0,0,0,0,0,1 E---F,1,1,0,0,0,0,0,1 diff --git a/test/ml/input/test-data-empty-line/emptyLine.txt b/test/ml/input/test-data-empty-line/emptyLine.txt new file mode 100644 index 00000000..144f2487 --- /dev/null +++ b/test/ml/input/test-data-empty-line/emptyLine.txt @@ -0,0 +1,4 @@ +Node1 Node2 Rank Direction +A B 1 U + +E F 1 U \ No newline at end of file diff --git a/test/ml/input/test-data-empty/empty.txt b/test/ml/input/test-data-empty/empty.txt index e69de29b..63fda2b1 100644 --- a/test/ml/input/test-data-empty/empty.txt +++ b/test/ml/input/test-data-empty/empty.txt @@ -0,0 +1 @@ +Node1 Node2 Rank Direction \ No newline at end of file diff --git a/test/ml/input/test-data-longName/longName.txt b/test/ml/input/test-data-longName/longName.txt index aabf41b2..7e120dff 100644 --- a/test/ml/input/test-data-longName/longName.txt +++ b/test/ml/input/test-data-longName/longName.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction node1 node2 1 U node1 node3 1 U node4 node5 1 U diff --git a/test/ml/input/test-data-longName2/longName2.txt b/test/ml/input/test-data-longName2/longName2.txt index 8765175f..35bf0c2e 100644 --- a/test/ml/input/test-data-longName2/longName2.txt +++ b/test/ml/input/test-data-longName2/longName2.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction node3 node2 1 U node1 node3 1 U node5 node4 1 U diff --git a/test/ml/input/test-mixed-direction/mixed-direction.txt b/test/ml/input/test-data-mixed-direction/mixed-direction.txt similarity index 53% rename from test/ml/input/test-mixed-direction/mixed-direction.txt rename to test/ml/input/test-data-mixed-direction/mixed-direction.txt index 6463ab3b..f77061a1 100644 --- a/test/ml/input/test-mixed-direction/mixed-direction.txt +++ b/test/ml/input/test-data-mixed-direction/mixed-direction.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 D B A 1 D C D 1 U diff --git a/test/ml/input/test-data-s1/s1.txt b/test/ml/input/test-data-s1/s1.txt index 031f4142..a8a52914 100644 --- a/test/ml/input/test-data-s1/s1.txt +++ b/test/ml/input/test-data-s1/s1.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U C D 1 U E F 1 U \ No newline at end of file diff --git a/test/ml/input/test-data-s2/s2.txt b/test/ml/input/test-data-s2/s2.txt index 680bf369..d4e9860b 100644 --- a/test/ml/input/test-data-s2/s2.txt +++ b/test/ml/input/test-data-s2/s2.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction A B 1 U C D 1 U E F 1 U diff --git a/test/ml/input/test-data-s3/s3.txt b/test/ml/input/test-data-s3/s3.txt index d06960f9..6884cfe8 100644 --- a/test/ml/input/test-data-s3/s3.txt +++ b/test/ml/input/test-data-s3/s3.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction L M 1 U M N 1 U O P 1 U diff --git a/test/ml/input/test-data-single/single.txt b/test/ml/input/test-data-single/single.txt index 30397283..822ccb97 100644 --- a/test/ml/input/test-data-single/single.txt +++ b/test/ml/input/test-data-single/single.txt @@ -1 +1,2 @@ +Node1 Node2 Rank Direction L M 1 U diff --git a/test/ml/input/test-data-spaces/spaces.txt b/test/ml/input/test-data-spaces/spaces.txt index 0860d779..3565af81 100644 --- a/test/ml/input/test-data-spaces/spaces.txt +++ b/test/ml/input/test-data-spaces/spaces.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction L M 1 U O P 1 U nodes with spaces in name 1 U \ No newline at end of file diff --git a/test/ml/input/test-data-unexpected-amount-of-values/less.txt b/test/ml/input/test-data-unexpected-amount-of-values/less.txt new file mode 100644 index 00000000..b6d268a1 --- /dev/null +++ b/test/ml/input/test-data-unexpected-amount-of-values/less.txt @@ -0,0 +1,5 @@ +Node1 Node2 Rank Direction +A B +C D 1 +E +L M 1 U \ No newline at end of file diff --git a/test/ml/input/test-data-unexpected-amount-of-values/more.txt b/test/ml/input/test-data-unexpected-amount-of-values/more.txt new file mode 100644 index 00000000..f43c2d7d --- /dev/null +++ b/test/ml/input/test-data-unexpected-amount-of-values/more.txt @@ -0,0 +1,5 @@ +Node1 Node2 Rank Direction +A B 1 U B B +C D 1 U B B B B +E F 1 U B +L M 1 U \ No newline at end of file diff --git a/test/ml/input/test-data-wrong-direction/wrong-direction.txt b/test/ml/input/test-data-wrong-direction/wrong-direction.txt new file mode 100644 index 00000000..dc45581c --- /dev/null +++ b/test/ml/input/test-data-wrong-direction/wrong-direction.txt @@ -0,0 +1,5 @@ +Node1 Node2 Rank Direction +A B 1 D +B A 1 D +C D 1 B +E F 1 U \ No newline at end of file diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py index 6e8132b2..3010179d 100644 --- a/test/ml/test_ml.py +++ b/test/ml/test_ml.py @@ -22,16 +22,32 @@ def setup_class(cls): def test_summarize_networks(self): dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt', INPUT_DIR + 'test-data-longName/longName.txt', INPUT_DIR + 'test-data-longName2/longName2.txt', - INPUT_DIR + 'test-data-empty/empty.txt', INPUT_DIR + 'test-data-spaces/spaces.txt', INPUT_DIR + 'test-mixed-direction/mixed-direction.txt']) + INPUT_DIR + 'test-data-empty/empty.txt', INPUT_DIR + 'test-data-spaces/spaces.txt', INPUT_DIR + 'test-data-mixed-direction/mixed-direction.txt']) dataframe.to_csv(OUT_DIR + 'dataframe.csv') assert filecmp.cmp(OUT_DIR + 'dataframe.csv', EXPECT_DIR + 'expected-dataframe.csv', shallow=False) + def test_summarize_networks_less_values(self): + with pytest.raises(ValueError): + ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/less.txt']) + + def test_summarize_networks_more_values(self): + with pytest.raises(ValueError): + ml.summarize_networks([INPUT_DIR + 'test-data-unexpected-amount-of-values/more.txt']) + + def test_summarize_networks_empty_line(self): + with pytest.raises(ValueError): + ml.summarize_networks([INPUT_DIR + 'test-data-empty-line/emptyLine.txt']) + + def test_summarize_networks_wrong_direction(self): + with pytest.raises(ValueError): + ml.summarize_networks([INPUT_DIR + 'test-data-wrong-direction/wrong-direction.txt']) + def test_summarize_networks_empty(self): - with pytest.raises(ValueError): #raises error if empty dataframe is used for post processing + with pytest.raises(ValueError): # raises error if empty dataframe is used for post processing ml.summarize_networks([INPUT_DIR + 'test-data-empty/empty.txt']) def test_single_line(self): - with pytest.raises(ValueError): #raises error if single line in file s.t. single row in dataframe is used for post processing + with pytest.raises(ValueError): # raises error if single line in file s.t. single row in dataframe is used for post processing ml.summarize_networks([INPUT_DIR + 'test-data-single/single.txt']) def test_pca(self): @@ -58,7 +74,7 @@ def test_hac_vertical(self): assert filecmp.cmp(OUT_DIR + 'hac-clusters-vertical.txt', EXPECT_DIR + 'expected-hac-vertical-clusters.txt', shallow=False) def test_ensemble_network(self): - dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt', INPUT_DIR + 'test-mixed-direction/mixed-direction.txt']) + dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt', INPUT_DIR + 'test-data-mixed-direction/mixed-direction.txt']) ml.ensemble_network(dataframe, OUT_DIR + 'ensemble-network.tsv') en = pd.read_table(OUT_DIR + 'ensemble-network.tsv') diff --git a/test/parse-outputs/expected/allpairs-pathway-expected.txt b/test/parse-outputs/expected/allpairs-pathway-expected.txt index ee3c198b..3af52bc6 100644 --- a/test/parse-outputs/expected/allpairs-pathway-expected.txt +++ b/test/parse-outputs/expected/allpairs-pathway-expected.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction S1 A 1 U S1 B 1 U A E 1 U diff --git a/test/parse-outputs/expected/domino-pathway-expected.txt b/test/parse-outputs/expected/domino-pathway-expected.txt index 3fb1c13a..074f1b20 100644 --- a/test/parse-outputs/expected/domino-pathway-expected.txt +++ b/test/parse-outputs/expected/domino-pathway-expected.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction ENSG00000122691 ENSG00000138757 1 U ENSG00000122691 ENSG00000109320 1 U ENSG00000134954 ENSG00000077150 1 U diff --git a/test/parse-outputs/expected/empty-pathway-expected.txt b/test/parse-outputs/expected/empty-pathway-expected.txt new file mode 100644 index 00000000..a1a76651 --- /dev/null +++ b/test/parse-outputs/expected/empty-pathway-expected.txt @@ -0,0 +1 @@ +Node1 Node2 Rank Direction diff --git a/test/parse-outputs/expected/meo-pathway-expected.txt b/test/parse-outputs/expected/meo-pathway-expected.txt index 1971d419..6515013f 100644 --- a/test/parse-outputs/expected/meo-pathway-expected.txt +++ b/test/parse-outputs/expected/meo-pathway-expected.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction GENEA GENEC 1 D GENEC GENEB 1 D diff --git a/test/parse-outputs/expected/mincostflow-pathway-expected.txt b/test/parse-outputs/expected/mincostflow-pathway-expected.txt index cd60214e..b25d172b 100644 --- a/test/parse-outputs/expected/mincostflow-pathway-expected.txt +++ b/test/parse-outputs/expected/mincostflow-pathway-expected.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction B A 1 U D B 1 U diff --git a/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt b/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt index 16f30549..f808bc3a 100644 --- a/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt +++ b/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction A C 1 D C D 1 U diff --git a/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt b/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt index 65f6f221..e34eeaff 100644 --- a/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt +++ b/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt @@ -1,2 +1,3 @@ +Node1 Node2 Rank Direction B A 1 U B C 1 U diff --git a/test/parse-outputs/expected/pathlinker-pathway-expected.txt b/test/parse-outputs/expected/pathlinker-pathway-expected.txt index 9edabc0c..e490cd91 100644 --- a/test/parse-outputs/expected/pathlinker-pathway-expected.txt +++ b/test/parse-outputs/expected/pathlinker-pathway-expected.txt @@ -1,3 +1,4 @@ +Node1 Node2 Rank Direction S2 T3 1 D A E 2 D S1 A 2 D diff --git a/test/parse-outputs/input/empty-raw-pathway.txt b/test/parse-outputs/input/empty-raw-pathway.txt new file mode 100644 index 00000000..e69de29b diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 8d8d0933..60763d13 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -29,3 +29,11 @@ def test_parse_outputs(self): runner.parse_output(algo, test_file, out_file) assert filecmp.cmp(OUTDIR + f"{algo}-pathway.txt", EXPDIR + f"{algo}-pathway-expected.txt", shallow=False) + + def test_empty_file(self): + for algo in algorithms: + test_file = INDIR + f"empty-raw-pathway.txt" + out_file = OUTDIR + f"{algo}-empty-pathway.txt" + + runner.parse_output(algo, test_file, out_file) + assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)