From 129532c08c36e88e87f9d8bc61f40fe50b053b91 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Mon, 18 Mar 2024 13:49:04 -0500 Subject: [PATCH] clean up new util func, add new test, add to contributing guide --- CONTRIBUTING.md | 4 ++-- spras/omicsintegrator2.py | 4 ++-- spras/util.py | 7 ++++--- test/parse-outputs/expected/empty-pathway-expected.txt | 1 + test/parse-outputs/input/empty-raw-pathway.txt | 0 test/parse-outputs/test_parse_outputs.py | 10 ++++++++++ 6 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 test/parse-outputs/expected/empty-pathway-expected.txt create mode 100644 test/parse-outputs/input/empty-raw-pathway.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2ec87b01..c4452f7a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -155,8 +155,8 @@ Use the `run_container` utility function to run the command in the container `|`. Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank. -See the `add_rank_column` function in `src.util.py`. -Make sure header = True when the file is created. +See the `add_rank_column` and `raw_pathway_df` function in `src.util.py`. +Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created The output should have the format ` 1 U`. ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index b1419258..2d8dee3c 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -152,10 +152,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file): df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction']) else: df = pd.read_csv(raw_pathway_file, sep='\t', header=0) - df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line + df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line + df = df.take([0, 1], axis=1) df = add_rank_column(df) df = reinsert_direction_col_undirected(df) - df.drop(columns=['cost', 'in_solution'], inplace = True) df.columns = ['Node1', 'Node2', 'Rank', "Direction"] df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/spras/util.py b/spras/util.py index 94c7ab2f..efa9bc73 100644 --- a/spras/util.py +++ b/spras/util.py @@ -62,9 +62,10 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame: def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame: """ - creates df from contents in raw pathway file, otherwise returns an empty df - @param raw_pathway_file: the specific path to the raw_pathway_file to read from - @param header: what row the header is, otherwise None + Creates DF from contents in raw pathway file, + otherwise returns an empty DF with standard output column names + @param raw_pathway_file: path to raw_pathway_file + @param header: what row the header is in raw_pathway_file, otherwise None """ try: diff --git a/test/parse-outputs/expected/empty-pathway-expected.txt b/test/parse-outputs/expected/empty-pathway-expected.txt new file mode 100644 index 00000000..a1a76651 --- /dev/null +++ b/test/parse-outputs/expected/empty-pathway-expected.txt @@ -0,0 +1 @@ +Node1 Node2 Rank Direction diff --git a/test/parse-outputs/input/empty-raw-pathway.txt b/test/parse-outputs/input/empty-raw-pathway.txt new file mode 100644 index 00000000..e69de29b diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 8d8d0933..0f471e5d 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -1,6 +1,9 @@ import filecmp from pathlib import Path +import pandas as pd +import pytest + from spras import runner INDIR = "test/parse-outputs/input/" @@ -29,3 +32,10 @@ def test_parse_outputs(self): runner.parse_output(algo, test_file, out_file) assert filecmp.cmp(OUTDIR + f"{algo}-pathway.txt", EXPDIR + f"{algo}-pathway-expected.txt", shallow=False) + + def test_empty_file(self): + for algo in algorithms: + test_file = INDIR + f"empty-raw-pathway.txt" + out_file = OUTDIR + f"{algo}-empty-pathway.txt" + runner.parse_output(algo, test_file, out_file) + assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)