clean up new util func, add new test, add to contributing guide

ntalluri · Mar 18, 2024 · 129532c · 129532c
1 parent 01ad342
commit 129532c
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 7 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -155,8 +155,8 @@ Use the `run_container` utility function to run the command in the container `<u
 Implement the `parse_output` function.
 The edges in the Local Neighborhood output have the same format as the input, `<vertex1>|<vertex2>`.
 Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank.
-See the `add_rank_column` function in `src.util.py`.
-Make sure header = True when the file is created.
+See the `add_rank_column` and `raw_pathway_df` function in `src.util.py`.
+Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created
 The output should have the format `<vertex1> <vertex2> 1 U`.
 
 ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS

diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
@@ -152,10 +152,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
-            df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
+            df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
+            df = df.take([0, 1], axis=1)
             df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
-            df.drop(columns=['cost', 'in_solution'], inplace = True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')

diff --git a/spras/util.py b/spras/util.py
@@ -62,9 +62,10 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame:
 
 def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame:
     """
-    creates df from contents in raw pathway file, otherwise returns an empty df
-    @param raw_pathway_file: the specific path to the raw_pathway_file to read from
-    @param header: what row the header is, otherwise None
+    Creates DF from contents in raw pathway file,
+    otherwise returns an empty DF with standard output column names
+    @param raw_pathway_file: path to raw_pathway_file
+    @param header: what row the header is in raw_pathway_file, otherwise None
 
     """
     try:

diff --git a/test/parse-outputs/expected/empty-pathway-expected.txt b/test/parse-outputs/expected/empty-pathway-expected.txt
@@ -0,0 +1 @@
+Node1	Node2	Rank	Direction
diff --git a/test/parse-outputs/input/empty-raw-pathway.txt b/test/parse-outputs/input/empty-raw-pathway.txt
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
@@ -1,6 +1,9 @@
 import filecmp
 from pathlib import Path
 
+import pandas as pd
+import pytest
+
 from spras import runner
 
 INDIR = "test/parse-outputs/input/"
@@ -29,3 +32,10 @@ def test_parse_outputs(self):
 
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-pathway.txt", EXPDIR + f"{algo}-pathway-expected.txt", shallow=False)
+
+    def test_empty_file(self):
+        for algo in algorithms:
+            test_file = INDIR + f"empty-raw-pathway.txt"
+            out_file = OUTDIR + f"{algo}-empty-pathway.txt"
+            runner.parse_output(algo, test_file, out_file)
+            assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)