From 129532c08c36e88e87f9d8bc61f40fe50b053b91 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Mon, 18 Mar 2024 13:49:04 -0500
Subject: [PATCH] clean up new util func, add new test, add to contributing
 guide

---
 CONTRIBUTING.md                                        |  4 ++--
 spras/omicsintegrator2.py                              |  4 ++--
 spras/util.py                                          |  7 ++++---
 test/parse-outputs/expected/empty-pathway-expected.txt |  1 +
 test/parse-outputs/input/empty-raw-pathway.txt         |  0
 test/parse-outputs/test_parse_outputs.py               | 10 ++++++++++
 6 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 test/parse-outputs/expected/empty-pathway-expected.txt
 create mode 100644 test/parse-outputs/input/empty-raw-pathway.txt
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2ec87b01..c4452f7a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -155,8 +155,8 @@ Use the `run_container` utility function to run the command in the container `<u
 Implement the `parse_output` function.
 The edges in the Local Neighborhood output have the same format as the input, `<vertex1>|<vertex2>`.
 Convert these to be tab-separated vertex pairs followed by a tab and a `1` at the end of every line, which indicates all edges have the same rank.
-See the `add_rank_column` function in `src.util.py`.
-Make sure header = True when the file is created.
+See the `add_rank_column` and `raw_pathway_df` function in `src.util.py`.
+Make sure header = True with column names: ['Node1', 'Node2', 'Rank', 'Direction'] when the file is created
 The output should have the format `<vertex1> <vertex2> 1 U`.
 
 ### Step 4: Make the Local Neighborhood wrapper accessible through SPRAS
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index b1419258..2d8dee3c 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -152,10 +152,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             df = pd.DataFrame(columns = ['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
-            df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
+            df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
+            df = df.take([0, 1], axis=1)
             df = add_rank_column(df)
             df = reinsert_direction_col_undirected(df)
-            df.drop(columns=['cost', 'in_solution'], inplace = True)
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/util.py b/spras/util.py
index 94c7ab2f..efa9bc73 100644
--- a/spras/util.py
+++ b/spras/util.py
@@ -62,9 +62,10 @@ def add_rank_column(df: pd.DataFrame) -> pd.DataFrame:
 
 def raw_pathway_df(raw_pathway_file: str, header:int= None) -> pd.DataFrame:
     """
-    creates df from contents in raw pathway file, otherwise returns an empty df
-    @param raw_pathway_file: the specific path to the raw_pathway_file to read from
-    @param header: what row the header is, otherwise None
+    Creates DF from contents in raw pathway file,
+    otherwise returns an empty DF with standard output column names
+    @param raw_pathway_file: path to raw_pathway_file
+    @param header: what row the header is in raw_pathway_file, otherwise None
 
     """
     try:
diff --git a/test/parse-outputs/expected/empty-pathway-expected.txt b/test/parse-outputs/expected/empty-pathway-expected.txt
new file mode 100644
index 00000000..a1a76651
--- /dev/null
+++ b/test/parse-outputs/expected/empty-pathway-expected.txt
@@ -0,0 +1 @@
+Node1	Node2	Rank	Direction
diff --git a/test/parse-outputs/input/empty-raw-pathway.txt b/test/parse-outputs/input/empty-raw-pathway.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index 8d8d0933..0f471e5d 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -1,6 +1,9 @@
 import filecmp
 from pathlib import Path
 
+import pandas as pd
+import pytest
+
 from spras import runner
 
 INDIR = "test/parse-outputs/input/"
@@ -29,3 +32,10 @@ def test_parse_outputs(self):
 
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-pathway.txt", EXPDIR + f"{algo}-pathway-expected.txt", shallow=False)
+
+    def test_empty_file(self):
+        for algo in algorithms:
+            test_file = INDIR + f"empty-raw-pathway.txt"
+            out_file = OUTDIR + f"{algo}-empty-pathway.txt"
+            runner.parse_output(algo, test_file, out_file)
+            assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)