Skip to content

Commit

Permalink
first pass at fixing oi2 error
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Sep 3, 2024
1 parent cf0b401 commit 059c0fc
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 19 deletions.
26 changes: 13 additions & 13 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ container_registry:
algorithms:
- name: "pathlinker"
params:
include: true
include: false
run1:
k: range(100,201,100)

- name: "omicsintegrator1"
params:
include: true
include: false
run1:
b: [5, 6]
w: np.linspace(0,5,2)
Expand All @@ -69,26 +69,26 @@ algorithms:

- name: "meo"
params:
include: true
include: false
run1:
max_path_length: [3]
local_search: ["Yes"]
rand_restarts: [10]

- name: "mincostflow"
params:
include: true
include: false
run1:
flow: [1] # The flow must be an int
capacity: [1]

- name: "allpairs"
params:
include: true
include: false

- name: "domino"
params:
include: true
include: false
run1:
slice_threshold: [0.3]
module_threshold: [0.05]
Expand Down Expand Up @@ -149,28 +149,28 @@ reconstruction_settings:
analysis:
# Create one summary per pathway file and a single summary table for all pathways for each dataset
summary:
include: true
include: false
# Create output files for each pathway that can be visualized with GraphSpace
graphspace:
include: true
include: false
# Create Cytoscape session file with all pathway graphs for each dataset
cytoscape:
include: true
include: false
# Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
ml:
# ml analysis per dataset
include: true
include: false
# adds ml analysis per algorithm output
# only runs for algorithms with multiple parameter combinations chosen
aggregate_per_algorithm: true
aggregate_per_algorithm: false
# specify how many principal components to calculate
components: 2
# boolean to show the labels on the pca graph
labels: true
labels: false
# 'ward', 'complete', 'average', 'single'
# if linkage: ward, must use metric: euclidean
linkage: 'ward'
# 'euclidean', 'manhattan', 'cosine'
metric: 'euclidean'
evaluation:
include: true
include: false
18 changes: 13 additions & 5 deletions spras/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,22 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
"""
# Omicsintegrator2 returns a single line file if no network is found
num_lines = sum(1 for line in open(raw_pathway_file))
df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
print(df)
# Omicsintegrator2 has corrupted output, list of correct column names and order
correct_columns = ['protein1', 'protein2', 'cost', 'in_solution']

if num_lines < 2:
df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
else:
df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
df = df.take([0, 1], axis=1)
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns):
df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
df = df.take([0, 1], axis=1)
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
else:
df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])

df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Node1 Node2 Rank Direction
3 changes: 3 additions & 0 deletions test/parse-outputs/expected/oi2-expected/oi2-expected.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
B C 1 U
3 changes: 3 additions & 0 deletions test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
protein1 protein2 cost in_solution
B A 0.52 True
B C 0.73 True
1 change: 1 addition & 0 deletions test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
protein1 protein2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
protein1 protein2 cost
B A 0.52
B C 0.73
3 changes: 3 additions & 0 deletions test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
protein1 protein2 in_solution cost
B A True 0.52
B C True 0.73
27 changes: 26 additions & 1 deletion test/parse-outputs/test_parse_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
INDIR = "test/parse-outputs/input/"
OUTDIR = "test/parse-outputs/output/"
EXPDIR = "test/parse-outputs/expected/"
RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/'
RAW_PATHS_EXPDIR = 'test/parse-outputs/expected/oi2-expected/'

# DOMINO input is the concatenated module_0.html and module_1.html file from
# the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
# from https://github.com/Shamir-Lab/DOMINO/tree/master/examples

algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino']


class TestParseOutputs:
@classmethod
def setup_class(cls):
Expand All @@ -37,3 +38,27 @@ def test_empty_file(self):

runner.parse_output(algo, test_file, out_file)
assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)

def test_oi2_correct_parse_output(self):
test_file = RAW_PATHS_INDIR + f"oi2-correct.txt"
out_file = OUTDIR + f"oi2-correct-pathway.txt"
runner.parse_output('omicsintegrator2', test_file, out_file)
assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected.txt", shallow=False)

def test_oi2_empty_parse_output(self):
test_file = RAW_PATHS_INDIR + f"oi2-empty.txt"
out_file = OUTDIR + f"oi2-empty-pathway.txt"
runner.parse_output('omicsintegrator2', test_file, out_file)
assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)

def test_oi2_miss_insolution_parse_output(self):
test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt"
out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt"
runner.parse_output('omicsintegrator2', test_file, out_file)
assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)

def test_oi2_wrong_order_parse_output(self):
test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt"
out_file = OUTDIR + f"oi2-wrong-order-pathway.txt"
runner.parse_output('omicsintegrator2', test_file, out_file)
assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)

0 comments on commit 059c0fc

Please sign in to comment.