Skip to content

Commit

Permalink
added in parse_output directionality
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Aug 30, 2023
1 parent a214cc8 commit 1e7fef9
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 42 deletions.
8 changes: 7 additions & 1 deletion src/allpairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pandas as pd

from src.dataset import convert_directed_to_undirected, readd_direction_col_undirected
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -42,8 +43,12 @@ def generate_inputs(data, filename_map):

input_df.to_csv(filename_map["nodetypes"], sep="\t", index=False, columns=["#Node", "Node type"])

# Create network file
edges_df = data.get_interactome()
# Format network file
edges_df = convert_directed_to_undirected(edges_df)
# This is pretty memory intensive. We might want to keep the interactome centralized.
data.get_interactome().to_csv(filename_map["network"], sep="\t", index=False,
edges_df.to_csv(filename_map["network"], sep="\t", index=False,
columns=["Interactor1", "Interactor2", "Weight"],
header=["#Interactor1", "Interactor2", "Weight"])

Expand Down Expand Up @@ -100,4 +105,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
"""
df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked.
df = readd_direction_col_undirected(df, 2)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
52 changes: 52 additions & 0 deletions src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,55 @@ def add_directionality_seperators(df: pd.DataFrame, col_loc: int, col_name: str,
)

return df

def readd_direction_col_mixed(df: pd.DataFrame, direction_col_loc: int, existing_direction_column: str, dir_sep: str, undir_sep: str) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U' or 'D' based on the dir/undir seperators in the existing direction column
*user must keep the existing direction column when using the function
@param df: input network df that contains directionality
@param direction_col_loc: the spot in the dataframe to put back the 'Direction' column
@param existing_direction_column: the name of the existing directionality column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@return a df with Direction column added back
"""

df.insert(direction_col_loc, "Direction", "D")

for index, row in df.iterrows():
if row[existing_direction_column] == undir_sep:
df.at[index, "Direction"] = "U"

elif row[existing_direction_column] == dir_sep:
df.at[index, "Direction"] = "D"

else:
raise ValueError(
f'direction must be a \'{dir_sep}\' or \'{undir_sep}\', but found {row[existing_direction_column]}'
)

return df

def readd_direction_col_undirected(df: pd.DataFrame, direction_col_loc: int) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U'
@param df: input network df that contains directionality
@param direction_col_loc: the spot in the dataframe to put back the 'Direction' column
@return a df with Direction column added back
"""
df.insert(direction_col_loc, "Direction", "U")
return df

def readd_direction_col_directed(df: pd.DataFrame, direction_col_loc: int) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'D'
@param df: input network df that contains directionality
@param direction_col_loc: the spot in the dataframe to put back the 'Direction' column
@return a df with Direction column added back
"""
df.insert(direction_col_loc, "Direction", "D")
return df
8 changes: 7 additions & 1 deletion src/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

import pandas as pd

from src.dataset import add_seperator, convert_directed_to_undirected
from src.dataset import (
add_seperator,
convert_directed_to_undirected,
readd_direction_col_undirected,
)
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -199,7 +203,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Remove the prefix
edges_df['source'] = edges_df['source'].apply(post_domino_id_transform)
edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
edges_df = readd_direction_col_undirected(edges_df, 3)

print(edges_df)
edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False)


Expand Down
8 changes: 6 additions & 2 deletions src/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from src.dataset import add_directionality_seperators
from src.dataset import add_directionality_seperators, readd_direction_col_mixed
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -179,4 +179,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# TODO what should be the edge rank?
# Would need to load the paths output file to rank edges correctly
df.insert(5, 'Rank', 1) # Add a constant rank of 1
df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank'], header=False, index=False, sep='\t')

# TODO: add direction column
df = readd_direction_col_mixed(df, 6, "Type", "pd", "pp")

df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False, index=False, sep='\t')
3 changes: 2 additions & 1 deletion src/mincostflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from src.dataset import convert_undirected_to_directed
from src.dataset import convert_undirected_to_directed, readd_direction_col_directed
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -140,4 +140,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):

df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
df.insert(2, 'Rank', 1) # adds in a rank column of 1s because the edges are not ranked
df = readd_direction_col_directed(df, 3)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
10 changes: 7 additions & 3 deletions src/omicsintegrator1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pandas as pd

from src.dataset import readd_direction_col_mixed
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -193,6 +194,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
with open(standardized_pathway_file, 'w'):
pass
return
df = df.take([0, 2], axis=1)
df[3] = [1 for _ in range(len(df.index))]
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')

df.columns = ["Edge1", "InteractionType", "Edge2"]
df.insert (3, "Rank", 1)
df = readd_direction_col_mixed(df, 4, "InteractionType", "pd", "pp")

df.to_csv(standardized_pathway_file,columns=['Edge1', 'Edge2', 'Rank', "Direction"], header=False, index=False, sep='\t')
3 changes: 2 additions & 1 deletion src/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import docker
import pandas as pd

from src.dataset import convert_directed_to_undirected
from src.dataset import convert_directed_to_undirected, readd_direction_col_undirected
from src.prm import PRM
from src.util import prepare_path_docker

Expand Down Expand Up @@ -173,4 +173,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
df = df.take([0, 1], axis=1)
df[3] = [1 for _ in range(len(df.index))]
df = readd_direction_col_undirected(df, 3)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
3 changes: 2 additions & 1 deletion src/pathlinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from src.dataset import convert_undirected_to_directed
from src.dataset import convert_undirected_to_directed, readd_direction_col_directed
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -137,4 +137,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Questions: should there be a header/optional columns?
# What about multiple raw_pathway_files
df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1)
df = readd_direction_col_directed(df, 3)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
64 changes: 32 additions & 32 deletions test/DOMINO/expected_output/domino-parse-output.txt
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
ENSG00000122691 ENSG00000138757 1
ENSG00000122691 ENSG00000109320 1
ENSG00000134954 ENSG00000077150 1
ENSG00000077150 ENSG00000107968 1
ENSG00000077150 ENSG00000157557 1
ENSG00000077150 ENSG00000109320 1
ENSG00000173120 ENSG00000173039 1
ENSG00000173039 ENSG00000109320 1
ENSG00000168884 ENSG00000109320 1
ENSG00000109320 ENSG00000282905 1
ENSG00000109320 ENSG00000104856 1
ENSG00000109320 ENSG00000146232 1
ENSG00000109320 ENSG00000166135 1
ENSG00000109320 ENSG00000170606 1
ENSG00000100906 ENSG00000166135 1
ENSG00000100906 ENSG00000198873 1
ENSG00000100906 ENSG00000173020 1
ENSG00000162924 ENSG00000170606 1
ENSG00000187079 ENSG00000177606 1
ENSG00000177606 ENSG00000168062 1
ENSG00000177606 ENSG00000182979 1
ENSG00000177606 ENSG00000050748 1
ENSG00000177606 ENSG00000109339 1
ENSG00000177606 ENSG00000170345 1
ENSG00000177606 ENSG00000175592 1
ENSG00000177606 ENSG00000085276 1
ENSG00000171223 ENSG00000100721 1
ENSG00000171223 ENSG00000075426 1
ENSG00000171223 ENSG00000085276 1
ENSG00000130522 ENSG00000175592 1
ENSG00000175592 ENSG00000128272 1
ENSG00000128272 ENSG00000162772 1
ENSG00000122691 ENSG00000138757 1 U
ENSG00000122691 ENSG00000109320 1 U
ENSG00000134954 ENSG00000077150 1 U
ENSG00000077150 ENSG00000107968 1 U
ENSG00000077150 ENSG00000157557 1 U
ENSG00000077150 ENSG00000109320 1 U
ENSG00000173120 ENSG00000173039 1 U
ENSG00000173039 ENSG00000109320 1 U
ENSG00000168884 ENSG00000109320 1 U
ENSG00000109320 ENSG00000282905 1 U
ENSG00000109320 ENSG00000104856 1 U
ENSG00000109320 ENSG00000146232 1 U
ENSG00000109320 ENSG00000166135 1 U
ENSG00000109320 ENSG00000170606 1 U
ENSG00000100906 ENSG00000166135 1 U
ENSG00000100906 ENSG00000198873 1 U
ENSG00000100906 ENSG00000173020 1 U
ENSG00000162924 ENSG00000170606 1 U
ENSG00000187079 ENSG00000177606 1 U
ENSG00000177606 ENSG00000168062 1 U
ENSG00000177606 ENSG00000182979 1 U
ENSG00000177606 ENSG00000050748 1 U
ENSG00000177606 ENSG00000109339 1 U
ENSG00000177606 ENSG00000170345 1 U
ENSG00000177606 ENSG00000175592 1 U
ENSG00000177606 ENSG00000085276 1 U
ENSG00000171223 ENSG00000100721 1 U
ENSG00000171223 ENSG00000075426 1 U
ENSG00000171223 ENSG00000085276 1 U
ENSG00000130522 ENSG00000175592 1 U
ENSG00000175592 ENSG00000128272 1 U
ENSG00000128272 ENSG00000162772 1 U

0 comments on commit 1e7fef9

Please sign in to comment.