Skip to content

Commit

Permalink
made updates to code and attempted to add testing for interactome
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Sep 4, 2023
1 parent 4fdaadb commit cae057a
Show file tree
Hide file tree
Showing 16 changed files with 260 additions and 164 deletions.
4 changes: 2 additions & 2 deletions src/allpairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from src.dataset import convert_directed_to_undirected, readd_direction_col_undirected
from src.interactome import convert_directed_to_undirected, readd_direction_col_undirected
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -105,5 +105,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
"""
df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked.
df = readd_direction_col_undirected(df, 2)
df = readd_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
146 changes: 1 addition & 145 deletions src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,148 +171,4 @@ def get_other_files(self):
return self.other_files.copy()

def get_interactome(self):
return self.interactome.copy()

def convert_undirected_to_directed(df: pd.DataFrame) -> pd.DataFrame:
"""
turns a graph into a fully directed graph
- turns every unidirected edges into a bi-directed edge
- with bi-directed edges, we are not loosing too much information because the relationship of the undirected egde is still preserved
*A user must keep the Direction column when using this function
@param df: input network df of edges, weights, and directionality
@return a dataframe with no undirected edges in Direction column
"""

# TODO: add a check to make sure there is a direction column in df

for index, row in df.iterrows():
if row["Direction"] == "U":
df.at[index, "Direction"] = "D"

new_directed_row = row.copy(deep=True)
new_directed_row["Interactor1"], new_directed_row["Interactor2"] = (
row["Interactor2"],
row["Interactor1"],
)
print("new directed row\n", new_directed_row)
new_directed_row["Direction"] = "D"
df.loc[len(df)] = new_directed_row

return df


def convert_directed_to_undirected(df: pd.DataFrame) -> pd.DateOffset:
"""
turns a graph into a fully undirected graph
- turns the directed edges directly into undirected edges
- we will loose any sense of directionality and the graph won't be inherently accurate, but the basic relationship between the two connected nodes will still remain intact.
@param df: input network df of edges, weights, and directionality
@return a dataframe with no directed edges in Direction column
"""

for index, row in df.iterrows():
if row["Direction"] == "D":
df.at[index, "Direction"] = "U"

return df


def add_seperator(df: pd.DataFrame, col_loc: int, col_name: str, sep: str) -> pd.DataFrame:
"""
adds a seperator somewhere into the input dataframe
@param df: input network df of edges, weights, and directionality
@param col_loc: the spot in the dataframe to put the new column
@param col_name: the name of the new column
@param sep: some type of seperator needed in the df
@return a df with a new seperator added to every row
"""

df.insert(col_loc, col_name, sep)
return df


def add_directionality_seperators(df: pd.DataFrame, col_loc: int, col_name: str, dir_sep: str, undir_sep: str) -> pd.DataFrame:
"""
deals with adding in directionality seperators for mixed graphs that isn't in the universal input
*user must keep the Direction column when using the function
@param df: input network df of edges, weights, and directionality
@param col_loc: the spot in the dataframe to put the new column
@param col_name: the name of the new column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@return a df converted to show directionality differently
"""

# TODO: add a check to make sure there is a direction column in df

df.insert(col_loc, col_name, dir_sep)

for index, row in df.iterrows():
if row["Direction"] == "U":
df.at[index, col_name] = undir_sep
elif row["Direction"] == "D":
continue
else:
raise ValueError(
f'direction must be a \'U\' or \'D\', but found {row["Direction"]}'
)

return df

def readd_direction_col_mixed(df: pd.DataFrame, direction_col_loc: int, existing_direction_column: str, dir_sep: str, undir_sep: str) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U' or 'D' based on the dir/undir seperators in the existing direction column
*user must keep the existing direction column when using the function
@param df: input network df that contains directionality
@param direction_col_loc: the spot in the dataframe to put back the 'Direction' column
@param existing_direction_column: the name of the existing directionality column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@return a df with Direction column added back
"""

df.insert(direction_col_loc, "Direction", "D")

for index, row in df.iterrows():
if row[existing_direction_column] == undir_sep:
df.at[index, "Direction"] = "U"

elif row[existing_direction_column] == dir_sep:
df.at[index, "Direction"] = "D"

else:
raise ValueError(
f'direction must be a \'{dir_sep}\' or \'{undir_sep}\', but found {row[existing_direction_column]}'
)

return df

def readd_direction_col_undirected(df: pd.DataFrame, direction_col_loc: int) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U'
@param df: input network df that contains directionality
@param direction_col_loc: the spot in the dataframe to put back the 'Direction' column
@return a df with Direction column added back
"""
df.insert(direction_col_loc, "Direction", "U")
return df

def readd_direction_col_directed(df: pd.DataFrame, direction_col_loc: int) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'D'
@param df: input network df that contains directionality
@param direction_col_loc: the spot in the dataframe to put back the 'Direction' column
@return a df with Direction column added back
"""
df.insert(direction_col_loc, "Direction", "D")
return df
return self.interactome.copy(deep = True)
10 changes: 5 additions & 5 deletions src/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import pandas as pd

from src.dataset import (
add_seperator,
from src.interactome import (
add_constant,
convert_directed_to_undirected,
readd_direction_col_undirected,
)
Expand Down Expand Up @@ -58,8 +58,8 @@ def generate_inputs(data, filename_map):
edges_df = data.get_interactome()

# Format network file
edges_df = convert_directed_to_undirected(edges_df)
edges_df = add_seperator(edges_df, 1, 'ppi', 'ppi')
# edges_df = convert_directed_to_undirected(edges_df)
edges_df = add_constant(edges_df, 'ppi', 'ppi')

# Transform each node id with a prefix
edges_df['Interactor1'] = edges_df['Interactor1'].apply(pre_domino_id_transform)
Expand Down Expand Up @@ -203,7 +203,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Remove the prefix
edges_df['source'] = edges_df['source'].apply(post_domino_id_transform)
edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
edges_df = readd_direction_col_undirected(edges_df, 3)
edges_df = readd_direction_col_undirected(edges_df)

print(edges_df)
edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False)
Expand Down
145 changes: 145 additions & 0 deletions src/interactome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""
Author: Neha Talluri
07/19/23
Methods for converting/creating the universal input and universal output
"""

import pandas as pd

def convert_undirected_to_directed(df: pd.DataFrame) -> pd.DataFrame:
"""
turns a graph into a fully directed graph
- turns every unidirected edges into a bi-directed edge
- with bi-directed edges, we are not loosing too much information because the relationship of the undirected egde is still preserved
*A user must keep the Direction column when using this function
@param df: input network df of edges, weights, and directionality
@return a dataframe with no undirected edges in Direction column
"""

mask = df['Direction'] == 'U'
new_df = df[mask].copy(deep=True)
new_df['Interactor1'], new_df['Interactor2'] = new_df['Interactor2'], new_df['Interactor1']
new_df['Direction'] = 'D'
df.loc[mask, 'Direction'] = 'D'
df = pd.concat([df, new_df], ignore_index=True)

print("convert_undirected_to_directed")
print(df)

return df


def convert_directed_to_undirected(df: pd.DataFrame) -> pd.DateOffset:
"""
turns a graph into a fully undirected graph
- turns all the directed edges directly into undirected edges
- we will loose any sense of directionality and the graph won't be inherently accurate, but the basic relationship between the two connected nodes will still remain intact.
@param df: input network df of edges, weights, and directionality
@return a dataframe with no directed edges in Direction column
"""

df["Direction"] = "U"

print("convert_directed_to_undirected")
print(df)

return df


def add_constant(df: pd.DataFrame, col_name: str, const: str) -> pd.DataFrame:
"""
adds a seperator somewhere into the input dataframe
@param df: input network df of edges, weights, and directionality
@param col_name: the name of the new column
@param sep: some type of seperator needed in the df
@return a df with a new seperator added to every row
"""

df.insert(df.shape[1], col_name, const)

print("add_constant")
print(df)

return df


def add_directionality_constant(df: pd.DataFrame, col_name: str, dir_const: str, undir_const: str) -> pd.DataFrame:
"""
deals with adding in directionality seperators for mixed graphs that isn't in the universal input
*user must keep the Direction column when using the function
@param df: input network df of edges, weights, and directionality
@param col_name: the name of the new column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@return a df converted to show directionality differently
"""
df.insert(df.shape[1], col_name, dir_const)
mask = df['Direction'] == 'U'
df.loc[mask, col_name] = undir_const

print("add_directionality_constant")
print(df)

return df

def readd_direction_col_mixed(df: pd.DataFrame, existing_direction_column: str, dir_const: str, undir_const: str) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U' or 'D' at the end of provided dataframe
based on the dir/undir seperators in the existing direction column
*user must keep the existing direction column when using the function
@param df: input network df that contains directionality
@param existing_direction_column: the name of the existing directionality column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@return a df with Direction column added back
"""

df.insert(df.shape[1], "Direction", "D")

mask_undir = df[existing_direction_column] == undir_const
df.loc[mask_undir, "Direction"] = "U"

mask_dir = df[existing_direction_column] == dir_const
df.loc[mask_dir, "Direction"] = "D"

print("readd_direction_col_mixed")
print(df)

return df

def readd_direction_col_undirected(df: pd.DataFrame) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U' at the end of the provided dataframe
@param df: input network df that contains directionality
@return a df with Direction column added back
"""
df.insert(df.shape[1], "Direction", "U")

print("readd_direction_col_undirected")
print(df)

return df

def readd_direction_col_directed(df: pd.DataFrame) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'D' at the end of the provided dataframe
@param df: input network df that contains directionality
@return a df with Direction column added back
"""
df.insert(df.shape[1], "Direction", "D")

print("readd_direction_col_directed")
print(df)

return df
6 changes: 3 additions & 3 deletions src/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from src.dataset import add_directionality_seperators, readd_direction_col_mixed
from src.interactome import add_directionality_constant, readd_direction_col_mixed
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -88,7 +88,7 @@ def generate_inputs(data, filename_map):
edges = data.get_interactome()

# Format network file
edges = add_directionality_seperators(edges, 1, 'EdgeType', '(pd)', '(pp)')
edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)')

edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False)

Expand Down Expand Up @@ -181,6 +181,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df.insert(5, 'Rank', 1) # Add a constant rank of 1

# TODO: add direction column
df = readd_direction_col_mixed(df, 6, "Type", "pd", "pp")
df = readd_direction_col_mixed(df, "Type", "pd", "pp")

df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False, index=False, sep='\t')
4 changes: 2 additions & 2 deletions src/mincostflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from src.dataset import convert_undirected_to_directed, readd_direction_col_directed
from src.interactome import convert_undirected_to_directed, readd_direction_col_directed
from src.prm import PRM
from src.util import prepare_volume, run_container

Expand Down Expand Up @@ -140,5 +140,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):

df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
df.insert(2, 'Rank', 1) # adds in a rank column of 1s because the edges are not ranked
df = readd_direction_col_directed(df, 3)
df = readd_direction_col_directed(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
Loading

0 comments on commit cae057a

Please sign in to comment.