Skip to content

Commit

Permalink
cleaned up code and finished interactome test
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Sep 5, 2023
1 parent 319a5d3 commit bb55ee0
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 29 deletions.
5 changes: 4 additions & 1 deletion src/allpairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ def generate_inputs(data, filename_map):

# Create network file
edges_df = data.get_interactome()

# Format network file
edges_df = convert_directed_to_undirected(edges_df)
# edges_df = convert_directed_to_undirected(edges_df)
# - technically this can be called but since we don't use the column and based on what the function does, it is not truly needed

# This is pretty memory intensive. We might want to keep the interactome centralized.
edges_df.to_csv(filename_map["network"], sep="\t", index=False,
columns=["Interactor1", "Interactor2", "Weight"],
Expand Down
1 change: 1 addition & 0 deletions src/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def generate_inputs(data, filename_map):

# Format network file
# edges_df = convert_directed_to_undirected(edges_df)
# - technically this can be called but since we don't use the column and based on what the function does, it is not truly needed
edges_df = add_constant(edges_df, 'ppi', 'ppi')

# Transform each node id with a prefix
Expand Down
44 changes: 23 additions & 21 deletions src/interactome.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def convert_undirected_to_directed(df: pd.DataFrame) -> pd.DataFrame:
- turns every unidirected edges into a bi-directed edge
- with bi-directed edges, we are not loosing too much information because the relationship of the undirected egde is still preserved
*A user must keep the Direction column when using this function
* A user must keep the Direction column when using this function
@param df: input network df of edges, weights, and directionality
@return a dataframe with no undirected edges in Direction column
Expand All @@ -39,6 +39,8 @@ def convert_directed_to_undirected(df: pd.DataFrame) -> pd.DateOffset:
- turns all the directed edges directly into undirected edges
- we will loose any sense of directionality and the graph won't be inherently accurate, but the basic relationship between the two connected nodes will still remain intact.
* A user must keep the Direction column when using this function
@param df: input network df of edges, weights, and directionality
@return a dataframe with no directed edges in Direction column
"""
Expand All @@ -51,17 +53,17 @@ def convert_directed_to_undirected(df: pd.DataFrame) -> pd.DateOffset:
return df


def add_constant(df: pd.DataFrame, col_name: str, const: str) -> pd.DataFrame:
def add_constant(df: pd.DataFrame, new_col_name: str, const: str) -> pd.DataFrame:
"""
adds a seperator somewhere into the input dataframe
adds a constant at the end of the input dataframe that is needed inbetween columns for a specifc algorithm
@param df: input network df of edges, weights, and directionality
@param col_name: the name of the new column
@param sep: some type of seperator needed in the df
@return a df with a new seperator added to every row
@param new_col_name: the name of the new column
@param const: some type of constant needed in the df
@return a df with a new constant added to every row
"""

df.insert(df.shape[1], col_name, const)
df.insert(df.shape[1], new_col_name, const)

print("add_constant")
print(df)
Expand All @@ -71,14 +73,14 @@ def add_constant(df: pd.DataFrame, col_name: str, const: str) -> pd.DataFrame:

def add_directionality_constant(df: pd.DataFrame, col_name: str, dir_const: str, undir_const: str) -> pd.DataFrame:
"""
deals with adding in directionality seperators for mixed graphs that isn't in the universal input
deals with adding in directionality constants for mixed graphs that aren't using the universal input directly
*user must keep the Direction column when using the function
* user must keep the Direction column when using the function
@param df: input network df of edges, weights, and directionality
@param col_name: the name of the new column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@param dir_const: the directed edge sep
@param undir_const: the undirected edge sep
@return a df converted to show directionality differently
"""
df.insert(df.shape[1], col_name, dir_const)
Expand All @@ -93,24 +95,24 @@ def add_directionality_constant(df: pd.DataFrame, col_name: str, dir_const: str
def readd_direction_col_mixed(df: pd.DataFrame, existing_direction_column: str, dir_const: str, undir_const: str) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U' or 'D' at the end of provided dataframe
based on the dir/undir seperators in the existing direction column
based on the dir/undir constants in the existing direction column
*user must keep the existing direction column when using the function
@param df: input network df that contains directionality
@param existing_direction_column: the name of the existing directionality column
@param dir_sep: the directed edge sep
@param undir_sep: the undirected edge sep
@return a df with Direction column added back
@param dir_const: the directed edge sep
@param undir_const: the undirected edge sep
@return a df with universal Direction column added back
"""

df.insert(df.shape[1], "Direction", "D")

mask_undir = df[existing_direction_column] == undir_const
df.loc[mask_undir, "Direction"] = "U"

mask_dir = df[existing_direction_column] == dir_const
df.loc[mask_dir, "Direction"] = "D"
# mask_dir = df[existing_direction_column] == dir_const
# df.loc[mask_dir, "Direction"] = "D"

print("readd_direction_col_mixed")
print(df)
Expand All @@ -119,10 +121,10 @@ def readd_direction_col_mixed(df: pd.DataFrame, existing_direction_column: str,

def readd_direction_col_undirected(df: pd.DataFrame) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'U' at the end of the provided dataframe
readds a 'Direction' column that puts a columns of 'U's at the end of the provided dataframe
@param df: input network df that contains directionality
@return a df with Direction column added back
@return a df with Direction column of 'U's added back
"""
df.insert(df.shape[1], "Direction", "U")

Expand All @@ -133,10 +135,10 @@ def readd_direction_col_undirected(df: pd.DataFrame) -> pd.DataFrame:

def readd_direction_col_directed(df: pd.DataFrame) -> pd.DataFrame:
"""
readds a 'Direction' column that puts a 'D' at the end of the provided dataframe
readds a 'Direction' column that puts a column of 'D's at the end of the provided dataframe
@param df: input network df that contains directionality
@return a df with Direction column added back
@return a df with Direction column of 'D's added back
"""
df.insert(df.shape[1], "Direction", "D")

Expand Down
1 change: 0 additions & 1 deletion src/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Would need to load the paths output file to rank edges correctly
df.insert(5, 'Rank', 1) # Add a constant rank of 1

# TODO: add direction column
df = readd_direction_col_mixed(df, "Type", "pd", "pp")

df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False, index=False, sep='\t')
3 changes: 2 additions & 1 deletion src/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def generate_inputs(data, filename_map):

# Format network file
# edges_df = convert_directed_to_undirected(edges_df)
# - technically this can be called but since we don't use the column and based on what the function does, it is not truly needed

#We'll have to update this when we make iteractomes more proper, but for now
# assume we always get a weight and turn it into a cost.
Expand Down Expand Up @@ -175,6 +176,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df = pd.read_csv(raw_pathway_file, sep='\t')
df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
df = df.take([0, 1], axis=1)
df[3] = [1 for _ in range(len(df.index))]
df['Rank'] = [1 for _ in range(len(df.index))]
df = readd_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions test/interactome/expected/readd_dir.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A pp B 0.5 D
B pd C 0.5 D
C pd B 0.5 D
A pp D 0.5 D
C pd D 0.5 D
D pd C 0.5 D
6 changes: 6 additions & 0 deletions test/interactome/expected/readd_mixed.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A pp B 0.5 U
B pd C 0.5 D
C pd B 0.5 D
A pp D 0.5 U
C pd D 0.5 D
D pd C 0.5 D
6 changes: 6 additions & 0 deletions test/interactome/expected/readd_undir.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A pp B 0.5 U
B pd C 0.5 U
C pd B 0.5 U
A pp D 0.5 U
C pd D 0.5 U
D pd C 0.5 U
20 changes: 15 additions & 5 deletions test/interactome/test_interactome.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ def setup_class(cls):
def undirected_to_directed(self):
df = pd.read_csv(IN_DIR+ 'test-network.csv', sep='\t')
df = convert_undirected_to_directed(df)
expected_df = pd.read_csv(EXPECTED_DIR+ 'u_to_d.csv', sep='\t')
expected_df = pd.read_csv(EXPECTED_DIR+ 'convert_u_to_d.csv', sep='\t')
assert df.equals(expected_df)

def directed_to_undirected(self):
df = pd.read_csv(IN_DIR+ 'test-network.csv', sep='\t')
df = convert_directed_to_undirected(df)
expected_df = pd.read_csv(EXPECTED_DIR+'d_to_u.csv', sep='\t')
expected_df = pd.read_csv(EXPECTED_DIR+'convert_d_to_u.csv', sep='\t')
assert df.equals(expected_df)

def add_const(self):
Expand All @@ -49,10 +49,20 @@ def add_directionality_const(self):
assert df.equals(expected_df)

def readd_col_mixed(self):
assert True
df = pd.read_csv(IN_DIR+ "test-readd-network.csv", sep ='\t')
df.columns = ["Edge1", "InteractionType", "Edge2", "Weight"]
df = readd_direction_col_mixed(df, "InteractionType", "pd", "pp")
expected_df = pd.read_csv(EXPECTED_DIR+"readd_mixed.csv", sep = '\t')
assert df.equals(expected_df)

def readd_col_undir(self):
assert True
df = pd.read_csv(IN_DIR+ "test-readd-network.csv", sep ='\t')
df = readd_direction_col_undirected(df)
expected_df = pd.read_csv(EXPECTED_DIR+"readd_undir.csv", sep = '\t')
assert df.equals(expected_df)

def readd_col_dir(self):
assert True
df = pd.read_csv(IN_DIR+ "test-readd-network.csv", sep ='\t')
df = readd_direction_col_directed(df)
expected_df = pd.read_csv(EXPECTED_DIR+"readd_dir.csv", sep = '\t')
assert df.equals(expected_df)

0 comments on commit bb55ee0

Please sign in to comment.