Skip to content

Commit

Permalink
Implemeted Protquantplot with retention time instead of Intensities
Browse files Browse the repository at this point in the history
  • Loading branch information
RogerAK committed Jul 10, 2024
1 parent 286023d commit 4b940f0
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 18 deletions.
33 changes: 15 additions & 18 deletions protzilla/data_analysis/prot_quant_plot_peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def prot_quant_plot_peptide(
) -> dict:
"""
A function to create a graph visualising protein quantifications across all samples
as a line diagram using retention time and intensity. It's possible to select one proteingroup
as a line diagram using retention time. It's possible to select one proteingroup
that will be displayed in orange and choose a similarity measurement with a similarity score
to get all proteingroups that are similar displayed in another color in this line diagram.
All other proteingroups are displayed in the background as a grey polygon.
Expand All @@ -37,13 +37,10 @@ def prot_quant_plot_peptide(
:return: returns a dictionary containing a list with a plotly figure and/or a list of messages
"""
# Ensure the dataframe includes retention time
if 'Retention time' not in input_df.columns:
raise ValueError("The input dataframe must include a 'Retention time' column.")

wide_df = input_df.interpolate(method='linear', axis=0)
wide_df = long_to_wide_retention_time(wide_df) if is_long_format(wide_df) else wide_df


if protein_group not in wide_df.columns:
raise ValueError("Please select a valid protein group.")
elif similarity_measure == "euclidean distance" and similarity < 0:
Expand All @@ -65,30 +62,30 @@ def prot_quant_plot_peptide(
lower_upper_x = []
lower_upper_y = []

lower_upper_x.append(wide_df['Retention time'].iloc[0])
lower_upper_x.append(wide_df.index[0])
lower_upper_y.append(wide_df.iloc[0].min())

for index, row in wide_df.iterrows():
lower_upper_x.append(row['Retention time'])
lower_upper_x.append(index)
lower_upper_y.append(row.max())

for index, row in reversed(list(wide_df.iterrows())):
lower_upper_x.append(row['Retention time'])
lower_upper_x.append(index)
lower_upper_y.append(row.min())

fig.add_trace(
go.Scatter(
x=lower_upper_x,
y=lower_upper_y,
fill="toself",
name="Intensity Range",
name="Retention time of all protein groups",
line=dict(color="silver"),
)
)

similar_groups = []
for group_to_compare in wide_df.columns:
if group_to_compare not in ['Retention time', protein_group]:
if group_to_compare != protein_group:
if similarity_measure == "euclidean distance":
distance = euclidean_distances(
stats.zscore(wide_df[protein_group]).values.reshape(1, -1),
Expand All @@ -109,7 +106,7 @@ def prot_quant_plot_peptide(
for group in similar_groups:
fig.add_trace(
go.Scatter(
x=wide_df['Retention time'],
x=wide_df.index,
y=wide_df[group],
mode="lines",
name=group[:15] + "..." if len(group) > 15 else group,
Expand All @@ -134,7 +131,7 @@ def prot_quant_plot_peptide(
)
fig.add_trace(
go.Scatter(
x=wide_df['Retention time'],
x=wide_df.index,
y=wide_df[protein_group],
mode="lines",
name=formatted_protein_name,
Expand Down Expand Up @@ -163,22 +160,22 @@ def prot_quant_plot_peptide(
)

fig.update_layout(
title=f"Intensity of {formatted_protein_name} across retention time",
title=f"Retention time of {formatted_protein_name} in all samples",
plot_bgcolor=colors["plot_bgcolor"],
xaxis_gridcolor=colors["gridcolor"],
yaxis_gridcolor=colors["gridcolor"],
xaxis_linecolor=colors["linecolor"],
yaxis_linecolor=colors["linecolor"],
xaxis_title="Retention Time",
yaxis_title="Intensity",
xaxis_title="Sample",
yaxis_title="Retention time",
legend_title="Legend",
xaxis=dict(
tickmode="array",
tickangle=0,
tickvals=sorted(wide_df['Retention time']),
tickvals=wide_df.index,
ticktext=[
f"<span style='font-size: 10px; color:{color_mapping.get(label[0], 'black')}'><b>•</b></span>"
for label in wide_df['Retention time']
for label in wide_df.index
],
),
autosize=True,
Expand All @@ -191,4 +188,4 @@ def prot_quant_plot_peptide(
),
)

return dict(plots=[fig])
return dict(plots=[fig])
25 changes: 25 additions & 0 deletions protzilla/utilities/transform_dfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,31 @@ def long_to_wide(intensity_df: pd.DataFrame, value_name: str = None):
intensity_df = intensity_df.fillna(intensity_df.mean())
return intensity_df

def long_to_wide_retention_time(intensity_df: pd.DataFrame, value_name: str = None):
"""
This function transforms the dataframe to a wide format that
can be more easily handled by packages such as sklearn.
Each sample gets one row with all observations as columns.
:param intensity_df: the dataframe that should be transformed into
long format
:type intensity_df: pd.DataFrame
:return: returns dataframe in wide format suitable for use by
packages such as sklearn
:rtype: pd.DataFrame
"""

if intensity_df.duplicated(subset=["Sample", "Protein ID"]).any():
intensity_df = intensity_df.groupby(["Sample", "Protein ID"]).mean().reset_index()
intensity_df = intensity_df.dropna()

values_name = 'Retention time'
intensity_df = pd.pivot(
intensity_df, index="Sample", columns="Protein ID", values=values_name
)
intensity_df = intensity_df.fillna(intensity_df.mean())
return intensity_df

def wide_to_long(wide_df: pd.DataFrame, original_long_df: pd.DataFrame):
"""
Expand Down

0 comments on commit 4b940f0

Please sign in to comment.