Skip to content

Commit

Permalink
Add plot prot quant to data analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
lucatreide committed Nov 22, 2023
1 parent 397bc55 commit 581b2f3
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 24 deletions.
20 changes: 20 additions & 0 deletions protzilla/constants/workflow_meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,26 @@
null,
null
]
},
"protein_group": {
"name": "Protein group: choose highlighted protein group",
"type": "categorical",
"fill": "protein_group_column",
"categories": [],
"default": null
},
"similarity_measure": {
"name": "Similarity Measurement: choose how to compare protein groups",
"type": "categorical",
"categories": ["euclidean distance", "cosine similarity"],
"default": "euclidean_distance"
},
"similarity": {
"name": "Similarity",
"type": "numeric",
"min": -1,
"max": 999,
"default": 1
}
}
},
Expand Down
114 changes: 90 additions & 24 deletions protzilla/data_analysis/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import plotly.express as px
import plotly.graph_objects as go
from django.contrib import messages
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

from protzilla.utilities.clustergram import Clustergram
from protzilla.utilities.transform_dfs import is_long_format, long_to_wide
Expand Down Expand Up @@ -257,43 +258,108 @@ def clustergram_plot(
return [dict(messages=[dict(level=messages.ERROR, msg=msg)])]


def prot_quant_plot(input_df: pd.DataFrame):
def prot_quant_plot(input_df: pd.DataFrame, protein_group: str, similarity: float, similarity_measure: str):
wide_df = long_to_wide(input_df) if is_long_format(input_df) else input_df

fig = go.Figure()

# Use shortened names or aliases for the legend if necessary
# This is just a placeholder; you'll need to create your own mapping
legend_names = {column: column[:10] + "..." for column in wide_df.columns}

x_values = wide_df.index
for column in wide_df.columns:
fig.add_trace(
go.Scatter(
x=x_values,
y=wide_df[column],
mode="lines+markers",
name=legend_names[column],
hoverinfo="name+y",
)
)
color_mapping = {
'A': 'green',
'C': 'blue',
}

lower_upper_x = []
lower_upper_y = []

lower_upper_x.append(wide_df.index[0])
lower_upper_y.append(wide_df.iloc[0].min())

for index, row in wide_df.iterrows():
lower_upper_x.append(index)
lower_upper_y.append(row.max())

for index, row in reversed(list(wide_df.iterrows())):
lower_upper_x.append(index)
lower_upper_y.append(row.min())

fig.add_trace(go.Scatter(
x=lower_upper_x,
y=lower_upper_y,
fill="toself",
name="Intensity Range",
line=dict(color="silver")
))

similar_groups = []
for group_to_compare in wide_df.columns:
if group_to_compare != protein_group:
similarity_measure_method = euclidean_distances if similarity_measure == "euclidean distance" else cosine_similarity
distance = similarity_measure_method(wide_df[protein_group].values.reshape(1, -1), wide_df[group_to_compare].values.reshape(1, -1))[0][0]
if distance <= similarity:
similar_groups.append(group_to_compare)


for group in similar_groups:
fig.add_trace(go.Scatter(
x=wide_df.index,
y=wide_df[group],
mode="lines",
name=group,
line=dict(color='rgba(102,51,153,0.5)'),
showlegend=len(similar_groups) <= 7
))

if len(similar_groups) > 7:
fig.add_trace(go.Scatter(
x=[None],
y=[None],
mode='lines',
marker=dict(color="rgba(102,51,153,0.5)"),
name='Similar Protein Groups'
))

formatted_protein_name = protein_group[:15] + "..." if len(protein_group) > 15 else protein_group
fig.add_trace(go.Scatter(
x=wide_df.index,
y=wide_df[protein_group],
mode="lines",
name= formatted_protein_name,
line=dict(color='orangered'),
))

fig.add_trace(go.Scatter(
x=[None],
y=[None],
mode='markers',
marker=dict(color="green"),
name='Experimental Group'
))

fig.add_trace(go.Scatter(
x=[None],
y=[None],
mode='markers',
marker=dict(color="blue"),
name='Control Group'
))

fig.update_layout(
title="Quant Prot Plot",
title=f"Intensity of {formatted_protein_name} in all samples",
xaxis_title="Sample",
yaxis_title="Intensity",
legend_title="Protein Groups",
legend_title="Legend",
xaxis=dict(
tickangle=-90,
tickvals=wide_df.index[::2], # Show every other label to reduce clutter
tickmode='array',
tickangle=0,
tickvals=wide_df.index,
ticktext=[
label[:10] + "..." for label in wide_df.index[::2]
], # Shortened label text
f"<span style='font-size: 10px; color:{color_mapping.get(label[0], 'black')}'><b>•</b></span>" for label in wide_df.index
],
),
autosize=True,
margin=dict(l=100, r=300, t=100, b=100), # Adjust margins to fit legend
margin=dict(l=100, r=300, t=100, b=100),
legend=dict(
x=1.05, # Place legend to the right of the plot
x=1.05,
y=1,
bgcolor="rgba(255, 255, 255, 0.5)",
orientation="v",
Expand Down
2 changes: 2 additions & 0 deletions protzilla/run_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def insert_special_params(param_dict, run):
server = BiomartServer("http://www.ensembl.org/biomart")
database = server.databases["ENSEMBL_MART_ENSEMBL"]
param_dict["categories"] = database.datasets
elif param_dict["fill"] == "protein_group_column":
param_dict["categories"] = run.df["Protein ID"].unique()

if "fill_dynamic" in param_dict:
param_dict["class"] = "dynamic_trigger"
Expand Down
12 changes: 12 additions & 0 deletions user_data/workflows/standard.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@
},
"data_analysis": {
"steps": [
{
"name": "plot",
"method": "prot_quant",
"parameters": {
"input_df": [
"preprocessed_data",
"dataframe"
],
"protein_group": null
},
"output_name": "prot_quant_plot"
},
{
"name": "dimension_reduction",
"method": "umap",
Expand Down

0 comments on commit 581b2f3

Please sign in to comment.