Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

color selection for enrichment analysis bar plot #556

Open
wants to merge 11 commits into
base: dev
Choose a base branch
from
8 changes: 2 additions & 6 deletions protzilla/data_integration/di_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ def GO_enrichment_bar_plot(
top_terms,
cutoff,
value,
gene_sets=[],
gene_sets={},
title="",
colors=PLOT_COLOR_SEQUENCE,
figsize=None,
):
"""
Expand Down Expand Up @@ -73,8 +72,6 @@ def GO_enrichment_bar_plot(
if not gene_sets:
msg = "Please select at least one category to plot."
return dict(messages=[dict(level=logging.ERROR, msg=msg)])
if not isinstance(gene_sets, list):
gene_sets = [gene_sets]
if value not in ["fdr", "p-value"]:
msg = "Invalid value. Value must be either 'fdr' or 'p-value'."
return dict(messages=[dict(level=logging.ERROR, msg=msg)])
Expand Down Expand Up @@ -108,9 +105,8 @@ def GO_enrichment_bar_plot(
elif value == "p-value":
column = "P-value" if restring_input else "Adjusted P-value"

colors = gene_sets.values()

if colors == "" or colors is None or len(colors) == 0:
colors = PLOT_COLOR_SEQUENCE
size_y = top_terms * 0.5 * len(gene_sets)
try:
ax = gseapy.barplot(
Expand Down
1 change: 0 additions & 1 deletion protzilla/methods/data_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ class PlotGOEnrichmentBarPlot(PlotStep):
"top_terms",
"cutoff",
"title",
"colors",
"figsize",
]
# TODO: input figsize optional?
Expand Down
16 changes: 8 additions & 8 deletions tests/protzilla/data_integration/test_plots_data_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_enrichment_bar_plot_restring(show_figures, helpers):
top_terms=10,
cutoff=0.05,
value="fdr",
gene_sets=["KEGG", "Process"],
gene_sets={"KEGG" : "#E2A46D", "Process" : "#4A536A"},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Die ganzen Hex-Strings gerne als Variable zusammengefasst, sodass man die mit einem Mal alle ändern kann.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vielen Dank für die Idee, aber diese wäre an dieser Stelle nicht passend, da es sich um eine Test-Datei handelt. Die erstellten Plots werden nicht visuell betrachtet. An dieser Stelle ist viel mehr wichtig, dass sich der Dateityp von List zu Dict geändert hat und direkt ersichtlich werden soll, dass es sich um einen HEX-Farbwert handelt. Bei einem fehlgeschlagenen Test ist somit direkt klar, welche Parameter zu einem Fail geführt haben.

)
if show_figures:
helpers.open_graph_from_base64(bar_base64[0])
Expand All @@ -35,7 +35,7 @@ def test_enrichment_bar_plot_restring(show_figures, helpers):
top_terms=10,
cutoff=0.05,
value="p_value",
gene_sets=["KEGG", "Process"],
gene_sets={"KEGG" : "#E2A46D", "Process" : "#4A536A"},
)
if show_figures:
helpers.open_graph_from_base64(bar_base64[0])
Expand All @@ -50,7 +50,7 @@ def test_enrichment_bar_plot(show_figures, helpers, data_folder_tests):
top_terms=10,
cutoff=0.05,
value="p_value",
gene_sets=["Reactome_2013"],
gene_sets={"Reactome_2013" : "#E2A46D"},
)
if show_figures:
helpers.open_graph_from_base64(bar_base64[0])
Expand All @@ -65,7 +65,7 @@ def test_enrichment_bar_plot_wrong_value(data_folder_tests):
top_terms=10,
cutoff=0.05,
value="fdr",
gene_sets=["Reactome_2013"],
gene_sets={"Reactome_2013" : "#E2A46D"},
)
assert "messages" in current_out
assert any(("FDR is not available" in message["msg"]) for message in current_out["messages"])
Expand All @@ -78,7 +78,7 @@ def test_enrichment_bar_plot_empty_df():
top_terms=10,
cutoff=0.05,
value="p_value",
gene_sets=["Reactome_2013"],
gene_sets={"Reactome_2013" : "#E2A46D"},
)
assert "messages" in current_out
assert any(("No data to plot" in message["msg"]) for message in current_out["messages"])
Expand All @@ -102,7 +102,7 @@ def test_enrichment_bar_plot_wrong_df():
top_terms=10,
cutoff=0.05,
value="p_value",
gene_sets=["KEGG"],
gene_sets={"KEGG" : "#E2A46D"},
)
assert "messages" in current_out
assert any(("Please choose an enrichment result dataframe" in message["msg"]) for message in current_out["messages"])
Expand All @@ -115,7 +115,7 @@ def test_enrichment_bar_plot_cutoff(data_folder_tests):
top_terms=10,
cutoff=0,
value="fdr",
gene_sets=["KEGG", "Process"],
gene_sets={"KEGG" : "#E2A46D", "Process" : "#4A536A"},
)

assert "messages" in current_out
Expand All @@ -129,7 +129,7 @@ def test_enrichment_bar_plot_cutoff(data_folder_tests):
top_terms=10,
cutoff=0,
value="p-value",
gene_sets=["Reactome_2013"],
gene_sets={"Reactome_2013" : "#E2A46D"},
)
assert "messages" in current_out
assert any(("No data to plot when applying cutoff" in message["msg"]) for message in current_out["messages"])
Expand Down
70 changes: 67 additions & 3 deletions ui/runs/forms/custom_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from django.forms.widgets import CheckboxInput, SelectMultiple
from django.utils.html import format_html
from django.utils.safestring import SafeText, mark_safe
from django.template.loader import render_to_string

# Custom widgets

Expand Down Expand Up @@ -48,7 +49,7 @@ def __init__(self, choices: Enum | list, initial=None, *args, **kwargs):
super().__init__(choices=choices, initial=initial, *args, **kwargs)
else:
super().__init__(
choices=[(el.value, el.value) for el in choices],
choices=[(choice.value, choice.value) for choice in choices],
initial=initial,
*args,
**kwargs,
Expand Down Expand Up @@ -76,7 +77,7 @@ def __init__(self, choices: Enum | list, initial=None, *args, **kwargs):
super().__init__(choices=choices, initial=initial, *args, **kwargs)
else:
super().__init__(
choices=[(el.value, el.value) for el in choices],
choices=[(choice.value, choice.value) for choice in choices],
initial=initial,
*args,
**kwargs,
Expand All @@ -85,7 +86,70 @@ def __init__(self, choices: Enum | list, initial=None, *args, **kwargs):
self.widget.attrs.update({"class": "form-select mb-2"})

def clean(self, value: list[str] | None):
return [el for el in value if el != "hidden"] if value else None
return [element for element in value if element != "hidden"] if value else None

class CustomCheckboxMultipleChoiceField(MultipleChoiceField):
def __init__(self, choices: Enum | list, colors: Enum | list, initial=None, *args, **kwargs):
if isinstance(choices, list):
super().__init__(choices=choices, initial=initial, *args, **kwargs)
else:
super().__init__(
choices=[(choice.value, choice.value) for choice in choices],
initial=initial,
*args,
**kwargs,
)
self.widget = CustomCheckboxSelectMultipleWidget()
self.widget.colors = colors
self.widget.attrs.update({"class": "form-select mb-2"})


def clean(self, value: list[str] | None):
if not value:
return None

gen_sets = []
colors = {}
result = {}

for element in value:
if element.startswith("color_"):
_,gen_set,color = element.split('_', 2)
colors[gen_set] = color
else:
gen_sets.append(element)

for gen_set in sorted(gen_sets):
if gen_set in colors:
result[gen_set] = colors[gen_set]

return result


class CustomCheckboxSelectMultipleWidget(SelectMultiple):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.colors = []

def render(self, name, value, attrs=None, renderer=None) -> SafeText:
if isinstance(value, dict):
reformat_value = []
for gen_set, color in value.items():
reformat_value.append(gen_set)
reformat_value.append(f"color_{gen_set}_{color}")
value = reformat_value

return mark_safe(
render_to_string(
"runs/field_component_color_selection.html",
context={
"name": name,
"choices": self.choices,
"colors": self.colors,
"values": value,
}
)
)


class CustomFileField(FileField):
Expand Down
42 changes: 18 additions & 24 deletions ui/runs/forms/data_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
CustomFloatField,
CustomMultipleChoiceField,
CustomNumberField,
CustomCheckboxMultipleChoiceField,
)

PROTEIN_DF = "protein_df"
Expand Down Expand Up @@ -215,7 +216,7 @@ def fill_form(self, run: Run) -> None:
self.toggle_visibility("gene_sets_enrichr", True)
self.fields["gene_sets_enrichr"].choices = fill_helper.to_choices(
gseapy.get_library_name()
) # TODO check whether we need to pass the organism name here
) # check whether we need to pass the organism name here
else:
self.toggle_visibility("gene_sets_path", True)

Expand Down Expand Up @@ -324,7 +325,7 @@ class EnrichmentAnalysisWithGSEAForm(MethodForm):
choices=GeneSetsField,
label="How do you want to provide the gene sets? (reselect to show dynamic fields)",
initial="Choose from Enrichr options"
# Todo: Dynamic parameters
# : Dynamic parameters
)
gene_sets_path = CustomFileField(
label="Upload gene sets with uppercase gene symbols (any of the following file "
Expand All @@ -334,15 +335,15 @@ class EnrichmentAnalysisWithGSEAForm(MethodForm):
"SetName2: [Gene2, Gene3, ...]})",
initial=None,
)
# Todo: gene_sets_enrichr dynamic filling
# : gene_sets_enrichr dynamic filling
gene_sets_enrichr = CustomChoiceField(choices=[], label="Gene sets")
grouping = CustomChoiceField(
choices=[],
label="Grouping from metadata",
initial=None
# Todo: Dynamic parameters
# : Dynamic parameters
)
# Todo: add dynamic filling to group1, group2
# : add dynamic filling to group1, group2
group1 = CustomChoiceField(choices=[], label="Group1", initial=None)

group2 = CustomChoiceField(choices=[], label="Group2", initial=None)
Expand Down Expand Up @@ -402,7 +403,7 @@ def fill_form(self, run: Run) -> None:
self.toggle_visibility("gene_sets_enrichr", True)
self.fields["gene_sets_enrichr"].choices = fill_helper.to_choices(
gseapy.get_library_name()
) # TODO check whether we need to pass the organism name here
) # check whether we need to pass the organism name here
else:
self.toggle_visibility("gene_sets_path", True)

Expand Down Expand Up @@ -446,20 +447,20 @@ def fill_form(self, run: Run) -> None:


class EnrichmentAnalysisWithPrerankedGSEAForm(MethodForm):
# Todo: protein_df
# Todo: ranking_column
# : protein_df
# : ranking_column
ranking_direction = CustomChoiceField(
choices=RankingDirectionField,
label="Sort the ranking column (ascending - smaller values are better, "
"descending - larger values are better)",
initial=RankingDirectionField.ascending,
)
# Todo: gene_mapping
# : gene_mapping
gene_sets_field = CustomChoiceField(
choices=GeneSetsField,
label="How do you want to provide the gene sets? (reselect to show dynamic fields)",
initial=GeneSetsField.choose_from_enrichr_options
# Todo: Dynamic parameters
# : Dynamic parameters
)
gene_sets_path = CustomFileField(
label="Upload gene sets with uppercase gene symbols (any of the following file "
Expand All @@ -469,7 +470,7 @@ class EnrichmentAnalysisWithPrerankedGSEAForm(MethodForm):
"SetName2: [Gene2, Gene3, ...]})",
initial=None,
)
# Todo: gene_sets_enrichr
# : gene_sets_enrichr
min_size = CustomNumberField(
label="Minimum number of genes from gene set also in data", initial=15
)
Expand Down Expand Up @@ -513,7 +514,7 @@ class DatabaseIntegrationByGeneMappingForm(MethodForm):
)
dataframe = CustomChoiceField(
choices=[], label="Step to use"
) # TODO this looks and sounds very generic, be more specific, maybe it needs diffexp step
) # this looks and sounds very generic, be more specific, maybe it needs diffexp step

def fill_form(self, run: Run) -> None:
self.fields["database_names"].choices = fill_helper.to_choices(
Expand All @@ -525,8 +526,8 @@ def fill_form(self, run: Run) -> None:


class DatabaseIntegrationByUniprotForm(MethodForm):
# Todo: uniprot
# Todo: Add dynamic fill for database name and fields
# : uniprot
# : Add dynamic fill for database name and fields
database_name = CustomChoiceField(
choices=[],
label="Uniprot databases (offline)",
Expand All @@ -535,11 +536,12 @@ class DatabaseIntegrationByUniprotForm(MethodForm):


class PlotGOEnrichmentBarPlotForm(MethodForm):
# TODO: input:df fill dynamic with fill_forms
# : input:df fill dynamic with fill_forms
input_df_step_instance = CustomChoiceField(
choices=[], label="Choose dataframe to be plotted"
)
gene_sets = CustomMultipleChoiceField(choices=[], label="Sets to be plotted")
# TODO: after the color naming has been optimised in all filese, the underlying line can be updated: (color, color) for color in PLOT_COLOR_SEQUENCE
gene_sets = CustomCheckboxMultipleChoiceField(choices=[], colors=[(v, k[4:]) for k, v, in list(mcolors.TABLEAU_COLORS.items())], label="Sets to be plotted")
value = CustomChoiceField(
choices=GOEnrichmentBarPlotValue,
label="Value (bars will be plotted as -log10(value)), fdr only for GO analysis with STRING, p_value is adjusted if available",
Expand All @@ -561,15 +563,7 @@ class PlotGOEnrichmentBarPlotForm(MethodForm):
)
title = CustomCharField(label="Title of the plot (optional)", required=False)

colors = CustomMultipleChoiceField(
choices=[], label="Colors for the plot (optional)"
) # TODO this should not have to be set in fill_form

def fill_form(self, run: Run) -> None:
self.fields["colors"].choices = [
(v, k) for k, v, in mcolors.CSS4_COLORS.items()
]

self.fields["input_df_step_instance"].choices = fill_helper.get_choices(
run, "enrichment_df"
)
Expand Down
27 changes: 27 additions & 0 deletions ui/runs/templates/runs/field_component_color_selection.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<div class="checkbox-container border p-3 rounded">
{% for option_value, option_label in choices %}
<div class="d-flex" style="align-items: center">
<div class="checkbox-item px-2 w-50">
<input type="checkbox" id="{{ name }}_{{ option_label}}_checkbox" name="{{ name }}" value="{{ option_value }}" {% if option_value in values %}checked{% endif %}>
<label for="{{ name }}_{{ option_label}}_checkbox">{{ option_label }}</label>
</div>
<div class="ms-auto px-2 w-50">
<select class="form-select" name="{{ name }}">
{% for color, color_label in colors %}
{% with color_value="color_"|add:option_value|add:"_"|add:color %}
<option value={{ color_value }}
{% if values %}
{% if color_value in values %}selected{% endif %}
{% else %}
{% if forloop.parentloop.counter0 == forloop.counter0 %}selected{% endif %}
{% endif %}
>
{{ color_label }}
</option>
{% endwith %}
{% endfor %}
</select>
</div>
</div>
{% endfor %}
</div>
Loading