From e6c29e256817780e61e5cc8803812670039c4e6d Mon Sep 17 00:00:00 2001
From: JanniRoebbecke <50395935+JanniRoebbecke@users.noreply.github.com>
Date: Wed, 14 Aug 2024 20:57:05 +0200
Subject: [PATCH] Mann whitney test (#518)

* evidence import

* tests

* filter proteins

* filter sampels

* Select Peptides by group or sample for coverage visualisation (#410)

* untested working, needs cleanup

* linter

* remove dupliacte method (merge screw-up)

* add doc strings, fix first few tests (miss. args)

* metadata df now optional param + adjusted asserts, fixed test

* formatting

* conditional metadata input for protein graphs

* import logging levels, formatting

* add grouping tests for peptides, add error handling

* spelling, formatting

* remove unused import, format

* fix error msgs in tests

* fix line length

* Convert has_metadata to property

* Use logger instead of print statement

* An attempt at consistent text-casing

* formatting

---------

Co-authored-by: henninggaertner <Henning.Gaertner@student.hpi.de>

* tests for filter_proteins with peptide file

* tests for filter_proteins with peptide file, better mock dataframe

* output types

* added testing that only filtered Proteins are removed from peptides_df

* tests for filter_samples with peptide file

* extract test that peptide filtering matches protein filtering into Method

* refactor: shortened tests by integrating extracted method and mock data

* exclude peptides

* tests

* minor fixes in test

* minor fixes in test

* fix test that peptide filtering matches protein filtering Method

* fix test that peptide filtering matches protein filtering Method

* fix test that peptide filtering matches protein filtering Method

* fix test that peptide filtering matches protein filtering Method

* match mock protein dataframe to mock peptide dataframe

* extract test that peptide filtering matches protein filtering into Method

* implementation

* tests

* tests improve error logging

* tests git testing

* peptide import added missing column

* implements form, form_mapping, method and the actual functionality (untested)

* merge peptide filtering and outlier_detection

* merge peptide transformation

* implement passing of peptide data between preprocessing steps

* complete implement passing of peptide data between preprocessing steps

* implement overhead for new step evidence import

* enable selecting multiple proteins of interest and implements tests

* Fix EvidenceImport class (now the right method is called)

* implement option to choose protein from list of significant proteins

* implement option to let the system choose the most expressed protein

* include messages for peptide filtering

* remove selecting multiple proteins and add substring search

* add cleaning of protein groups to evidence import

* implement suggested changes: remove default values and correct grammar mistake

* make preprocessing work without peptide_df

* clean up

* add cleaning of protein groups to evidence import

* rename

* clean Protein IDs for peptide_import and name intensity column Intensity

* implement requested changes: make peptide_df input optional for all preprocessing steps, remove peptide_df input in preprocessing steps, where it is not used

* implement requested changes: make peptide_df input optional for all preprocessing steps, remove peptide_df input in preprocessing steps, where it is not used

* unify into one method

* cleanup old names

* cleanup names

* cleanup names

* setup form and method

* small refactor: utilize fill_helper better

* implemented method

* cleanup rename and docstring

* tests and named sample column from "0" to "Sample"

* implemented slow version

* enable selecting multiple proteins

* complete merge

* rename file and fix selecting multiple proteins

* improve efficiency of methods ptms_per_sample and ptms_per_protein_and_sample

* added test for ptms_per_protein_and_sample

* complete merge (fix testes after merge)

* cleanup

* implement differential expression with mann-whitney-test on ptm data

* implement differential expression with mann-whitney-test on protein data

* refactor input and output handling for Mann-Whitney-test on ptm data

* add additional parameter form user and docstrings

* fix typo

* add test for mann whitney on intensity data

* add test for mann whitney on ptm data

* add mann whitney to volcano plot form

* add missing value to output_keys of mann whitney test

* Implement requested changes

---------

Co-authored-by: janni.roebbecke <janni.roebbecke@student.hpi.uni-potsdam.de>
---
 .../differential_expression_mann_whitney.py   | 115 ++++++++++--
 protzilla/methods/data_analysis.py            |  26 +--
 protzilla/utilities/transform_dfs.py          |   2 +-
 .../test_differential_expression.py           | 165 ++++++++++++++++--
 ui/runs/forms/data_analysis.py                |  15 +-
 5 files changed, 277 insertions(+), 46 deletions(-)

diff --git a/protzilla/data_analysis/differential_expression_mann_whitney.py b/protzilla/data_analysis/differential_expression_mann_whitney.py
index 89041261..3ef4f663 100644
--- a/protzilla/data_analysis/differential_expression_mann_whitney.py
+++ b/protzilla/data_analysis/differential_expression_mann_whitney.py
@@ -16,8 +16,34 @@ def mann_whitney_test_on_intensity_data(
         group2: str,
         log_base: str = None,
         alpha=0.05,
-        multiple_testing_correction_method: str = "",
+        multiple_testing_correction_method: str = "Benjamini-Hochberg",
+        p_value_calculation_method: str = "auto"
 ) -> dict:
+    """
+    Perform Mann-Whitney U test on all proteins in the given intensity data frame.
+
+    :param intensity_df: A protein dataframe in typical PROTzilla long format.
+    :param metadata_df: The metadata data frame containing the grouping information.
+    :param grouping: The column name in the metadata data frame that contains the grouping information,
+        that should be used.
+    :param group1: The name of the first group for the Mann-Whitney U test.
+    :param group2: The name of the second group for the Mann-Whitney U test.
+    :param log_base: The base of the logarithm that was used to transform the data.
+    :param alpha: The significance level for the test.
+    :param multiple_testing_correction_method: The method for multiple testing correction.
+    :param p_value_calculation_method: The method for p-value calculation.
+
+    :return: a dict containing
+        - a df differentially_expressed_proteins_df in long format containing all test results
+        - a df significant_proteins_df, containing the proteins of differentially_expressed_column_df,
+            that are significant after multiple testing correction
+        - a df corrected_p_values, containing the p_values after application of multiple testing correction
+        - a df log2_fold_change, containing the log2 fold changes per protein
+        - a df u_statistic_df, containing the u-statistic per protein
+        - a float corrected_alpha, containing the alpha value after application of multiple testing correction
+            (depending on the selected multiple testing correction method corrected_alpha may be equal to alpha)
+        - a list messages (optional), containing messages for the user
+    """
     wide_df = long_to_wide(intensity_df)
 
     outputs = mann_whitney_test_on_columns(
@@ -30,6 +56,7 @@ def mann_whitney_test_on_intensity_data(
         alpha=alpha,
         multiple_testing_correction_method=multiple_testing_correction_method,
         columns_name="Protein ID",
+        p_value_calculation_method=p_value_calculation_method
     )
     differentially_expressed_proteins_df = pd.merge(intensity_df, outputs["differential_expressed_columns_df"], on="Protein ID", how="left")
     differentially_expressed_proteins_df = differentially_expressed_proteins_df.loc[
@@ -50,6 +77,65 @@ def mann_whitney_test_on_intensity_data(
         messages=outputs["messages"],
     )
 
+def mann_whitney_test_on_ptm_data(
+        ptm_df: pd.DataFrame,
+        metadata_df: pd.DataFrame,
+        grouping: str,
+        group1: str,
+        group2: str,
+        alpha=0.05,
+        multiple_testing_correction_method: str = "Benjamini-Hochberg",
+        p_value_calculation_method: str = "auto"
+) -> dict:
+    """
+    Perform Mann-Whitney U test on all PTMs in the given PTM data frame.
+
+    :param ptm_df: The data frame containing the PTM data in columns and a
+        "Sample" column that can be mapped to the metadata, to assign the groups.
+    :param metadata_df: The metadata data frame containing the grouping information.
+    :param grouping: The column name in the metadata data frame that contains the grouping information,
+        that should be used.
+    :param group1: The name of the first group for the Mann-Whitney U test.
+    :param group2: The name of the second group for the Mann-Whitney U test.
+    :param log_base: The base of the logarithm that was used to transform the data.
+    :param alpha: The significance level for the test.
+    :param multiple_testing_correction_method: The method for multiple testing correction.
+    :param p_value_calculation_method: The method for p-value calculation.
+
+    :return: a dict containing
+        - a df differentially_expressed_ptm_df in wide format containing all test results
+        - a df significant_ptm_df, containing the ptm of differentially_expressed_column_df,
+            that are significant after multiple testing correction
+        - a df corrected_p_values, containing the p_values after application of multiple testing correction,
+        - a df log2_fold_change, containing the log2 fold changes per column,
+        - a df t_statistic_df, containing the t-statistic per protein,
+        - a float corrected_alpha, containing the alpha value after application of multiple testing correction (depending on the selected multiple testing correction method corrected_alpha may be equal to alpha),
+        - a list messages, containing messages for the user
+    """
+    output = mann_whitney_test_on_columns(
+        df=ptm_df,
+        metadata_df=metadata_df,
+        grouping=grouping,
+        group1=group1,
+        group2=group2,
+        log_base=None,
+        alpha=alpha,
+        multiple_testing_correction_method=multiple_testing_correction_method,
+        columns_name="PTM",
+        p_value_calculation_method=p_value_calculation_method
+    )
+
+    return dict(
+        differentially_expressed_ptm_df=output["differential_expressed_columns_df"],
+        significant_ptm_df=output["significant_columns_df"],
+        corrected_p_values_df=output["corrected_p_values_df"],
+        u_statistic_df=output["u_statistic_df"],
+        log2_fold_change_df=output["log2_fold_change_df"],
+        corrected_alpha=output["corrected_alpha"],
+        messages=output["messages"],
+    )
+
+
 def mann_whitney_test_on_columns(
         df: pd.DataFrame,
         metadata_df: pd.DataFrame,
@@ -58,25 +144,28 @@ def mann_whitney_test_on_columns(
         group2: str,
         log_base: str = None,
         alpha=0.05,
-        multiple_testing_correction_method: str = "",
+        multiple_testing_correction_method: str = "Benjamini-Hochberg",
         columns_name: str = "Protein ID",
+        p_value_calculation_method: str = "auto"
 ) -> dict:
     """
     Perform Mann-Whitney U test on all columns of the data frame.
 
-    @param df: The data frame containing the data in columns and a
+    :param df: The data frame containing the data in columns and a
     "Sample" column that can be mapped to the metadata, to assign the groups.
-    @param metadata_df: The metadata data frame containing the grouping information.
-    @param grouping: The column name in the metadata data frame that contains the grouping information,
+    :param metadata_df: The metadata data frame containing the grouping information.
+    :param grouping: The column name in the metadata data frame that contains the grouping information,
     that should be used.
-    @param group1: The name of the first group for the Mann-Whitney U test.
-    @param group2: The name of the second group for the Mann-Whitney U test.
-    @param log_base: The base of the logarithm that was used to transform the data.
-    @param alpha: The significance level for the test.
-    @param multiple_testing_correction_method: The method for multiple testing correction.
+    :param group1: The name of the first group for the Mann-Whitney U test.
+    :param group2: The name of the second group for the Mann-Whitney U test.
+    :param log_base: The base of the logarithm that was used to transform the data.
+    :param alpha: The significance level for the test.
+    :param multiple_testing_correction_method: The method for multiple testing correction.
+    :param columns_name: The semantics of the column names. This is used to name the columns in the output data frames.
+    :param p_value_calculation_method: The method for p-value calculation.
 
     :return: a dict containing
-        - a df differentially_expressed_column_df in wide format containing the t-test results
+        - a df differentially_expressed_column_df in wide format containing the test results
         - a df significant_columns_df, containing the columns of differentially_expressed_column_df,
             that are significant after multiple testing correction
         - a df corrected_p_values, containing the p_values after application of multiple testing correction,
@@ -104,7 +193,7 @@ def mann_whitney_test_on_columns(
     for column in data_columns:
         group1_data = df_with_groups[df_with_groups[grouping] == group1][column]
         group2_data = df_with_groups[df_with_groups[grouping] == group2][column]
-        u_statistic, p_value = stats.mannwhitneyu(group1_data, group2_data, alternative="two-sided")
+        u_statistic, p_value = stats.mannwhitneyu(group1_data, group2_data, alternative="two-sided", method=p_value_calculation_method)
 
         if not np.isnan(p_value):
             log2_fold_change = (
@@ -139,7 +228,7 @@ def mann_whitney_test_on_columns(
     )
     u_statistic_df = pd.DataFrame(
         list(zip(valid_columns, u_statistics)),
-        columns=[columns_name, "t_statistic"],
+        columns=[columns_name, "u_statistic"],
     )
 
     combined_df = pd.DataFrame(
diff --git a/protzilla/methods/data_analysis.py b/protzilla/methods/data_analysis.py
index 001a2fca..7ad45ddd 100644
--- a/protzilla/methods/data_analysis.py
+++ b/protzilla/methods/data_analysis.py
@@ -9,9 +9,7 @@
 from protzilla.data_analysis.differential_expression_anova import anova
 from protzilla.data_analysis.differential_expression_linear_model import linear_model
 from protzilla.data_analysis.differential_expression_mann_whitney import (
-    mann_whitney_test_on_columns,
-    mann_whitney_test_on_intensity_data,
-)
+    mann_whitney_test_on_intensity_data, mann_whitney_test_on_ptm_data)
 from protzilla.data_analysis.differential_expression_t_test import t_test
 from protzilla.data_analysis.dimension_reduction import t_sne, umap
 from protzilla.data_analysis.model_evaluation import evaluate_classification_model
@@ -175,11 +173,13 @@ class DifferentialExpressionMannWhitneyOnIntensity(DataAnalysisStep):
         "group2",
         "alpha",
         "multiple_testing_correction_method",
+        "p_value_calculation_method",
     ]
     output_keys = [
         "differentially_expressed_proteins_df",
         "significant_proteins_df",
         "corrected_p_values_df",
+        "u_statistic_df",
         "log2_fold_change_df",
         "corrected_alpha",
     ]
@@ -209,40 +209,32 @@ class DifferentialExpressionMannWhitneyOnPTM(DataAnalysisStep):
     )
 
     input_keys = [
-        "df",
+        "ptm_df",
         "metadata_df",
         "grouping",
         "group1",
         "group2",
         "alpha",
         "multiple_testing_correction_method",
-        "columns_name",
+        "p_value_calculation_method",
     ]
     output_keys = [
         "differentially_expressed_ptm_df",
         "significant_ptm_df",
         "corrected_p_values_df",
+        "u_statistic_df",
         "log2_fold_change_df",
         "corrected_alpha",
     ]
 
     def method(self, inputs: dict) -> dict:
-        return mann_whitney_test_on_columns(**inputs)
+        return mann_whitney_test_on_ptm_data(**inputs)
 
     def insert_dataframes(self, steps: StepManager, inputs) -> dict:
-        inputs["df"] = steps.get_step_output(Step, "ptm_df", inputs["ptm_df"])
-        inputs["columns_name"] = "PTM"
+        inputs["ptm_df"] = steps.get_step_output(Step, "ptm_df", inputs["ptm_df"])
         inputs["metadata_df"] = steps.metadata_df
-        inputs["log_base"] = steps.get_step_input(TransformationLog, "log_base")
         return inputs
-
-    def handle_outputs(self, outputs: dict) -> None:
-        outputs["differentially_expressed_ptm_df"] = outputs.pop(
-            "differential_expressed_columns_df", None
-        )
-        outputs["significant_ptm_df"] = outputs.pop("significant_columns_df", None)
-        super().handle_outputs(outputs)
-
+      
 
 class PlotVolcano(PlotStep):
     display_name = "Volcano Plot"
diff --git a/protzilla/utilities/transform_dfs.py b/protzilla/utilities/transform_dfs.py
index f3605b08..59a83259 100644
--- a/protzilla/utilities/transform_dfs.py
+++ b/protzilla/utilities/transform_dfs.py
@@ -3,7 +3,7 @@
 from protzilla.utilities import default_intensity_column
 
 
-def long_to_wide(intensity_df: pd.DataFrame, value_name: str = None):
+def long_to_wide(intensity_df: pd.DataFrame, value_name: str | None = None):
     """
     This function transforms the dataframe to a wide format that
     can be more easily handled by packages such as sklearn.
diff --git a/tests/protzilla/data_analysis/test_differential_expression.py b/tests/protzilla/data_analysis/test_differential_expression.py
index 43ad6021..15a000e4 100644
--- a/tests/protzilla/data_analysis/test_differential_expression.py
+++ b/tests/protzilla/data_analysis/test_differential_expression.py
@@ -3,6 +3,8 @@
 import pytest
 
 from protzilla.data_analysis.differential_expression import anova, linear_model, t_test
+from protzilla.data_analysis.differential_expression_mann_whitney import mann_whitney_test_on_intensity_data, \
+    mann_whitney_test_on_ptm_data
 from protzilla.data_analysis.plots import create_volcano_plot
 
 
@@ -62,8 +64,8 @@ def diff_expr_test_data():
 
 
 def test_differential_expression_linear_model(
-    diff_expr_test_data,
-    show_figures,
+        diff_expr_test_data,
+        show_figures,
 ):
     test_intensity_df, test_metadata_df = diff_expr_test_data
     test_alpha = 0.05
@@ -107,8 +109,8 @@ def test_differential_expression_linear_model(
     assert p_values_rounded == corrected_p_values
     assert log2fc_rounded == log2_fc
     assert (
-        list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
-        == differentially_expressed_proteins
+            list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
+            == differentially_expressed_proteins
     )
     assert current_out["corrected_alpha"] == test_alpha
 
@@ -160,13 +162,13 @@ def test_differential_expression_student_t_test(diff_expr_test_data, show_figure
 
     assert p_values_rounded == corrected_p_values
     assert (
-        list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
-        == differentially_expressed_proteins
+            list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
+            == differentially_expressed_proteins
     )
     assert current_out["corrected_alpha"] == test_alpha
     assert (
-        list(current_out["significant_proteins_df"]["Protein ID"].unique())
-        == significant_proteins
+            list(current_out["significant_proteins_df"]["Protein ID"].unique())
+            == significant_proteins
     )
 
 
@@ -217,13 +219,13 @@ def test_differential_expression_welch_t_test(diff_expr_test_data, show_figures)
 
     assert p_values_rounded == corrected_p_values
     assert (
-        list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
-        == differentially_expressed_proteins
+            list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
+            == differentially_expressed_proteins
     )
     assert current_out["corrected_alpha"] == test_alpha
     assert (
-        list(current_out["significant_proteins_df"]["Protein ID"].unique())
-        == significant_proteins
+            list(current_out["significant_proteins_df"]["Protein ID"].unique())
+            == significant_proteins
     )
 
 
@@ -386,4 +388,141 @@ def test_differential_expression_anova(show_figures):
         1.0000,
     ]
 
-    assert assertion_p_values == p_values_rounded
\ No newline at end of file
+    assert assertion_p_values == p_values_rounded
+
+
+def test_differential_expression_mann_whitney_on_intensity(
+        diff_expr_test_data,
+        show_figures,
+):
+    test_intensity_df, test_metadata_df = diff_expr_test_data
+    test_alpha = 0.05
+
+    current_input = dict(
+        intensity_df=test_intensity_df,
+        metadata_df=test_metadata_df,
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=test_alpha,
+        log_base="log2",
+        p_value_calculation_method="auto",
+    )
+    current_out = mann_whitney_test_on_intensity_data(**current_input)
+
+    fig = create_volcano_plot(
+        p_values=current_out["corrected_p_values_df"],
+        log2_fc=current_out["log2_fold_change_df"],
+        alpha=current_out["corrected_alpha"],
+        group1=current_input["group1"],
+        group2=current_input["group2"],
+        fc_threshold=0,
+    )
+
+    if show_figures:
+        fig.show()
+
+    expected_corrected_p_values = [0.2, 0.4916, 1.0, 0.2]
+    expected_u_statistics = [9.0, 7.0, 4.5, 9.0]
+    expected_log2_fc = [-10.1926, -1.0, 0.0, -5.0]
+    expected_differentially_expressed_proteins = ["Protein1", "Protein2", "Protein3", "Protein4"]
+
+    p_values_rounded = [
+        round(x, 4) for x in current_out["corrected_p_values_df"]["corrected_p_value"]
+    ]
+    u_statistics = current_out["u_statistic_df"]["u_statistic"]
+    log2fc_rounded = [
+        round(x, 4) for x in current_out["log2_fold_change_df"]["log2_fold_change"]
+    ]
+
+    assert p_values_rounded == expected_corrected_p_values
+    assert all(u_statistics == expected_u_statistics)
+    assert log2fc_rounded == expected_log2_fc
+    assert (
+            list(current_out["differentially_expressed_proteins_df"]["Protein ID"].unique())
+            == expected_differentially_expressed_proteins
+    )
+    assert current_out["corrected_alpha"] == test_alpha
+
+
+@pytest.fixture
+def ptm_test_data():
+    test_amount_list = (
+        ["Sample1",  1,  1, 10,   1, 100],
+        ["Sample2",  2,  2, 10,   1, 100],
+        ["Sample3",  3,  3, 10,   1, 100],
+        ["Sample4",  4,  4, 10,   1, 100],
+        ["Sample5",  5,  5, 10,   1, 100],
+        ["Sample6",  6,  3, 11, 111, 100],
+        ["Sample7",  7,  4, 12, 222, 100],
+        ["Sample8",  8,  5, 13, 333, 100],
+        ["Sample9",  9,  6, 14, 444, 100],
+        ["Sample10", 10, 7, 15, 555, 100],
+    )
+    test_amount_df = pd.DataFrame(
+        data=test_amount_list,
+        columns=["Sample", "Oxidation", "Acetyl", "GlyGly", "Phospho", "Unmodified"],
+    )
+
+    test_metadata_list = (
+        ["Sample1", "Group1"],
+        ["Sample2", "Group1"],
+        ["Sample3", "Group1"],
+        ["Sample4", "Group1"],
+        ["Sample5", "Group1"],
+        ["Sample6", "Group2"],
+        ["Sample7", "Group2"],
+        ["Sample8", "Group2"],
+        ["Sample9", "Group2"],
+        ["Sample10", "Group2"],
+    )
+    test_metadata_df = pd.DataFrame(
+        data=test_metadata_list,
+        columns=["Sample", "Group"],
+    )
+
+    return test_amount_df, test_metadata_df
+
+
+def test_differential_expression_mann_whitney_on_ptm(
+        ptm_test_data,
+        show_figures,
+):
+    test_amount_df, test_metadata_df = ptm_test_data
+    test_alpha = 0.05
+
+    current_input = dict(
+        ptm_df=test_amount_df,
+        metadata_df=test_metadata_df,
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=test_alpha,
+        p_value_calculation_method="auto",
+    )
+    current_out = mann_whitney_test_on_ptm_data(**current_input)
+
+    expected_corrected_p_values = [0.0132, 0.1423, 0.0132, 0.0132, 1.00000]
+    expected_u_statistics = [0.0, 4.5, 0.0, 0.0, 12.5]
+    expected_log2_fc = [1.415, 0.737, 0.3785, 8.3794, 0.0]
+
+    expected_significant_ptms = ["Oxidation", "GlyGly", "Phospho"]
+
+    p_values_rounded = [
+        round(x, 4) for x in current_out["corrected_p_values_df"]["corrected_p_value"]
+    ]
+    u_statistics = current_out["u_statistic_df"]["u_statistic"]
+    log2_fc_rounded = [
+        round(x, 4) for x in current_out["log2_fold_change_df"]["log2_fold_change"]
+    ]
+
+    assert p_values_rounded == expected_corrected_p_values
+    assert all(u_statistics == expected_u_statistics)
+    assert log2_fc_rounded == expected_log2_fc
+    assert (
+            list(current_out["significant_ptm_df"]["PTM"].unique())
+            == expected_significant_ptms
+    )
+    assert current_out["corrected_alpha"] == test_alpha
\ No newline at end of file
diff --git a/ui/runs/forms/data_analysis.py b/ui/runs/forms/data_analysis.py
index 1eb6441b..2363832d 100644
--- a/ui/runs/forms/data_analysis.py
+++ b/ui/runs/forms/data_analysis.py
@@ -38,6 +38,12 @@ class MultipleTestingCorrectionMethod(Enum):
     bonferroni = "Bonferroni"
 
 
+class PValueCalculationMethod(Enum):
+    auto = "Auto"
+    exact = "Exact"
+    asymptotic = "Asymptotic"
+
+
 class YesNo(Enum):
     yes = "Yes"
     no = "No"
@@ -341,12 +347,17 @@ class DifferentialExpressionMannWhitneyOnPTMForm(MethodForm):
     alpha = CustomFloatField(
         label="Error rate (alpha)", min_value=0, max_value=1, initial=0.05
     )
+    p_value_calculation_method = CustomChoiceField(
+        choices=PValueCalculationMethod,
+        label="P-value calculation method",
+        initial=PValueCalculationMethod.auto,
+    )
     grouping = CustomChoiceField(choices=[], label="Grouping from metadata")
     group1 = CustomChoiceField(choices=[], label="Group 1")
     group2 = CustomChoiceField(choices=[], label="Group 2")
 
     def fill_form(self, run: Run) -> None:
-        self.fields["df"].choices = fill_helper.to_choices(
+        self.fields["ptm_df"].choices = fill_helper.to_choices(
             run.steps.get_instance_identifiers(PTMsPerSample, "ptm_df")
         )
 
@@ -395,7 +406,7 @@ class PlotVolcanoForm(MethodForm):
     def fill_form(self, run: Run) -> None:
         self.fields["input_dict"].choices = fill_helper.to_choices(
             run.steps.get_instance_identifiers(
-                DifferentialExpressionTTest | DifferentialExpressionLinearModel,
+                DifferentialExpressionTTest | DifferentialExpressionLinearModel | DifferentialExpressionMannWhitneyOnIntensityForm,
                 "differentially_expressed_proteins_df",
             )
         )