From 996a0ec3a147cee9ed69b61e4534988157b0cf70 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 10 Dec 2024 12:18:38 +0100 Subject: [PATCH] Improve the docs for specified terms MT filter. --- docs/tutorial.rst | 2 +- docs/user-guide/analyses/mtc.rst | 44 ++++++++++++------- docs/user-guide/analyses/phenotype-groups.rst | 31 ++++++++++--- src/gpsea/analysis/mtc_filter/_impl.py | 6 +-- 4 files changed, 58 insertions(+), 25 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index b1a1b483..ae774626 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -309,7 +309,7 @@ For general use, we recommend using a combination of a *phenotype MT filter* (:class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`) with a *multiple testing correction*. Phenotype MT filter chooses the HPO terms to test according to several heuristics, which reduce the multiple testing burden and focus the analysis -on the most interesting terms (see :ref:`HPO MT filter ` for more info). +on the most interesting terms (see :ref:`HPO MT filter ` for more info). Then the multiple testing correction, such as Bonferroni or Benjamini-Hochberg, is used to control the family-wise error rate or the false discovery rate. See :ref:`mtc` for more information. diff --git a/docs/user-guide/analyses/mtc.rst b/docs/user-guide/analyses/mtc.rst index bc232e3e..893b634c 100644 --- a/docs/user-guide/analyses/mtc.rst +++ b/docs/user-guide/analyses/mtc.rst @@ -1,8 +1,8 @@ .. _mtc: -=========================== +########################### Multiple-testing correction -=========================== +########################### ********** Background @@ -38,6 +38,7 @@ it is likely that we will obtain one or more false-positive results. GPSEA offers two approaches to mitigate this problem: multiple-testing correction (MTC) procedures and MT filters to choose the terms to be tested. + .. _mtc-correction-procedures: Multiple-testing correction procedures @@ -118,27 +119,38 @@ may "survive" the multiple-testing correction. In the context of GPSEA, we represent the concept of phenotype filtering by :class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`. -We provide three filtering strategies. +We provide three filtering strategies, each of which is a subclass +of :class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter` +and can, therefore, be used +as a component of :class:`~gpsea.analysis.pcats.HpoTermAnalysis`, +as shown in :ref:`custom-hpo-analysis`. + +There are three phenotype MT filters: +* Use all terms +* Specified terms +* HPO MT filter -.. _use-all-terms-strategy: -Test all terms --------------- +.. _use-all-terms-mt-filter: -The first MT filtering strategy is the simplest - do not apply any filtering at all. -This will result in testing all terms and we do not recommend this strategy, -but it can be used to disable MT filtering. +Use all terms +------------- + +The first MT filtering strategy is the simplest - it does not apply any filtering, +resulting in testing all terms. +We do not recommend this strategy, but it can be used to disable MT filtering. The strategy is implemented in :class:`~gpsea.analysis.mtc_filter.UseAllTermsMtcFilter`. >>> from gpsea.analysis.mtc_filter import UseAllTermsMtcFilter >>> use_all = UseAllTermsMtcFilter() -.. _specify-terms-strategy: -Specify terms strategy ----------------------- +.. _specified-terms-mt-filter: + +Specified terms +--------------- In presence of a specific hypothesis as to which terms may be different between groups, then you can specify these terms in :class:`~gpsea.analysis.mtc_filter.SpecifiedTermsMtcFilter`. @@ -159,12 +171,12 @@ we pass an iterable (e.g. a tuple) with these two terms as an argument: 2 -.. _hpo-mtc-filter-strategy: +.. _hpo-mt-filter: -HPO MT filter strategy ------------------------ +HPO MT filter +------------- -The HPO MT strategy involves making several domain judgments and takes advantage of the HPO structure. +The HPO MT filter involves making several domain judgments and takes advantage of the HPO structure. The strategy needs access to HPO: >>> import hpotk diff --git a/docs/user-guide/analyses/phenotype-groups.rst b/docs/user-guide/analyses/phenotype-groups.rst index 9b5a5999..92b87bab 100644 --- a/docs/user-guide/analyses/phenotype-groups.rst +++ b/docs/user-guide/analyses/phenotype-groups.rst @@ -179,6 +179,9 @@ The function finds 369 HPO terms that annotate at least one individual, including the *indirect* annotations whose presence is implied by the :ref:`true-path-rule`. +.. _phenotype-groups-statistical-analysis: + + Statistical analysis -------------------- @@ -201,6 +204,7 @@ The available MTC procedures are listed in the :ref:`mtc-correction-procedures` We must pick one of these to perform genotype-phenotype analysis. +.. _default-hpo-analysis: Default analysis ^^^^^^^^^^^^^^^^ @@ -212,11 +216,18 @@ The default analysis can be configured with :func:`~gpsea.analysis.pcats.configu >>> from gpsea.analysis.pcats import configure_hpo_term_analysis >>> analysis = configure_hpo_term_analysis(hpo) +At this point, the ``analysis`` configured to test +a cohort for G/P associations. + + +.. _custom-hpo-analysis: Custom analysis ^^^^^^^^^^^^^^^ -If the defaults do not work, we can configure the analysis manually. +If the default selection of phenotype MT filter and multiple testing correction is not an option, +we can configure the analysis manually. + First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`): >>> from gpsea.analysis.mtc_filter import HpoMtcFilter @@ -224,7 +235,7 @@ First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter. .. note:: - See the :ref:`mtc-filters` section for more info on the available MT filters. + See the :ref:`mtc-filters` section for info regarding other phenotype MT filters. then a statistical test (e.g. Fisher Exact test): @@ -242,6 +253,10 @@ and we finalize the setup by choosing a MTC procedure >>> mtc_correction = 'fdr_bh' >>> mtc_alpha = 0.05 +.. note:: + + See the :ref:`mtc-correction-procedures` section for a list of available MTC procedure codes. + The final :class:`~gpsea.analysis.pcats.HpoTermAnalysis` is created as: >>> from gpsea.analysis.pcats import HpoTermAnalysis @@ -252,6 +267,8 @@ The final :class:`~gpsea.analysis.pcats.HpoTermAnalysis` is created as: ... mtc_alpha=0.05, ... ) +The ``analysis`` is identical to the one configured in the :ref:`default-hpo-analysis` section. + Analysis ======== @@ -269,8 +286,10 @@ We can now test associations between the genotype groups and the HPO terms: 24 +We tested the ``cohort`` for association between the genotype groups (``gt_predicate``) +and HPO terms (``pheno_predicates``). Thanks to phenotype MT filter, we only tested 24 out of 369 terms. -We can learn more by showing the MT filter report: +The MT filter report shows the filtering details: >>> from gpsea.view import MtcStatsViewer >>> mtc_viewer = MtcStatsViewer() @@ -289,8 +308,10 @@ We can learn more by showing the MT filter report: Genotype phenotype associations =============================== -Last, let's explore the associations. The results include a table with all tested HPO terms -ordered by the corrected p value (Benjamini-Hochberg FDR): +Last, let's explore the associations. + +GPSEA displays the associations between genotypes and HPO terms in a table, +one HPO term per row. The rows are ordered by the corrected p value and nominal p value in descending order. >>> from gpsea.view import summarize_hpo_analysis >>> summary_df = summarize_hpo_analysis(hpo, result) diff --git a/src/gpsea/analysis/mtc_filter/_impl.py b/src/gpsea/analysis/mtc_filter/_impl.py index c43a89ad..2166bd73 100644 --- a/src/gpsea/analysis/mtc_filter/_impl.py +++ b/src/gpsea/analysis/mtc_filter/_impl.py @@ -157,7 +157,7 @@ class UseAllTermsMtcFilter(PhenotypeMtcFilter[typing.Any]): """ `UseAllTermsMtcFilter` filters out *no* phenotype terms. - See :ref:`use-all-terms-strategy` section for more info. + See :ref:`use-all-terms-mt-filter` section for more info. """ def filter( @@ -186,7 +186,7 @@ class SpecifiedTermsMtcFilter(PhenotypeMtcFilter[hpotk.TermId]): terms to the constructor of this class, thereby preventing other terms from being tested and reducing the multiple testing burden. - See :ref:`specify-terms-strategy` section for more info. + See :ref:`specified-terms-mt-filter` section for more info. """ NON_SPECIFIED_TERM = PhenotypeMtcResult.fail(code="ST1", reason="Non-specified term") @@ -247,7 +247,7 @@ class HpoMtcFilter(PhenotypeMtcFilter[hpotk.TermId]): `HpoMtcFilter` decides which phenotypes should be tested and which phenotypes are not worth testing. The class leverages a number of heuristics and domain decisions. - See :ref:`hpo-mtc-filter-strategy` section for more info. + See :ref:`hpo-mt-filter` section for more info. We recommend creating an instance using the :func:`default_filter` static factory method. """