From 996a0ec3a147cee9ed69b61e4534988157b0cf70 Mon Sep 17 00:00:00 2001
From: Daniel Danis <daniel.gordon.danis@protonmail.com>
Date: Tue, 10 Dec 2024 12:18:38 +0100
Subject: [PATCH] Improve the docs for specified terms MT filter.

---
 docs/tutorial.rst                             |  2 +-
 docs/user-guide/analyses/mtc.rst              | 44 ++++++++++++-------
 docs/user-guide/analyses/phenotype-groups.rst | 31 ++++++++++---
 src/gpsea/analysis/mtc_filter/_impl.py        |  6 +--
 4 files changed, 58 insertions(+), 25 deletions(-)
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index b1a1b483..ae774626 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -309,7 +309,7 @@ For general use, we recommend using a combination
 of a *phenotype MT filter* (:class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`) with a *multiple testing correction*.
 Phenotype MT filter chooses the HPO terms to test according to several heuristics, which
 reduce the multiple testing burden and focus the analysis
-on the most interesting terms (see :ref:`HPO MT filter <hpo-mtc-filter-strategy>` for more info).
+on the most interesting terms (see :ref:`HPO MT filter <hpo-mt-filter>` for more info).
 Then the multiple testing correction, such as Bonferroni or Benjamini-Hochberg,
 is used to control the family-wise error rate or the false discovery rate.
 See :ref:`mtc` for more information.
diff --git a/docs/user-guide/analyses/mtc.rst b/docs/user-guide/analyses/mtc.rst
index bc232e3e..893b634c 100644
--- a/docs/user-guide/analyses/mtc.rst
+++ b/docs/user-guide/analyses/mtc.rst
@@ -1,8 +1,8 @@
 .. _mtc:
 
-===========================
+###########################
 Multiple-testing correction
-===========================
+###########################
 
 **********
 Background
@@ -38,6 +38,7 @@ it is likely that we will obtain one or more false-positive results.
 GPSEA offers two approaches to mitigate this problem: multiple-testing correction (MTC) procedures
 and MT filters to choose the terms to be tested.
 
+
 .. _mtc-correction-procedures:
 
 Multiple-testing correction procedures
@@ -118,27 +119,38 @@ may "survive" the multiple-testing correction.
 
 In the context of GPSEA, we represent the concept of phenotype filtering 
 by :class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`.
-We provide three filtering strategies.
+We provide three filtering strategies, each of which is a subclass
+of :class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`
+and can, therefore, be used
+as a component of :class:`~gpsea.analysis.pcats.HpoTermAnalysis`,
+as shown in :ref:`custom-hpo-analysis`.
+
+There are three phenotype MT filters:
 
+* Use all terms
+* Specified terms
+* HPO MT filter
 
-.. _use-all-terms-strategy:
 
-Test all terms
---------------
+.. _use-all-terms-mt-filter:
 
-The first MT filtering strategy is the simplest - do not apply any filtering at all.
-This will result in testing all terms and we do not recommend this strategy, 
-but it can be used to disable MT filtering.
+Use all terms
+-------------
+
+The first MT filtering strategy is the simplest - it does not apply any filtering,
+resulting in testing all terms.
+We do not recommend this strategy, but it can be used to disable MT filtering.
 
 The strategy is implemented in :class:`~gpsea.analysis.mtc_filter.UseAllTermsMtcFilter`.
 
 >>> from gpsea.analysis.mtc_filter import UseAllTermsMtcFilter
 >>> use_all = UseAllTermsMtcFilter()
 
-.. _specify-terms-strategy:
 
-Specify terms strategy
-----------------------
+.. _specified-terms-mt-filter:
+
+Specified terms
+---------------
 
 In presence of a specific hypothesis as to which terms may be different between groups, 
 then you can specify these terms in :class:`~gpsea.analysis.mtc_filter.SpecifiedTermsMtcFilter`.
@@ -159,12 +171,12 @@ we pass an iterable (e.g. a tuple) with these two terms as an argument:
 2
 
 
-.. _hpo-mtc-filter-strategy:
+.. _hpo-mt-filter:
 
-HPO MT filter strategy
------------------------
+HPO MT filter
+-------------
 
-The HPO MT strategy involves making several domain judgments and takes advantage of the HPO structure.
+The HPO MT filter involves making several domain judgments and takes advantage of the HPO structure.
 The strategy needs access to HPO:
 
 >>> import hpotk
diff --git a/docs/user-guide/analyses/phenotype-groups.rst b/docs/user-guide/analyses/phenotype-groups.rst
index 9b5a5999..92b87bab 100644
--- a/docs/user-guide/analyses/phenotype-groups.rst
+++ b/docs/user-guide/analyses/phenotype-groups.rst
@@ -179,6 +179,9 @@ The function finds 369 HPO terms that annotate at least one individual,
 including the *indirect* annotations whose presence is implied by the :ref:`true-path-rule`.
 
 
+.. _phenotype-groups-statistical-analysis:
+
+
 Statistical analysis
 --------------------
 
@@ -201,6 +204,7 @@ The available MTC procedures are listed in the :ref:`mtc-correction-procedures`
 
 We must pick one of these to perform genotype-phenotype analysis.
 
+.. _default-hpo-analysis:
 
 Default analysis
 ^^^^^^^^^^^^^^^^
@@ -212,11 +216,18 @@ The default analysis can be configured with :func:`~gpsea.analysis.pcats.configu
 >>> from gpsea.analysis.pcats import configure_hpo_term_analysis
 >>> analysis = configure_hpo_term_analysis(hpo)
 
+At this point, the ``analysis`` configured to test
+a cohort for G/P associations.
+
+
+.. _custom-hpo-analysis:
 
 Custom analysis
 ^^^^^^^^^^^^^^^
 
-If the defaults do not work, we can configure the analysis manually.
+If the default selection of phenotype MT filter and multiple testing correction is not an option,
+we can configure the analysis manually.
+
 First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`):
 
 >>> from gpsea.analysis.mtc_filter import HpoMtcFilter
@@ -224,7 +235,7 @@ First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter.
 
 .. note::
 
-   See the :ref:`mtc-filters` section for more info on the available MT filters.
+   See the :ref:`mtc-filters` section for info regarding other phenotype MT filters.
 
 then a statistical test (e.g. Fisher Exact test):
 
@@ -242,6 +253,10 @@ and we finalize the setup by choosing a MTC procedure
 >>> mtc_correction = 'fdr_bh'
 >>> mtc_alpha = 0.05
 
+.. note::
+
+   See the :ref:`mtc-correction-procedures` section for a list of available MTC procedure codes.
+
 The final :class:`~gpsea.analysis.pcats.HpoTermAnalysis` is created as:
 
 >>> from gpsea.analysis.pcats import HpoTermAnalysis
@@ -252,6 +267,8 @@ The final :class:`~gpsea.analysis.pcats.HpoTermAnalysis` is created as:
 ...     mtc_alpha=0.05,
 ... )
 
+The ``analysis`` is identical to the one configured in the :ref:`default-hpo-analysis` section.
+
 
 Analysis
 ========
@@ -269,8 +286,10 @@ We can now test associations between the genotype groups and the HPO terms:
 24
 
 
+We tested the ``cohort`` for association between the genotype groups (``gt_predicate``)
+and HPO terms (``pheno_predicates``).
 Thanks to phenotype MT filter, we only tested 24 out of 369 terms.
-We can learn more by showing the MT filter report:
+The MT filter report shows the filtering details:
 
 >>> from gpsea.view import MtcStatsViewer
 >>> mtc_viewer = MtcStatsViewer()
@@ -289,8 +308,10 @@ We can learn more by showing the MT filter report:
 Genotype phenotype associations
 ===============================
 
-Last, let's explore the associations. The results include a table with all tested HPO terms
-ordered by the corrected p value (Benjamini-Hochberg FDR):
+Last, let's explore the associations. 
+
+GPSEA displays the associations between genotypes and HPO terms in a table,
+one HPO term per row. The rows are ordered by the corrected p value and nominal p value in descending order.
 
 >>> from gpsea.view import summarize_hpo_analysis
 >>> summary_df = summarize_hpo_analysis(hpo, result)
diff --git a/src/gpsea/analysis/mtc_filter/_impl.py b/src/gpsea/analysis/mtc_filter/_impl.py
index c43a89ad..2166bd73 100644
--- a/src/gpsea/analysis/mtc_filter/_impl.py
+++ b/src/gpsea/analysis/mtc_filter/_impl.py
@@ -157,7 +157,7 @@ class UseAllTermsMtcFilter(PhenotypeMtcFilter[typing.Any]):
     """
     `UseAllTermsMtcFilter` filters out *no* phenotype terms.
 
-    See :ref:`use-all-terms-strategy` section for more info.
+    See :ref:`use-all-terms-mt-filter` section for more info.
     """
 
     def filter(
@@ -186,7 +186,7 @@ class SpecifiedTermsMtcFilter(PhenotypeMtcFilter[hpotk.TermId]):
     terms to the constructor of this class, thereby preventing other terms from
     being tested and reducing the multiple testing burden.
 
-    See :ref:`specify-terms-strategy` section for more info.
+    See :ref:`specified-terms-mt-filter` section for more info.
     """
 
     NON_SPECIFIED_TERM = PhenotypeMtcResult.fail(code="ST1", reason="Non-specified term")
@@ -247,7 +247,7 @@ class HpoMtcFilter(PhenotypeMtcFilter[hpotk.TermId]):
     `HpoMtcFilter` decides which phenotypes should be tested and which phenotypes are not worth testing.
 
     The class leverages a number of heuristics and domain decisions.
-    See :ref:`hpo-mtc-filter-strategy` section for more info.
+    See :ref:`hpo-mt-filter` section for more info.
 
     We recommend creating an instance using the :func:`default_filter` static factory method.
     """