From 0d7bd0e3f7a413b24dcb0edcecfb8bd059d1b47d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Boris=20Cl=C3=A9net?=
 <117362283+bclenet@users.noreply.github.com>
Date: Wed, 10 Jan 2024 16:55:38 +0100
Subject: [PATCH] New command line tools (#138)

* [BUG] inside unit_tests workflow

* [DOC] runner help

* Creating entry-points for the project

* [DOC] command line tools

* [DOC] command line tools

* Adding a tester command line tool
---
 INSTALL.md                              | 32 +++++++--
 docs/data.md                            | 27 +++----
 docs/description.md                     | 11 +--
 docs/running.md                         | 18 +++--
 docs/status.md                          |  9 ++-
 docs/testing.md                         | 24 ++++++-
 narps_open/data/description/__main__.py | 96 +++++++++++++------------
 narps_open/data/results/__main__.py     | 55 +++++++-------
 narps_open/runner.py                    |  7 +-
 narps_open/tester.py                    | 29 ++++++++
 narps_open/utils/status.py              | 11 ++-
 setup.py                                | 11 ++-
 12 files changed, 224 insertions(+), 106 deletions(-)
 create mode 100644 narps_open/tester.py

diff --git a/INSTALL.md b/INSTALL.md
index be1b5939..9a429f00 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -74,12 +74,36 @@ cd /home/neuro/code/
 pip install .
 ```
 
-Finally, you are able to run pipelines :
+Finally, you are able to use the scripts of the project :
+
+* `narps_open_runner`: run pipelines
+* `narps_open_tester`: run a pipeline and test its results against original ones from the team
+* `narps_description`: get the textual description made by a team
+* `narps_results`: download the original results from teams
+* `narps_open_status`: get status information about the development process of the pipelines
 
 ```bash
-python narps_open/runner.py
-  usage: runner.py [-h] -t TEAM (-r RSUBJECTS | -s SUBJECTS [SUBJECTS ...] | -n NSUBJECTS) [-g | -f] [-c]
+# Run the pipeline for team 2T6S, with 40 subjects
+narps_open_runner -t 2T6S -n 40
+
+# Run the pipeline for team 08MQ, compare results with original ones,
+#   and produces a report with correlation values.
+narps_open_tester -t 08MQ
+
+# Get the description of team C88N in markdown formatting
+narps_description -t C88N --md
+
+# Download the results from all teams
+narps_results -a
+
+#  Get the pipeline work status information in json formatting
+narps_open_status --json
 ```
 
 > [!NOTE]  
-> For further information, read this documentation page [docs/running.md](docs/running.md).
+> For further information about these command line tools, read the corresponding documentation pages.
+> * `narps_open_runner` : [docs/running.md](docs/running.md)
+> * `narps_open_tester` : [docs/testing.md](docs/testing.md#command-line-tool)
+> * `narps_description` : [docs/description.md](docs/description.md)
+> * `narps_results` : [docs/data.md](docs/data.md#results-from-narps-teams)
+> * `narps_open_status` : [docs/status.md](docs/status.md)
diff --git a/docs/data.md b/docs/data.md
index c5b55fba..3a68b32e 100644
--- a/docs/data.md
+++ b/docs/data.md
@@ -67,28 +67,31 @@ for team in teams:
     collection.rectify() # Rectified versions are created
 ```
 
+> [!TIP]
+> In the following examples, use `narps_results` or `python narps_open/data/results` indifferently to launch the command line tool.
+
 ```bash
 # From the command line
-$ python narps_open/data/results -h
-usage: results [-h] (-t TEAMS [TEAMS ...] | -a) [-r]
+narps_results -h
+    usage: results [-h] (-t TEAMS [TEAMS ...] | -a) [-r]
 
-Get Neurovault collection of results from NARPS teams.
+    Get Neurovault collection of results from NARPS teams.
 
-options:
-  -h, --help            show this help message and exit
-  -t TEAMS [TEAMS ...], --teams TEAMS [TEAMS ...]
-                        a list of team IDs
-  -a, --all             download results from all teams
-  -r, --rectify         rectify the results
+    options:
+      -h, --help            show this help message and exit
+      -t TEAMS [TEAMS ...], --teams TEAMS [TEAMS ...]
+                            a list of team IDs
+      -a, --all             download results from all teams
+      -r, --rectify         rectify the results
 
 # Either download all collections
-python narps_open/utils/results -a
+narps_results -a
 
 # Or select the ones you need
-python narps_open/utils/results -t 2T6S C88N L1A8
+narps_results -t 2T6S C88N L1A8
 
 # Download and rectify the collections
-python narps_open/utils/results -r -t 2T6S C88N L1A8
+narps_results -r -t 2T6S C88N L1A8
 ```
 
 The collections are also available [here](https://zenodo.org/record/3528329/) as one release on Zenodo that you can download.
diff --git a/docs/description.md b/docs/description.md
index ac17f588..82f78097 100644
--- a/docs/description.md
+++ b/docs/description.md
@@ -12,8 +12,11 @@ The class `TeamDescription` of module `narps_open.data.description` acts as a pa
 
 You can use the command-line tool as so. Option `-t` is for the team id, option `-d` allows to print only one of the sub parts of the description among : `general`, `exclusions`, `preprocessing`, `analysis`, `categorized_for_analysis`, `derived`, and `comments`. Options `--json` and `--md` allow to choose the export format you prefer between JSON and Markdown.
 
+> [!TIP]
+> In the following examples, use `narps_description` or `python narps_open/data/description` indifferently to launch the command line tool.
+
 ```bash
-python narps_open/data/description -h
+narps_description -h
 # usage: __init__.py [-h] -t TEAM [-d {general,exclusions,preprocessing,analysis,categorized_for_analysis,derived,comments}]
 #
 # Get description of a NARPS pipeline.
@@ -26,7 +29,7 @@ python narps_open/data/description -h
 #  --json                output team description as JSON
 #  --md                  output team description as Markdown
 
-python narps_open/data/description -t 2T6S --json
+narps_description -t 2T6S --json
 # {
 #     "general.teamID": "2T6S",
 #     "general.NV_collection_link": "https://neurovault.org/collections/4881/",
@@ -41,7 +44,7 @@ python narps_open/data/description -t 2T6S --json
 #     "preprocessing.preprocessing_order": "We used the provided preprocessed data by fMRIPprep 1.1.4 (Esteban, Markiewicz, et al. (2018); Esteban, Blair, et al. (2018); RRID:SCR_016216), which is based on Nipype 1.1.1 (Gorgolewski et al. (2011); Gorgolewski et al. (2018); RRID:SCR_002502) and we additionally conducted a spatial smoothing using the provided preprocessed data set and SPM12. Here, we attach the preprocessing steps described in the provided data set. \nAnatomical data preprocessing\nThe T1-weighted (T1w) image was corrected for intensity non-uniformity (INU) using N4BiasFieldCorrection (Tustison et al. 2010, ANTs 2.2.0), and used as T1w-reference throughout the workflow. The T1w-reference was then skull-stripped using antsBrainExtraction.sh (ANTs 2.2.0), using OASIS as target template. Brain surfaces we
 # ...
 
-python narps_open/data/description -t 2T6S -d general --json
+narps_description -t 2T6S -d general --json
 # {
 #    "teamID": "2T6S",
 #    "NV_collection_link": "https://neurovault.org/collections/4881/",
@@ -53,7 +56,7 @@ python narps_open/data/description -t 2T6S -d general --json
 #    "general_comments": "NA"
 # }
 
-python narps_open/data/description -t 2T6S --md
+narps_description -t 2T6S --md
 # # NARPS team description : 2T6S
 # ## General
 # * `teamID` : 2T6S
diff --git a/docs/running.md b/docs/running.md
index eb614eef..b2f7da77 100644
--- a/docs/running.md
+++ b/docs/running.md
@@ -2,10 +2,13 @@
 
 ## Using the runner application
 
-The `narps_open.runner` module allows to run pipelines from the command line :
+The `narps_open.runner` module allows to run pipelines from the command line.
+
+> [!TIP]
+> In the following examples, use `narps_open_runner` or `python narps_open/runner.py` indifferently to launch the command line tool.
 
 ```bash
-python narps_open/runner.py -h
+narps_open_runner -h
 	usage: runner.py [-h] -t TEAM (-r RANDOM | -s SUBJECTS [SUBJECTS ...]) [-g | -f]
 
 	Run the pipelines from NARPS.
@@ -19,13 +22,14 @@ python narps_open/runner.py -h
 	  -f, --first           run the first levels only (preprocessing + subjects + runs)
 	  -c, --check           check pipeline outputs (runner is not launched)
 
-python narps_open/runner.py -t 2T6S -s 001 006 020 100
-python narps_open/runner.py -t 2T6S -r 4
-python narps_open/runner.py -t 2T6S -r 4 -f
-python narps_open/runner.py -t 2T6S -r 4 -f -c # Check the output files without launching the runner
+narps_open_runner -t 2T6S -s 001 006 020 100
+narps_open_runner -t 2T6S -r 4
+narps_open_runner -t 2T6S -r 4 -f
+narps_open_runner -t 2T6S -r 4 -f -c # Check the output files without launching the runner
 ```
 
-In this usecase, the paths where to store the outputs and to the dataset are picked by the runner from the [configuration](docs/configuration.md).
+> [!NOTE]
+> In this usecase, the paths where to store the outputs and to the dataset are picked by the runner from the [configuration](docs/configuration.md).
 
 ## Using the `PipelineRunner` object
 
diff --git a/docs/status.md b/docs/status.md
index 28492390..d461b1ea 100644
--- a/docs/status.md
+++ b/docs/status.md
@@ -46,8 +46,11 @@ report.markdown() # Returns a string containing the markdown
 
 You can also use the command-line tool as so.
 
+> [!TIP]
+> In the following examples, use `narps_open_status` or `python narps_open/utils/status.py` indifferently to launch the command line tool.
+
 ```bash
-python narps_open/utils/status -h
+narps_open_status -h
 # usage: status.py [-h] [--json | --md]
 # 
 # Get a work progress status report for pipelines.
@@ -57,7 +60,7 @@ python narps_open/utils/status -h
 #   --json      output the report as JSON
 #   --md        output the report as Markdown
 
-python narps_open/utils/status --json
+narps_open_status --json
 # {
 #     "08MQ": {
 #         "softwares": "FSL",
@@ -83,7 +86,7 @@ python narps_open/utils/status --json
 #     },
 # ...
 
-python narps_open/utils/status --md
+narps_open_status --md
 # ...
 # | team_id | status | main software | fmriprep used ? | related issues | related pull requests | excluded from NARPS analysis | reproducibility |
 # | --- |:---:| --- | --- | --- | --- | --- | --- |
diff --git a/docs/testing.md b/docs/testing.md
index 5294ea9b..1ea3b66c 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -2,6 +2,13 @@
 
 :mega: This file describes the test suite and features for the project.
 
+## Test dependencies
+
+Before using the test suite, make sure you installed all the dependencies, after step 5 of the [installation process](docs/install.md), run this command:
+```bash
+pip install .[tests]
+```
+
 ## Static analysis
 
 We use [*pylint*](http://pylint.pycqa.org/en/latest/) to run static code analysis.
@@ -24,7 +31,7 @@ black ./narps_open/runner.py
 
 ## Automatic tests
 
-Use [*pytest*](https://docs.pytest.org/en/6.2.x/contents.html) to run automatic testing and its [*pytest-cov*](https://pytest-cov.readthedocs.io/en/latest/) plugin to control code coverage. Furthermore, [*pytest-helpers-namespace*](https://pypi.org/project/pytest-helpers-namespace/) enables to register helper functions.
+We use [*pytest*](https://docs.pytest.org/en/6.2.x/contents.html) to run automatic testing and its [*pytest-cov*](https://pytest-cov.readthedocs.io/en/latest/) plugin to control code coverage. Furthermore, [*pytest-helpers-namespace*](https://pypi.org/project/pytest-helpers-namespace/) enables to register helper functions.
 
 > The pytest framework makes it easy to write small tests, yet scales to support complex functional testing for applications and libraries.
 
@@ -36,6 +43,21 @@ Tests can be launched manually or while using CI (Continuous Integration).
 * To run a tests with a given mark 'mark' : `pytest -m 'mark'`
 * To create code coverage data : `coverage run -m pytest ./tests` then `coverage report` to see the code coverage result or `coverage xml` to output a .xml report file
 
+## Command line tool
+
+We created the simple command line tool `narps_open_tester` to help testing the outcome of one pipeline.
+
+> [!WARNING]
+> This command must be launched from inside the repository's root directory, because it needs to access the `tests` directory relatively to the current/working directory.
+
+```bash
+narps_open_tester -t 08MQ
+```
+
+This will run the pipeline for the requested team -here 08MQ- on subsets of subjects (20, 40, 60, 80 and 108). For each subset, the outputs of the pipeline (statistical maps for each of the 9 hypotheses) will be compared with original results from the team using a Pearson correlation computation. At each step, if one of the correlation score is below the threshold (see `correlation_thresholds` defined in `narps_open/utils/configuration/testing_config.toml`), the tests ends. Otherwise, it proceeds to the next step, i.e.: the next subset of subjects.
+
+Once finished, a text file report (`test_pipeline-*.txt`) is created, containing all the computed correlation values.
+
 ## Configuration files for testing
 
 * `pytest.ini` is a global configuration files for using pytest (see reference [here](https://docs.pytest.org/en/7.1.x/reference/customize.html)). It allows to [register markers](https://docs.pytest.org/en/7.1.x/example/markers.html) that help to better identify tests. Note that `pytest.ini` could be replaced by data inside `pyproject.toml` in the next versions.
diff --git a/narps_open/data/description/__main__.py b/narps_open/data/description/__main__.py
index e538ff4d..b6c9ead3 100644
--- a/narps_open/data/description/__main__.py
+++ b/narps_open/data/description/__main__.py
@@ -8,49 +8,55 @@
 
 from narps_open.data.description import TeamDescription
 
-# Parse arguments
-parser = ArgumentParser(description='Get description of a NARPS pipeline.')
-parser.add_argument('-t', '--team', type=str, required=True,
-    help='the team ID')
-parser.add_argument('-d', '--dictionary', type=str, required=False,
-    choices=[
-        'general',
-        'exclusions',
-        'preprocessing',
-        'analysis',
-        'categorized_for_analysis',
-        'derived',
-        'comments'
-        ],
-    help='the sub dictionary of team description')
-formats = parser.add_mutually_exclusive_group(required = False)
-formats.add_argument('--json', action='store_true', help='output team description as JSON')
-formats.add_argument('--md', action='store_true', help='output team description as Markdown')
-arguments = parser.parse_args()
-
-# Initialize a TeamDescription
-information = TeamDescription(team_id = arguments.team)
-
-# Output description
-if arguments.md and arguments.dictionary is not None:
-    print('Sub dictionaries cannot be exported as Markdown yet.')
-    print('Print the whole description instead.')
-elif arguments.md:
-    print(information.markdown())
-else:
-    if arguments.dictionary == 'general':
-        print(dumps(information.general, indent = 4))
-    elif arguments.dictionary == 'exclusions':
-        print(dumps(information.exclusions, indent = 4))
-    elif arguments.dictionary == 'preprocessing':
-        print(dumps(information.preprocessing, indent = 4))
-    elif arguments.dictionary == 'analysis':
-        print(dumps(information.analysis, indent = 4))
-    elif arguments.dictionary == 'categorized_for_analysis':
-        print(dumps(information.categorized_for_analysis, indent = 4))
-    elif arguments.dictionary == 'derived':
-        print(dumps(information.derived, indent = 4))
-    elif arguments.dictionary == 'comments':
-        print(dumps(information.comments, indent = 4))
+def main():
+    """ Entry-point for the command line tool narps_description """
+
+    # Parse arguments
+    parser = ArgumentParser(description='Get description of a NARPS pipeline.')
+    parser.add_argument('-t', '--team', type=str, required=True,
+        help='the team ID')
+    parser.add_argument('-d', '--dictionary', type=str, required=False,
+        choices=[
+            'general',
+            'exclusions',
+            'preprocessing',
+            'analysis',
+            'categorized_for_analysis',
+            'derived',
+            'comments'
+            ],
+        help='the sub dictionary of team description')
+    formats = parser.add_mutually_exclusive_group(required = False)
+    formats.add_argument('--json', action='store_true', help='output team description as JSON')
+    formats.add_argument('--md', action='store_true', help='output team description as Markdown')
+    arguments = parser.parse_args()
+
+    # Initialize a TeamDescription
+    information = TeamDescription(team_id = arguments.team)
+
+    # Output description
+    if arguments.md and arguments.dictionary is not None:
+        print('Sub dictionaries cannot be exported as Markdown yet.')
+        print('Print the whole description instead.')
+    elif arguments.md:
+        print(information.markdown())
     else:
-        print(dumps(information, indent = 4))
+        if arguments.dictionary == 'general':
+            print(dumps(information.general, indent = 4))
+        elif arguments.dictionary == 'exclusions':
+            print(dumps(information.exclusions, indent = 4))
+        elif arguments.dictionary == 'preprocessing':
+            print(dumps(information.preprocessing, indent = 4))
+        elif arguments.dictionary == 'analysis':
+            print(dumps(information.analysis, indent = 4))
+        elif arguments.dictionary == 'categorized_for_analysis':
+            print(dumps(information.categorized_for_analysis, indent = 4))
+        elif arguments.dictionary == 'derived':
+            print(dumps(information.derived, indent = 4))
+        elif arguments.dictionary == 'comments':
+            print(dumps(information.comments, indent = 4))
+        else:
+            print(dumps(information, indent = 4))
+
+if __name__ == '__main__':
+    main()
diff --git a/narps_open/data/results/__main__.py b/narps_open/data/results/__main__.py
index b9f1d728..88111b87 100644
--- a/narps_open/data/results/__main__.py
+++ b/narps_open/data/results/__main__.py
@@ -8,27 +8,34 @@
 from narps_open.data.results import ResultsCollectionFactory
 from narps_open.pipelines import implemented_pipelines
 
-# Parse arguments
-parser = ArgumentParser(description='Get Neurovault collection of results from NARPS teams.')
-group = parser.add_mutually_exclusive_group(required = True)
-group.add_argument('-t', '--teams', nargs='+', type=str, action='extend',
-    help='a list of team IDs')
-group.add_argument('-a', '--all', action='store_true', help='download results from all teams')
-parser.add_argument('-r', '--rectify', action='store_true', default = False, required = False,
-    help='rectify the results')
-arguments = parser.parse_args()
-
-factory = ResultsCollectionFactory()
-
-if arguments.all:
-    for team_id, _ in implemented_pipelines.items():
-        collection = factory.get_collection(team_id)
-        collection.download()
-        if arguments.rectify:
-            collection.rectify()
-else:
-    for team in arguments.teams:
-        collection = factory.get_collection(team)
-        collection.download()
-        if arguments.rectify:
-            collection.rectify()
+
+def main():
+    """ Entry-point for the command line tool narps_results """
+
+    # Parse arguments
+    parser = ArgumentParser(description='Get Neurovault collection of results from NARPS teams.')
+    group = parser.add_mutually_exclusive_group(required = True)
+    group.add_argument('-t', '--teams', nargs='+', type=str, action='extend',
+        help='a list of team IDs')
+    group.add_argument('-a', '--all', action='store_true', help='download results from all teams')
+    parser.add_argument('-r', '--rectify', action='store_true', default = False, required = False,
+        help='rectify the results')
+    arguments = parser.parse_args()
+
+    factory = ResultsCollectionFactory()
+
+    if arguments.all:
+        for team_id, _ in implemented_pipelines.items():
+            collection = factory.get_collection(team_id)
+            collection.download()
+            if arguments.rectify:
+                collection.rectify()
+    else:
+        for team in arguments.teams:
+            collection = factory.get_collection(team)
+            collection.download()
+            if arguments.rectify:
+                collection.rectify()
+
+if __name__ == '__main__':
+    main()
diff --git a/narps_open/runner.py b/narps_open/runner.py
index 0776c4aa..32c80180 100644
--- a/narps_open/runner.py
+++ b/narps_open/runner.py
@@ -152,7 +152,8 @@ def get_missing_group_level_outputs(self):
 
         return [f for f in files if not isfile(f)]
 
-if __name__ == '__main__':
+def main():
+    """ Entry-point for the command line tool narps_open_runner """
 
     # Parse arguments
     parser = ArgumentParser(description='Run the pipelines from NARPS.')
@@ -191,7 +192,6 @@ def get_missing_group_level_outputs(self):
 
     # Check data
     if arguments.check:
-        missing_files = []
         print('Missing files for team', arguments.team, 'after running',
             len(runner.pipeline.subject_list), 'subjects:')
         if not arguments.group:
@@ -202,3 +202,6 @@ def get_missing_group_level_outputs(self):
     # Start the runner
     else:
         runner.start(arguments.first, arguments.group)
+
+if __name__ == '__main__':
+    main()
diff --git a/narps_open/tester.py b/narps_open/tester.py
new file mode 100644
index 00000000..1a2cf284
--- /dev/null
+++ b/narps_open/tester.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# coding: utf-8
+
+""" This module allows to compare pipeline output with original team results """
+
+import sys
+from argparse import ArgumentParser
+
+import pytest
+
+def main():
+    """ Entry-point for the command line tool narps_open_tester """
+
+    # Parse arguments
+    parser = ArgumentParser(description='Test the pipelines from NARPS.')
+    parser.add_argument('-t', '--team', type=str, required=True,
+        help='the team ID')
+    arguments = parser.parse_args()
+
+    sys.exit(pytest.main([
+        '-s',
+        '-q',
+        '-x',
+        f'tests/pipelines/test_team_{arguments.team}.py',
+        '-m',
+        'pipeline_test']))
+
+if __name__ == '__main__':
+    main()
diff --git a/narps_open/utils/status.py b/narps_open/utils/status.py
index 0058b40b..4f80b11f 100644
--- a/narps_open/utils/status.py
+++ b/narps_open/utils/status.py
@@ -22,7 +22,6 @@ def get_opened_issues():
     request_url = 'https://api.github.com/repos/Inria-Empenn/narps_open_pipelines'
     response = get(request_url, timeout = 2)
     response.raise_for_status()
-    nb_issues = response.json()['open_issues']
 
     # Get all opened issues
     request_url = 'https://api.github.com/repos/Inria-Empenn/narps_open_pipelines/issues'
@@ -185,11 +184,14 @@ def markdown(self):
                 reproducibility_ranking += ':star:'
             for _ in range(4-team_values['reproducibility']):
                 reproducibility_ranking += ':black_small_square:'
-            output_markdown += f'| {reproducibility_ranking}<br />{team_values["reproducibility_comment"]} |\n'
+            output_markdown += f'| {reproducibility_ranking}<br />'
+            output_markdown += f'{team_values["reproducibility_comment"]} |\n'
 
         return output_markdown
 
-if __name__ == '__main__':
+def main():
+    """ Entry-point for the command line tool narps_open_status """
+
     # Parse arguments
     parser = ArgumentParser(description='Get a work progress status report for pipelines.')
     formats = parser.add_mutually_exclusive_group(required = False)
@@ -204,3 +206,6 @@ def markdown(self):
         print(report.markdown())
     else:
         print(report)
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
index 2c6c9b06..91a2d63a 100644
--- a/setup.py
+++ b/setup.py
@@ -63,5 +63,14 @@
         ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_comments.tsv']),
         ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_derived_descriptions.tsv']),
         ('narps_open/data/description', ['narps_open/data/description/analysis_pipelines_full_descriptions.tsv'])
-    ]
+    ],
+    entry_points = {
+        'console_scripts': [
+            'narps_open_runner = narps_open.runner:main',
+            'narps_open_tester = narps_open.tester:main',
+            'narps_open_status = narps_open.utils.status:main',
+            'narps_description = narps_open.data.description.__main__:main',
+            'narps_results = narps_open.data.results.__main__:main'
+        ]
+    }
 )