diff --git a/.all-contributorsrc b/.all-contributorsrc index 180ba8db8..07a6f0f9b 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -34,6 +34,16 @@ "question" ] }, + { + "login": "marco7877", + "name": "Marco Flores-Coronado", + "avatar_url": "https://avatars.githubusercontent.com/u/56403434?v=4", + "profile": "https://github.com/marco7877", + "contributions": [ + "ideas", + "doc" + ] + }, { "login": "javiergcas", "name": "Javier Gonzalez-Castillo", @@ -78,6 +88,28 @@ "question" ] }, + { + "login": "pmolfese", + "name": "Pete Molfese", + "avatar_url": "https://avatars.githubusercontent.com/u/3665743?v=4", + "profile": "https://github.com/pmolfese", + "contributions": [ + "code" + ] + }, + { + "login": "n-reddy", + "name": "Neha Reddy", + "avatar_url": "https://avatars.githubusercontent.com/u/58482773?v=4", + "profile": "https://github.com/n-reddy", + "contributions": [ + "bug", + "doc", + "ideas", + "question", + "review" + ] + }, { "login": "tsalo", "name": "Taylor Salo", @@ -252,4 +284,4 @@ "repoHost": "https://github.com", "skipCi": false, "commitConvention": "angular" -} +} \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index 650ff3788..896d7c212 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,112 +7,44 @@ orbs: codecov: codecov/codecov@1.0.5 jobs: - makeenv_37: + makeenv_38: docker: - image: continuumio/miniconda3 working_directory: /tmp/src/tedana steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | - if [ ! -d /opt/conda/envs/tedana_py37 ]; then - conda create -yq -n tedana_py37 python=3.7 - source activate tedana_py37 - pip install .[tests] + if [ ! -d /opt/conda/envs/tedana_py38 ]; then + conda create -yq -n tedana_py38 python=3.8 + source activate tedana_py38 + pip install -e .[tests] fi - save_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} paths: - - /opt/conda/envs/tedana_py37 - - unittest_36: - docker: - - image: continuumio/miniconda3 - working_directory: /tmp/src/tedana - steps: - - checkout - - restore_cache: - key: conda-py36-v2-{{ checksum "setup.cfg" }} - - run: - name: Generate environment - command: | - apt-get update - apt-get install -yqq make - if [ ! -d /opt/conda/envs/tedana_py36 ]; then - conda create -yq -n tedana_py36 python=3.6 - source activate tedana_py36 - pip install .[tests] - fi - - run: - name: Running unit tests - command: | - source activate tedana_py36 - make unittest - mkdir /tmp/src/coverage - mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py36 - - save_cache: - key: conda-py36-v2-{{ checksum "setup.cfg" }} - paths: - - /opt/conda/envs/tedana_py36 - - persist_to_workspace: - root: /tmp - paths: - - src/coverage/.coverage.py36 + - /opt/conda/envs/tedana_py38 - unittest_37: + unittest_38: docker: - image: continuumio/miniconda3 working_directory: /tmp/src/tedana steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Running unit tests command: | apt-get update apt-get install -y make - source activate tedana_py37 # depends on makeenv_37 - make unittest - mkdir /tmp/src/coverage - mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py37 - - persist_to_workspace: - root: /tmp - paths: - - src/coverage/.coverage.py37 - - unittest_38: - docker: - - image: continuumio/miniconda3 - working_directory: /tmp/src/tedana - steps: - - checkout - - restore_cache: - key: conda-py38-v2-{{ checksum "setup.cfg" }} - - run: - name: Generate environment - command: | - apt-get update - apt-get install -yqq make - if [ ! -d /opt/conda/envs/tedana_py38 ]; then - conda create -yq -n tedana_py38 python=3.8 - source activate tedana_py38 - pip install .[tests] - fi - - run: - name: Running unit tests - command: | - source activate tedana_py38 + source activate tedana_py38 # depends on makeenv_38 make unittest mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py38 - - save_cache: - key: conda-py38-v2-{{ checksum "setup.cfg" }} - paths: - - /opt/conda/envs/tedana_py38 - persist_to_workspace: root: /tmp paths: @@ -125,7 +57,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py39-v2-{{ checksum "setup.cfg" }} + key: conda-py39-v2-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -144,7 +76,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py39 - save_cache: - key: conda-py39-v2-{{ checksum "setup.cfg" }} + key: conda-py39-v2-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py39 - persist_to_workspace: @@ -159,7 +91,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py310-v1-{{ checksum "setup.cfg" }} + key: conda-py310-v1-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -178,7 +110,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py310 - save_cache: - key: conda-py310-v1-{{ checksum "setup.cfg" }} + key: conda-py310-v1-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py310 - persist_to_workspace: @@ -186,7 +118,6 @@ jobs: paths: - src/coverage/.coverage.py310 - style_check: docker: - image: continuumio/miniconda3 @@ -194,13 +125,13 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Style check command: | apt-get update apt-get install -yqq make - source activate tedana_py37 # depends on makeenv37 + source activate tedana_py38 # depends on makeenv38 make lint three-echo: @@ -210,14 +141,14 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m command: | apt-get update apt-get install -yqq make - source activate tedana_py37 # depends on makeenv_37 + source activate tedana_py38 # depends on makeenv_38 make three-echo mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.three-echo @@ -235,14 +166,14 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m command: | apt-get update apt-get install -yqq make - source activate tedana_py37 # depends on makeenv_37 + source activate tedana_py38 # depends on makeenv_38 make four-echo mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.four-echo @@ -260,14 +191,14 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m command: | apt-get update apt-get install -yqq make - source activate tedana_py37 # depends on makeenv_37 + source activate tedana_py38 # depends on makeenv_38 make five-echo mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.five-echo @@ -278,6 +209,32 @@ jobs: paths: - src/coverage/.coverage.five-echo + reclassify: + docker: + - image: continuumio/miniconda3 + working_directory: /tmp/src/tedana + steps: + - checkout + - restore_cache: + key: conda-py38-v2-{{ checksum "pyproject.toml" }} + - run: + name: Run integration tests + no_output_timeout: 40m + command: | + apt-get update + apt-get install -yqq make + source activate tedana_py38 # depends on makeenv_38 + make reclassify + mkdir /tmp/src/coverage + mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.reclassify + - store_artifacts: + path: /tmp/data + - persist_to_workspace: + root: /tmp + paths: + - src/coverage/.coverage.reclassify + + t2smap: docker: - image: continuumio/miniconda3 @@ -285,14 +242,14 @@ jobs: steps: - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m command: | apt-get update apt-get install -yqq make - source activate tedana_py37 # depends on makeenv_37 + source activate tedana_py38 # depends on makeenv_38 make t2smap mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.t2smap @@ -312,13 +269,13 @@ jobs: at: /tmp - checkout - restore_cache: - key: conda-py37-v2-{{ checksum "setup.cfg" }} + key: conda-py38-v2-{{ checksum "pyproject.toml" }} - run: name: Merge coverage files command: | apt-get update apt-get install -yqq curl - source activate tedana_py37 # depends on makeenv37 + source activate tedana_py38 # depends on makeenv38 cd /tmp/src/coverage/ coverage combine coverage xml @@ -331,37 +288,37 @@ workflows: version: 2.1 build_test: jobs: - - makeenv_37 - - unittest_36 - - unittest_37: + - makeenv_38 + - unittest_38: requires: - - makeenv_37 + - makeenv_38 - style_check: requires: - - makeenv_37 + - makeenv_38 - three-echo: requires: - - makeenv_37 + - makeenv_38 - four-echo: requires: - - makeenv_37 + - makeenv_38 - five-echo: requires: - - makeenv_37 + - makeenv_38 + - reclassify: + requires: + - makeenv_38 - t2smap: requires: - - makeenv_37 - - unittest_38 + - makeenv_38 - unittest_39 - unittest_310 - merge_coverage: requires: - - unittest_36 - - unittest_37 - unittest_38 - unittest_39 - unittest_310 - three-echo - four-echo - five-echo + - reclassify - t2smap diff --git a/.codecov.yml b/.codecov.yml index d5fe2b824..ef3135e15 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -16,4 +16,3 @@ coverage: ignore: - "tedana/tests/" - - "tedana/_version.py" diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 000000000..8fb235d70 --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes index d04993667..c42bb4796 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,5 @@ -tedana/_version.py export-subst +.git_archival.txt export-subst + # Set the default behavior, in case people don't have core.autocrlf set. * text=auto diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 561debfc0..a1ab31fca 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.7' + python-version: '3.8' - name: Install dependencies run: | python -m pip install --upgrade pip @@ -27,5 +27,5 @@ jobs: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - python setup.py sdist bdist_wheel + python -m build --sdist --wheel --outdir dist/ . twine upload dist/* diff --git a/.gitignore b/.gitignore index d7d4a4a82..a52ae4872 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ +# setuptools-scm +tedana/_version.py + .DS_Store docs/generated/ .pytest_cache/ +.testing_data_cache/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.readthedocs.yml b/.readthedocs.yml index f994a7eb1..3976cddad 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,19 +1,19 @@ -# .readthedocs.yml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required version: 2 -# Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py +build: + os: ubuntu-22.04 + tools: + python: "3.8" + jobs: + post_checkout: + - git fetch --unshallow + python: - version: 3.7 install: - method: pip path: . extra_requirements: - doc - system_packages: true diff --git a/Dockerfile_dev b/Dockerfile_dev index 7e908ab98..1b48d6cc7 100644 --- a/Dockerfile_dev +++ b/Dockerfile_dev @@ -41,14 +41,14 @@ ENV LANG="C.UTF-8" \ RUN git clone https://github.com/me-ica/tedana.git /tedana -RUN bash -c "conda create -yq --name tedana_py36 python=3.6 pip \ - && source activate tedana_py36 \ +RUN bash -c "conda create -yq --name tedana_env python=3.8 pip \ + && source activate tedana_env \ && pip install /tedana[all] \ && pip install ipython \ && rm -rf ~/.cache/pip/* \ && conda clean --all" -RUN /opt/conda/envs/tedana_py36/bin/ipython profile create \ +RUN /opt/conda/envs/tedana_env/bin/ipython profile create \ && sed -i 's/#c.InteractiveShellApp.extensions = \[\]/ \ c.InteractiveShellApp.extensions = \['\''autoreload'\''\]/g' \ /root/.ipython/profile_default/ipython_config.py @@ -57,7 +57,7 @@ RUN mkdir -p /tedana/dev_tools COPY ["./dev_tools", "/tedana/dev_tools"] -RUN sed -i '$isource activate tedana_py36' $ND_ENTRYPOINT +RUN sed -i '$isource activate tedana_env' $ND_ENTRYPOINT RUN sed -i '$isource /tedana/dev_tools/run_tests.sh' $ND_ENTRYPOINT diff --git a/MANIFEST.in b/MANIFEST.in index bfe1a3926..e69de29bb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +0,0 @@ -include versioneer.py -include tedana/_version.py diff --git a/Makefile b/Makefile index 2c0e026ce..16fead0d9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: all lint -all_tests: lint unittest three-echo four-echo five-echo t2smap +all_tests: lint unittest three-echo four-echo five-echo reclassify t2smap help: @echo "Please use 'make ' where is one of:" @@ -12,6 +12,7 @@ help: @echo " all_tests to run 'lint', 'unittest', and 'integration'" lint: + @black --check --diff tedana @flake8 tedana unittest: @@ -23,11 +24,11 @@ three-echo: four-echo: @py.test --log-cli-level=INFO --cov-append --cov-report term-missing --cov=tedana -k test_integration_four_echo tedana/tests/test_integration.py -four-echo: - @py.test --cov-append --cov-report term-missing --cov=tedana -k test_integration_four_echo tedana/tests/test_integration.py - five-echo: @py.test --log-cli-level=INFO --cov-append --cov-report term-missing --cov=tedana -k test_integration_five_echo tedana/tests/test_integration.py +reclassify: + @py.test --log-cli-level=INFO --cov-append --cov-report term-missing --cov=tedana -k test_integration_reclassify tedana/tests/test_integration.py + t2smap: @py.test --log-cli-level=INFO --cov-append --cov-report term-missing --cov=tedana -k test_integration_t2smap tedana/tests/test_integration.py diff --git a/README.md b/README.md index 78e170762..cca32a220 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ If you use `tedana`, please cite the following papers, as well as our [most rece ### Use `tedana` with your local Python environment You'll need to set up a working development environment to use `tedana`. -To set up a local environment, you will need Python >=3.6 and the following packages will need to be installed: +To set up a local environment, you will need Python >=3.8 and the following packages will need to be installed: * [numpy](http://www.numpy.org/) * [scipy](https://www.scipy.org/) diff --git a/contributions.md b/contributions.md index e7247aa51..eaa2b9b0c 100644 --- a/contributions.md +++ b/contributions.md @@ -1,20 +1,24 @@ # Contributions ## Contributors + We appreciate all of our contributors! Each contributor below has made a statement of how they feel they've contributed to `tedana`. + - [**Logan Dowdle**][logan-dowdle] helps folks get multi-echo data collection going on their scanners, tries to keep up with the increasing number of multi-echo papers, likes making figures that explain what tedana has done to the data, and occasionally adds a new feature (with lots of help!). -- [**Elizabeth DuPre**][elizabeth-dupre] initiated the tedana project in 2017, building on the ME-ICA codebase. She continued to develop the code and began actively creating the community structure as part of the fifth Mozilla Open Leaders cohort (mentored by Kirstie Whitaker). Since her time as interim BDFL, Elizabeth has been involved in most aspects of the project -- although she currently focuses primarily on improving tedana's integration with the broader neuroimaging ecosystem. +- [**Elizabeth DuPre**][elizabeth-dupre] initiated the tedana project in 2017, building on the ME-ICA codebase. She continued to develop the code and began actively creating the community structure as part of the fifth Mozilla Open Leaders cohort (mentored by Kirstie Whitaker). Since her time as BDFL, Elizabeth has been involved in most aspects of the project -- although she currently focuses primarily on improving tedana's integration with the broader neuroimaging ecosystem. - [**Javier Gonzalez-Castillo**][javier-gonzalez-castillo] contributed to the development of dimensionality reduction and decomposition algorithms in tedana, as well as to the development of the interactive reports. -- [**Dan Handwerker**][dan-handwerker] helps with project management (people wrangling & documentation), led the organization for the 2019 tedana hackathon, provides conceptual feedback on many aspects of the code, contributes to documentation, and, once in a while, even contributes to the code. -- [**Taylor Salo**][taylor-salo] helps with code-related issues and with engaging new contributors to tedana. -- [**Joshua Teves**][joshua-teves] helps manage issues and pull requests for a variety of both administrative and code-specific tasks. -- [**Eneko Uruñuela**][eneko-urunuela] helps with the development of dimensionality reduction and decomposition algorithms in tedana, with Principal Component Analysis to be more specific, and contributed to the development of the interactive reports. +- [**Dan Handwerker**][dan-handwerker] helps with project management (people wrangling & documentation), led the organization for the 2019 tedana hackathon, provides conceptual feedback on many aspects of the code, contributes to documentation, and contributes to the code, particularly modulariation and improvements to the component selection process. +- [**Taylor Salo**][taylor-salo] helps and has contributed to many parts of the code, including modularizing the metric calculation process, and helps with engaging new contributors to tedana. +- [**Joshua Teves**][joshua-teves] made many contributions to the code include improving stability and modularization and helped manage issues and pull requests for a variety of both administrative and code-specific tasks. +- [**Eneko Uruñuela**][eneko-urunuela] helps with the development of dimensionality reduction and decomposition algorithms in tedana, with Principal Component Analysis to be more specific, and contributed to the development of the interactive reports and RICA. - [**Maryam Vaziri-Pashkam**][maryam-vaziri-pashkam] helps with the tedana documentation to make it easier to understand for beginners. ## Funding and Operational Support + Special thanks to the following sources of funding and operational support for `tedana`: + - National Institutes of Mental Health, [Section on Functional Imaging Methods][sfim], for supporting the 2019 `tedana` hackathon. - National Institutes of Health for supporting the 2019 AFNI Code Convergence, where work in the 2019 `tedana` hackathon was continued. - The Mozilla Open Leaders program for support in developing the tedana community structure as part of [the ME-BIDS project](https://medium.com/read-write-participate/brain-imaging-in-the-open-aac7c17bcf69). diff --git a/docs/_static/decision_tree_kundu.png b/docs/_static/decision_tree_kundu.png new file mode 100644 index 000000000..76b646374 Binary files /dev/null and b/docs/_static/decision_tree_kundu.png differ diff --git a/docs/_static/decision_tree_kundu.tex b/docs/_static/decision_tree_kundu.tex new file mode 100644 index 000000000..e2192171c --- /dev/null +++ b/docs/_static/decision_tree_kundu.tex @@ -0,0 +1,157 @@ +\documentclass[border=2pt]{standalone} +\usepackage[utf8]{inputenc} % Required for inserting images +\usepackage{tikz} +\usepackage{helvet} +\usetikzlibrary{shapes.geometric, arrows} +\pagecolor{white} + +%-------------------------defining colorblind friendly colors +% Using pale color scheme in Figure 6 +% by Paul Tol https://personal.sron.nl/~pault/ +\definecolor{cbblue}{HTML}{BBCCEE} +\definecolor{cbcyan}{HTML}{CCEEFF} +\definecolor{cbgreen}{HTML}{CCDDAA} +\definecolor{cbyellow}{HTML}{EEEEBB} +\definecolor{cbred}{HTML}{FFCCCC} +\definecolor{cbgrey}{HTML}{DDDDDD} + +% -------------------------defining nodes +\tikzstyle{input} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 3cm, minimum height=0.5cm, text centered, draw=black, fill=cbblue] +\tikzstyle{process} = [rectangle, minimum width = 3cm, minimum height = 1cm, +text centered, , text width=4cm,draw=black, fill=cbgrey] +\tikzstyle{decision} = [diamond, minimum width = 3cm, minimum height = 1cm, +text centered, , text width=3.5cm, draw=black, fill=cbcyan] +\tikzstyle{changeclass} = [rectangle, rounded corners, minimum width=3cm, minimum height=1cm, +text centered, draw = black, fill=cbyellow] +\tikzstyle{reject} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 1cm, minimum height=0.5cm, text centered, draw=black, fill=cbred] +\tikzstyle{accept} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 1cm, minimum height=0.5cm, text centered, draw=black, fill=cbgreen] + +% -------------------------defining connectors +\tikzstyle{arrow} = [thick,->, >=stealth] +\tikzstyle{line} = [thick,-,>=stealth] +\begin{document} + +% ------------------------- tikz image (flow chart) +\begin{tikzpicture}[node distance = 2cm] + +% ------------------------- nodes ------------------------- + +% ----- node: 0 +\node(0)[input, label={90:\textbf{Kundu Decision Tree (Tedana implementation)}}, label={180:$node\ 0$}] {Set all components to unclassified}; +% ----- node: 1 +\node(1)[decision, below of=0,label={180:$node\ 1$}, yshift=-1.5cm]{$\rho$ $>$ $\kappa$}; +\node(rej0)[reject, right of=1, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 2 +\node(2)[decision, below of=1,label={180:$node\ 2$} ,label={[align=center] 315: voxel counts for signif fit\\of multi-echo data\\to $T_2$ or $S_0$ decay models}, yshift=-3.5cm]{$n \, FS_0 \, > \, n \, FT_2$ \& $n \,FT_2$ $>$ 0}; +\node(rej1)[reject, right of=2, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 3 +\node(3)[process, below of=2, label={180:$node\ 3$}, label={[align=center] 315: varex: variance explained\\by each component}, yshift=-1.5cm]{Calculate median(varex) across all components}; +% ----- node: 4 +\node(4)[decision, below of=3,label={180:$node\ 4$},label={[align=center] 315:DICE overlap between $T_2$ or $S_0$\\decay models and ICA component\\peak clusters}, yshift=-1.5cm]{dice $FS_0$ $>$ dice $FT_2$ \& varex $>$ median(varex) +}; +\node(rej2)[reject, right of=4, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 5 +\node(5)[decision, below of=4,label={180:$node\ 5$}, label={[align=center] 315: $t-statistic$ of $FT_2$ values\\in component peak clusters vs\\peak voxels outside of clusters}, yshift=-3.5cm]{ $0 \, >$ signal-noise \& varex $>$ median(varex)}; +\node(rej3)[reject, right of=5, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 6 +\node(6)[process, below of=5, label={180:$node\ 6$}, label={0: Uses all components}, yshift=-1.5cm]{Calculate $\kappa$ elbow}; +% ----- node: 7 +\node(7)[process, below of=6, label={180:$node\ 7$}, yshift=-0.2cm]{Identify and exclude $\leq$3 highest variance unclassified components from some $\rho$ elbow calculations}; +% ----- node: 8 +\node(8)[process, below of=7, label={180:$node\ 8$}, label={[align=center] 0: Uses all components and subset\\of unclassified components}]{Calculate $\rho$ elbow\\(kundu method)}; +% ----- node: 9 +\node(9)[decision, below of=8,label={180:$node\ 9$}, yshift=-1.5cm]{$\kappa \geq \kappa$ elbow}; +\node(rej4)[changeclass, right of=9, xshift=3cm]{Provisional accept}; +% ----- node: 10 +\node(10)[decision, below of=9,label={180:$node\ 10$}, yshift=-3.5cm]{$\rho > \rho$ elbow }; +\node(rej5)[changeclass, right of=10, xshift=3cm]{Unclassified}; +% ----- node: 11 +\node(11)[decision, below of=10, label={180:$node\ 11$}, yshift=-3.5cm]{ \textit{n} classified as $Provisional\ accept < 2$}; +\node(rej6)[input, right of=11, xshift=4cm, align=center]{Rerun ICA, metric calcs,\\\& component selection.\\If max restarts reached,\\accept everything\\not already rejected}; +% ----- node: 12 +\node(12)[process, below of=11,label={180:$node\ 12$},label={0: $90^{th}$ percentile threshold}, yshift=-1.7cm]{Calculate upper varex on provionally accepted components}; +% ----- node: 13 +\node(13)[process, below of=12,label={180:$node\ 13$}, label={0: $25^{th}$ percentile threshold},]{Calculate lower varex on provionally accepted components}; +% ----- node: 14 +\node(14)[process, below of=13,label={180:$node\ 14$}, label={[align=center] 0:$\lceil 2:3 \rceil$ depending on the\\number of fMRI volumes}]{Calculate extend factor}; +% ----- node: 15 +\node(15)[process, below of=14,label={180:$node\ 15$},label={[align=center] 0: \textit{n} Provisional accept\\$*$ extend factor}]{Calculate max good mean metric rank}; +% ----- node: 16 +\node(16)[process, below of=15, label={180:$node\ 16$}, label={[align=center] 0: $\frac{(max-min \, \kappa) \, \div \kappa}{(max-min \, varex) \div varex}$}]{Calculate $\kappa$ ratio on provionally accepted components}; +% ----- node: 17 +\node(17)[decision, below of=16,label={180:$node\ 17$},label={315:variance \& mean metric rank are high}, yshift=-2.5cm]{mean metric rank $>$ max good mean metric rank \& varex$>$extend factor * upper varex}; +\node(rej7)[reject, right of=17, xshift=4cm, align=center]{Less likely BOLD\\$\rightarrow$ Reject}; +% ----- node: 18 +\node(18)[decision, below of=17,label={180:$node\ 18$},label={[align=center] 315: Accept if remaining component\\is less likely to be BOLD,\\but varex is low \& not worth\\losing a degree of freedom for}, yshift=-4.5cm]{mean metric rank $>$ \textit{n} max good mean metric rank \& varex $\leq$ lower varex \& $\kappa$ $\leq \, \kappa$ elbow }; +\node(rej8)[accept, right of=18, xshift=4cm, align=center]{Low variance\\$\rightarrow$ Accept}; +% ----- node: 19 +\node(19)[decision, below of=18,label={180:$node\ 19$},label={315: Nothing unclassified remains}, yshift=-4.5cm]{\textit{n} Unclassified $==0$}; +\node(rej9)[accept, right of=19, xshift=3cm, align=center]{Provisional accept\\$\rightarrow$ Accept}; +% ----- node: 20 +\node(20)[process, below of=19, label={180:$node\ 20$},yshift=-2.0cm, label={[align=center] 315: \textit{n} accepted guess =\\$\frac{\sum(\kappa > \kappa\, elbow\, \&\, \rho > \rho\, elbow)+ \sum(\kappa > \kappa\, elbow)}{2}$}]{Calculate new mean metric ranks and \textit{n} accepted guess on remaining unclassified and provisionally accepted components}; +% ----- node: 21 +\node(21)[decision, below of=20,label={180:$node\ 21$}, yshift=-3.5cm]{new mean metric rank $>$ (\textit{n} accepted guess)/2 \& varex $\kappa$ ratio $>$ 2 entend factor \& varex $>$ 2 upper varex}; +\node(rej10)[reject, right of=21, xshift=4cm, align=center]{Less likely BOLD\\$\rightarrow$ Reject}; +% ----- node: 22 +\node(22)[decision, below of=21,label={180:$node\ 22$}, yshift=-5cm]{new mean metric rank $>$ 0.9*\textit{n} accepted guess \& varex $>$ (lower varex * extend factor)}; +\node(rej11)[reject, right of=22, xshift=4cm, align=center]{Less likely BOLD\\$\rightarrow$ Reject}; +% ----- node: 23 +\node(23)[process, below of=22,label={180:$node\ 23$}, label={[align=center] 315: $25^{th}$ percentile variance explained\\ from remaining non-rejected components},yshift=-2cm]{Calculate new lower varex}; +% ----- node: 24 +\node(24)[decision, below of=23,label={180:$node\ 24$}, yshift=-2.5cm]{new mean metric rank $>$ \textit{n} accepted guess \& varex $>$ new lower varex}; +\node(rej12)[accept, right of= 24, xshift=4cm, align=center]{Accept borderline\\$\rightarrow$Accept}; +% ----- node: 25 +\node(25)[decision, below of=24,label={180:$node\ 25$}, yshift=-4cm]{ $\kappa$ $>$ $\kappa$ elbow \& varex $>$ new lower varex}; +\node(rej13)[accept, right of=25, xshift=3cm, align=center]{Accept borderline\\$\rightarrow$Accept}; +% ----- node: 26 +\node(26)[accept, below of=25,label={180:$node\ 26$}, yshift=-2cm, align=center]{Remaining Unclassified \& Provisional accept\\$\rightarrow$ Likely BOLD $\rightarrow$ Accept}; + +% ------------------------- connections ------------------------- +% draw[x](origin)--node[anchor=position]{text}(destination); +\draw[arrow](0)--(1); +\draw[arrow](1)--(2); +\draw[arrow](2)--(3); +\draw[arrow](3)--(4); +\draw[arrow](4)--(5); +\draw[arrow](5)--(6); +\draw[arrow](6)--(7); +\draw[arrow](7)--(8); +\draw[arrow](8)--(9); +\draw[arrow](9)--(10); +\draw[arrow](10)--(11); +\draw[arrow](11)--(12); +\draw[arrow](12)--(13); +\draw[arrow](13)--(14); +\draw[arrow](14)--(15); +\draw[arrow](15)--(16); +\draw[arrow](16)--(17); +\draw[arrow](17)--(18); +\draw[arrow](18)--(19); +\draw[arrow](19)--(20);a +\draw[arrow](20)--(21); +\draw[arrow](21)--(22); +\draw[arrow](22)--(23); +\draw[arrow](23)--(24); +\draw[arrow](24)--(25); +\draw[arrow](25)--(26); +\draw[arrow](1)--node[anchor=south] {yes} (rej0); +\draw[arrow](2)--node[anchor=south] {yes} (rej1); +\draw[arrow](4)--node[anchor=south] {yes} (rej2); +\draw[arrow](5)--node[anchor=south] {yes} (rej3); +\draw[arrow](9)--node[anchor=south] {yes} (rej4); +\draw[arrow](rej4)--(10); +\draw[arrow](10)--node[anchor=south] {yes} (rej5); +\draw[arrow](rej5)--(11); +\draw[arrow](11)--node[anchor=south] {yes} (rej6); +\draw[arrow](17)--node[anchor=south] {yes} (rej7); +\draw[arrow](18)--node[anchor=south] {yes} (rej8); +\draw[arrow](19)--node[anchor=south] {yes} (rej9); +\draw[arrow](21)--node[anchor=south] {yes} (rej10); +\draw[arrow](22)--node[anchor=south] {yes} (rej11); +\draw[arrow](24)--node[anchor=south] {yes} (rej12); +\draw[arrow](25)--node[anchor=south] {yes} (rej13); +\end{tikzpicture} +\end{document} diff --git a/docs/_static/decision_tree_legend.png b/docs/_static/decision_tree_legend.png new file mode 100644 index 000000000..ee000bb28 Binary files /dev/null and b/docs/_static/decision_tree_legend.png differ diff --git a/docs/_static/decision_tree_legend.tex b/docs/_static/decision_tree_legend.tex new file mode 100644 index 000000000..0fda0384f --- /dev/null +++ b/docs/_static/decision_tree_legend.tex @@ -0,0 +1,57 @@ +\documentclass[border=2pt]{standalone} +\usepackage[utf8]{inputenc} % Required for inserting images +\usepackage{tikz} +\usepackage{helvet} +\usetikzlibrary{shapes.geometric, arrows} +\pagecolor{white} + +%-------------------------defining colorblind friendly colors +% Using pale color scheme in Figure 6 +% by Paul Tol https://personal.sron.nl/~pault/ +\definecolor{cbblue}{HTML}{BBCCEE} +\definecolor{cbcyan}{HTML}{CCEEFF} +\definecolor{cbgreen}{HTML}{CCDDAA} +\definecolor{cbyellow}{HTML}{EEEEBB} +\definecolor{cbred}{HTML}{FFCCCC} +\definecolor{cbgrey}{HTML}{DDDDDD} + +% -------------------------defining nodes +\tikzstyle{input} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 3cm, minimum height=0.5cm, text centered, draw=black, fill=cbblue] +\tikzstyle{process} = [rectangle, minimum width = 3cm, minimum height = 0cm, +text centered, , text width=4cm,draw=black, fill=cbgrey] +\tikzstyle{decision} = [diamond, minimum width = 0cm, minimum height = 0cm, +text centered, , text width=1cm, draw=black, fill=cbcyan] +\tikzstyle{changeclass} = [rectangle, rounded corners, minimum width=3cm, minimum height=1cm, +text centered, draw = black, fill=cbyellow] +\tikzstyle{reject} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 1cm, minimum height=0.5cm, text centered, draw=black, fill=cbred] +\tikzstyle{accept} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 1cm, minimum height=0.5cm, text centered, draw=black, fill=cbgreen] + +% -------------------------defining connectors +\tikzstyle{arrow} = [thick,->, >=stealth] +\tikzstyle{line} = [thick,-,>=stealth] +\begin{document} + +% ------------------------- tikz image (flow chart) +\begin{tikzpicture}[node distance = 1cm] + +% ------------------------- nodes ------------------------- + +% ----- node: 0 +\node(0)[input, label={90:\textbf{Decision Tree Flow Chart Legend}}] {Initialization or restarting}; +% ----- node: 1 +\node(1)[process, below of=0, align=center, yshift=0.4cm] {Calculation Node}; +% ----- node: 2 +\node(2)[changeclass, below of=1, align=center, yshift=0.15cm] {New intermediate\\classification for a component}; +% ----- node: 3 +\node(3)[decision, right of=1, xshift=2.5cm, align=center] {Decision node}; +% ----- node: 4 +\node(4)[accept, below of=2, align=center, yshift=-0.3cm] {Final "accept" classification\\or node to set final classification.\\Classification tag also shown}; +% ----- node: 5 +\node(5)[reject, below of=4, align=center, yshift=-0.5cm] {Final "reject" classification\\or node to set final classification.\\Classification tag also shown}; + + +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/docs/_static/decision_tree_minimal.png b/docs/_static/decision_tree_minimal.png new file mode 100644 index 000000000..f11732c31 Binary files /dev/null and b/docs/_static/decision_tree_minimal.png differ diff --git a/docs/_static/decision_tree_minimal.tex b/docs/_static/decision_tree_minimal.tex new file mode 100644 index 000000000..f8b922e68 --- /dev/null +++ b/docs/_static/decision_tree_minimal.tex @@ -0,0 +1,106 @@ +\documentclass[border=2pt]{standalone} +\usepackage[utf8]{inputenc} % Required for inserting images +\usepackage{tikz} +\usepackage{helvet} +\usetikzlibrary{shapes.geometric, arrows} +\pagecolor{white} + +%-------------------------defining colorblind friendly colors +% Using pale color scheme in Figure 6 +% by Paul Tol https://personal.sron.nl/~pault/ +\definecolor{cbblue}{HTML}{BBCCEE} +\definecolor{cbcyan}{HTML}{CCEEFF} +\definecolor{cbgreen}{HTML}{CCDDAA} +\definecolor{cbyellow}{HTML}{EEEEBB} +\definecolor{cbred}{HTML}{FFCCCC} +\definecolor{cbgrey}{HTML}{DDDDDD} + +% -------------------------defining nodes +\tikzstyle{input} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 3cm, minimum height=0.5cm, text centered, draw=black, fill=cbblue] +\tikzstyle{process} = [rectangle, minimum width = 3cm, minimum height = 1cm, +text centered, , text width=4cm,draw=black, fill=cbgrey] +\tikzstyle{decision} = [diamond, minimum width = 3cm, minimum height = 1cm, +text centered, , text width=3cm, draw=black, fill=cbcyan] +\tikzstyle{changeclass} = [rectangle, rounded corners, minimum width=3cm, minimum height=1cm, +text centered, draw = black, fill=cbyellow] +\tikzstyle{reject} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 1cm, minimum height=0.5cm, text centered, draw=black, fill=cbred] +\tikzstyle{accept} = [trapezium, trapezium left angle =80, trapezium right angle = 100, +minimum width= 1cm, minimum height=0.5cm, text centered, draw=black, fill=cbgreen] + +% -------------------------defining connectors +\tikzstyle{arrow} = [thick,->, >=stealth] +\tikzstyle{line} = [thick,-,>=stealth] +\begin{document} + +% ------------------------- tikz image (flow chart) +\begin{tikzpicture}[node distance = 2cm] + +% ------------------------- nodes ------------------------- +% ----- node: 0 +\node(0)[input,label={90:\textbf{Minimal Decision Tree (Tedana implementation)}}, label={180:$node\ 0$}]{Set all components to unclassified}; +% ----- node: 1 +\node(1)[decision, below of=0,label={180:$node\ 1$}, yshift=-1.5cm]{$\rho$ $>$ $\kappa$}; +\node(rej1)[reject, right of=1, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 2 +\node(2)[decision, below of=1,label={180:$node\ 2$}, label={[align=center] 315: voxel counts for signif fit\\of multi-echo data\\to $T_2$ or $S_0$ decay models}, yshift=-4.0cm]{$n \, FS_0 \, > \, n \, FT_2$ \& $n \,FT_2$ $>$ 0}; +\node(rej2)[reject, right of=2, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 3 +\node(3)[process, below of=2, label={180:$node\ 3$}, label={[align=center] 315: varex: variance explained\\by each component}, yshift=-2.0cm]{Calculate median(varex) across all components}; +% ----- node: 4 +\node(4)[decision, below of=3,label={180:$node\ 4$},label={[align=center] 315:DICE overlap between $T_2$ or $S_0$\\decay models and ICA component\\peak clusters}, yshift=-1.5cm]{dice $FS_0$ $>$ dice $FT_2$ \& varex $>$ median(varex) +}; +\node(rej4)[reject, right of=4, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 5 +\node(5)[decision, below of=4,label={180:$node\ 5$}, label={[align=center] 315: $t-statistic$ of $FT_2$ values\\in component peak clusters vs\\peak voxels outside of clusters}, yshift=-4.0cm]{ $0 \, >$ signal-noise \& varex $>$ median(varex)}; +\node(rej5)[reject, right of=5, xshift=3cm, align=center]{Unlikely BOLD\\$\rightarrow$ Reject}; +% ----- node: 6 +\node(6)[process, below of=5, label={180:$node\ 6$}, label={0: Uses all components}, yshift=-2.0cm]{Calculate $\kappa$ elbow}; +% ----- node: 7 +\node(7)[process, below of=6, label={180:$node\ 7$}, label={[align=center] 0: Uses all components and subset\\of unclassified components}]{Calculate $\rho$ elbow\\(liberal method)}; +% ----- node: 7 +\node(8)[decision, below of=7,label={180:$node\ 8$}, yshift=-1.5cm]{$\kappa \geq \kappa$ elbow}; +\node(chrej8)[changeclass, below of=8, xshift=-0.5cm, yshift=-2cm]{Provisional reject}; +\node(chacc8)[changeclass, right of=8, xshift=3cm, yshift=0cm]{Provisional accept}; +% ----- node: 8 +\node(9)[decision, below of=chacc8,label={180:$node\ 9$},label={315: Accept even if $\rho < \rho\ elbow$},yshift=-1.5cm]{$\kappa > 2\rho$ }; +\node(acc9)[accept, right of=9, xshift=3cm, align=center]{Likely BOLD\\$\rightarrow$ Accept}; +% ----- node: 9 +\node(10)[decision, below of=chrej8,label={180:$node\ 10$}, yshift=-1.5cm]{ $\rho$ $>$ $\rho$ elbow}; +\node(chrej10)[changeclass, right of=10, xshift=4cm]{Provisional reject}; +% ----- node: 10 +\node(11)[decision, below of=chrej10,label={180:$node\ 11$},label={[align=left] 335: Will accept the lowest\\variance components until\\1\% of total variance is\\accepted this way}, yshift=-2.1cm]{$if$ component variance $<0.1$};%--check in kundu +\node(acc11)[accept, right of=11, xshift=3cm, align=center]{Low variance\\$\rightarrow$ Accept}; +% ----- node: 11 +\node(12)[accept, below of=10,label={180:$node\ 12$},yshift=-3.5cm, align=center]{Likely BOLD\\Change provisional accept\\$\rightarrow$Accept}; +% ----- node: 12 +\node(13)[reject, below of=11,label={180:$node\ 13$}, yshift=-2.0cm, align=center]{Unlikely BOLD\\Change provisional reject\\$\rightarrow$Reject}; + +% ------------------------- connections ------------------------- +% draw[x](origin)--node[anchor=position]{text}(destination); +\draw[arrow](0)--(1); +\draw[arrow](1)--node[anchor=south, right=0] {no} (2); +\draw[arrow](1)--node[anchor=south] {yes} (rej1); +\draw[arrow](2)--node[anchor=south, right=0] {no} (3); +\draw[arrow](2)--node[anchor=south] {yes} (rej2); +\draw[arrow](3)--(4); +\draw[arrow](4)--node[anchor=south, right=0] {no} (5); +\draw[arrow](4)--node[anchor=south] {yes} (rej4); +\draw[arrow](5)--node[anchor=south, right=0] {no} (6); +\draw[arrow](5)--node[anchor=south] {yes} (rej5); +\draw[arrow](6)--(7); +\draw[arrow](7)--(8); +\draw[arrow](8)--node[anchor=south] {yes} (chacc8); +\draw[arrow](8)--node[anchor=south, right=0] {no} (chrej8); +\draw[arrow](chacc8)--(9); +\draw[arrow](chrej8)--(10); +\draw[arrow](9)--node[anchor=south, right=0] {no} (10); +\draw[arrow](9)--node[anchor=south] {yes} (acc9); +\draw[arrow](10)--node[anchor=south, left=0, align=right] { yes\\still\\provisional accept} (12); +\draw[arrow](10)--node[anchor=south] {yes} (chrej10); +\draw[arrow](chrej10)--(11); +\draw[arrow](11)--node[anchor=south, right=0] {no} (13); +\draw[arrow](11)--node[anchor=south] {yes} (acc11); +\end{tikzpicture} +\end{document} diff --git a/docs/_templates/class.rst b/docs/_templates/class.rst new file mode 100644 index 000000000..0b9ab90cb --- /dev/null +++ b/docs/_templates/class.rst @@ -0,0 +1,52 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :no-members: + :no-inherited-members: + :no-special-members: + + {% block methods %} + {% if methods %} + + .. automethod:: __init__ + + {% if ('__call__' in all_methods) or ('__call__' in inherited_members) %} + + .. automethod:: __call__ + + {% endif %} + + .. rubric:: Methods + + .. autosummary:: + :toctree: + {% for item in all_methods %} + {%- if not item.startswith('_') or item in ['__mul__', '__getitem__', '__len__'] %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% for item in inherited_members %} + {%- if item in ['__mul__', '__getitem__', '__len__'] %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} + + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + .. autosummary:: + :toctree: + {% for item in all_attributes %} + {%- if not item.startswith('_') %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/api.rst b/docs/api.rst index dfe600209..c91211989 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -24,6 +24,7 @@ API :toctree: generated/ tedana.workflows.tedana_workflow + tedana.workflows.ica_reclassify_workflow tedana.workflows.t2smap_workflow @@ -120,11 +121,25 @@ API .. autosummary:: :toctree: generated/ + :template: class.rst + + tedana.selection.component_selector.ComponentSelector + tedana.selection.component_selector.TreeError + :template: function.rst - tedana.selection.manual_selection - tedana.selection.kundu_selection_v2 - tedana.selection.kundu_tedpca + tedana.selection.component_selector.load_config + tedana.selection.component_selector.validate_tree + +.. autosummary:: + :toctree: generated/ + :template: module.rst + + tedana.selection.selection_nodes + tedana.selection.selection_utils + tedana.selection.tedica + tedana.selection.tedpca + .. _api_gscontrol_ref: @@ -164,11 +179,16 @@ API :template: class.rst tedana.io.OutputGenerator + tedana.io.InputHarvester + tedana.io.CustomEncoder :template: function.rst tedana.io.load_data + tedana.io.load_json + tedana.io.get_fields tedana.io.new_nii_like + tedana.io.prep_data_for_json tedana.io.add_decomp_prefix tedana.io.denoise_ts tedana.io.split_ts diff --git a/docs/approach.rst b/docs/approach.rst index 97a985f1a..ae59ef0bd 100644 --- a/docs/approach.rst +++ b/docs/approach.rst @@ -338,14 +338,29 @@ classify ICA components as TE-dependent (BOLD signal), TE-independent (non-BOLD noise), or neither (to be ignored). These classifications are saved in **desc-tedana_metrics.tsv**. The actual decision tree is dependent on the component selection algorithm employed. -``tedana`` includes the option `kundu` (which uses hardcoded thresholds -applied to each of the metrics). +``tedana`` includes two options `kundu` and `minimal` (which uses hardcoded thresholds +applied to each of the metrics). `These decision trees are detailed here`_. Components that are classified as noise are projected out of the optimally combined data, yielding a denoised timeseries, which is saved as **desc-optcomDenoised_bold.nii.gz**. .. image:: /_static/a15_denoised_data_timeseries.png +.. _These decision trees are detailed here: included_decision_trees.html + +******************************* +Manual classification with RICA +******************************* + +``RICA`` is a tool for manual ICA classification. Once the .tsv file containing the result of +manual component classification is obtained, it is necessary to `re-run the tedana workflow`_ +passing the manual_classification.tsv file with the --ctab option. To save the output correctly, +make sure that the output directory does not coincide with the input directory. See `this example`_ +presented at MRITogether 2022 for a hands-on tutorial. + +.. _re-run the tedana workflow: https://tedana.readthedocs.io/en/stable/usage.html#Arguments%20for%20Rerunning%20the%20Workflow +.. _this example: https://www.youtube.com/live/P4cV-sGeltk?feature=share&t=1347 + ********************************************* Removal of spatially diffuse noise (optional) diff --git a/docs/building_decision_trees.rst b/docs/building_decision_trees.rst new file mode 100644 index 000000000..dce2c6d31 --- /dev/null +++ b/docs/building_decision_trees.rst @@ -0,0 +1,410 @@ +######################################################## +Understanding and building a component selection process +######################################################## + +This guide is designed for users who want to better understand the mechanics +of the component selection process and people who are considering customizing +their own decision tree or contributing to ``tedana`` code. We have tried to +make this accessible, but it is long. If you just want to better understand +what's in the outputs from ``tedana`` start with +:ref:`classification-output-descriptions`. + +``tedana`` involves transforming data into components, currently via ICA, and then +calculating metrics for each component. Each metric has one value per component that +is stored in a ``component_table`` dataframe. This structure is then passed to a +"decision tree" through which a series of binary choices categorize each component +as **accepted** or **rejected**. The time series for the rejected components are +regressed from the data in the `final denoising step`_. + +There are a couple of decision trees that are included by default in ``tedana`` but +users can also build their own. This might be useful if one of the default decision +trees needs to be slightly altered due to the nature of a specific data set, if one has +an idea for a new approach to multi-echo denoising, or if one wants to integrate +non-multi-echo metrics into a single decision tree. + +.. note:: + We use two terminologies interchangeably. + The whole process is called "component selection" and much of the code uses + variants of that phrase + (e.g. the :class:`~tedana.selection.component_selector.ComponentSelector` class, + :mod:`~tedana.selection.selection_nodes` for the functions used in selection). + We call the steps for how to classify components a "decision tree" since each + step in the selection process branches components into different intermediate + or final classifications. + +.. _final denoising step: denoising.html + + +.. contents:: :local: + +****************************************** +Expected outputs after component selection +****************************************** + +During processing, everything is stored in a +:class:`~tedana.selection.component_selector.ComponentSelector` called ``selector``. +The elements of that object are then saved to multiple files. +The file key names are used below the full file names in the +:ref:`output-filename-descriptions`. + + +General outputs from component selection +======================================== + +New columns in ``selector.component_table`` and the "ICA metrics tsv" file: + + - classification: + While the decision table is running, there may also be intermediate + classification labels, but the final labels are expected to be + "accepted" or "rejected". There will be a warning if other labels remain. + - classification_tags: + Human readable tags that explain why a classification was reached. + Each component can have no tags (an empty string or n/a), one tag, + or a comma separated list of tags. These tags may be useful parameters + for visualizing and reviewing results + +``selector.cross_component_metrics`` and "ICA cross component metrics json": + A dictionary of metrics that are each a single value calculated across components, + for example, kappa and rho elbows. User or pre-defined scaling factors are + also stored here. Any constant that is used in the component classification + processes that isn't pre-defined in the decision tree file should be saved here. + +``selector.component_status_table`` and "ICA status table tsv": + A table where each column lists the classification status of + each component after each node was run. Columns are only added + for runs where component statuses can change. + This is useful for understanding the classification + path of each component through the decision tree + +``selector.tree`` and "ICA decision tree json": + A copy of the inputted decision tree specification with an added "output" field + for each node. The output field (see next section) contains information about + what happened during execution. Of particular note, each output includes a list + of the metrics used within the node, "node_label", which is a (hopefully) human + readable brief description of the node's function and, for nodes where component + classifications can change, "n_false" & "n_true" list who many components + changed classifications. The inputted parameters include "if_true" and "if_false" + which specify what changes for each component. These fields can be used to + construct a visual flow chart or text-based summary of how classifications + changed for each run. + +``selector.tree["used_metrics"]`` and a field in "ICA decision tree json": + A list of the metrics that were used in the decision tree. Everything in + ``used_metrics`` should be in either ``necessary_metrics`` or + ``generated_metrics`` If a used metric isn't in either, a warning message + will appear. This is a useful check that makes sure every metric used was + pre-specified. + +``selector.tree["classification_tags"]`` and a field in "ICA decision tree json": + A list of the pre-specified classification tags that could be used in a decision tree. + Any reporting interface should use this field so that all possible tags are listed + even if a given tag is not used by any component by the end of the selection process. + + +Outputs of each decision tree step +================================== + +"ICA decision tree json" includes all the information from the specified decision tree +for each "node" or function call. For each node, there is an "outputs" subfield with +information from when the tree was executed. +Each outputs field includes: + +- decision_node_idx + The decision tree functions are run as part of an ordered list. + This is the positional index (the location of the function in + the list), starting with index 0. + +- used_metrics + A list of the metrics used in a node of the decision tree + +- used_cross_component_metrics + A list of cross component metrics used in the node of a decision tree + +- node_label + A brief label for what happens in this node that can be used in a decision + tree summary table or flow chart. + +- n_true, n_false + For decision tree (dec) functions, the number of components that were classified + as true or false, respectively, in this decision tree step. + +- calc_cross_comp_metrics + For calculation (calc) functions, cross component metrics that were + calculated in this function. When this is included, each of those + metrics and the calculated values are also distinct keys in 'outputs'. + While the cross component metrics table does not include where each component + was calculated, that information is stored here. + +- added_component_table_metrics + It is possible to add a new metric to the component table during the selection process. + This is useful if a metric is to be calculated on a subset of components based on what + happened during previous steps in the selection process. This is **not** recommended, + but since it was done as part of the original kundu decision tree process defined in + meica it is possible. + + +************************************** +Decision trees distributed with tedana +************************************** + +Two decision trees are distributed with ``tedana``. +These trees are documented in :doc:`included_decision_trees`. +It might be useful to look at these trees while reading how to develop a custom +decision tree. + + +******************************* +Defining a custom decision tree +******************************* + +Decision trees are stored in json files. The default trees are stored as part of +the tedana code repository in `resources/decision_trees`_. The minimal tree, +minimal.json, is a good example highlighting the structure and steps in a tree. It +may be helpful to look at that tree while reading this section. kundu.json replicates +the decision tree used in MEICA version 2.5, the predecessor to tedana. It is more +complex, but also highlights additional possible functionality in decision trees. + +A user can specify another decision tree and link to the tree location when tedana is +executed with the ``--tree`` option. The format is flexible to allow for future +innovations, but be advised that this also allows you to create something with +non-ideal results for the current code. Some criteria will result in an error if +violated, but more will just give a warning. If you are designing or editing a new +tree, look carefully at the warnings. + +A decision tree can include two types of nodes or functions. +All functions are currently in :mod:`~tedana.selection.selection_nodes`. + +- A decision function will use existing metrics and potentially change the + classification of the components based on those metrics. By convention, all + these functions begin with "dec". +- A calculation function will take existing metrics and calculate a value across + components to be used for classification, for example the kappa and rho elbows. + By convention, all these functions begin with "calc". +- Nothing prevents a function from both calculating new cross component values and + applying those values in a decision step, but following this convention should + hopefully make decision tree specifications easier to follow and results easier + to interpret. + +.. _resources/decision_trees: https://github.com/ME-ICA/tedana/tree/main/tedana/resources/decision_trees + + +General information fields +========================== + +There are several fields with general information. Some of these store general +information that's useful for reporting results and others store information +that is used to check whether results are plausible & can help avoid mistakes. + +- tree_id + A descriptive name for the tree that will be logged. + +- info + A brief description of the tree for info logging + +- report + A narrative description of the tree that could be used in report logging + +- refs + Publications that should be referenced when this tree is used + +- necessary_metrics + A list of the necessary metrics in the component table that will be used + by the tree. If a metric doesn't exist then this will raise an error instead + of executing a tree. (Depending on future code development, this could + potentially be used to run ``tedana`` by specifying a decision tree and + metrics are calculated based on the contents of this field.) If a necessary + metric isn't used, there will be a warning. + +- generated_metrics + An optional initial field. It lists metrics that are to be calculated as + part of the decision tree's execution. This is used similarly to necessary_metrics + except, since the decision tree starts before these metrics exist, it won't raise + an error when these metrics are not found. One might want to calculate a new metric + if the metric uses only a subset of the components based on previous + classifications. This does make interpretation of results more confusing, but, since + this functionality was part of the kundu decision tree, it is included. + +- intermediate_classifications + A list of intermediate classifications (i.e. "provisionalaccept", + "provisionalreject"). It is very important to pre-specify these because the code + will make sure only the default classifications ("accepted" "rejected" + "unclassified") and intermediate classifications are used in a tree. This prevents + someone from accidentially losing a component due to a spelling error or other + minor variation in a classification label. + +- classification_tags + A list of acceptable classification tags (i.e. "Likely BOLD", "Unlikely BOLD", + "Low variance"). This will both be used to make sure only these tags are used in + the tree and allow programs that interact with the results to see all potential + tags in one place. Note: "Likely BOLD" is a required tag. If tedana is run and + none of the components include the "Likely BOLD" tag, then ICA will be repeated + with a different seed and then the selection process will repeat. + + +Nodes in the decision tree +========================== + +The "nodes" field is an ordered list of elements where each element defines a +node in the decision tree. Each node contains the information to call a function. + +All trees should start with a "manual_classification" node that should set all +component classifications to "unclassified" and have "clear_classification_tags" +set to true. There might be special cases where someone might want to violate +these rules, but depending what else happens in preceding code, other functions +will expect both of these columns to exist. This manual_classification step will +make sure those columns are created and initialized. + +Every possible path through the tree should result in each component being +classified as 'accepted' or 'rejected' by the time the tree is completed. + +There are several key fields for each node: + +- "functionname": The exact function name in :mod:`~tedana.selection.selection_nodes` that will be called. +- "parameters": Specifications of all required parameters for the function in functionname +- "kwargs": Specifications for optional parameters for the function in functionname + +The only parameter that is used in all functions is ``decide_comps``, which is used to +identify, based on their classifications, the components a function should be applied +to. It can be a single classification, or a comma separated string of classifications. +In addition to the intermediate and default ("accepted", "rejected", "unclassified") +component classifications, this can be "all" for functions that should be applied to +all components regardless of their classifications. + +Most decision functions also include ``if_true`` and ``if_false``, which specify how to change +the classification of each component based on whether a decision criterion is true +or false. In addition to the default and intermediate classification options, this can +also be "nochange" +(e.g., for components where a>b is true, "reject", and for components where a>b is false, "nochange"). +The optional parameters ``tag_if_true`` and ``tag_if_false`` +define the classification tags to be assigned to components. +Currently, the only exceptions are ``manual_classify`` and ``dec_classification_doesnt_exist``, +which use ``new_classification`` to designate the new component classification and +``tag`` (optional) to designate which classification tag to apply. + +There are several optional parameters (to include within "kwargs") in every decision +tree function: + +- ``custom_node_label``: A brief label for what happens in this node that can be used in + a decision tree summary table or flow chart. If custom_node_label is not not defined, + then each function has default descriptive text. +- ``log_extra_report``, ``log_extra_info``: Text for each function call is automatically placed + in the logger output. In addition to that text, the text in these these strings will + also be included in the logger with the report or info codes respectively. These + might be useful to give a narrative explanation of why a step was parameterized a + certain way. +- ``only_used_metrics``: If true, this function will only return the names of the component + table metrics that will be used when this function is fully run. This can be used to + identify all used metrics before running the decision tree. + +``"_comments"`` can be used to add a longer explanation about what a node is doing. +This will not be logged anywhere except in the tree, but may be useful to help explain the +purpose of a given node. + + +******************************** +Key parts of selection functions +******************************** + +There are several expectations for selection functions that are necessary for them to +properly execute. +In :mod:`~tedana.selection.selection_nodes`, +:func:`~tedana.selection.selection_nodes.manual_classify`, +:func:`~tedana.selection.selection_nodes.dec_left_op_right`, +and :func:`~tedana.selection.selection_nodes.calc_kappa_elbow` +are good examples for how to meet these expectations. + +Create a dictionary called "outputs" that includes key fields that should be recorded. +The following line should be at the end of each function to retain the output info: +``selector.nodes[selector.current_node_idx]["outputs"] = outputs`` + +Additional fields can be used to log function-specific information, but the following +fields are common and may be used by other parts of the code: + +- "decision_node_idx" (required): the ordered index for the current function in the + decision tree. +- "node_label" (required): A decriptive label for what happens in the node. +- "n_true" & "n_false" (required for decision functions): For decision functions, + the number of components labeled true or false within the function call. +- "used_metrics" (required if a function uses metrics): The list of metrics used in + the function. This can be hard coded, defined by input parameters, or empty. +- "used_cross_component_metrics" (required if a function uses cross component metrics): + A list of cross component metrics used in the function. This can be hard coded, + defined by input parameters, or empty. +- "calc_cross_comp_metrics" (required for calculation functions): A list of cross + component metrics calculated within the function. The key-value pair for each + calculated metric is also included in "outputs" + +Before any data are touched in the function, there should be an +``if only_used_metrics:`` clause that returns ``used_metrics`` for the function +call. This will be useful to gather all metrics a tree will use without requiring a +specific dataset. + +Existing functions define ``function_name_idx = f"Step {selector.current_node_idx}: [text of function_name]``. +This is used in logging and is cleaner to initialize near the top of each function. + +Each function has code that creates a default node label in ``outputs["node_label"]``. +The default node label may be used in decision tree visualization so it should be +relatively short. Within this section, if there is a user-provided custom_node_label, +that should be used instead. + +Calculation nodes should check if the value they are calculating was already calculated +and output a warning if the function overwrites an existing value + +Code that adds the text ``log_extra_info`` and ``log_extra_report`` into the appropriate +logs (if they are provided by the user) + +After the above information is included, +all functions will call :func:`~tedana.selection.selection_utils.selectcomps2use`, +which returns the components with classifications included in ``decide_comps`` +and then runs :func:`~tedana.selection.selection_utils.confirm_metrics_exist`, +which is an added check to make sure the metrics +used by this function exist in the component table. + +Nearly every function has a clause like: + +.. code-block:: python + + if comps2use is None: + log_decision_tree_step(function_name_idx, comps2use, decide_comps=decide_comps) + outputs["n_true"] = 0 + outputs["n_false"] = 0 + else: + +If there are no components with the classifications in ``decide_comps``, this logs that +there's nothing for the function to be run on, else continue. + +For decision functions, the key variable is ``decision_boolean``, which should be a pandas +dataframe column that is True or False for the components in ``decide_comps`` based on +the function's criteria. +That column is an input to :func:`~tedana.selection.selection_utils.change_comptable_classifications`, +which will update the component_table classifications, update the classification history +in component_status_table, and update the component classification_tags. Components not +in ``decide_comps`` retain their existing classifications and tags. +:func:`~tedana.selection.selection_utils.change_comptable_classifications` +also returns and should assign values to +``outputs["n_true"]`` and ``outputs["n_false"]``. These log how many components were +identified as true or false within each function. + +For calculation functions, the calculated values should be added as a value/key pair to +both ``selector.cross_component_metrics`` and ``outputs``. + +:func:`~tedana.selection.selection_utils.log_decision_tree_step` +puts the relevant info from the function call into the program's output log. + +Every function should end with: + +.. code-block:: python + + selector.nodes[selector.current_node_idx]["outputs"] = outputs + return selector + + functionname.__doc__ = (functionname.__doc__.format(**DECISION_DOCS)) + +This makes sure the outputs from the function are saved in the class structure and the +class structure is returned. The following line should include the function's name and +is used to make sure repeated variable names are compiled correctly for the API +documentation. + +If you have made it this far, congratulations!!! If you follow these steps, you'll be able +to impress your colleagues, friends, and family by designing your very own decision +tree functions. diff --git a/docs/conf.py b/docs/conf.py index e71f38b00..0abb14feb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,6 +42,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "sphinx.ext.napoleon", "matplotlib.sphinxext.plot_directive", "sphinx.ext.autodoc", "sphinx.ext.autosummary", @@ -49,7 +50,6 @@ "sphinx.ext.ifconfig", "sphinx.ext.intersphinx", "sphinx.ext.linkcode", - "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx_copybutton", "sphinxarg.ext", @@ -96,7 +96,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -124,6 +124,7 @@ napoleon_use_param = False napoleon_use_keyword = True napoleon_use_rtype = False +napoleon_custom_sections = ["Generated Files"] # -- Options for HTML output ---------------------------------------------- diff --git a/docs/contributing.rst b/docs/contributing.rst index 5499f8eac..4a82618ff 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -125,7 +125,7 @@ this switch, but it also means that exactly reproducing previous MEICA analyses The other reason is that the core developers have chosen to look forwards rather than maintaining an older code base. -As described in the :ref:`governance` section, ``tedana`` is maintained by a small team of +As described in the `governance`_ section, ``tedana`` is maintained by a small team of volunteers with limited development time. If you'd like to use MEICA as has been previously published the code is available on `bitbucket`_ and freely available under a LGPL2 license. diff --git a/docs/faq.rst b/docs/faq.rst index 3dfa30e5c..8f516ee20 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -47,6 +47,8 @@ Nevertheless, we have some code (thanks to Julio Peraza) that works for version +.. _fMRIPrep: https://fmriprep.readthedocs.io + Warping scanner-space fMRIPrep outputs to standard space ======================================================== @@ -68,42 +70,108 @@ The standard space template in this example is "MNI152NLin2009cAsym", but will d The TEDICA step may fail to converge if TEDPCA is either too strict (i.e., there are too few components) or too lenient (there are too many). -In our experience, this may happen when preprocessing has not been applied to -the data, or when improper steps have been applied to the data (e.g., distortion -correction, rescaling, nuisance regression). +With updates to the ``tedana`` code, this issue is now rare, but it may happen +when preprocessing has not been applied to the data, or when improper steps have +been applied to the data (e.g. rescaling, nuisance regression). If you are confident that your data have been preprocessed correctly prior to applying tedana, and you encounter this problem, please submit a question to `NeuroStars`_. +.. _NeuroStars: https://neurostars.org .. _manual classification: ******************************************************************************** -[tedana] I think that some BOLD ICA components have been misclassified as noise. +[tedana] Can I manually reclassify components? ******************************************************************************** -``tedana`` allows users to manually specify accepted components when calling the pipeline. -You can use the ``--manacc`` argument to specify the indices of components to accept. +``ica_reclassify`` allows users to manually alter component classifications. +This can both be used as a command line tool or as part of other interactive +programs, such as `RICA`_. RICA creates a graphical interface that is similar to +the build-in tedana reports that lets users interactively change component +classifications. Both programs will log which component classifications were +manually altered. If one wants to retain the original denoised time series, +make sure to output the denoised time series into a separate directory. +.. _RICA: https://github.com/ME-ICA/rica ************************************************************************************* -[tedana] Why isn't v3.2 of the component selection algorithm supported in ``tedana``? +[tedana] What is the difference between the kundu and minimal decision trees? ************************************************************************************* -There is a lot of solid logic behind the updated version of the TEDICA component -selection algorithm, first added to the original ME-ICA codebase `here`_ by Dr. Prantik Kundu. -However, we (the ``tedana`` developers) have encountered certain difficulties -with this method (e.g., misclassified components) and the method itself has yet -to be validated in any papers, posters, etc., which is why we have chosen to archive -the v3.2 code, with the goal of revisiting it when ``tedana`` is more stable. - -Anyone interested in using v3.2 may compile and install an earlier release (<=0.0.4) of ``tedana``. - +The decision tree is the series of conditions through which each component is +classified as accepted or rejected. The kundu tree (`--tree kundu`), used in Prantik +Kundu's MEICA v2.5, is the classification process that has long +been used by ``tedana`` and users have been generally content with the results. The +kundu tree used multiple intersecting metrics and rankings to classify components. +How these steps may interact on specific datasets is opaque. While there is a kappa +(T2*-weighted) elbow threshold and a rho (S0-weighted) elbow threshold, as discussed +in publications, no component is accepted or rejected because of those thresholds. +Users sometimes notice rejected components that clearly should have been accepted. For +example, a component that included a clear T2*-weighted V1 response to a block design +flashing checkerboard was sometimes rejected because the relatively large variance of +that component interacted with a rejection criterion. + +The minimal tree (`--tree minimal`) is designed to be easier to understand and less +likely to reject T2* weighted components. There are a few other criteria, but components +with `kappa>kappa elbow` and `rho=v3.7) so +that this version of the selection process would again be possible to run. + +.. _shared code on bitbucket: https://bitbucket.org/prantikk/me-ica/src/experimental +.. _distributed with AFNI as MEICA v2.5 beta 11: https://github.com/afni/afni/tree/master/src/pkundu +.. _MEICA v3.2: https://github.com/ME-ICA/me-ica/tree/53191a7e8838788acf837fdf7cb3026efadf49ac +.. _MEICA v3.3: https://github.com/ME-ICA/me-ica/tree/ME-ICA_v3.3.0 ******************************************************************* diff --git a/docs/included_decision_trees.rst b/docs/included_decision_trees.rst new file mode 100644 index 000000000..156404bcd --- /dev/null +++ b/docs/included_decision_trees.rst @@ -0,0 +1,100 @@ +####################### +Included Decision Trees +####################### + +Two decision trees are currently distributed with ``tedana``. + +``kundu`` is the decision tree that is based on MEICA version 2.5 +and has been included with ``tedana`` since the start of this project. +While multiple publications have used and benefits from this decision, +tree, but it includes many steps with arbitrary thresholds and, when +components seem misclassified, it's often hard to understand why. + +``minimal`` is a simplified version of that decision tree with fewer +steps and arbitrary thresholds. Minimal is designed to be more stable +and comprehensible, but it has not yet be extensively validated and +parts of the tree may change in response to additional tests on a +wider range of data sets. + +Flowcharts describing the steps in both trees are below. +As documented more in :doc:`building_decision_trees`, the input to each tree +is a table with metrics, like :math:`\kappa` or :math:`\rho`, for each +component. Each step or node in the decision tree either calculates +new values or changes component classifications based on these metrics. +When a component classification changes to ``accept`` or ``reject``, a +``classification_tag`` is also assigned which may help understand why +a component was given a specific classification. + +Each step in the flow chart is labeled with a ``node`` number. +If ``tedana`` is run using one of these trees, those node +numbers will match the numbers in the ``ICA status table`` and the +``ICA decision tree`` that are described in +:ref:`output-filename-descriptions`. These node numbers can be used +to see when in the process a component's classifiation changed. + +.. image:: _static/decision_tree_legend.png + :width: 300 + :alt: Legend for Decision Tree Flow Charts + +.. + Reminder on how to load svg if I can figure out how to correctly generate them + .. raw:: html + + Legend for Decision Tree Flow Charts + + +******************* +Kundu decision tree +******************* + +Nodes 1-5 reject components that are very unlikely to be BOLD. +In nodes 9-10 components where :math:`\kappa` > +:math:`\kappa` elbow and :math:`\rho` < :math:`\rho` +elbow are classified as `provisional accept`. A non-obvious aspect +of this decision tree is that no decision node below this point distinguishes +components that are `provisional accept` from components that are still +`unclassified` and nothing that does not cross the :math:`\kappa` and +:math:`\rho` elbow thresholds is inherantly rejected. The number of +`provisional accept` components is used to see if the process should +be restarted (node 11) and calculate other thresholds (nodes 12-16 & 20), +but nothing is directly accepted or rejected based on the elbow thresholds. +Several additional criteria are used to reject components (nodes 17, 21, & 22). +In older versions of ``tedana`` components were classified as `ignored`. +This meant too small/minor to lose a degree of freedom by rejecting so treat +like the `accepted` components. This was widely confusing to many users so they +are now classified as `accepted` but with classification tags `low variance` +(node 18) or `accept borderline` (nodes 24 & 25). + +.. image:: _static/decision_tree_kundu.png + :width: 400 + :alt: Kundu Decision Tree Flow Chart + +`LaTeX file to generate the kundu decision tree flow chart`_ + +.. _LaTeX file to generate the kundu decision tree flow chart: _static/decision_tree_kundu.tex + +********************* +Minimal decision tree +********************* + +The minimal tree starts similarly to the kundu tree by rejecting components +that are very unlikely to be BOLD (nodes 1-5). Then all components where +:math:`\kappa` > :math:`\kappa` elbow and :math:`\rho` < :math:`\rho` elbow +are `provisional accept` and otherwise are `provisional reject` (nodes 8 & 10). +The only expection to this is if :math:`\kappa` > :math:`\kappa` elbow and +:math:`\kappa` > 2* :math:`\rho` than it is `provisional accept` regardless of the +:math:`\rho` elbow under the assumption that there is enough T2* weighted signal +the component should not be rejected even if it also contains noise (node 9). +If `provisional reject` components have very low variance they are accepted rather +than losing degrees of freedom, but no more than 1% of the total variance can be +accepted this way (node 11). After that point, everything that is +`provisional accept` is accepted (node 12) and everything that is `provisional reject` +is rejected (node 13) + +.. image:: _static/decision_tree_minimal.png + :width: 400 + :alt: Minimal Decision Tree Flow Chart + +`LaTeX file to generate the minimal decision tree flow chart`_ + +.. _LaTeX file to generate the minimal decision tree flow chart: _static/decision_tree_minimal.tex diff --git a/docs/index.rst b/docs/index.rst index 415540fff..cc1f38718 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -177,6 +177,7 @@ tedana is licensed under GNU Lesser General Public License version 2.1. approach outputs faq + building_decision_trees support contributing roadmap @@ -188,6 +189,7 @@ tedana is licensed under GNU Lesser General Public License version 2.1. :name: hiddentoc dependence_metrics + included_decision_trees ****************** diff --git a/docs/installation.rst b/docs/installation.rst index 4cba104c9..9ff6bd4ab 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -3,7 +3,7 @@ Installation ############ You'll need to set up a working development environment to use ``tedana``. -To set up a local environment, you will need Python >=3.6 and the following +To set up a local environment, you will need Python >=3.8 and the following packages will need to be installed: - nilearn diff --git a/docs/multi-echo.rst b/docs/multi-echo.rst index 18105b64f..11b581810 100644 --- a/docs/multi-echo.rst +++ b/docs/multi-echo.rst @@ -501,13 +501,16 @@ Videos * A `series of lectures from the OHBM 2017 multi-echo session`_ on multiple facets of multi-echo data analysis * | Multi-echo fMRI lecture from the `2018 NIH FMRI Summer Course`_ by Javier Gonzalez-Castillo | `Slides from 2018 NIH FMRI Summer Course`_ -* An `NIMH Center for Multimodal Neuroimaging video`_ by the Section on Functional Imaging Methods +* NIMH Center for Multimodal Neuroimaging `Advantages of multi-echo fMRI`_ (2019) by Dan Handwerker, Javier Gonzalez-Castillo, and Vinai Roopchansingh +* | MRI Together 2022 Conference Presentations by Eneko Uruñuela + | Tedana: Analysis of echo-time dependent fMRI data (`recording `_, `slides `_) + | A tour of tedana (`tour recording `_, `tour slides `_) .. _educational session from OHBM 2017: https://www.pathlms.com/ohbm/courses/5158/sections/7788/video_presentations/75977 .. _series of lectures from the OHBM 2017 multi-echo session: https://www.pathlms.com/ohbm/courses/5158/sections/7822 .. _2018 NIH FMRI Summer Course: https://fmrif.nimh.nih.gov/course/fmrif_course/2018/14_Javier_20180713 .. _Slides from 2018 NIH FMRI Summer Course: https://fmrif.nimh.nih.gov/COURSE/fmrif_course/2018/content/14_Javier_20180713.pdf -.. _NIMH Center for Multimodal Neuroimaging video: https://youtu.be/G1Ftd2IwF14 +.. _Advantages of multi-echo fMRI: https://youtu.be/G1Ftd2IwF14 Multi-echo preprocessing software diff --git a/docs/outputs.rst b/docs/outputs.rst index e686bd100..d0a37d1bd 100644 --- a/docs/outputs.rst +++ b/docs/outputs.rst @@ -4,163 +4,229 @@ Outputs of tedana ################# +When ``tedana`` is run, it outputs many files and an html report to help interpret the +results. This details the contents of all outputted files, explains the terminology +used for describing the outputs of classification, and details the contents of the html +report. -****************************** -Outputs of the tedana workflow -****************************** - -================================================ ===================================================== -Filename Content -================================================ ===================================================== -dataset_description.json Top-level metadata for the workflow. -T2starmap.nii.gz Full estimated T2* 3D map. - Values are in seconds. - The difference between the limited and full maps - is that, for voxels affected by dropout where - only one echo contains good data, the full map uses - the T2* estimate from the first two echoes, while the - limited map has a NaN. -S0map.nii.gz Full S0 3D map. - The difference between the limited and full maps - is that, for voxels affected by dropout where - only one echo contains good data, the full map uses - the S0 estimate from the first two echoes, while the - limited map has a NaN. -desc-optcom_bold.nii.gz Optimally combined time series. -desc-optcomDenoised_bold.nii.gz Denoised optimally combined time series. Recommended - dataset for analysis. -desc-optcomRejected_bold.nii.gz Combined time series from rejected components. -desc-optcomAccepted_bold.nii.gz High-kappa time series. This dataset does not - include thermal noise or low variance components. - Not the recommended dataset for analysis. -desc-adaptiveGoodSignal_mask.nii.gz Integer-valued mask used in the workflow, where - each voxel's value corresponds to the number of good - echoes to be used for T2\*/S0 estimation. -desc-PCA_mixing.tsv Mixing matrix (component time series) from PCA - decomposition in a tab-delimited file. Each column is - a different component, and the column name is the - component number. -desc-PCA_decomposition.json Metadata for the PCA decomposition. -desc-PCA_stat-z_components.nii.gz Component weight maps from PCA decomposition. - Each map corresponds to the same component index in - the mixing matrix and component table. - Maps are in z-statistics. -desc-PCA_metrics.tsv TEDPCA component table. A BIDS Derivatives-compatible - TSV file with summary metrics and inclusion/exclusion - information for each component from the PCA - decomposition. -desc-PCA_metrics.json Metadata about the metrics in ``desc-PCA_metrics.tsv``. -desc-ICA_mixing.tsv Mixing matrix (component time series) from ICA - decomposition in a tab-delimited file. Each column is - a different component, and the column name is the - component number. -desc-ICA_components.nii.gz Full ICA coefficient feature set. -desc-ICA_stat-z_components.nii.gz Z-statistic component weight maps from ICA - decomposition. - Values are z-transformed standardized regression - coefficients. Each map corresponds to the same - component index in the mixing matrix and component table. -desc-ICA_decomposition.json Metadata for the ICA decomposition. -desc-tedana_metrics.tsv TEDICA component table. A BIDS Derivatives-compatible - TSV file with summary metrics and inclusion/exclusion - information for each component from the ICA - decomposition. -desc-tedana_metrics.json Metadata about the metrics in - ``desc-tedana_metrics.tsv``. -desc-ICAAccepted_components.nii.gz High-kappa ICA coefficient feature set -desc-ICAAcceptedZ_components.nii.gz Z-normalized spatial component maps -report.txt A summary report for the workflow with relevant - citations. -references.bib The BibTeX entries for references cited in - report.txt. -tedana_report.html The interactive HTML report. -================================================ ===================================================== - -If ``verbose`` is set to True: - -============================================================== ===================================================== -Filename Content -============================================================== ===================================================== -desc-limited_T2starmap.nii.gz Limited T2* map/time series. - Values are in seconds. - The difference between the limited and full maps - is that, for voxels affected by dropout where - only one echo contains good data, the full map uses - the S0 estimate from the first two echoes, while the - limited map has a NaN. -desc-limited_S0map.nii.gz Limited S0 map/time series. - The difference between the limited and full maps - is that, for voxels affected by dropout where - only one echo contains good data, the full map uses - the S0 estimate from the first two echoes, while the - limited map has a NaN. -echo-[echo]_desc-[PCA|ICA]_components.nii.gz Echo-wise PCA/ICA component weight maps. -echo-[echo]_desc-[PCA|ICA]R2ModelPredictions_components.nii.gz Component- and voxel-wise R2-model predictions, - separated by echo. -echo-[echo]_desc-[PCA|ICA]S0ModelPredictions_components.nii.gz Component- and voxel-wise S0-model predictions, - separated by echo. -desc-[PCA|ICA]AveragingWeights_components.nii.gz Component-wise averaging weights for metric - calculation. -desc-[PCA|ICA]S0_stat-F_statmap.nii.gz F-statistic map for each component, for the S0 model. -desc-[PCA|ICA]T2_stat-F_statmap.nii.gz F-statistic map for each component, for the T2 model. -desc-optcomPCAReduced_bold.nii.gz Optimally combined data after dimensionality - reduction with PCA. This is the input to the ICA. -echo-[echo]_desc-Accepted_bold.nii.gz High-Kappa time series for echo number ``echo`` -echo-[echo]_desc-Rejected_bold.nii.gz Low-Kappa time series for echo number ``echo`` -echo-[echo]_desc-Denoised_bold.nii.gz Denoised time series for echo number ``echo`` -============================================================== ===================================================== - -If ``gscontrol`` includes 'gsr': - -================================================ ===================================================== -Filename Content -================================================ ===================================================== -desc-globalSignal_map.nii.gz Spatial global signal -desc-globalSignal_timeseries.tsv Time series of global signal from optimally combined - data. -desc-optcomWithGlobalSignal_bold.nii.gz Optimally combined time series with global signal - retained. -desc-optcomNoGlobalSignal_bold.nii.gz Optimally combined time series with global signal - removed. -================================================ ===================================================== - -If ``gscontrol`` includes 't1c': - -================================================ ===================================================== -Filename Content -================================================ ===================================================== -desc-T1likeEffect_min.nii.gz T1-like effect -desc-optcomAcceptedT1cDenoised_bold.nii.gz T1-corrected high-kappa time series by regression -desc-optcomT1cDenoised_bold.nii.gz T1-corrected denoised time series -desc-TEDICAAcceptedT1cDenoised_components.nii.gz T1-GS corrected high-kappa components -desc-TEDICAT1cDenoised_mixing.tsv T1-GS corrected mixing matrix -================================================ ===================================================== - - -**************** -Component tables -**************** +.. contents:: :local: -TEDPCA and TEDICA use component tables to track relevant metrics, component -classifications, and rationales behind classifications. -The component tables are stored as tsv files for BIDS-compatibility. +.. _output-filename-descriptions: + +***************************** +Output filename descriptions +***************************** + +The output include files for the optimally combined and denoised +data and many additional files to help understand the results and fascilitate +future processing. ``tedana`` allows for multiple file naming conventions. The key labels +and naming options for each convention that can be set using the ``--convention`` option +are in `outputs.json`_. The output of ``tedana`` also includes a file called +``registry.json`` or ``desc-tedana_registry.json`` that includes the keys and the matching +file names for the output. The table below lists both these keys and the default +"BIDS Derivatives" file names. + +.. _outputs.json: https://github.com/ME-ICA/tedana/blob/main/tedana/resources/config/outputs.json + +.. _standard-filename-outputs: + +Standard filename outputs +------------------------- -In order to make sense of the rationale codes in the component tables, -consult the tables below. -TEDPCA rationale codes start with a "P", while TEDICA codes start with an "I". +=========================================================================== ===================================================== +Key: Filename Content +=========================================================================== ===================================================== +"registry json": desc-tedana_registry.json Mapping of file name keys to filename locations +"data description json": dataset_description.json Top-level metadata for the workflow. +tedana_report.html The interactive HTML report. +"combined img": desc-optcom_bold.nii.gz Optimally combined time series. +"denoised ts img": desc-optcomDenoised_bold.nii.gz Denoised optimally combined time series. Recommended + dataset for analysis. +"adaptive mask img": desc-adaptiveGoodSignal_mask.nii.gz Integer-valued mask used in the workflow, where + each voxel's value corresponds to the number of good + echoes to be used for T2\*/S0 estimation. Will be + calculated whether original mask estimated within + tedana or user-provided. All voxels with 1 good + echo will be included in outputted time series + but only voxels with at least 3 good echoes will be + used in ICA and metric calculations +"t2star img": T2starmap.nii.gz Full estimated T2* 3D map. + Values are in seconds. If a voxel has at least 1 good + echo then the first two echoes will be used to estimate + a value (an impresise weighting for optimal combination + is better than fully excluding a voxel) +"s0 img": S0map.nii.gz Full S0 3D map. If a voxel has at least 1 good + echo then the first two echoes will be used to estimate + a value +"PCA mixing tsv": desc-PCA_mixing.tsv Mixing matrix (component time series) from PCA + decomposition in a tab-delimited file. Each column is + a different component, and the column name is the + component number. +"PCA decomposition json": desc-PCA_decomposition.json Metadata for the PCA decomposition. +"z-scored PCA components img": desc-PCA_stat-z_components.nii.gz Component weight maps from PCA decomposition. + Each map corresponds to the same component index in + the mixing matrix and component table. + Maps are in z-statistics. +"PCA metrics tsv": desc-PCA_metrics.tsv TEDPCA component table. A BIDS Derivatives-compatible + TSV file with summary metrics and inclusion/exclusion + information for each component from the PCA + decomposition. +"PCA metrics json": desc-PCA_metrics.json Metadata about the metrics in ``desc-PCA_metrics.tsv``. +"PCA cross component metrics json": desc-PCACrossComponent_metrics.json Measures calculated across PCA compononents including + values for the full cost function curves for all + AIC, KIC, and MDL cost functions and the number of + components and variance explained for multiple options + Figures for the cost functions and variance explained + are also in + ``./figures//pca_[criteria|variance_explained.png]`` +"ICA mixing tsv": desc-ICA_mixing.tsv Mixing matrix (component time series) from ICA + decomposition in a tab-delimited file. Each column is + a different component, and the column name is the + component number. +"ICA components img": desc-ICA_components.nii.gz Full ICA coefficient feature set. +"z-scored ICA components img": desc-ICA_stat-z_components.nii.gz Z-statistic component weight maps from ICA + decomposition. + Values are z-transformed standardized regression + coefficients. Each map corresponds to the same + component index in the mixing matrix and component table. +"ICA decomposition json": desc-ICA_decomposition.json Metadata for the ICA decomposition. +"ICA metrics tsv": desc-tedana_metrics.tsv TEDICA component table. A BIDS Derivatives-compatible + TSV file with summary metrics and inclusion/exclusion + information for each component from the ICA + decomposition. +"ICA metrics json": desc-tedana_metrics.json Metadata about the metrics in + ``desc-tedana_metrics.tsv``. +"ICA cross component metrics json": desc-ICACrossComponent_metrics.json Metric names and values that are each a single number + calculated across components. For example, kappa and + rho elbows. +"ICA decision tree json": desc-ICA_decision_tree A copy of the inputted decision tree specification with + an added "output" field for each node. The output field + contains information about what happened during + execution. +"ICA status table tsv": desc-ICA_status_table.tsv A table where each column lists the classification + status of each component after each node was run. + Columns are only added for runs where component + statuses can change. +"ICA accepted components img": desc-ICAAccepted_components.nii.gz High-kappa ICA coefficient feature set +"z-scored ICA accepted components img": desc-ICAAcceptedZ_components.nii.gz Z-normalized spatial component maps +report.txt A summary report for the workflow with relevant + citations. +"low kappa ts img": desc-optcomRejected_bold.nii.gz Combined time series from rejected components. +"high kappa ts img": desc-optcomAccepted_bold.nii.gz High-kappa time series. This dataset does not + include thermal noise or low variance components. + Not the recommended dataset for analysis. +references.bib The BibTeX entries for references cited in + report.txt. + +=========================================================================== ===================================================== + +If ``verbose`` is set to True +------------------------------ + +============================================================================================= ===================================================== +Key: Filename Content +============================================================================================= ===================================================== +"limited t2star img": desc-limited_T2starmap.nii.gz Limited T2* map/time series. + Values are in seconds. + Unlike the full T2* maps, if only one 1 echo contains + good data the limited map will have NaN +"limited s0 img": desc-limited_S0map.nii.gz Limited S0 map/time series. + Unlike the full S0 maps, if only one 1 echo contains + good data the limited map will have NaN +"whitened img": desc-optcom_whitened_bold The optimally combined data after whitening +"echo weight [PCA|ICA] maps split img": echo-[echo]_desc-[PCA|ICA]_components.nii.gz Echo-wise PCA/ICA component weight maps. +"echo T2 [PCA|ICA] split img": echo-[echo]_desc-[PCA|ICA]T2ModelPredictions_components.nii.gz Component- and voxel-wise R2-model predictions, + separated by echo. +"echo S0 [PCA|ICA] split img": echo-[echo]_desc-[PCA|ICA]S0ModelPredictions_components.nii.gz Component- and voxel-wise S0-model predictions, + separated by echo. +"[PCA|ICA] component weights img": desc-[PCA|ICA]AveragingWeights_components.nii.gz Component-wise averaging weights for metric + calculation. +"[PCA|ICA] component F-S0 img": desc-[PCA|ICA]S0_stat-F_statmap.nii.gz F-statistic map for each component, for the S0 model. +"[PCA|ICA] component F-T2 img": desc-[PCA|ICA]T2_stat-F_statmap.nii.gz F-statistic map for each component, for the T2 model. +"PCA reduced img": desc-optcomPCAReduced_bold.nii.gz Optimally combined data after dimensionality + reduction with PCA. This is the input to the ICA. +"high kappa ts split img": echo-[echo]_desc-Accepted_bold.nii.gz High-Kappa time series for echo number ``echo`` +"low kappa ts split img": echo-[echo]_desc-Rejected_bold.nii.gz Low-Kappa time series for echo number ``echo`` +"denoised ts split img": echo-[echo]_desc-Denoised_bold.nii.gz Denoised time series for echo number ``echo`` +============================================================================================= ===================================================== + +If ``tedort`` is True +--------------------- + +======================================================== ===================================================== +Key: Filename Content +======================================================== ===================================================== +"ICA orthogonalized mixing tsv": desc-ICAOrth_mixing.tsv Mixing matrix with rejected components orthogonalized + from accepted components +======================================================== ===================================================== + +If ``gscontrol`` includes 'gsr' +------------------------------- + +================================================================= ===================================================== +Key: Filename Content +================================================================= ===================================================== +"gs img": desc-globalSignal_map.nii.gz Spatial global signal +"global signal time series tsv": desc-globalSignal_timeseries.tsv Time series of global signal from optimally combined + data. +"has gs combined img": desc-optcomWithGlobalSignal_bold.nii.gz Optimally combined time series with global signal + retained. +"removed gs combined img": desc-optcomNoGlobalSignal_bold.nii.gz Optimally combined time series with global signal + removed. +================================================================= ===================================================== + +If ``gscontrol`` includes 'mir' +------------------------------- + +(Minimal intensity regression, which may help remove some T1 noise and +was an option in the MEICA v2.5 code, but never fully explained or evaluted in a publication) + +======================================================================================= ===================================================== +Key: Filename Content +======================================================================================= ===================================================== +"t1 like img": desc-T1likeEffect_min.nii.gz T1-like effect +"mir denoised img": desc-optcomMIRDenoised_bold.nii.gz Denoised time series after MIR +"ICA MIR mixing tsv": desc-ICAMIRDenoised_mixing.tsv ICA mixing matrix after MIR +"ICA accepted mir component weights img": desc-ICAAcceptedMIRDenoised_components.nii.gz high-kappa components after MIR +"ICA accepted mir denoised img": desc-optcomAcceptedMIRDenoised_bold.nii.gz high-kappa time series after MIR +======================================================================================= ===================================================== + +.. _classification-output-descriptions: + +********************************** +Classification output descriptions +********************************** -=============== ============================================================= -Classification Description -=============== ============================================================= -accepted BOLD-like components included in denoised and high-Kappa data -rejected Non-BOLD components excluded from denoised and high-Kappa data -ignored Low-variance components included in denoised, but excluded - from high-Kappa data -=============== ============================================================= +TEDPCA and TEDICA use component tables to track relevant metrics, component +classifications, and rationales behind classifications. +The component tables and additional information are stored as tsv and json files, +labeled "ICA metrics" and "PCA metrics" in :ref:`standard-filename-outputs` This section +explains the classification codes those files in more detail. +:doc:`building_decision_trees` covers the full process, and not just the +descriptions of outputted files. TEDPCA codes -============ +------------ + +In ``tedana`` PCA is used to reduce the number of dimensions (components) in the +dataset. Without this step, the number of components would be one less than +the number of volumes, many of those components would effectively be +Gaussian noise and ICA would not reliably converge. Standard methods for data +reduction use cost functions, like MDL, KIC, and AIC to estimate the variance +that is just noise and remove the lowest variance components under that +threshold. +By default, ``tedana`` uses AIC. +Of those three, AIC is the least agressive and will retain the most components. + +``Tedana`` includes additional `kundu` and `kundu-stabilize` approaches that +identify and remove components that don't contain T2* or S0 signal and are more +likely to be noise. If the `--tedpca kundu` option is used, the PCA_metrics tsv +file will include an accepted vs rejected classification column and also a +rationale column of codes documenting why a PCA component removed. If MDL, KIC, +or AIC are used then the classification column will exist, but will include +include the accepted components and the rationale column will contain n/a" +When kundu is used, these are brief explanations of the the rationale codes ===== =============== ======================================================== Code Classification Description @@ -176,29 +242,67 @@ P007 rejected Rho below fmin (only in stabilized PCA decision tree) ===== =============== ======================================================== -TEDICA codes -============ - -===== ================= ======================================================== -Code Classification Description -===== ================= ======================================================== -I001 rejected|accepted Manual classification -I002 rejected Rho greater than Kappa -I003 rejected More significant voxels in S0 model than R2 model -I004 rejected S0 Dice is higher than R2 Dice and high variance - explained -I005 rejected Noise F-value is higher than signal F-value and high - variance explained -I006 ignored No good components found -I007 rejected Mid-Kappa component -I008 ignored Low variance explained -I009 rejected Mid-Kappa artifact type A -I010 rejected Mid-Kappa artifact type B -I011 ignored ign_add0 -I012 ignored ign_add1 -===== ================= ======================================================== - -.. _interactive reports: +ICA Classification Outputs +-------------------------- + +The component table is stored in ``desc-tedana_metrics.tsv`` or +``tedana_metrics.tsv``. +Each row is a component number. +Each column is a metric that is calculated for each component. +Short descriptions of each column metric are in the output log, +``tedana_[date_time].tsv``, and the actual metric calculations are in +:mod:`~tedana.metrics.collect`. +The final two columns are ``classification`` and ``classification_tags``. +``classification`` should include **accepted** or **rejected** for every +component and **rejected** components are be removed through denoising. +``classification_tags`` provide more information on why +components received a specific classification. +Each component can receive more than one tag. +The following tags are included depending if ``--tree`` is "minimal", "kundu", +or if ``ica_reclassify`` is run. + +===================== ================ ======================================== +Tag Included in Tree Explanation +===================== ================ ======================================== +Likely BOLD minimal,kundu Accepted because likely to include some + BOLD signal +Unlikely BOLD minimal,kundu Rejected because likely to include a + lot of non-BOLD signal +Low variance minimal,kundu Accepted because too low variance to + lose a degree-of-freedom by rejecting +Less likely BOLD kundu Rejected based on some edge criteria + based on relative rankings of components +Accept borderline kundu Accepted based on some edge criteria + based on relative rankings of components +No provisional accept kundu Accepted because because kundu tree did + not find any components to consider + accepting so the conservative "failure" + case is accept everything rather than + rejecting everything +manual reclassify manual_classify Classification based on user input. If + done after automatic selection then + the preceding tag from automatic + selection is retained and this tag + notes the classification was manually + changed +===================== ================ ======================================== + +The decision tree is a list of nodes where the classification of each component +could change. The information on which nodes and how classifications changed is +in several places: + +- The information in the output log includes the name of each + node and the count of components that changed classification during execution. +- The same information is stored in the ``ICA decision tree`` json file + (see :ref:`output-filename-descriptions`) in the "output" field for each node. + That information is organized so that it can be used to generate a visual or + text-based summary of what happened when the decision tree was run on a + dataset. +- The ``ICA status table`` lists the classification status of each component + after each node was run. + This is particularly useful to trying to understand how a specific component + ended receiving its classification. + ********************* ICA Components Report @@ -214,7 +318,7 @@ You can also play around with `our demo`_. Report Structure -================ +---------------- The image below shows a representative report, which has two sections: a) the summary view, and b) the individual component view. @@ -309,7 +413,7 @@ component (selected in the summary view, see below). It includes three different Reports User Interactions -========================= +------------------------- As previously mentioned, all summary plots in the report allow user interactions. While the Kappa/Rho Scatter plot allows full user interaction (see the toolbar that accompanies the plot diff --git a/docs/usage.rst b/docs/usage.rst index 39d76a3ef..8037a80ff 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -7,8 +7,8 @@ Using tedana from the command line #. Acquired echo times (in milliseconds) #. Functional datasets equal to the number of acquired echoes -But you can supply many other options, viewable with ``tedana -h`` or -``t2smap -h``. +But you can supply many other options, viewable with ``tedana -h``, +``ica_reclassify -h``, or ``t2smap -h``. For most use cases, we recommend that users call tedana from within existing fMRI preprocessing pipelines such as `fMRIPrep`_ or `afni_proc.py`_. @@ -51,6 +51,22 @@ https://tedana.readthedocs.io/en/latest/outputs.html To examine regions-of-interest with multi-echo data, apply masks after TE Dependent ANAlysis. +.. _ica_reclassify cli: + +*********************************** +Running the ica_reclassify workflow +*********************************** + +``ica_reclassify`` takes the output of ``tedana`` and can be used to manually +reclassify components, re-save denoised classifications following the new +classifications, and log the changes in all relevant output files. The +output files are the same as for ``tedana``: +https://tedana.readthedocs.io/en/latest/outputs.html + +.. argparse:: + :ref: tedana.workflows.ica_reclassify._get_parser + :prog: ica_reclassify + :func: _get_parser .. _t2smap cli: diff --git a/pyproject.toml b/pyproject.toml index d71606108..2e695b542 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,98 @@ [build-system] -requires = ["setuptools==58.2.0", "wheel"] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "tedana" +description = "TE-Dependent Analysis (tedana) of multi-echo functional magnetic resonance imaging (fMRI) data." +readme = "README.md" +authors = [{name = "tedana developers"}] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Information Analysis", + "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", +] +license = {file = "LICENSE"} +requires-python = ">=3.8" +dependencies = [ + "bokeh<2.3.0", + "mapca>=0.0.3", + "matplotlib", + "nibabel>=2.5.1", + "nilearn>=0.7", + "numpy>=1.16", + "pandas>=2.0", + "scikit-learn>=0.21", + "scipy>=1.2.0", + "threadpoolctl", + "jinja2==3.0.1", +] +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/ME-ICA/tedana" +Documentation = "https://www.tedana.readthedocs.io" +Paper = "https://joss.theoj.org/papers/10.21105/joss.03669" + +[project.optional-dependencies] +doc = [ + "sphinx>=1.5.3", + "sphinx_copybutton", + "sphinx_rtd_theme", + "sphinx-argparse", + "sphinxcontrib-bibtex", +] +tests = [ + "codecov", + "coverage", + "flake8>=3.7", + "flake8-black", + "flake8-isort", + "flake8-pyproject", + "pytest", + "pytest-cov>=4.0.0", + "requests", +] + +# Aliases +all = ["tedana[doc,tests]"] + +[project.scripts] +ica_reclassify = "tedana.workflows.ica_reclassify:_main" +t2smap = "tedana.workflows.t2smap:_main" +tedana = "tedana.workflows.tedana:_main" + +# +# Hatch configurations +# + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.sdist] +exclude = [".git_archival.txt"] # No longer needed in sdist + +[tool.hatch.build.targets.wheel] +packages = ["tedana"] +exclude = [ + "tedana/tests/data", # Large test data directory +] + +## The following two sections configure setuptools_scm in the hatch way + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.build.hooks.vcs] +version-file = "tedana/_version.py" + +# +# Developer tool configurations +# [tool.black] line-length = 99 @@ -14,15 +107,46 @@ exclude = ''' | \.github | \.hg | \.pytest_cache + | \.testing_data_cache | _build | build | dist )/ - | get_version.py - | versioneer.py + | tedana/_version.py ) ''' [tool.isort] profile = "black" multi_line_output = 3 + +[tool.flake8] +max-line-length = 99 +exclude = [ + "*build/", + "tedana/_version.py", +] +ignore = ["E203", "E402", "W503"] +per-file-ignores = [ + "*/__init__.py: F401", +] +docstring-convention = "numpy" + +[tool.coverage.run] +branch = true +omit = [ + "*/tests/*", + "*/__init__.py", + "*/conftest.py", + "tedana/_version.py", +] + +[tool.coverage.report] +# Regexes for lines to exclude from consideration +exclude_lines = [ + 'raise NotImplementedError', + 'warnings\.warn', +] + +[tool.pytest] +log_cli = true diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 908a3e800..000000000 --- a/setup.cfg +++ /dev/null @@ -1,88 +0,0 @@ -[metadata] -url = https://github.com/ME-ICA/tedana -license = LGPL 2.1 -author = tedana developers -author_email = emd222@cornell.edu -maintainer = Elizabeth DuPre -maintainer_email = emd222@cornell.edu -description = TE-Dependent Analysis (tedana) of multi-echo functional magnetic resonance imaging (fMRI) data. -description-file = README.md -long_description = file:README.md -long_description_content_type = text/markdown -classifiers = - Development Status :: 4 - Beta - Intended Audience :: Science/Research - Topic :: Scientific/Engineering :: Information Analysis - License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL) - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - -[options] -python_requires = >= 3.6 -install_requires = - bokeh<2.3.0 - mapca>=0.0.3 - matplotlib - nibabel>=2.5.1 - nilearn>=0.7 - numpy>=1.16 - pandas>=0.24 - scikit-learn>=0.21 - scipy>=1.2.0 - threadpoolctl - jinja2==3.0.1 -packages = find: -include_package_data = False - -[options.extras_require] -doc = - sphinx>=1.5.3 - sphinx_copybutton - sphinx_rtd_theme - sphinx-argparse - sphinxcontrib-bibtex -tests = - codecov - coverage<5.0 - flake8>=3.7 - flake8-black - flake8-isort - pytest - pytest-cov - requests -all = - %(doc)s - %(tests)s - -[options.entry_points] -console_scripts = - t2smap = tedana.workflows.t2smap:_main - tedana = tedana.workflows.tedana:_main - -[options.package_data] -* = - resources/config/* - reporting/data/* - reporting/data/html/* - -[versioneer] -VCS = git -style = pep440 -versionfile_source = tedana/_version.py -versionfile_build = tedana/_version.py -tag_prefix = -parentdir_prefix = - -[flake8] -max-line-length = 99 -exclude=*build/ -ignore = E203,E402,W503 -per-file-ignores = - */__init__.py:F401 -docstring-convention = numpy - -[tool:pytest] -log_cli = true diff --git a/setup.py b/setup.py deleted file mode 100644 index 9c0c61cf8..000000000 --- a/setup.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python -"""tedana setup script.""" -from setuptools import setup - -import versioneer - -if __name__ == "__main__": - setup( - name="tedana", - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - zip_safe=False, - ) diff --git a/tedana/__about__.py b/tedana/__about__.py new file mode 100644 index 000000000..9403eacc0 --- /dev/null +++ b/tedana/__about__.py @@ -0,0 +1,18 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""Base module variables.""" + +try: + from tedana._version import __version__ +except ImportError: + __version__ = "0+unknown" + +__packagename__ = "tedana" +__copyright__ = "Copyright 2023, The ME-ICA Developers" +__credits__ = ( + "Contributors: please check the ``.zenodo.json`` file at the top-level folder" + "of the repository" +) +__url__ = "https://github.com/ME-ICA/tedana" + +DOWNLOAD_URL = f"https://github.com/ME-ICA/{__packagename__}/archive/{__version__}.tar.gz" diff --git a/tedana/__init__.py b/tedana/__init__.py index a26b9ef10..cec68b396 100644 --- a/tedana/__init__.py +++ b/tedana/__init__.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- # vi: set ft=python sts=4 ts=4 sw=4 et: -""" -tedana: A Python package for TE-dependent analysis of multi-echo data. -""" - +"""tedana: A Python package for TE-dependent analysis of multi-echo data.""" import warnings -from ._version import get_versions - -__version__ = get_versions()["version"] +from tedana.__about__ import __copyright__, __credits__, __packagename__, __version__ # cmp is not used, so ignore nipype-generated warnings warnings.filterwarnings("ignore", r"cmp not installed") -del get_versions +__all__ = [ + "__copyright__", + "__credits__", + "__packagename__", + "__version__", +] diff --git a/tedana/_version.py b/tedana/_version.py deleted file mode 100644 index bede1575c..000000000 --- a/tedana/_version.py +++ /dev/null @@ -1,543 +0,0 @@ -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "" - cfg.versionfile_source = "tedana/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen( - [c] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - ) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix) :], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix) :] - if verbose: - print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command( - GITS, - ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], - cwd=root, - ) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix) :] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split("/"): - root = os.path.dirname(root) - except NameError: - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None, - } - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index ce18fe840..dfee16772 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -159,9 +159,8 @@ def tedpca( - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. - Outputs: - - This function writes out several files: + Generated Files + --------------- =========================== ============================================= Default Filename Content diff --git a/tedana/docs.py b/tedana/docs.py new file mode 100644 index 000000000..8154d716a --- /dev/null +++ b/tedana/docs.py @@ -0,0 +1,172 @@ +"""Documentation to be injected into docstrings.""" +import sys + +################################### +# Standard documentation entries +docdict = dict() + +docdict[ + "selector" +] = """ +selector : :obj:`tedana.selection.component_selector.ComponentSelector` + The selector to perform decision tree-based component selection with. +""" + +docdict[ + "if_true" +] = """ +if_true : :obj:`str` + If the condition in this step is True, give the component classification this + label. Use 'nochange' if no label changes are desired. +""" + +docdict[ + "if_false" +] = """ +if_false : :obj:`str` + If the condition in this step is False, give the component classification this + label. Use 'nochange' to indicate if no label changes are desired. +""" + +docdict[ + "decide_comps" +] = """ +decide_comps : :obj:`str` or :obj:`list[str]` + What classification(s) to operate on. using default or + intermediate_classification labels. For example: decide_comps='unclassified' + means to operate only on unclassified components. Use 'all' to include all + components. +""" + +docdict[ + "log_extra_report" +] = """ +log_extra_report : :obj:`str` + Additional text to the report log. Default="". +""" + +docdict[ + "log_extra_info" +] = """ +log_extra_info : :obj:`str` + Additional text to the information log. Default="". +""" + +docdict[ + "only_used_metrics" +] = """ +only_used_metrics : :obj:`bool` + If True, only return the component_table metrics that would be used. Default=False. +""" + +docdict[ + "custom_node_label" +] = """ +custom_node_label : :obj:`str` + A short label to describe what happens in this step. If "" then a label is + automatically generated. Default="". +""" + +docdict[ + "tag_if_true" +] = """ +tag_if_true : :obj:`str` + The classification tag to apply if a component is classified True. Default="". +""" + +docdict[ + "tag_if_false" +] = """ +tag_if_false : :obj:`str` + The classification tag to apply if a component is classified False. Default="". +""" + +docdict[ + "selector" +] = """ +selector : :obj:`~tedana.selection.component_selector.ComponentSelector` + If only_used_metrics is False, the updated selector is returned. +""" + +docdict[ + "used_metrics" +] = """ +used_metrics : :obj:`set(str)` + If only_used_metrics is True, the names of the metrics used in the + function are returned. +""" + +docdict_indented = {} + + +def _indentcount_lines(lines): + """Minimum indent for all lines in line list. + + >>> lines = [' one', ' two', ' three'] + >>> _indentcount_lines(lines) + 1 + >>> lines = [] + >>> _indentcount_lines(lines) + 0 + >>> lines = [' one'] + >>> _indentcount_lines(lines) + 1 + >>> _indentcount_lines([' ']) + 0 + + """ + indentno = sys.maxsize + for line in lines: + stripped = line.lstrip() + if stripped: + indentno = min(indentno, len(line) - len(stripped)) + if indentno == sys.maxsize: + return 0 + return indentno + + +def fill_doc(f): + """Fill a docstring with docdict entries. + + Parameters + ---------- + f : callable + The function to fill the docstring of. Will be modified in place. + + Returns + ------- + f : callable + The function, potentially with an updated ``__doc__``. + + """ + docstring = f.__doc__ + if not docstring: + return f + lines = docstring.splitlines() + # Find the minimum indent of the main docstring, after first line + if len(lines) < 2: + icount = 0 + else: + icount = _indentcount_lines(lines[1:]) + # Insert this indent to dictionary docstrings + try: + indented = docdict_indented[icount] + except KeyError: + indent = " " * icount + docdict_indented[icount] = indented = {} + for name, dstr in docdict.items(): + lines = dstr.splitlines() + try: + newlines = [lines[0]] + for line in lines[1:]: + newlines.append(indent + line) + indented[name] = "\n".join(newlines) + except IndexError: + indented[name] = dstr + try: + f.__doc__ = docstring % indented + except (TypeError, ValueError, KeyError) as exp: + funcname = f.__name__ + funcname = docstring.split("\n")[0] if funcname is None else funcname + raise RuntimeError(f"Error documenting {funcname}:\n{str(exp)}") + return f diff --git a/tedana/io.py b/tedana/io.py index d5a82892b..0389ffc67 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -10,6 +10,7 @@ import os.path as op from copy import deepcopy from string import Formatter +from typing import List import nibabel as nib import numpy as np @@ -23,6 +24,33 @@ LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") +ALLOWED_COMPONENT_DELIMITERS = ( + "\t", + "\n", + " ", + ",", +) + + +class CustomEncoder(json.JSONEncoder): + """Class for converting some types because of JSON serialization and numpy incompatibilities. + + See here: https://stackoverflow.com/q/50916422/2589328 + """ + + def default(self, obj): + # int64 non-serializable but is a numpy output + if isinstance(obj, np.int32) or isinstance(obj, np.int64): + return int(obj) + + # containers that are not serializable + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, set): + return list(obj) + + return super(CustomEncoder, self).default(obj) + class OutputGenerator: """A class for managing tedana outputs. @@ -42,6 +70,8 @@ class OutputGenerator: descriptions. Default is "auto", which uses tedana's default configuration file. make_figures : bool, optional Whether or not to actually make a figures directory + overwrite : bool, optional + Whether to force overwrites of data. Default False. Attributes ---------- @@ -58,8 +88,12 @@ class OutputGenerator: This will correspond to a "figures" subfolder of ``out_dir``. prefix : str Prefix to prepend to output filenames. + overwrite : bool + Whether to force file overwrites. verbose : bool - Whether or not to generate verbose output + Whether or not to generate verbose output. + registry : dict + A registry of all files saved """ def __init__( @@ -70,7 +104,9 @@ def __init__( prefix="", config="auto", make_figures=True, + overwrite=False, verbose=False, + old_registry=None, ): if config == "auto": config = op.join(utils.get_resource_path(), "config", "outputs.json") @@ -89,13 +125,25 @@ def __init__( f"({', '.join(v.keys())})" ) cfg[k] = v[convention] + self.config = cfg self.reference_img = check_niimg(reference_img) self.convention = convention self.out_dir = op.abspath(out_dir) self.figures_dir = op.join(out_dir, "figures") self.prefix = prefix + "_" if prefix != "" else "" + self.overwrite = overwrite self.verbose = verbose + self.registry = {} + if old_registry: + root = old_registry["root"] + rel_root = op.relpath(root, start=self.out_dir) + del old_registry["root"] + for k, v in old_registry.items(): + if isinstance(v, list): + self.registry[k] = [op.join(rel_root, vv) for vv in v] + else: + self.registry[k] = op.join(rel_root, v) if not op.isdir(self.out_dir): LGR.info(f"Generating output directory: {self.out_dir}") @@ -137,6 +185,16 @@ def _determine_extension(self, description, name): return extension + def register_input(self, names): + """Register input filenames. + + Parameters + ---------- + names : list[str] + The list of filenames being input as multi-echo volumes. + """ + self.registry["input img"] = [op.relpath(name, start=self.out_dir) for name in names] + def get_name(self, description, **kwargs): """Generate a file full path to simplify file output. @@ -194,6 +252,13 @@ def save_file(self, data, description, **kwargs): The full file path of the saved file. """ name = self.get_name(description, **kwargs) + if op.exists(name) and not self.overwrite: + raise RuntimeError( + f"File {name} already exists. In order to allow overwrite " + "please use the --overwrite option in the command line or the " + "overwrite parameter in the Python API." + ) + if description.endswith("img"): self.save_img(data, name) elif description.endswith("json"): @@ -201,6 +266,10 @@ def save_file(self, data, description, **kwargs): self.save_json(prepped, name) elif description.endswith("tsv"): self.save_tsv(data, name) + else: + raise ValueError(f"Unsupported file {description}") + + self.registry[description] = op.basename(name) return name @@ -219,8 +288,12 @@ def save_img(self, data, name): Will coerce 64-bit float and int arrays into 32-bit arrays. """ data_type = type(data) - if not isinstance(data, np.ndarray): + if isinstance(data, nib.nifti1.Nifti1Image): + data.to_filename(name) + return + elif not isinstance(data, np.ndarray): raise TypeError(f"Data supplied must of type np.ndarray, not {data_type}.") + if data.ndim not in (1, 2): raise TypeError(f"Data must have number of dimensions in (1, 2), not {data.ndim}") @@ -249,8 +322,9 @@ def save_json(self, data, name): data_type = type(data) if not isinstance(data, dict): raise TypeError(f"data must be a dict, not type {data_type}.") + with open(name, "w") as fo: - json.dump(data, fo, indent=4, sort_keys=True) + json.dump(data, fo, indent=4, sort_keys=True, cls=CustomEncoder) def save_tsv(self, data, name): """Save DataFrame to a tsv file. @@ -265,10 +339,54 @@ def save_tsv(self, data, name): data_type = type(data) if not isinstance(data, pd.DataFrame): raise TypeError(f"data must be pd.Data, not type {data_type}.") - if versiontuple(pd.__version__) >= versiontuple("1.5.2"): - data.to_csv(name, sep="\t", lineterminator="\n", na_rep="n/a", index=False) + + # Replace blanks with numpy NaN + deblanked = data.replace("", np.nan) + deblanked.to_csv(name, sep="\t", lineterminator="\n", na_rep="n/a", index=False) + + def save_self(self): + fname = self.save_file(self.registry, "registry json") + return fname + + +class InputHarvester: + """Class for turning a registry file into a lookup table to get previous data.""" + + loaders = { + "json": lambda f: load_json(f), + "tsv": lambda f: pd.read_csv(f, delimiter="\t"), + "img": lambda f: nib.load(f), + } + + def __init__(self, path): + self._full_path = op.abspath(path) + self._base_dir = op.dirname(self._full_path) + self._registry = load_json(self._full_path) + + def get_file_path(self, description): + if description in self._registry.keys(): + return op.join(self._base_dir, self._registry[description]) else: - data.to_csv(name, sep="\t", line_terminator="\n", na_rep="n/a", index=False) + return None + + def get_file_contents(self, description): + """Get file contents. + Notes + ----- + Since we restrict to just these three types, this function should always return. + If more types are added, the loaders dict will need to be updated with an appropriate + loader. + """ + for ftype, loader in InputHarvester.loaders.items(): + if ftype in description: + return loader(self.get_file_path(description)) + + @property + def registry(self): + """The underlying file registry, including the root directory.""" + d = self._registry + d["root"] = self._base_dir + return d def versiontuple(v): @@ -317,8 +435,7 @@ def load_json(path: str) -> dict: def add_decomp_prefix(comp_num, prefix, max_value): - """ - Create component name with leading zeros matching number of components + """Create component name with leading zeros matching number of components. Parameters ---------- @@ -389,8 +506,7 @@ def denoise_ts(data, mmix, mask, comptable): # File Writing Functions def write_split_ts(data, mmix, mask, comptable, io_generator, echo=0): - """ - Splits `data` into denoised / noise / ignored time series and saves to disk + """Split `data` into denoised / noise / ignored time series and save to disk. Parameters ---------- @@ -414,9 +530,8 @@ def write_split_ts(data, mmix, mask, comptable, io_generator, echo=0): varexpl : :obj:`float` Percent variance of data explained by extracted + retained components - Notes - ----- - This function writes out several files: + Generated Files + --------------- ============================ ============================================ Filename Content @@ -454,8 +569,7 @@ def write_split_ts(data, mmix, mask, comptable, io_generator, echo=0): def writeresults(ts, mask, comptable, mmix, n_vols, io_generator): - """ - Denoises `ts` and saves all resulting files to disk + """Denoise `ts` and save all resulting files to disk. Parameters ---------- @@ -475,9 +589,8 @@ def writeresults(ts, mask, comptable, mmix, n_vols, io_generator): ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk - Notes - ----- - This function writes out several files: + Generated Files + --------------- ========================================= ===================================== Filename Content @@ -516,8 +629,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, io_generator): def writeresults_echoes(catd, mmix, mask, comptable, io_generator): - """ - Saves individually denoised echos to disk + """Save individually denoised echos to disk. Parameters ---------- @@ -534,9 +646,8 @@ def writeresults_echoes(catd, mmix, mask, comptable, io_generator): ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk - Notes - ----- - This function writes out several files: + Generated Files + --------------- ===================================== =================================== Filename Content @@ -553,7 +664,6 @@ def writeresults_echoes(catd, mmix, mask, comptable, io_generator): -------- tedana.io.write_split_ts: Writes out the files. """ - for i_echo in range(catd.shape[1]): LGR.info("Writing Kappa-filtered echo #{:01d} timeseries".format(i_echo + 1)) write_split_ts(catd[:, i_echo, :], mmix, mask, comptable, io_generator, echo=(i_echo + 1)) @@ -617,8 +727,7 @@ def load_data(data, n_echos=None): # Helper Functions def new_nii_like(ref_img, data, affine=None, copy_header=True): - """ - Coerces `data` into NiftiImage format like `ref_img` + """Coerce `data` into NiftiImage format like `ref_img`. Parameters ---------- @@ -636,7 +745,6 @@ def new_nii_like(ref_img, data, affine=None, copy_header=True): nii : :obj:`nibabel.nifti1.Nifti1Image` NiftiImage """ - ref_img = check_niimg(ref_img) newdata = data.reshape(ref_img.shape[:3] + data.shape[1:]) if ".nii" not in ref_img.valid_exts: @@ -651,8 +759,7 @@ def new_nii_like(ref_img, data, affine=None, copy_header=True): def split_ts(data, mmix, mask, comptable): - """ - Splits `data` time series into accepted component time series and remainder + """Split `data` time series into accepted component time series and remainder. Parameters ---------- @@ -690,11 +797,11 @@ def split_ts(data, mmix, mask, comptable): def prep_data_for_json(d) -> dict: - """Attempts to create a JSON serializable dictionary from a data dictionary + """Attempt to create a JSON serializable dictionary from a data dictionary. Parameters ---------- - d: dict + d : dict A dictionary that will be converted into something JSON serializable Raises @@ -729,7 +836,87 @@ def prep_data_for_json(d) -> dict: v = v.tolist() elif isinstance(v, np.int64) or isinstance(v, np.uint64): v = int(v) + # NOTE: add more special cases for type conversions above this # comment line as an elif block d[k] = v return d + + +def str_to_component_list(s: str) -> List[int]: + """Convert a string to a list of component indices. + + Parameters + ---------- + s: str + The string to convert into a list of component indices. + + Returns + ------- + List[int] of component indices. + + Raises + ------ + ValueError, if the string cannot be split by an allowed delimeter + """ + # Strip off newline at end in case we've been given a one-line file + if s[-1] == "\n": + s = s[:-1] + + # Search across all allowed delimiters for a match + for d in ALLOWED_COMPONENT_DELIMITERS: + possible_list = s.split(d) + if len(possible_list) > 1: + # We have a likely hit + # Check to see if extra delimeter at end and get rid of it + if possible_list[-1] == "": + possible_list = possible_list[:-1] + break + elif len(possible_list) == 1 and possible_list[0].isnumeric(): + # We have a likely hit and there is just one component + break + + # Make sure we can actually convert this split list into an integer + # Crash with a sensible error if not + for x in possible_list: + try: + int(x) + except ValueError: + raise ValueError( + "While parsing component list, failed to convert to int." + f' Offending element is "{x}", offending string is "{s}".' + ) + + return [int(x) for x in possible_list] + + +def fname_to_component_list(fname: str) -> List[int]: + """Read a file of component indices. + + Parameters + ---------- + fname: str + The name of the file to read the list of component indices from. + + Returns + ------- + List[int] of component indices. + + Raises + ------ + ValueError, if the string cannot be split by an allowed delimeter or the + csv file cannot be interpreted. + """ + if fname[-3:] == "csv": + contents = pd.read_csv(fname) + columns = contents.columns + if len(columns) == 2 and "0" in columns: + return contents["0"].tolist() + elif len(columns) >= 2 and "Components" in columns: + return contents["Components"].tolist() + else: + raise ValueError(f"Cannot determine a components column in file {fname}") + + with open(fname, "r") as fp: + contents = fp.read() + return str_to_component_list(contents) diff --git a/tedana/metrics/collect.py b/tedana/metrics/collect.py index ada13ffe1..f4c731876 100644 --- a/tedana/metrics/collect.py +++ b/tedana/metrics/collect.py @@ -6,11 +6,10 @@ import pandas as pd from tedana import io, utils +from tedana.metrics import dependence +from tedana.metrics._utils import dependency_resolver, determine_signs, flip_components from tedana.stats import getfbounds -from . import dependence -from ._utils import dependency_resolver, determine_signs, flip_components - LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -532,15 +531,7 @@ def get_metadata(comptable): ), }, } - if "original_rationale" in comptable: - metric_metadata["original_rationale"] = { - "LongName": "Original rationale", - "Description": ( - "The reason for the original classification. " - "Please see tedana's documentation for information about " - "possible rationales." - ), - } + if "classification" in comptable: metric_metadata["classification"] = { "LongName": "Component classification", @@ -554,13 +545,20 @@ def get_metadata(comptable): ), }, } + if "classification_tags" in comptable: + metric_metadata["classification_tags"] = { + "LongName": "Component classification tags", + "Description": ( + "A single tag or a comma separated list of tags to describe why a component" + " received its classification" + ), + } if "rationale" in comptable: metric_metadata["rationale"] = { "LongName": "Rationale for component classification", "Description": ( "The reason for the original classification. " - "Please see tedana's documentation for information about " - "possible rationales." + "This column label was replaced with classification_tags in late 2022" ), } if "kappa ratio" in comptable: diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py index 87a987eab..60b16d04e 100644 --- a/tedana/reporting/dynamic_figures.py +++ b/tedana/reporting/dynamic_figures.py @@ -124,6 +124,7 @@ def _create_data_struct(comptable_path, color_mapping=color_mapping): color=df["color"], size=df["var_exp_size"], classif=df["classification"], + classtag=df["classification_tags"], angle=df["angle"], ) ) @@ -131,7 +132,7 @@ def _create_data_struct(comptable_path, color_mapping=color_mapping): return cds -def _create_kr_plt(comptable_cds): +def _create_kr_plt(comptable_cds, kappa_elbow=None, rho_elbow=None): """ Create Dymamic Kappa/Rho Scatter Plot @@ -140,6 +141,10 @@ def _create_kr_plt(comptable_cds): comptable_cds: bokeh.models.ColumnDataSource Data structure containing a limited set of columns from the comp_table + kappa_elbow, rho_elbow: :obj:`float` :obj:`int` + The elbow thresholds for kappa and rho to display on the plots + Defaults=None + Returns ------- fig: bokeh.plotting.figure.Figure @@ -152,6 +157,7 @@ def _create_kr_plt(comptable_cds): ("Kappa", "@kappa{0.00}"), ("Rho", "@rho{0.00}"), ("Var. Expl.", "@varexp{0.00}%"), + ("Tags", "@classtag"), ] ) fig = plotting.figure( @@ -171,6 +177,50 @@ def _create_kr_plt(comptable_cds): source=comptable_cds, legend_group="classif", ) + + if rho_elbow: + rho_elbow_line = models.Span( + location=rho_elbow, + dimension="width", + line_color="#000033", + line_width=1, + line_alpha=0.75, + line_dash="dashed", + name="rho elbow", + ) + rho_elbow_label = models.Label( + x=300, + y=rho_elbow * 1.02, + x_units="screen", + text="rho elbow", + text_color="#000033", + text_alpha=0.75, + text_font_size="10px", + ) + fig.add_layout(rho_elbow_line) + fig.add_layout(rho_elbow_label) + if kappa_elbow: + kappa_elbow_line = models.Span( + location=kappa_elbow, + dimension="height", + line_color="#000033", + line_width=1, + line_alpha=0.75, + line_dash="dashed", + name="kappa elbow", + ) + kappa_elbow_label = models.Label( + x=kappa_elbow * 1.02, + y=300, + y_units="screen", + text="kappa elbow", + text_color="#000033", + text_alpha=0.75, + text_font_size="10px", + ) + fig.add_layout(kappa_elbow_line) + fig.add_layout(kappa_elbow_label) + fig.xaxis.axis_label = "Kappa" fig.yaxis.axis_label = "Rho" fig.toolbar.logo = None @@ -181,7 +231,7 @@ def _create_kr_plt(comptable_cds): def _create_sorted_plt( - comptable_cds, n_comps, x_var, y_var, title=None, x_label=None, y_label=None + comptable_cds, n_comps, x_var, y_var, title=None, x_label=None, y_label=None, elbow=None ): """ Create dynamic sorted plots @@ -206,6 +256,10 @@ def _create_sorted_plt( y_label: str Y-axis label + elbow: :obj:`float` :obj:`int` + The elbow threshold for kappa or rho to display on the plot + Default=None + Returns ------- fig: bokeh.plotting.figure.Figure @@ -217,6 +271,7 @@ def _create_sorted_plt( ("Kappa", "@kappa{0.00}"), ("Rho", "@rho{0.00}"), ("Var. Expl.", "@varexp{0.00}%"), + ("Tags", "@classtag"), ] ) fig = plotting.figure( @@ -236,6 +291,28 @@ def _create_sorted_plt( fig.x_range = models.Range1d(-1, n_comps + 1) fig.toolbar.logo = None + if elbow: + elbow_line = models.Span( + location=elbow, + dimension="width", + line_color="#000033", + line_width=1, + line_alpha=0.75, + line_dash="dashed", + name="elbow", + ) + elbow_label = models.Label( + x=20, + y=elbow * 1.02, + x_units="screen", + text="elbow", + text_color="#000033", + text_alpha=0.75, + text_font_size="10px", + ) + fig.add_layout(elbow_line) + fig.add_layout(elbow_label) + return fig @@ -250,6 +327,7 @@ def _create_varexp_pie_plt(comptable_cds, n_comps): ("Kappa", "@kappa{0.00}"), ("Rho", "@rho{0.00}"), ("Var. Exp.", "@varexp{0.00}%"), + ("Tags", "@classtag"), ], ) fig.wedge( diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py index 956898e9a..a83fd1809 100644 --- a/tedana/reporting/html_report.py +++ b/tedana/reporting/html_report.py @@ -1,3 +1,4 @@ +import logging import os from os.path import join as opj from pathlib import Path @@ -8,8 +9,11 @@ from bokeh import embed, layouts, models from tedana import __version__ +from tedana.io import load_json from tedana.reporting import dynamic_figures as df +LGR = logging.getLogger("GENERAL") + def _generate_buttons(out_dir): resource_path = Path(__file__).resolve().parent.joinpath("data", "html") @@ -124,15 +128,65 @@ def generate_report(io_generator, tr): comptable_path = io_generator.get_name("ICA metrics tsv") comptable_cds = df._create_data_struct(comptable_path) + # Load the cross component metrics, including the kappa & rho elbows + cross_component_metrics_path = io_generator.get_name("ICA cross component metrics json") + cross_comp_metrics_dict = load_json(cross_component_metrics_path) + + def get_elbow_val(elbow_prefix): + """ + Find cross component metrics that begin with elbow_prefix and output the value + Current prefixes are kappa_elbow_kundu and rho_elbow_kundu. This flexability + means anything that begins [kappa/rho]_elbow will be found and used regardless + of the suffix. If more than one metric has the prefix then the alphabetically + first one will be used and a warning will be logged + """ + + elbow_keys = [k for k in cross_comp_metrics_dict.keys() if elbow_prefix in k] + elbow_keys.sort() + if len(elbow_keys) == 0: + LGR.warning( + f"No {elbow_prefix} saved in cross_component_metrics so not displaying in report" + ) + return None + elif len(elbow_keys) == 1: + return cross_comp_metrics_dict[elbow_keys[0]] + else: + printed_key = elbow_keys[0] + unprinted_keys = elbow_keys[1:] + + LGR.warning( + "More than one key saved in cross_component_metrics begins with " + f"{elbow_prefix}. The lines on the plots will be for {printed_key} " + f"NOT {unprinted_keys}" + ) + return cross_comp_metrics_dict[printed_key] + + kappa_elbow = get_elbow_val("kappa_elbow") + rho_elbow = get_elbow_val("rho_elbow") + # Create kappa rho plot - kappa_rho_plot = df._create_kr_plt(comptable_cds) + kappa_rho_plot = df._create_kr_plt(comptable_cds, kappa_elbow=kappa_elbow, rho_elbow=rho_elbow) # Create sorted plots kappa_sorted_plot = df._create_sorted_plt( - comptable_cds, n_comps, "kappa_rank", "kappa", "Kappa Rank", "Kappa" + comptable_cds, + n_comps, + "kappa_rank", + "kappa", + title="Kappa Rank", + x_label="Components sorted by Kappa", + y_label="Kappa", + elbow=kappa_elbow, ) rho_sorted_plot = df._create_sorted_plt( - comptable_cds, n_comps, "rho_rank", "rho", "Rho Rank", "Rho" + comptable_cds, + n_comps, + "rho_rank", + "rho", + title="Rho Rank", + x_label="Components sorted by Rho", + y_label="Rho", + elbow=rho_elbow, ) varexp_pie_plot = df._create_varexp_pie_plt(comptable_cds, n_comps) diff --git a/tedana/reporting/static_figures.py b/tedana/reporting/static_figures.py index 74e24793d..3740f1637 100644 --- a/tedana/reporting/static_figures.py +++ b/tedana/reporting/static_figures.py @@ -192,17 +192,18 @@ def comp_figures(ts, mask, comptable, mmix, io_generator, png_cmap): expl_text = "" # Remove trailing ';' from rationale column - comptable["rationale"] = comptable["rationale"].str.rstrip(";") + # comptable["rationale"] = comptable["rationale"].str.rstrip(";") for compnum in comptable.index.values: if comptable.loc[compnum, "classification"] == "accepted": line_color = "g" - expl_text = "accepted" + expl_text = "accepted reason(s): " + str(comptable.loc[compnum, "classification_tags"]) elif comptable.loc[compnum, "classification"] == "rejected": line_color = "r" - expl_text = "rejection reason(s): " + comptable.loc[compnum, "rationale"] + expl_text = "rejected reason(s): " + str(comptable.loc[compnum, "classification_tags"]) + elif comptable.loc[compnum, "classification"] == "ignored": line_color = "k" - expl_text = "ignored reason(s): " + comptable.loc[compnum, "rationale"] + expl_text = "ignored reason(s): " + str(comptable.loc[compnum, "classification_tags"]) else: # Classification not added # If new, this will keep code running diff --git a/tedana/resources/config/outputs.json b/tedana/resources/config/outputs.json index 6019f0a29..622ab3bbe 100644 --- a/tedana/resources/config/outputs.json +++ b/tedana/resources/config/outputs.json @@ -57,7 +57,7 @@ }, "whitened img": { "orig": "ts_OC_whitened", - "bidsv1.5.0": "desc-optcomPCAReduced_bold" + "bidsv1.5.0": "desc-optcom_whitened_bold" }, "echo weight PCA map split img": { "orig": "e{echo}_PCA_comp", @@ -165,7 +165,7 @@ }, "PCA cross component metrics json": { "orig": "pca_cross_component_metrics", - "bidsv1.5.0": "desc-PCA_cross_component_metrics" + "bidsv1.5.0": "desc-PCACrossComponent_metrics" }, "ICA decomposition json": { "orig": "ica_decomposition", @@ -191,6 +191,18 @@ "orig": "ica_metrics", "bidsv1.5.0": "desc-tedana_metrics" }, + "ICA cross component metrics json": { + "orig": "ica_cross_component_metrics", + "bidsv1.5.0": "desc-ICACrossComponent_metrics" + }, + "ICA status table tsv": { + "orig": "ica_status_table", + "bidsv1.5.0": "desc-ICA_status_table" + }, + "ICA decision tree json": { + "orig": "ica_decision_tree", + "bidsv1.5.0": "desc-ICA_decision_tree" + }, "global signal time series tsv": { "orig": "global_signal_ts", "bidsv1.5.0": "desc-globalSignal_timeseries" @@ -202,5 +214,9 @@ "ICA orthogonalized mixing tsv": { "orig": "ica_orth_mixing", "bidsv1.5.0": "desc-ICAOrth_mixing" + }, + "registry json": { + "orig": "registry", + "bidsv1.5.0": "desc-tedana_registry" } -} +} \ No newline at end of file diff --git a/tedana/resources/decision_trees/kundu.json b/tedana/resources/decision_trees/kundu.json new file mode 100644 index 000000000..72f0c5770 --- /dev/null +++ b/tedana/resources/decision_trees/kundu.json @@ -0,0 +1,464 @@ +{ + "tree_id": "kundu_MEICA27_decision_tree", + "info": "Following the full decision tree designed by Prantik Kundu", + "report": "This is based on the criteria of the MEICA v2.5 decision tree", + "refs": "Kundu 2013", + "necessary_metrics": [ + "kappa", + "rho", + "countsigFS0", + "countsigFT2", + "dice_FS0", + "dice_FT2", + "signal-noise_t", + "variance explained", + "d_table_score", + "countnoise" + ], + "generated_metrics": [ + "d_table_score_node20", + "varex kappa ratio" + ], + "intermediate_classifications": [ + "provisionalaccept", + "unclass_highvar" + ], + "classification_tags": [ + "Likely BOLD", + "Unlikely BOLD", + "Less likely BOLD", + "Low variance", + "Accept borderline", + "No provisional accept" + ], + "nodes": [ + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "unclassified", + "decide_comps": "all" + }, + "kwargs": { + "log_extra_report": "", + "clear_classification_tags": true, + "dont_warn_reclassify": true + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "rho", + "right": "kappa" + }, + "kwargs": { + "tag_if_true": "Unlikely BOLD" + }, + "_comment": "Code I002 in premodularized tedana" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "countsigFS0", + "right": "countsigFT2" + }, + "kwargs": { + "left2": "countsigFT2", + "op2": ">", + "right2": 0, + "tag_if_true": "Unlikely BOLD" + }, + "_comment": "Code I003 in premodularized tedana" + }, + { + "functionname": "calc_median", + "parameters": { + "decide_comps": "all", + "metric_name": "variance explained", + "median_label": "varex" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "dice_FS0", + "right": "dice_FT2" + }, + "kwargs": { + "left2": "variance explained", + "op2": ">", + "right2": "median_varex", + "tag_if_true": "Unlikely BOLD" + }, + "_comment": "Code I004 in premodularized tedana" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": 0, + "right": "signal-noise_t" + }, + "kwargs": { + "left2": "variance explained", + "op2": ">", + "right2": "median_varex", + "tag_if_true": "Unlikely BOLD" + }, + "_comment": "Code I005 in premodularized tedana" + }, + { + "functionname": "calc_kappa_elbow", + "parameters": { + "decide_comps": "all" + }, + "kwargs": { + "log_extra_info": "", + "log_extra_report": "" + }, + "_comment": "" + }, + { + "functionname": "dec_reclassify_high_var_comps", + "parameters": { + "decide_comps": "unclassified", + "new_classification": "unclass_highvar" + }, + "kwargs": { + "log_extra_info": "", + "log_extra_report": "" + }, + "_comment": "" + }, + { + "functionname": "calc_rho_elbow", + "parameters": { + "decide_comps": "all" + }, + "kwargs": { + "subset_decide_comps": "unclassified", + "rho_elbow_type": "kundu", + "log_extra_info": "", + "log_extra_report": "" + }, + "_comment": "" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "provisionalaccept", + "if_false": "nochange", + "decide_comps": "unclassified", + "op": ">=", + "left": "kappa", + "right": "kappa_elbow_kundu" + }, + "kwargs": { + "log_extra_report": "" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "unclassified", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept" + ], + "op": ">", + "left": "rho", + "right": "rho_elbow_kundu" + }, + "kwargs": { + "log_extra_report": "" + } + }, + { + "functionname": "dec_classification_doesnt_exist", + "parameters": { + "new_classification": "accepted", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "class_comp_exists": "provisionalaccept" + }, + "kwargs": { + "at_least_num_exist": 2, + "tag": "No provisional accept", + "log_extra_info": "If nothing is provisionally accepted by this point, then rerun ICA & selection. If max iterations of rerunning done, then accept everything not already rejected", + "log_extra_report": "" + }, + "_comment": "Code I006 in premodularized tedana" + }, + { + "functionname": "calc_varex_thresh", + "parameters": { + "decide_comps": "provisionalaccept", + "thresh_label": "upper", + "percentile_thresh": 90 + }, + "kwargs": {} + }, + { + "functionname": "calc_varex_thresh", + "parameters": { + "decide_comps": "provisionalaccept", + "thresh_label": "lower", + "percentile_thresh": 25 + }, + "kwargs": {} + }, + { + "functionname": "calc_extend_factor", + "parameters": {}, + "kwargs": {}, + "_comment": "This is a scaling number that is used for a few thresholds. 2 if fewer than 90 fMRI volumes, 3 if more than 110 and linear in-between" + }, + { + "functionname": "calc_max_good_meanmetricrank", + "parameters": { + "decide_comps": "provisionalaccept" + }, + "kwargs": {}, + "_comment": "Number of provisionalaccept components * extend_factor" + }, + { + "functionname": "calc_varex_kappa_ratio", + "parameters": { + "decide_comps": "provisionalaccept" + }, + "kwargs": {}, + "_comment": "This is used to calculate the new 'varex kappa ratio' column in the component_table" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "op": ">", + "left": "d_table_score", + "right": "max_good_meanmetricrank" + }, + "kwargs": { + "op2": ">", + "left2": "variance explained", + "right2": "varex_upper_thresh", + "right2_scale": "extend_factor", + "log_extra_info": "If variance and d_table_scores are high, then reject", + "tag_if_true": "Less likely BOLD" + }, + "_comment": "Code I007 in premodularized tedana. One of several steps that makes it more likely to reject high variance components" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "accepted", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "op": ">", + "left": "d_table_score", + "right": "max_good_meanmetricrank" + }, + "kwargs": { + "tag_if_true": "Low variance", + "op2": "<=", + "left2": "variance explained", + "right2": "varex_lower_thresh", + "op3": "<=", + "left3": "kappa", + "right3": "kappa_elbow_kundu", + "log_extra_info": "If low variance, accept even if bad kappa & d_table_scores" + }, + "_comment": "Code I008 in premodularized tedana" + }, + { + "functionname": "dec_classification_doesnt_exist", + "parameters": { + "new_classification": "accepted", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "class_comp_exists": [ + "unclassified", + "unclass_highvar" + ] + }, + "kwargs": { + "tag": "Likely BOLD", + "log_extra_info": "If nothing left is unclassified, then accept all", + "log_extra_report": "" + }, + "_comment": "No code in premodularized tedana" + }, + { + "functionname": "calc_revised_meanmetricrank_guesses", + "parameters": { + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ] + }, + "kwargs": {}, + "_comment": "Add more here" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "op": ">", + "left": "d_table_score_node20", + "right": "conservative_guess" + }, + "kwargs": { + "tag_if_true": "Less likely BOLD", + "op2": ">", + "left2": "varex kappa ratio", + "right2": "extend_factor", + "right2_scale": 2, + "op3": ">", + "left3": "variance explained", + "right3": "varex_upper_thresh", + "right3_scale": "extend_factor", + "log_extra_info": "Reject if a combination of kappa, variance, and other factors are ranked worse than others" + }, + "_comment": "Code I009 in premodularized tedana. Quirky combination 1 of a bunch of metrics that deal with rejecting some edge cases" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "op": ">", + "left": "d_table_score_node20", + "right": "num_acc_guess" + }, + "kwargs": { + "tag_if_true": "Less likely BOLD", + "right_scale": 0.9, + "op2": ">", + "left2": "variance explained", + "right2": "varex_lower_thresh", + "right2_scale": "extend_factor", + "log_extra_info": "Reject if a combination of variance and ranks of other metrics are worse than others" + }, + "_comment": "Code I010 in premodularized tedana. Quirky combination 2 of a bunch of metrics that deal with rejecting some edge cases" + }, + { + "functionname": "calc_varex_thresh", + "parameters": { + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "thresh_label": "new_lower", + "percentile_thresh": 25 + }, + "kwargs": { + "num_highest_var_comps": "num_acc_guess" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "accepted", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "op": ">", + "left": "d_table_score_node20", + "right": "num_acc_guess" + }, + "kwargs": { + "tag_if_true": "Accept borderline", + "op2": ">", + "left2": "variance explained", + "right2": "varex_new_lower_thresh", + "log_extra_info": "Accept components with a bad d_table_score, but are at the higher end of the remaining variance so more cautious to not remove" + }, + "_comment": "Code I011 in premodularized tedana. Yet another quirky criterion, but this one to keep components. In the original tree, varex_new_lower_thresh would be lower than it is here. If there are differences in results, might be worth adding a scaling factor" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "accepted", + "if_false": "nochange", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ], + "op": "<=", + "left": "kappa", + "right": "kappa_elbow_kundu" + }, + "kwargs": { + "tag_if_true": "Accept borderline", + "op2": ">", + "left2": "variance explained", + "right2": "varex_new_lower_thresh", + "log_extra_info": "For not already rejected components, accept ones below the kappa elbow, but at the higher end of the remaining variance so more cautious to not remove" + }, + "_comment": "Code I012 in premodularized tedana. Yet another quirky criterion, but this one to keep components. In the original tree, varex_new_lower_thresh might be lower than it is here. If there are differences in results, might be worth adding a scaling factor" + }, + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "accepted", + "decide_comps": [ + "provisionalaccept", + "unclassified", + "unclass_highvar" + ] + }, + "kwargs": { + "log_extra_info": "Anything still provisional (accepted or rejected) should be accepted", + "log_extra_report": "", + "tag": "Likely BOLD" + }, + "_comment": "No code in the premodularized tedana" + } + ] +} \ No newline at end of file diff --git a/tedana/resources/decision_trees/minimal.json b/tedana/resources/decision_trees/minimal.json new file mode 100644 index 000000000..8ce4ff68a --- /dev/null +++ b/tedana/resources/decision_trees/minimal.json @@ -0,0 +1,231 @@ +{ + "tree_id": "minimal_decision_tree_test1", + "info": "Proposed minimal decision tree", + "report": "This is based on the minimal criteria of the original MEICA decision tree without the more aggressive noise removal steps", + "refs": "Kundu 2013; DuPre, Salo, 2021", + "necessary_metrics": [ + "kappa", + "rho", + "countsigFS0", + "countsigFT2", + "dice_FS0", + "dice_FT2", + "signal-noise_t", + "variance explained" + ], + "intermediate_classifications": [ + "provisionalaccept", + "provisionalreject" + ], + "classification_tags": [ + "Likely BOLD", + "Unlikely BOLD", + "Low variance" + ], + "nodes": [ + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "unclassified", + "decide_comps": "all" + }, + "kwargs": { + "log_extra_report": "", + "clear_classification_tags": true, + "dont_warn_reclassify": true + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "rho", + "right": "kappa" + }, + "kwargs": { + "log_extra_report": "", + "tag_if_true": "Unlikely BOLD" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "countsigFS0", + "right": "countsigFT2" + }, + "kwargs": { + "left2": "countsigFT2", + "op2": ">", + "right2": 0, + "log_extra_report": "", + "tag_if_true": "Unlikely BOLD" + } + }, + { + "functionname": "calc_median", + "parameters": { + "decide_comps": "all", + "metric_name": "variance explained", + "median_label": "varex" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "dice_FS0", + "right": "dice_FT2" + }, + "kwargs": { + "left2": "variance explained", + "op2": ">", + "right2": "median_varex", + "log_extra_report": "", + "tag_if_true": "Unlikely BOLD" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": 0, + "right": "signal-noise_t" + }, + "kwargs": { + "left2": "variance explained", + "op2": ">", + "right2": "median_varex", + "log_extra_report": "", + "tag_if_true": "Unlikely BOLD" + } + }, + { + "functionname": "calc_kappa_elbow", + "parameters": { + "decide_comps": "all" + }, + "kwargs": { + "log_extra_info": "", + "log_extra_report": "" + }, + "_comment": "" + }, + { + "functionname": "calc_rho_elbow", + "parameters": { + "decide_comps": "all" + }, + "kwargs": { + "subset_decide_comps": "unclassified", + "rho_elbow_type": "liberal", + "log_extra_info": "", + "log_extra_report": "" + }, + "_comment": "" + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "provisionalaccept", + "if_false": "provisionalreject", + "decide_comps": "unclassified", + "op": ">=", + "left": "kappa", + "right": "kappa_elbow_kundu" + }, + "kwargs": { + "log_extra_report": "" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "accepted", + "if_false": "nochange", + "decide_comps": "provisionalaccept", + "op": ">", + "left": "kappa", + "right": "rho" + }, + "kwargs": { + "log_extra_info": "If kappa>elbow and kappa>2*rho accept even if rho>elbow", + "log_extra_report": "", + "right_scale": 2, + "tag_if_true": "Likely BOLD" + } + }, + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "provisionalreject", + "if_false": "nochange", + "decide_comps": [ + "provisionalreject", + "provisionalaccept" + ], + "op": ">", + "left": "rho", + "right": "rho_elbow_liberal" + }, + "kwargs": { + "log_extra_report": "" + } + }, + { + "functionname": "dec_variance_lessthan_thresholds", + "parameters": { + "if_true": "accepted", + "if_false": "nochange", + "decide_comps": "provisionalreject" + }, + "kwargs": { + "var_metric": "variance explained", + "log_extra_info": "", + "log_extra_report": "", + "single_comp_threshold": 0.1, + "all_comp_threshold": 1.0, + "tag_if_true": "Low variance" + } + }, + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "accepted", + "decide_comps": "provisionalaccept" + }, + "kwargs": { + "log_extra_info": "", + "log_extra_report": "", + "tag": "Likely BOLD" + } + }, + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "rejected", + "decide_comps": [ + "provisionalreject", + "unclassified" + ] + }, + "kwargs": { + "log_extra_info": "", + "log_extra_report": "", + "tag": "Unlikely BOLD" + } + } + ] +} \ No newline at end of file diff --git a/tedana/selection/__init__.py b/tedana/selection/__init__.py index b689de6b0..8a2f3dc5f 100644 --- a/tedana/selection/__init__.py +++ b/tedana/selection/__init__.py @@ -1,7 +1,7 @@ # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- # ex: set sts=4 ts=4 sw=4 et: -from .tedica import kundu_selection_v2, manual_selection +from .tedica import automatic_selection from .tedpca import kundu_tedpca -__all__ = ["kundu_tedpca", "kundu_selection_v2", "manual_selection"] +__all__ = ["kundu_tedpca", "automatic_selection"] diff --git a/tedana/selection/_utils.py b/tedana/selection/_utils.py deleted file mode 100644 index d5b004447..000000000 --- a/tedana/selection/_utils.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Utility functions for tedana.selection -""" -import logging - -import numpy as np - -LGR = logging.getLogger("GENERAL") -RepLGR = logging.getLogger("REPORT") - - -def clean_dataframe(comptable): - """ - Reorder columns in component table so "rationale" and "classification" are - last and remove trailing semicolons from rationale column. - """ - cols_at_end = ["classification", "rationale"] - comptable = comptable[ - [c for c in comptable if c not in cols_at_end] + [c for c in cols_at_end if c in comptable] - ] - comptable["rationale"] = comptable["rationale"].str.rstrip(";") - return comptable - - -def getelbow_cons(arr, return_val=False): - """ - Elbow using mean/variance method - conservative - - Parameters - ---------- - arr : (C,) array_like - Metric (e.g., Kappa or Rho) values. - return_val : :obj:`bool`, optional - Return the value of the elbow instead of the index. Default: False - - Returns - ------- - :obj:`int` or :obj:`float` - Either the elbow index (if return_val is True) or the values at the - elbow index (if return_val is False) - """ - if arr.ndim != 1: - raise ValueError("Parameter arr should be 1d, not {0}d".format(arr.ndim)) - - if not arr.size: - raise ValueError( - "Empty array detected during elbow calculation. " - "This error happens when getelbow_cons is incorrectly called on no components. " - "If you see this message, please open an issue at " - "https://github.com/ME-ICA/tedana/issues with the full traceback and any data " - "necessary to reproduce this error, so that we create additional data checks to " - "prevent this from happening." - ) - - arr = np.sort(arr)[::-1] - nk = len(arr) - temp1 = [ - (arr[nk - 5 - ii - 1] > arr[nk - 5 - ii : nk].mean() + 2 * arr[nk - 5 - ii : nk].std()) - for ii in range(nk - 5) - ] - ds = np.array(temp1[::-1], dtype=np.int32) - dsum = [] - c_ = 0 - for d_ in ds: - c_ = (c_ + d_) * d_ - dsum.append(c_) - e2 = np.argmax(np.array(dsum)) - elind = np.max([getelbow(arr), e2]) - - if return_val: - return arr[elind] - else: - return elind - - -def getelbow(arr, return_val=False): - """ - Elbow using linear projection method - moderate - - Parameters - ---------- - arr : (C,) array_like - Metric (e.g., Kappa or Rho) values. - return_val : :obj:`bool`, optional - Return the value of the elbow instead of the index. Default: False - - Returns - ------- - :obj:`int` or :obj:`float` - Either the elbow index (if return_val is True) or the values at the - elbow index (if return_val is False) - """ - if arr.ndim != 1: - raise ValueError("Parameter arr should be 1d, not {0}d".format(arr.ndim)) - - if not arr.size: - raise ValueError( - "Empty array detected during elbow calculation. " - "This error happens when getelbow is incorrectly called on no components. " - "If you see this message, please open an issue at " - "https://github.com/ME-ICA/tedana/issues with the full traceback and any data " - "necessary to reproduce this error, so that we create additional data checks to " - "prevent this from happening." - ) - - arr = np.sort(arr)[::-1] - n_components = arr.shape[0] - coords = np.array([np.arange(n_components), arr]) - p = coords - coords[:, 0].reshape(2, 1) - b = p[:, -1] - b_hat = np.reshape(b / np.sqrt((b**2).sum()), (2, 1)) - proj_p_b = p - np.dot(b_hat.T, p) * np.tile(b_hat, (1, n_components)) - d = np.sqrt((proj_p_b**2).sum(axis=0)) - k_min_ind = d.argmax() - - if return_val: - return arr[k_min_ind] - else: - return k_min_ind diff --git a/tedana/selection/component_selector.py b/tedana/selection/component_selector.py new file mode 100644 index 000000000..0e82db931 --- /dev/null +++ b/tedana/selection/component_selector.py @@ -0,0 +1,551 @@ +""" +Functions that include workflows to identify and label +TE-dependent and TE-independent components. +""" +import inspect +import logging +import os.path as op + +from numpy import asarray + +from tedana.io import load_json +from tedana.selection import selection_nodes +from tedana.selection.selection_utils import ( + clean_dataframe, + confirm_metrics_exist, + log_classification_counts, +) +from tedana.utils import get_resource_path + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") +RefLGR = logging.getLogger("REFERENCES") + +# These are the names of the json files containing decision +# trees that are stored in the ./resouces/decision_trees/ directory +# A user can run the desision tree either using one of these +# names or by giving the full path to a tree in a different +# location +DEFAULT_TREES = ["minimal", "kundu"] + + +class TreeError(Exception): + """ + Passes errors that are raised when `validate_tree` fails + """ + + pass + + +def load_config(tree): + """Load the json file with the decision tree and validate the fields in the decision tree. + + Parameters + ---------- + tree : :obj:`str` + The named tree or path to a JSON file that defines one + + Returns + ------- + tree : :obj:`dict` + A validated decision tree for the component selection process. + """ + + if tree in DEFAULT_TREES: + fname = op.join(get_resource_path(), "decision_trees", tree + ".json") + else: + fname = tree + + try: + dectree = load_json(fname) + except FileNotFoundError: + raise ValueError( + f"Cannot find tree {tree}. Please check your path or use a " + f"default tree ({DEFAULT_TREES})." + ) + except IsADirectoryError: + raise ValueError( + f"Tree {tree} is a directory. Please supply a JSON file or " + f"default tree ({DEFAULT_TREES})." + ) + + return validate_tree(dectree) + + +def validate_tree(tree): + """Confirm that provided `tree` is a valid decision tree. + + Parameters + ---------- + tree : :obj:`dict` + Ostensible decision tree for the component selection process + + Returns + ------- + tree : :obj:`dict` + Validated decision tree dictionary + + Raises + ------ + TreeError + """ + + # Set the fields that should always be present + err_msg = "" + tree_expected_keys = [ + "tree_id", + "info", + "report", + "refs", + "necessary_metrics", + "intermediate_classifications", + "classification_tags", + "nodes", + ] + defaults = {"selector", "decision_node_idx"} + default_classifications = {"nochange", "accepted", "rejected", "unclassified"} + default_decide_comps = {"all", "accepted", "rejected", "unclassified"} + + # Confirm that the required fields exist + missing_keys = set(tree_expected_keys) - set(tree.keys()) + if missing_keys: + # If there are missing keys, this function may crash before the end. + # End function here with a clear error message rather than adding + # `if assert tree.get()` statements before every section + raise TreeError("\n" + f"Decision tree missing required fields: {missing_keys}") + + # Warn if unused fields exist + unused_keys = set(tree.keys()) - set(tree_expected_keys) - set(["used_metrics"]) + # Make sure some fields don't trigger a warning; hacky, sorry + ok_to_not_use = ( + "reconstruct_from", + "generated_metrics", + ) + for k in ok_to_not_use: + if k in unused_keys: + unused_keys.remove(k) + if unused_keys: + LGR.warning(f"Decision tree includes fields that are not used or logged {unused_keys}") + + # Combine the default classifications with the user inputted classifications + all_classifications = set(tree.get("intermediate_classifications")) | set( + default_classifications + ) + all_decide_comps = set(tree.get("intermediate_classifications")) | set(default_decide_comps) + for i, node in enumerate(tree["nodes"]): + # Make sure each function defined in a node exists + try: + fcn = getattr(selection_nodes, node.get("functionname")) + sig = inspect.signature(fcn) + except (AttributeError, TypeError): + err_msg += f"Node {i} has invalid functionname parameter: {node.get('functionname')}\n" + continue + + # Get a functions parameters and compare to parameters defined in the tree + pos = set([p for p, i in sig.parameters.items() if i.default is inspect.Parameter.empty]) + kwargs = set(sig.parameters.keys()) - pos + + missing_pos = pos - set(node.get("parameters").keys()) - defaults + if len(missing_pos) > 0: + err_msg += f"Node {i} is missing required parameter(s): {missing_pos}\n" + + invalid_params = set(node.get("parameters").keys()) - pos + if len(invalid_params) > 0: + err_msg += ( + f"Node {i} has additional, undefined required parameters: {invalid_params}\n" + ) + + # Only if kwargs are inputted, make sure they are all valid + if node.get("kwargs") is not None: + invalid_kwargs = set(node.get("kwargs").keys()) - kwargs + if len(invalid_kwargs) > 0: + err_msg += ( + f"Node {i} has additional, undefined optional parameters (kwargs): " + f"{invalid_kwargs}\n" + ) + + # Gather all the classification labels used in each tree both for + # changing classifications and for decide_comps which defines which + # component classifications to use in each node then make sure these + # classifications are in the predefined list. + # It's important to require a predefined list of classifications + # beccuse spelling inconsistencies cause problems and are hard to + # catch. For example if a node is applied to "provisionalaccept" + # nodes, but a previous node classified components as + # "provisionalaccepted" they won't be included and there might not + # be any other warnings + compclass = set() + if "if_true" in node.get("parameters").keys(): + tmp_comp = node["parameters"]["if_true"] + if isinstance(tmp_comp, str): + tmp_comp = [tmp_comp] + compclass = compclass | set(tmp_comp) + if "if_false" in node.get("parameters").keys(): + tmp_comp = node["parameters"]["if_false"] + if isinstance(tmp_comp, str): + tmp_comp = [tmp_comp] + compclass = compclass | set(tmp_comp) + nonstandard_labels = compclass.difference(all_classifications) + if nonstandard_labels: + LGR.warning(f"{compclass} in node {i} of the decision tree includes a classification") + if "decide_comps" in node.get("parameters").keys(): + tmp_comp = node["parameters"]["decide_comps"] + if isinstance(tmp_comp, str): + tmp_comp = [tmp_comp] + compclass = set(tmp_comp) + nonstandard_labels = compclass.difference(all_decide_comps) + if nonstandard_labels: + LGR.warning( + f"{compclass} in node {i} of the decision tree includes a classification " + "label that was not predefined" + ) + + if node.get("kwargs") is not None: + tagset = set() + if "tag_if_true" in node.get("kwargs").keys(): + tagset.update(set([node["kwargs"]["tag_if_true"]])) + if "tag_if_false" in node.get("kwargs").keys(): + tagset.update(set([node["kwargs"]["tag_if_false"]])) + if "tag" in node.get("kwargs").keys(): + tagset.update(set([node["kwargs"]["tag"]])) + undefined_classification_tags = tagset.difference(set(tree.get("classification_tags"))) + if undefined_classification_tags: + LGR.warning( + f"{tagset} in node {i} of the decision tree includes a classification " + "tag that was not predefined" + ) + + if err_msg: + raise TreeError("\n" + err_msg) + + return tree + + +class ComponentSelector: + """Load and classify components based on a specified ``tree``.""" + + def __init__(self, tree, component_table, cross_component_metrics={}, status_table=None): + """Initialize the class using the info specified in the json file ``tree``. + + Parameters + ---------- + tree : :obj:`str` + The named tree or path to a JSON file that defines one. + component_table : (C x M) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric; the index should be the component number. + cross_component_metrics : :obj:`dict` + Metrics that are each a single value calculated across components. + Default is empty dictionary. + status_table : :obj:`pandas.DataFrame` + A table tracking the status of each component at each step. + Pass a status table if running additional steps on a decision + tree that was already executed. Default=None. + + Notes + ----- + Initializing the ``ComponentSelector`` confirms tree is valid and + loads all information in the tree json file into ``ComponentSelector``. + + Adds to the ``ComponentSelector``: + + - component_status_table: empty dataframe or contents of inputted status_table + - cross_component_metrics: empty dict or contents of inputed values + - used_metrics: empty set + + Any parameter that is used by a decision tree node function can be passed + as a parameter in the ``ComponentSelector`` initialization or can be + included in the json file that defines the decision tree. + If a parameter is set in the json file, that will take precedence. + As a style rule, a parameter that is the same regardless of the inputted data should be + defined in the decision tree json file. + A parameter that is dataset-specific should be passed through the initialization function. + Dataset-specific parameters that may need to be passed during initialization include: + + n_echos : :obj:`int` + Number of echos in multi-echo fMRI data. + Required for kundu and minimal trees + n_vols : :obj:`int` + Number of volumes (time points) in the fMRI data + Required for kundu tree + + An example initialization with these options would look like + ``selector = ComponentSelector(tree, comptable, n_echos=n_echos, n_vols=n_vols)`` + """ + + self.tree_name = tree + + self.__dict__.update(cross_component_metrics) + self.cross_component_metrics = cross_component_metrics + + # Construct an un-executed selector + self.component_table = component_table.copy() + + # To run a decision tree, each component needs to have an initial classification + # If the classification column doesn't exist, create it and label all components + # as unclassified + if "classification" not in self.component_table: + self.component_table["classification"] = "unclassified" + + self.tree = load_config(self.tree_name) + tree_config = self.tree + + LGR.info("Performing component selection with " + tree_config["tree_id"]) + LGR.info(tree_config.get("info", "")) + RepLGR.info(tree_config.get("report", "")) + RefLGR.info(tree_config.get("refs", "")) + + self.tree["nodes"] = tree_config["nodes"] + self.necessary_metrics = set(tree_config["necessary_metrics"]) + self.intermediate_classifications = tree_config["intermediate_classifications"] + self.classification_tags = set(tree_config["classification_tags"]) + if "used_metrics" not in self.tree.keys(): + self.tree["used_metrics"] = set() + else: + self.tree["used_metrics"] = set(self.tree["used_metrics"]) + + if status_table is None: + self.component_status_table = self.component_table[ + ["Component", "classification"] + ].copy() + self.component_status_table = self.component_status_table.rename( + columns={"classification": "initialized classification"} + ) + self.start_idx = 0 + else: + # Since a status table exists, we need to skip nodes up to the + # point where the last tree finished + self.start_idx = len(tree_config["nodes"]) + LGR.info(f"Start is {self.start_idx}") + self.component_status_table = status_table + + def select(self): + """Apply the decision tree to data. + + Using the validated tree in ``ComponentSelector`` to run the decision + tree functions to calculate cross_component metrics and classify + each component as accepted or rejected. + + Notes + ------- + The selection process uses previously calculated parameters stored in + `component_table` for each ICA component such as Kappa (a T2* weighting metric), + Rho (an S0 weighting metric), and variance explained. If a necessary metric + is not calculated, this will not run. See `tedana.metrics` for more detail on + the calculated metrics + + This can be used on a component_table with no component classifications or to alter + classifications on a component_table that was already run (i.e. for manual + classificaiton changes after visual inspection) + + When this is run, multiple elements in `ComponentSelector` will change including: + + - component_table: ``classification`` column with ``accepted`` or ``rejected`` labels + and ``classification_tags`` column with can hold multiple comma-separated labels + explaining why a classification happened + - cross_component_metrics: Any values that were calculated based on the metric + values across components or by direct user input + - component_status_table: Contains the classification statuses at each node in + the decision tree + - used_metrics: A list of metrics used in the selection process + - nodes: The original tree definition with an added ``outputs`` key listing + everything that changed in each node + - current_node_idx: The total number of nodes run in ``ComponentSelector`` + """ + + if "classification_tags" not in self.component_table.columns: + self.component_table["classification_tags"] = "" + + # this will crash the program with an error message if not all + # necessary_metrics are in the comptable + confirm_metrics_exist( + self.component_table, self.necessary_metrics, function_name=self.tree_name + ) + + # for each node in the decision tree + for self.current_node_idx, node in enumerate( + self.tree["nodes"][self.start_idx :], start=self.start_idx + ): + # parse the variables to use with the function + fcn = getattr(selection_nodes, node["functionname"]) + + params = node["parameters"] + + params = self.check_null(params, node["functionname"]) + + if "kwargs" in node: + kwargs = node["kwargs"] + kwargs = self.check_null(kwargs, node["functionname"]) + all_params = {**params, **kwargs} + else: + kwargs = None + all_params = {**params} + + LGR.debug( + f"Step {self.current_node_idx}: Running function {node['functionname']} " + f"with parameters: {all_params}" + ) + # run the decision node function + if kwargs is not None: + self = fcn(self, **params, **kwargs) + else: + self = fcn(self, **params) + + self.tree["used_metrics"].update( + self.tree["nodes"][self.current_node_idx]["outputs"]["used_metrics"] + ) + + # log the current counts for all classification labels + log_classification_counts(self.current_node_idx, self.component_table) + LGR.debug( + f"Step {self.current_node_idx} Full outputs: " + f"{self.tree['nodes'][self.current_node_idx]['outputs']}" + ) + + # move decision columns to end + self.component_table = clean_dataframe(self.component_table) + # warning anything called a necessary metric wasn't used and if + # anything not called a necessary metric was used + self.are_only_necessary_metrics_used() + + self.are_all_components_accepted_or_rejected() + + def add_manual(self, indices, classification): + """Add nodes that will manually classify components. + + Parameters + ---------- + indices : :obj:`list[int]` + The indices to manually classify + classification : :obj:`str` + The classification to set the nodes to (i.e. accepted or rejected) + """ + self.tree["nodes"].append( + { + "functionname": "manual_classify", + "parameters": { + "new_classification": classification, + "decide_comps": indices, + }, + "kwargs": { + "dont_warn_reclassify": "true", + "tag": "manual reclassify", + }, + } + ) + + def check_null(self, params, fcn): + """ + Checks that all required parameters for selection node functions are + attributes in the class. Error if any are undefined + + Returns + ------- + params: :obj:`dict` + The keys and values for the inputted parameters + """ + + for key, val in params.items(): + if val is None: + try: + params[key] = getattr(self, key) + except AttributeError: + raise ValueError( + f"Parameter {key} is required in node {fcn}, but not defined. " + f"If {key} is dataset specific, it should be " + "defined in the initialization of " + "ComponentSelector. If it is fixed regardless of dataset, it " + "should be defined in the json file that defines the " + "decision tree." + ) + + return params + + def are_only_necessary_metrics_used(self): + """ + Check if all metrics that are declared as necessary are actually + used and if any used_metrics weren't explicitly declared necessary. + If either of these happen, a warning is added to the logger + """ + necessary_metrics = self.necessary_metrics + not_declared = self.tree["used_metrics"] - necessary_metrics + not_used = necessary_metrics - self.tree["used_metrics"] + if len(not_declared) > 0: + LGR.warning( + f"Decision tree {self.tree_name} used the following metrics that were " + f"not declared as necessary: {not_declared}" + ) + if len(not_used) > 0: + LGR.warning( + f"Decision tree {self.tree_name} did not use the following metrics " + f"that were declared as necessary: {not_used}" + ) + + def are_all_components_accepted_or_rejected(self): + """ + After the tree has finished executing, check if all component + classifications are either "accepted" or "rejected". + If any other component classifications remain, log a warning + """ + component_classifications = set(self.component_table["classification"].to_list()) + nonfinal_classifications = component_classifications.difference({"accepted", "rejected"}) + if nonfinal_classifications: + for nonfinal_class in nonfinal_classifications: + numcomp = asarray(self.component_table["classification"] == nonfinal_class).sum() + LGR.warning( + f"{numcomp} components have a final classification of {nonfinal_class}. " + "At the end of the selection process, all components are expected " + "to be 'accepted' or 'rejected'" + ) + + @property + def n_comps(self): + """The number of components in the component table.""" + return len(self.component_table) + + @property + def likely_bold_comps(self): + """A boolean :obj:`pandas.Series` of components that are tagged "Likely BOLD".""" + likely_bold_comps = self.component_table["classification_tags"].copy() + for idx in range(len(likely_bold_comps)): + if "Likely BOLD" in likely_bold_comps.loc[idx]: + likely_bold_comps.loc[idx] = True + else: + likely_bold_comps.loc[idx] = False + return likely_bold_comps + + @property + def n_likely_bold_comps(self): + """The number of components that are tagged "Likely BOLD".""" + return self.likely_bold_comps.sum() + + @property + def accepted_comps(self): + """A boolean :obj:`pandas.Series` of components that are accepted.""" + return self.component_table["classification"] == "accepted" + + @property + def n_accepted_comps(self): + """The number of components that are accepted.""" + return self.accepted_comps.sum() + + @property + def rejected_comps(self): + """A boolean :obj:`pandas.Series` of components that are rejected.""" + return self.component_table["classification"] == "rejected" + + def to_files(self, io_generator): + """Convert this selector into component files. + + Parameters + ---------- + io_generator : :obj:`tedana.io.OutputGenerator` + The output generator to use for filename generation and saving. + """ + io_generator.save_file(self.component_table, "ICA metrics tsv") + io_generator.save_file( + self.cross_component_metrics, + "ICA cross component metrics json", + ) + io_generator.save_file(self.component_status_table, "ICA status table tsv") + io_generator.save_file(self.tree, "ICA decision tree json") diff --git a/tedana/selection/selection_nodes.py b/tedana/selection/selection_nodes.py new file mode 100644 index 000000000..4a91c22a0 --- /dev/null +++ b/tedana/selection/selection_nodes.py @@ -0,0 +1,1872 @@ +"""Functions that will be used as steps in a decision tree.""" +import logging + +import numpy as np +import pandas as pd +from scipy.stats import scoreatpercentile + +from tedana.docs import fill_doc +from tedana.metrics.dependence import generate_decision_table_score +from tedana.selection.selection_utils import ( + change_comptable_classifications, + confirm_metrics_exist, + get_extend_factor, + kappa_elbow_kundu, + log_decision_tree_step, + rho_elbow_kundu_liberal, + selectcomps2use, +) + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") +RefLGR = logging.getLogger("REFERENCES") + + +@fill_doc +def manual_classify( + selector, + decide_comps, + new_classification, + clear_classification_tags=False, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, + tag=None, + dont_warn_reclassify=False, +): + """Assign a classification defined in new_classification to the components in decide_comps. + + Parameters + ---------- + %(selector)s + %(decide_comps)s + new_classification : :obj:`str` + Assign all components identified in decide_comps the classification + in new_classification. Options are 'unclassified', 'accepted', + 'rejected', or intermediate_classification labels predefined in the + decision tree + clear_classification_tags : :obj:`bool` + If True, reset all values in the 'classification_tags' column to empty + strings. This also can create the classification_tags column if it + does not already exist. If False, do nothing. + tag : :obj:`str` + A classification tag to assign to all components being reclassified. + This should be one of the tags defined by classification_tags in + the decision tree specification + dont_warn_reclassify : :obj:`bool` + By default, if this function changes a component classification from accepted or + rejected to something else, it gives a warning, since those should be terminal + classifications. If this is True, that warning is suppressed. + (Useful if manual_classify is used to reset all labels to unclassified). + Default=False + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + This was designed with three use cases in mind: + (1) Set the classifications of all components to unclassified + for the first node of a decision tree. clear_classification_tags=True is + recommended for this use case. + (2) Shift all components between classifications, such as provisionalaccept to accepted for the + penultimate node in the decision tree. + (3) Manually re-classify components by number based on user observations. + + Unlike other decision node functions, ``if_true`` and ``if_false`` are not inputs + since the same classification is assigned to all components listed in ``decide_comps``. + """ + # predefine all outputs that should be logged + outputs = { + "decision_node_idx": selector.current_node_idx, + "used_metrics": set(), + "node_label": None, + "n_true": None, + "n_false": None, + } + + if only_used_metrics: + return outputs["used_metrics"] + + if_true = new_classification + if_false = "nochange" + + function_name_idx = f"Step {selector.current_node_idx}: manual_classify" + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = "Set " + str(decide_comps) + " to " + new_classification + + LGR.info(f"{function_name_idx}: {outputs['node_label']} ") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + + if not comps2use: + log_decision_tree_step(function_name_idx, comps2use, decide_comps=decide_comps) + outputs["n_true"] = 0 + outputs["n_false"] = 0 + else: + decision_boolean = pd.Series(True, index=comps2use) + selector, outputs["n_true"], outputs["n_false"] = change_comptable_classifications( + selector, + if_true, + if_false, + decision_boolean, + tag_if_true=tag, + dont_warn_reclassify=dont_warn_reclassify, + ) + + log_decision_tree_step( + function_name_idx, + comps2use, + n_true=outputs["n_true"], + n_false=outputs["n_false"], + if_true=if_true, + if_false=if_false, + ) + + if clear_classification_tags: + selector.component_table["classification_tags"] = "" + LGR.info(function_name_idx + " component classification tags are cleared") + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def dec_left_op_right( + selector, + if_true, + if_false, + decide_comps, + op, + left, + right, + left_scale=1, + right_scale=1, + op2=None, + left2=None, + right2=None, + left2_scale=1, + right2_scale=1, + op3=None, + left3=None, + right3=None, + left3_scale=1, + right3_scale=1, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, + tag_if_true=None, + tag_if_false=None, +): + """Perform a relational comparison. + + Parameters + ---------- + %(selector)s + %(tag_if_true)s + %(tag_if_false)s + %(decide_comps)s + op: :obj:`str` + Must be one of: ">", ">=", "==", "<=", "<" + Applied the user defined operator to left op right + left, right: :obj:`str` or :obj:`float` + The labels for the two metrics to be used for comparision. + For example: left='kappa', right='rho' and op='>' means this + function will test kappa>rho. One of the two can also be a number. + In that case, a metric would be compared against a fixed threshold. + For example left='T2fitdiff_invsout_ICAmap_Tstat', right=0, and op='>' + means this function will test T2fitdiff_invsout_ICAmap_Tstat>0 + left_scale, right_scale: :obj:`float` or :obj:`str` + Multiply the left or right metrics value by a constant. For example + if left='kappa', right='rho', right_scale=2, and op='>' this tests + kappa>(2*rho). These can also be a string that is a value in + cross_component_metrics, since those will resolve to a single value. + This cannot be a label for a component_table column since that would + output a different value for each component. Default=1 + op2: :obj:`str`, Default=None + left2, right2, left3, right3: :obj:`str` or :obj:`float`, Default=None + left2_scale, right2_scale, left3_scale, right3_scale: :obj:`float` or :obj:`str`, Default=1 + This function can also be used to calculate the intersection of two or three + boolean statements. If op2, left2, and right2 are defined then + this function returns + (left_scale*)left op (right_scale*right) AND (left2_scale*)left2 op2 (right2_scale*right2) + if the "3" parameters are also defined then it's the intersection of all 3 statements + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + %(tag_if_true)s + %(tag_if_false)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + This function is ideally run with one boolean statement at a time so that + the result of each boolean is logged. For example, it's better to test + kappa>kappa_elbow and rho>rho_elbow with two separate calls to this function + so that the results of each test can be easily viewed. That said, particularly for + the original kundu decision tree, if you're making decisions on components with + various classifications based on multiple boolean statements, the decision tree + becomes really messy and the added functionality here is useful. + Combinations of boolean statements only test with "and" and not "or". This is + an intentional decision because, if a classification changes if A>B or C>D are true + then A>B and C>D should be logged separately + """ + # predefine all outputs that should be logged + outputs = { + "decision_node_idx": selector.current_node_idx, + "used_metrics": set(), + "used_cross_component_metrics": set(), + "node_label": None, + "n_true": None, + "n_false": None, + } + + function_name_idx = f"Step {selector.current_node_idx}: left_op_right" + # Only select components if the decision tree is being run + if not only_used_metrics: + comps2use = selectcomps2use(selector, decide_comps) + + def identify_used_metric(val, isnum=False): + """ + Parse the left or right values or scalers to see if they are an + existing used_metric or cross_component_metric + If the value already a number, no parse would be needed + + This is also used on left_scale and right_scale to convert + a value in cross_component_metrics to a number. Set the isnum + flag to true for those inputs and this will raise an error + if a number isn't loaded + """ + orig_val = val + if isinstance(val, str): + if val in selector.component_table.columns: + outputs["used_metrics"].update([val]) + elif val in selector.cross_component_metrics: + outputs["used_cross_component_metrics"].update([val]) + val = selector.cross_component_metrics[val] + # If decision tree is being run, then throw errors or messages + # if a component doesn't exist. If this is just getting a list + # of metrics to be used, then don't bring up warnings + elif not only_used_metrics: + if not comps2use: + LGR.info( + f"{function_name_idx}: {val} is neither a metric in " + "selector.component_table nor selector.cross_component_metrics, " + f"but no components with {decide_comps} remain by this node " + "so nothing happens" + ) + else: + raise ValueError( + f"{val} is neither a metric in selector.component_table " + "nor selector.cross_component_metrics" + ) + if isnum: + if not isinstance(val, (int, float)): + raise ValueError(f"{orig_val} must be a number. It is {val}") + return val + + legal_ops = (">", ">=", "==", "<=", "<") + + def confirm_valid_conditional(left_scale, left_val, right_scale, right_val, op_val): + """ + Makes sure the left_scale, left_val, right_scale, right_val, and + operator variables combine into a valid conditional statement + """ + + left_val = identify_used_metric(left_val) + right_val = identify_used_metric(right_val) + left_scale = identify_used_metric(left_scale, isnum=True) + right_scale = identify_used_metric(right_scale, isnum=True) + + if op_val not in legal_ops: + raise ValueError(f"{op_val} is not a binary comparison operator, like > or <") + return left_scale, left_val, right_scale, right_val + + def operator_scale_descript(val_scale, val): + """ + Return a string with one element from the mathematical expression + If val_scale is not 1, include scaling factor (rounded to 2 decimals) + If val is a column in the component_table output the column label + If val is a number (either an inputted number or from cross_component_metrics + include the number (rounded to 2 decimals) + This output is used to great a descriptor for visualizing the decision tree + Unrounded values are saved and rounding here will not affect results + """ + if not isinstance(val, str): + val = str(round(val, 2)) + if val_scale == 1: + return val + else: + return f"{round(val_scale,2)}*{val}" + + left_scale, left, right_scale, right = confirm_valid_conditional( + left_scale, left, right_scale, right, op + ) + descript_left = operator_scale_descript(left_scale, left) + descript_right = operator_scale_descript(right_scale, right) + is_compound = 0 + + # If any of the values for the second boolean statement are set + if left2 is not None or right2 is not None or op2 is not None: + # Check if they're all set & use them all or raise an error + if left2 is not None and right2 is not None and op2 is not None: + is_compound = 2 + left2_scale, left2, right2_scale, right2 = confirm_valid_conditional( + left2_scale, left2, right2_scale, right2, op2 + ) + descript_left2 = operator_scale_descript(left2_scale, left2) + descript_right2 = operator_scale_descript(right2_scale, right2) + else: + raise ValueError( + "left_op_right can check if a first and second boolean " + "statement are both true. This call includes some but not " + "all variables to define the second boolean statement " + f"left2={left2}, right2={right2}, op2={op2}" + ) + + # If any of the values for the second boolean statement are set + if left3 or right3 or op3: + if is_compound == 0: + raise ValueError( + "left_op_right is includes parameters for a third conditional " + "(left3, right3, or op3) statement without setting the " + "second statement" + ) + # Check if they're all set & use them all or raise an error + if left3 and right3 and op3: + is_compound = 3 + left3_scale, left3, right3_scale, right3 = confirm_valid_conditional( + left3_scale, left3, right3_scale, right3, op3 + ) + descript_left3 = operator_scale_descript(left3_scale, left3) + descript_right3 = operator_scale_descript(right3_scale, right3) + else: + raise ValueError( + "left_op_right can check if three boolean " + "statements are all true. This call includes some but not " + "all variables to define the third boolean statement " + f"left3={left3}, right3={right3}, op3={op3}" + ) + + if only_used_metrics: + return outputs["used_metrics"] + + if custom_node_label: + outputs["node_label"] = custom_node_label + elif is_compound == 0: + outputs["node_label"] = f"{descript_left}{op}{descript_right}" + elif is_compound == 2: + outputs["node_label"] = [ + f"{descript_left}{op}{descript_right} & " f"{descript_left2}{op2}{descript_right2}" + ] + elif is_compound == 3: + outputs["node_label"] = [ + f"{descript_left}{op}{descript_right} & " + f"{descript_left2}{op2}{descript_right2} & " + f"{descript_left3}{op3}{descript_right3}" + ] + + # Might want to add additional default logging to functions here + # The function input will be logged before the function call + LGR.info(f"{function_name_idx}: {if_true} if {outputs['node_label']}, else {if_false}") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + def parse_vals(val): + """Get the metric values for the selected components or relevant constant""" + if isinstance(val, str): + return selector.component_table.loc[comps2use, val].copy() + else: + return val # should be a fixed number + + if not comps2use: + outputs["n_true"] = 0 + outputs["n_false"] = 0 + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + if_true=outputs["n_true"], + if_false=outputs["n_false"], + ) + + else: + left1_val = parse_vals(left) # noqa: F841 + right1_val = parse_vals(right) # noqa: F841 + decision_boolean = eval(f"(left_scale*left1_val) {op} (right_scale * right1_val)") + if is_compound >= 2: + left2_val = parse_vals(left2) # noqa: F841 + right2_val = parse_vals(right2) # noqa: F841 + statement1 = decision_boolean.copy() + statement2 = eval(f"(left2_scale*left2_val) {op2} (right2_scale * right2_val)") + # logical dot product for compound statement + decision_boolean = statement1 * statement2 + if is_compound == 3: + left3_val = parse_vals(left3) # noqa: F841 + right3_val = parse_vals(right3) # noqa: F841 + # statement 1 is now the combination of the first two conditional statements + statement1 = decision_boolean.copy() + # statement 2 is now the third conditional statement + statement2 = eval(f"(left3_scale*left3_val) {op2} (right3_scale * right3_val)") + # logical dot product for compound statement + decision_boolean = statement1 * statement2 + + ( + selector, + outputs["n_true"], + outputs["n_false"], + ) = change_comptable_classifications( + selector, + if_true, + if_false, + decision_boolean, + tag_if_true=tag_if_true, + tag_if_false=tag_if_false, + ) + # outputs["n_true"] = np.asarray(decision_boolean).sum() + # outputs["n_false"] = np.logical_not(decision_boolean).sum() + + log_decision_tree_step( + function_name_idx, + comps2use, + n_true=outputs["n_true"], + n_false=outputs["n_false"], + if_true=if_true, + if_false=if_false, + ) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def dec_variance_lessthan_thresholds( + selector, + if_true, + if_false, + decide_comps, + var_metric="variance explained", + single_comp_threshold=0.1, + all_comp_threshold=1.0, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, + tag_if_true=None, + tag_if_false=None, +): + """Change classifications for components with variance all_comp_threshold: + while variance[decision_boolean].sum() > all_comp_threshold: + tmpmax = variance == variance[decision_boolean].max() + decision_boolean[tmpmax] = False + ( + selector, + outputs["n_true"], + outputs["n_false"], + ) = change_comptable_classifications( + selector, + if_true, + if_false, + decision_boolean, + tag_if_true=tag_if_true, + tag_if_false=tag_if_false, + ) + + log_decision_tree_step( + function_name_idx, + comps2use, + n_true=outputs["n_true"], + n_false=outputs["n_false"], + if_true=if_true, + if_false=if_false, + ) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + return selector + + +@fill_doc +def calc_median( + selector, + decide_comps, + metric_name, + median_label, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, +): + """Calculate the median across components for the metric defined by metric_name. + + Parameters + ---------- + %(selector)s + %(decide_comps)s + metric_name: :obj:`str` + The name of a column in selector.component_table. The median of + the values in this column will be calculated + median_label: :obj:`str` + The median will be saved in "median_(median_label)" + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + """ + function_name_idx = f"Step {selector.current_node_idx}: calc_median" + if not isinstance(median_label, str): + raise ValueError( + f"{function_name_idx}: median_label must be a string. It is: {median_label}" + ) + else: + label_name = f"median_{median_label}" + + if not isinstance(metric_name, str): + raise ValueError( + f"{function_name_idx}: metric_name must be a string. It is: {metric_name}" + ) + + outputs = { + "decision_node_idx": selector.current_node_idx, + "node_label": None, + label_name: None, + "used_metrics": set([metric_name]), + "calc_cross_comp_metrics": [label_name], + } + + if only_used_metrics: + return outputs["used_metrics"] + + if label_name in selector.cross_component_metrics: + LGR.warning( + f"{label_name} already calculated. Overwriting previous value in {function_name_idx}" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = f"Median({label_name})" + + LGR.info(f"{function_name_idx}: {outputs['node_label']}") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + outputs[label_name] = np.median(selector.component_table.loc[comps2use, metric_name]) + + selector.cross_component_metrics[label_name] = outputs[label_name] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def calc_kappa_elbow( + selector, + decide_comps, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, +): + """Calculate elbow for kappa across components. + + Parameters + ---------- + %(selector)s + %(decide_comps)s + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + This function is currently hard coded for a specific way to calculate the kappa elbow + based on the method by Kundu in the MEICA v2.5 code. This uses the minimum of + a kappa elbow calculation on all components and on a subset of kappa values below + a significance threshold. To get the same functionality as in MEICA v2.5, + decide_comps must be 'all'. + + varex_upper_p isn't used for anything in this function, but it is calculated + on kappa values and is used in rho_elbow_kundu_liberal and + dec_reclassify_high_var_comps. For several reasons it made more sense to calculate here. + This also means the kappa elbow should be calculated before those two other functions + are called + """ + outputs = { + "decision_node_idx": selector.current_node_idx, + "node_label": None, + "n_echos": selector.n_echos, + "used_metrics": set(["kappa"]), + "calc_cross_comp_metrics": [ + "kappa_elbow_kundu", + "kappa_allcomps_elbow", + "kappa_nonsig_elbow", + "varex_upper_p", + ], + "kappa_elbow_kundu": None, + "kappa_allcomps_elbow": None, + "kappa_nonsig_elbow": None, + "varex_upper_p": None, + } + + if only_used_metrics: + return outputs["used_metrics"] + + function_name_idx = f"Step {selector.current_node_idx}: calc_kappa_elbow" + + if ("kappa_elbow_kundu" in selector.cross_component_metrics) and ( + "kappa_elbow_kundu" in outputs["calc_cross_comp_metrics"] + ): + LGR.warning( + "kappa_elbow_kundu already calculated." + f"Overwriting previous value in {function_name_idx}" + ) + + if "varex_upper_p" in selector.cross_component_metrics: + LGR.warning( + f"varex_upper_p already calculated. Overwriting previous value in {function_name_idx}" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = "Calc Kappa Elbow" + + LGR.info(f"{function_name_idx}: {outputs['node_label']}") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + ( + outputs["kappa_elbow_kundu"], + outputs["kappa_allcomps_elbow"], + outputs["kappa_nonsig_elbow"], + outputs["varex_upper_p"], + ) = kappa_elbow_kundu(selector.component_table, selector.n_echos, comps2use=comps2use) + selector.cross_component_metrics["kappa_elbow_kundu"] = outputs["kappa_elbow_kundu"] + selector.cross_component_metrics["kappa_allcomps_elbow"] = outputs["kappa_allcomps_elbow"] + selector.cross_component_metrics["kappa_nonsig_elbow"] = outputs["kappa_nonsig_elbow"] + selector.cross_component_metrics["varex_upper_p"] = outputs["varex_upper_p"] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def calc_rho_elbow( + selector, + decide_comps, + subset_decide_comps="unclassified", + rho_elbow_type="kundu", + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, +): + """Calculate elbow for rho across components. + + Parameters + ---------- + %(selector)s + %(decide_comps)s + subset_decide_comps: :obj:`str` + This is a string with a single component classification label. For the + elbow calculation used by Kundu in MEICA v.27 thresholds are based + on all components and on unclassified components. + Default='unclassified'. + rho_elbow_type: :obj:`str` + The algorithm used to calculate the rho elbow. Current options are: + 'kundu' and 'liberal'. Default='kundu'. + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + This script is currently hard coded for a specific way to calculate the rho elbow + based on the method by Kundu in the MEICA v2.5 code. To get the same functionality + in MEICA v2.5, decide_comps must be 'all' and subset_decide_comps must be + 'unclassified' See :obj:`tedana.selection.selection_utils.rho_elbow_kundu_liberal` + for a more detailed explanation of the difference between the kundu and liberal + options. + """ + function_name_idx = f"Step {selector.current_node_idx}: calc_rho_elbow" + + if rho_elbow_type == "kundu".lower(): + elbow_name = "rho_elbow_kundu" + elif rho_elbow_type == "liberal".lower(): + elbow_name = "rho_elbow_liberal" + else: + raise ValueError( + f"{function_name_idx}: rho_elbow_type must be 'kundu' or 'liberal' " + f"It is {rho_elbow_type} " + ) + + outputs = { + "decision_node_idx": selector.current_node_idx, + "node_label": None, + "n_echos": selector.n_echos, + "calc_cross_comp_metrics": [ + elbow_name, + "rho_allcomps_elbow", + "rho_unclassified_elbow", + "elbow_f05", + ], + "used_metrics": set(["kappa", "rho", "variance explained"]), + elbow_name: None, + "rho_allcomps_elbow": None, + "rho_unclassified_elbow": None, + "elbow_f05": None, + } + + if only_used_metrics: + return outputs["used_metrics"] + + if (elbow_name in selector.cross_component_metrics) and ( + elbow_name in outputs["calc_cross_comp_metrics"] + ): + LGR.warning( + f"{elbow_name} already calculated." + f"Overwriting previous value in {function_name_idx}" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = "Calc Rho Elbow" + + LGR.info(f"{function_name_idx}: {outputs['node_label']}") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + subset_comps2use = selectcomps2use(selector, subset_decide_comps) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + ( + outputs[elbow_name], + outputs["rho_allcomps_elbow"], + outputs["rho_unclassified_elbow"], + outputs["elbow_f05"], + ) = rho_elbow_kundu_liberal( + selector.component_table, + selector.n_echos, + rho_elbow_type=rho_elbow_type, + comps2use=comps2use, + subset_comps2use=subset_comps2use, + ) + selector.cross_component_metrics[elbow_name] = outputs[elbow_name] + selector.cross_component_metrics["rho_allcomps_elbow"] = outputs["rho_allcomps_elbow"] + selector.cross_component_metrics["rho_unclassified_elbow"] = outputs[ + "rho_unclassified_elbow" + ] + selector.cross_component_metrics["elbow_f05"] = outputs["elbow_f05"] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def dec_classification_doesnt_exist( + selector, + new_classification, + decide_comps, + class_comp_exists, + at_least_num_exist=1, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, + tag=None, +): + """ + If there are no components with a classification specified in class_comp_exists, + change the classification of all components in decide_comps. + + Parameters + ---------- + %(selector)s + new_classification: :obj:`str` + Assign all components identified in decide_comps the classification + in new_classification. + %(decide_comps)s + class_comp_exists: :obj:`str` or :obj:`list[str]` or :obj:`int` or :obj:`list[int]` + This has the same structure options as decide_comps. This function tests + whether any components in decide_comps have the classifications defined in this + variable. + at_least_num_exist: :obj:`int` + Instead of just testing whether a classification exists, test whether at least + this number of components have that classification. Default=1 + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + tag: :obj:`str` + A classification tag to assign to all components being reclassified. + This should be one of the tags defined by classification_tags in + the decision tree specification. Default="". + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + This function is useful to end the component selection process early + even if there are additional nodes. For example, in the original + kundu tree, if 0 or 1 components are identified with kappa>elbow and + rho>elbow then, instead of removing everything, it effectively says + something's wrong and conservatively keeps everything. Similarly, + later in the kundu tree, there are several steps deciding how to + classify any remaining provisional components. If none of the + remaining components are "provisionalreject" then it skips those + steps and accepts everything left. + """ + # predefine all outputs that should be logged + outputs = { + "decision_node_idx": selector.current_node_idx, + "used_metrics": set(), + "used_cross_comp_metrics": set(), + "node_label": None, + "n_true": None, + "n_false": None, + } + + if only_used_metrics: + return outputs["used_metrics"] + + function_name_idx = f"Step {selector.current_node_idx}: classification_doesnt_exist" + if custom_node_label: + outputs["node_label"] = custom_node_label + elif at_least_num_exist == 1: + outputs[ + "node_label" + ] = f"Change {decide_comps} to {new_classification} if {class_comp_exists} doesn't exist" + else: + outputs["node_label"] = ( + f"Change {decide_comps} to {new_classification} if less than " + f"{at_least_num_exist} components with {class_comp_exists} exist" + ) + + LGR.info(f"{function_name_idx}: {outputs['node_label']}") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + if_true = new_classification + if_false = "nochange" + + comps2use = selectcomps2use(selector, decide_comps) + + do_comps_exist = selectcomps2use(selector, class_comp_exists) + + if (not comps2use) or (len(do_comps_exist) >= at_least_num_exist): + outputs["n_true"] = 0 + # If nothing chanages, then assign the number of components in comps2use to n_false + outputs["n_false"] = len(comps2use) + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + if_true=outputs["n_true"], + if_false=outputs["n_false"], + ) + else: # do_comps_exist is None: + decision_boolean = pd.Series(True, index=comps2use) + + selector, outputs["n_true"], outputs["n_false"] = change_comptable_classifications( + selector, + if_true, + if_false, + decision_boolean, + tag_if_true=tag, + ) + + log_decision_tree_step( + function_name_idx, + comps2use, + n_true=outputs["n_true"], + n_false=outputs["n_false"], + if_true=if_true, + if_false=if_false, + ) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def dec_reclassify_high_var_comps( + selector, + new_classification, + decide_comps, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, + tag=None, +): + """ + Identifies and reclassifies a couple components with the largest gaps in variance + + Parameters + ---------- + %(selector)s + new_classification: :obj:`str` + Assign all components identified in decide_comps the classification + in new_classification. + %(decide_comps)s + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + tag: :obj:`str` + A classification tag to assign to all components being reclassified. + This should be one of the tags defined by classification_tags in + the decision tree specification. Default="". + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + This function should not exist, but with the goal of maintaining the results of + the original MEICA decision tree it is necessary, so here it is. + It is a quirky and brittle step that is used to remove a few higher variance + components from the calculation of the rho elbow. In the kundu decision tree, + these components are also excluded from being provisionally accepted if + kappa>kappa_elbow and rho 0) + and (percentile_thresh < 100) + ): + outputs["calc_cross_comp_metrics"] = [varex_name, perc_name] + outputs[perc_name] = percentile_thresh + else: + raise ValueError( + f"percentile_thresh must be a number between 0 & 100. It is: {percentile_thresh}" + ) + + if only_used_metrics: + return outputs["used_metrics"] + + if varex_name in selector.cross_component_metrics: + LGR.warning( + f"{varex_name} already calculated. Overwriting previous value in {function_name_idx}" + ) + + if perc_name in selector.cross_component_metrics: + LGR.warning( + f"{perc_name} already calculated. Overwriting previous value in {function_name_idx}" + ) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + if num_highest_var_comps is not None: + if isinstance(num_highest_var_comps, str): + if num_highest_var_comps in selector.cross_component_metrics: + num_highest_var_comps = selector.cross_component_metrics[num_highest_var_comps] + elif not comps2use: + # Note: It is possible the comps2use requested for this function + # is not empty, but the comps2use requested to calculate + # {num_highest_var_comps} was empty. Given the way this node is + # used, that's unlikely, but worth a comment. + LGR.info( + f"{function_name_idx}: num_highest_var_comps ( {num_highest_var_comps}) " + "is not in selector.cross_component_metrics, but no components with " + f"{decide_comps} remain by this node so nothing happens" + ) + else: + raise ValueError( + f"{function_name_idx}: num_highest_var_comps ( {num_highest_var_comps}) " + "is not in selector.cross_component_metrics" + ) + if not isinstance(num_highest_var_comps, int) and comps2use: + raise ValueError( + f"{function_name_idx}: num_highest_var_comps ( {num_highest_var_comps}) " + "is used as an array index and should be an integer" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = f"Calc {varex_name}, {percentile_thresh}th percentile threshold" + + LGR.info(f"{function_name_idx}: {outputs['node_label']}") + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + if num_highest_var_comps is None: + outputs[varex_name] = scoreatpercentile( + selector.component_table.loc[comps2use, "variance explained"], percentile_thresh + ) + else: + # Using only the first num_highest_var_comps components sorted to include + # lowest variance + if num_highest_var_comps > len(comps2use): + # NOTE: This was originally an error, but the original tedana code has no check + # at all and it looks like sorted_varex[:num_highest_var_comps] does not + # crash and always maxes out at the length of sorted_varex. Since this is + # an edge case, decided to print an info message and change the value even + # if this won't affect functionality + LGR.info( + f"{function_name_idx}: num_highest_var_comps ({num_highest_var_comps}) > " + f"len(comps2use) ({len(comps2use)}). Setting to equal len(comps2use) since " + "selection should not use more components than exist" + ) + num_highest_var_comps = len(comps2use) + + sorted_varex = np.flip( + np.sort((selector.component_table.loc[comps2use, "variance explained"]).to_numpy()) + ) + outputs[varex_name] = scoreatpercentile( + sorted_varex[:num_highest_var_comps], percentile_thresh + ) + + selector.cross_component_metrics[varex_name] = outputs[varex_name] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def calc_extend_factor( + selector, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, + extend_factor=None, +): + """Calculate the scalar used to set a threshold for d_table_score. + + 2 if fewer than 90 fMRI volumes, 3 if more than 110 and linear in-between + The explanation for the calculation is in + :obj:`tedana.selection.selection_utils.get_extend_factor` + + Parameters + ---------- + %(selector)s + %(decide_comps)s + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + extend_factor: :obj:`float` + If a number, then use rather than calculating anything. + If None than calculate. default=None + + Returns + ------- + %(selector)s + %(used_metrics)s + """ + outputs = { + "used_metrics": set(), + "decision_node_idx": selector.current_node_idx, + "node_label": None, + "extend_factor": None, + "calc_cross_comp_metrics": ["extend_factor"], + } + + if only_used_metrics: + return outputs["used_metrics"] + + function_name_idx = f"Step {selector.current_node_idx}: calc_extend_factor" + + if "extend_factor" in selector.cross_component_metrics: + LGR.warning( + f"extend_factor already calculated. Overwriting previous value in {function_name_idx}" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = "Calc extend_factor" + + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + outputs["extend_factor"] = get_extend_factor( + n_vols=selector.cross_component_metrics["n_vols"], extend_factor=extend_factor + ) + + selector.cross_component_metrics["extend_factor"] = outputs["extend_factor"] + + log_decision_tree_step(function_name_idx, -1, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def calc_max_good_meanmetricrank( + selector, + decide_comps, + metric_suffix=None, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, +): + """Calculate the metric "max_good_meanmetricrank". + + Calculates the max_good_meanmetricrank to use in the kundu decision tree. + This is the number of components selected with decide_comps * the extend_factor + calculated in calc_extend_factor + + Parameters + ---------- + %(selector)s + %(decide_comps)s + metric_suffix: :obj:`str` + By default, this will output a value called "max_good_meanmetricrank" + If this variable is not None or "" then it will output: + "max_good_meanmetricrank_[metric_suffix]". Default=None + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + "meanmetricrank" is the same as "d_table_score" and is used to set a threshold for + the "d_table" values in the component table. This metric ranks the components based + on 5 metrics and then outputs the mean rank across the 5 metrics. + Thus "meanmetricrank" is a slightly more descriptive name but d_table was used in + earlier versions of this code. It might be worth consistently using the same term, + but this note will hopefully suffice for now. + """ + function_name_idx = f"Step {selector.current_node_idx}: calc_max_good_meanmetricrank" + + if (metric_suffix is not None) and (metric_suffix != "") and isinstance(metric_suffix, str): + metric_name = f"max_good_meanmetricrank_{metric_suffix}" + else: + metric_name = "max_good_meanmetricrank" + + outputs = { + "decision_node_idx": selector.current_node_idx, + "node_label": None, + metric_name: None, + "used_metrics": set(), + "calc_cross_comp_metrics": [metric_name], + } + + if only_used_metrics: + return outputs["used_metrics"] + + if metric_name in selector.cross_component_metrics: + LGR.warning( + "max_good_meanmetricrank already calculated." + f"Overwriting previous value in {function_name_idx}" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = f"Calc {metric_name}" + + if log_extra_info: + LGR.info(f"{function_name_idx} {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + num_prov_accept = len(comps2use) + if "extend_factor" in selector.cross_component_metrics: + extend_factor = selector.cross_component_metrics["extend_factor"] + outputs[metric_name] = extend_factor * num_prov_accept + else: + raise ValueError( + f"extend_factor needs to be in cross_component_metrics for {function_name_idx}" + ) + + selector.cross_component_metrics[metric_name] = outputs[metric_name] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def calc_varex_kappa_ratio( + selector, + decide_comps, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, +): + """Calculate the cross-component metric "kappa_rate". + + Calculates the cross_component_metric kappa_rate for the components in decide_comps + and then calculate the variance explained / kappa ratio for ALL components + and adds those values to a new column in the component_table titled "varex kappa ratio". + + Parameters + ---------- + %(selector)s + %(decide_comps)s + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + These measures are used in the original kundu decision tree. + kappa_rate = (max-min kappa values of selected components)/(max-min variance explained) + varex_k. + varex kappa ratio = kappa_rate * "variance explained"/"kappa" for each component. + Components with larger variance and smaller kappa are more likely to be rejected. + This metric sometimes causes issues with high magnitude BOLD responses + such as the V1 response to a block-design flashing checkerboard + """ + function_name_idx = f"Step {selector.current_node_idx}: calc_varex_kappa_ratio" + + outputs = { + "decision_node_idx": selector.current_node_idx, + "node_label": None, + "kappa_rate": None, + "used_metrics": {"kappa", "variance explained"}, + "calc_cross_comp_metrics": ["kappa_rate"], + "added_component_table_metrics": ["varex kappa ratio"], + } + + if only_used_metrics: + return outputs["used_metrics"] + + if "kappa_rate" in selector.cross_component_metrics: + LGR.warning( + f"kappa_rate already calculated. Overwriting previous value in {function_name_idx}" + ) + + if "varex kappa ratio" in selector.component_table: + raise ValueError( + "'varex kappa ratio' is already a column in the component_table." + f"Recalculating in {function_name_idx} can cause problems since these " + "are only calculated on a subset of components" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = "Calc varex kappa ratio" + + if log_extra_info: + LGR.info(f"{function_name_idx}: {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + kappa_rate = ( + np.nanmax(selector.component_table.loc[comps2use, "kappa"]) + - np.nanmin(selector.component_table.loc[comps2use, "kappa"]) + ) / ( + np.nanmax(selector.component_table.loc[comps2use, "variance explained"]) + - np.nanmin(selector.component_table.loc[comps2use, "variance explained"]) + ) + outputs["kappa_rate"] = kappa_rate + LGR.debug( + f"{function_name_idx} Kappa rate found to be {kappa_rate} from components " + f"{comps2use}" + ) + # NOTE: kappa_rate is calculated on a subset of components while + # "varex kappa ratio" is calculated for all compnents + selector.component_table["varex kappa ratio"] = ( + kappa_rate + * selector.component_table["variance explained"] + / selector.component_table["kappa"] + ) + # Unclear if necessary, but this may clean up a weird issue on passing + # references in a data frame. + # See longer comment in selection_utils.comptable_classification_changer + selector.component_table = selector.component_table.copy() + + selector.cross_component_metrics["kappa_rate"] = outputs["kappa_rate"] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector + + +@fill_doc +def calc_revised_meanmetricrank_guesses( + selector, + decide_comps, + restrict_factor=2, + log_extra_report="", + log_extra_info="", + custom_node_label="", + only_used_metrics=False, +): + """Calculate a new d_table_score (meanmetricrank). + + Parameters + ---------- + %(selector)s + %(decide_comps)s + restrict_factor: :obj:`int` or :obj:`float` + A scaling factor to scale between num_acc_guess and conservative_guess. + Default=2. + + %(log_extra_info)s + %(log_extra_report)s + %(custom_node_label)s + %(only_used_metrics)s + + Returns + ------- + %(selector)s + %(used_metrics)s + + Note + ---- + These measures are used in the original kundu decision tree. + Since the d_table_rank is a mean rank across 5 metrics, those ranks + will change when they're calculated on a subset of components. It's + unclear how much the relative magnitudes will change and when the + recalculation will affect results, but this was in the original + kundu tree and will be replicated here to allow for comparisions + + This also hard-codes for kappa_elbow_kundu and rho_elbow_kundu in + the cross component metrics. If someone decides to keep using + this function with other elbow thresholds, the code would need to + be altered to account for that. + + This function also saves the following cross_component_metrics: + - ``num_acc_guess``, a guess on the final number of accepted components, + - ``restrict_factor``, an input to this function used for scaling, + - ``conservative_guess``, a conservative guess of the final number of + accepted components calculated as the ratio of ``num_acc_guess`` to + ``restrict_factor``. + """ + function_name_idx = f"Step {selector.current_node_idx}: calc_revised_meanmetricrank_guesses" + + outputs = { + "decision_node_idx": selector.current_node_idx, + "node_label": None, + "num_acc_guess": None, + "conservative_guess": None, + "restrict_factor": None, + "used_metrics": { + "kappa", + "dice_FT2", + "signal-noise_t", + "countnoise", + "countsigFT2", + "rho", + }, + "used_cross_component_metrics": {"kappa_elbow_kundu", "rho_elbow_kundu"}, + "calc_cross_comp_metrics": ["num_acc_guess", "conservative_guess", "restrict_factor"], + "added_component_table_metrics": [f"d_table_score_node{selector.current_node_idx}"], + } + + if only_used_metrics: + return outputs["used_metrics"] + + if "num_acc_guess" in selector.cross_component_metrics: + LGR.warning( + f"num_acc_guess already calculated. Overwriting previous value in {function_name_idx}" + ) + + if "conservative_guess" in selector.cross_component_metrics: + LGR.warning( + "conservative_guess already calculated. " + f"Overwriting previous value in {function_name_idx}" + ) + + if "restrict_factor" in selector.cross_component_metrics: + LGR.warning( + "restrict_factor already calculated. " + f"Overwriting previous value in {function_name_idx}" + ) + if not isinstance(restrict_factor, (int, float)): + raise ValueError(f"restrict_factor needs to be a number. It is: {restrict_factor}") + + if f"d_table_score_node{selector.current_node_idx}" in selector.component_table: + raise ValueError( + f"d_table_score_node{selector.current_node_idx} is already a column" + f"in the component_table. Recalculating in {function_name_idx} can " + "cause problems since these are only calculated on a subset of components" + ) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + for xcompmetric in outputs["used_cross_component_metrics"]: + if xcompmetric not in selector.cross_component_metrics: + if not comps2use: + LGR.info( + f"{function_name_idx}: {xcompmetric} is not in " + "selector.cross_component_metrics, but no components with " + f"{decide_comps} remain by this node so nothing happens" + ) + else: + raise ValueError( + f"{xcompmetric} not in cross_component_metrics. " + f"It needs to be calculated before {function_name_idx}" + ) + + if custom_node_label: + outputs["node_label"] = custom_node_label + else: + outputs["node_label"] = "Calc revised d_table_score & num accepted component guesses" + + LGR.info(f"{function_name_idx}: {outputs['node_label']}") + if log_extra_info: + LGR.info(f"{function_name_idx}: {log_extra_info}") + if log_extra_report: + RepLGR.info(log_extra_report) + + comps2use = selectcomps2use(selector, decide_comps) + confirm_metrics_exist( + selector.component_table, outputs["used_metrics"], function_name=function_name_idx + ) + + if not comps2use: + log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=decide_comps, + ) + else: + outputs["restrict_factor"] = restrict_factor + outputs["num_acc_guess"] = int( + np.mean( + [ + np.sum( + ( + selector.component_table.loc[comps2use, "kappa"] + > selector.cross_component_metrics["kappa_elbow_kundu"] + ) + & ( + selector.component_table.loc[comps2use, "rho"] + < selector.cross_component_metrics["rho_elbow_kundu"] + ) + ), + np.sum( + selector.component_table.loc[comps2use, "kappa"] + > selector.cross_component_metrics["kappa_elbow_kundu"] + ), + ] + ) + ) + outputs["conservative_guess"] = outputs["num_acc_guess"] / outputs["restrict_factor"] + + tmp_kappa = selector.component_table.loc[comps2use, "kappa"].to_numpy() + tmp_dice_FT2 = selector.component_table.loc[comps2use, "dice_FT2"].to_numpy() + tmp_signal_m_noise_t = selector.component_table.loc[comps2use, "signal-noise_t"].to_numpy() + tmp_countnoise = selector.component_table.loc[comps2use, "countnoise"].to_numpy() + tmp_countsigFT2 = selector.component_table.loc[comps2use, "countsigFT2"].to_numpy() + tmp_d_table_score = generate_decision_table_score( + tmp_kappa, tmp_dice_FT2, tmp_signal_m_noise_t, tmp_countnoise, tmp_countsigFT2 + ) + selector.component_table[f"d_table_score_node{selector.current_node_idx}"] = np.NaN + selector.component_table.loc[ + comps2use, f"d_table_score_node{selector.current_node_idx}" + ] = tmp_d_table_score + # Unclear if necessary, but this may clean up a weird issue on passing + # references in a data frame. + # See longer comment in selection_utils.comptable_classification_changer + selector.component_table = selector.component_table.copy() + + selector.cross_component_metrics["conservative_guess"] = outputs["conservative_guess"] + selector.cross_component_metrics["num_acc_guess"] = outputs["num_acc_guess"] + selector.cross_component_metrics["restrict_factor"] = outputs["restrict_factor"] + + log_decision_tree_step(function_name_idx, comps2use, calc_outputs=outputs) + + selector.tree["nodes"][selector.current_node_idx]["outputs"] = outputs + + return selector diff --git a/tedana/selection/selection_utils.py b/tedana/selection/selection_utils.py new file mode 100644 index 000000000..c4188c3e9 --- /dev/null +++ b/tedana/selection/selection_utils.py @@ -0,0 +1,820 @@ +""" +Utility functions for tedana.selection +""" + +import logging + +import numpy as np + +from tedana.stats import getfbounds + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") +RefLGR = logging.getLogger("REFERENCES") + +############################################################## +# Functions that are used for interacting with component_table +############################################################## + + +def selectcomps2use(selector, decide_comps): + """Get a list of component numbers that fit the classification types in ``decide_comps``. + + Parameters + ---------- + selector : :obj:`~tedana.selection.component_selector.ComponentSelector` + Only uses the component_table in this object + decide_comps : :obj:`str` or :obj:`list[str]` or :obj:`list[int]` + This is string or a list of strings describing what classifications + of components to operate on, using default or intermediate_classification + labels. For example: decide_comps='unclassified' means to operate only on + unclassified components. The label 'all' will operate on all components + regardess of classification. This can also be used to pass through a list + of component indices to comps2use + + Returns + ------- + comps2use : :obj:`list[int]` + A list of component indices with classifications included in decide_comps + """ + + if "classification" not in selector.component_table: + raise ValueError( + "selector.component_table needs a 'classification' column to run selectcomp2suse" + ) + + if (type(decide_comps) == str) or (type(decide_comps) == int): + decide_comps = [decide_comps] + if (type(decide_comps) == list) and (decide_comps[0] == "all"): + # All components with any string in the classification field + # are set to True + comps2use = list(range(selector.component_table.shape[0])) + + elif (type(decide_comps) == list) and all(isinstance(elem, str) for elem in decide_comps): + comps2use = [] + for didx in range(len(decide_comps)): + newcomps2use = selector.component_table.index[ + selector.component_table["classification"] == decide_comps[didx] + ].tolist() + comps2use = list(set(comps2use + newcomps2use)) + elif (type(decide_comps) == list) and all(type(elem) == int for elem in decide_comps): + # decide_comps is already a string of indices + if len(selector.component_table) <= max(decide_comps): + raise ValueError( + "decide_comps for selectcomps2use is selecting for a component with index" + f"{max(decide_comps)} (0 indexing) which is greater than the number " + f"of components: {len(selector.component_table)}" + ) + elif min(decide_comps) < 0: + raise ValueError( + "decide_comps for selectcomps2use is selecting for a component " + f"with index {min(decide_comps)}, which is less than 0" + ) + else: + comps2use = decide_comps + else: + raise ValueError( + "decide_comps in selectcomps2use needs to be a list or a single element " + f"of strings or integers. It is {decide_comps}" + ) + + # If no components are selected, then return None. + # The function that called this can check for None and exit before + # attempting any computations on no data + # if not comps2use: + # comps2use = None + + return comps2use + + +def change_comptable_classifications( + selector, + if_true, + if_false, + decision_boolean, + tag_if_true=None, + tag_if_false=None, + dont_warn_reclassify=False, +): + """ + Given information on whether a decision critereon is true or false for each + component, change or don't change the component classification + + Parameters + ---------- + selector : :obj:`tedana.selection.component_selector.ComponentSelector` + The attributes used are component_table, component_status_table, and + current_node_idx + if_true, if_false : :obj:`str` + If the condition in this step is true or false, give the component + the label in this string. Options are 'accepted', 'rejected', + 'nochange', or intermediate_classification labels predefined in the + decision tree. If 'nochange' then don't change the current component + classification + decision_boolean : :obj:`pd.Series(bool)` + A dataframe column of equal length to component_table where each value + is True or False. + tag_if_true, tag_if_false : :obj:`str` + A string containing a label in classification_tags that will be added to + the classification_tags column in component_table if a component is + classified as true or false. default=None + dont_warn_reclassify : :obj:`bool` + If this function changes a component classification from accepted or + rejected to something else, it gives a warning. If this is True, that + warning is suppressed. default=False + + Returns + ------- + selector : :obj:`tedana.selection.component_selector.ComponentSelector` + component_table["classifications"] will reflect any new + classifications. + component_status_table will have a new column titled + "Node current_node_idx" that is a copy of the updated classifications + column. + component_table["classification_tags"] will be updated to include any + new tags. Each tag should appear only once in the string and tags will + be separated by commas. + n_true, n_false : :obj:`int` + The number of True and False components in decision_boolean + + Note + ---- + If a classification is changed away from accepted or rejected and + dont_warn_reclassify is False, then a warning is logged + """ + selector = comptable_classification_changer( + selector, + True, + if_true, + decision_boolean, + tag_if=tag_if_true, + dont_warn_reclassify=dont_warn_reclassify, + ) + selector = comptable_classification_changer( + selector, + False, + if_false, + decision_boolean, + tag_if=tag_if_false, + dont_warn_reclassify=dont_warn_reclassify, + ) + + selector.component_status_table[ + f"Node {selector.current_node_idx}" + ] = selector.component_table["classification"] + + n_true = decision_boolean.sum() + n_false = np.logical_not(decision_boolean).sum() + return selector, n_true, n_false + + +def comptable_classification_changer( + selector, + boolstate, + classify_if, + decision_boolean, + tag_if=None, + dont_warn_reclassify=False, +): + """Implement the component classification changes from ``change_comptable_classifications``. + + Parameters + ---------- + selector : :obj:`tedana.selection.component_selector.ComponentSelector` + The attributes used are component_table, component_status_table, and + current_node_idx + boolstate : :obj:`bool` + Change classifications only for True or False components in + decision_boolean based on this variable + classify_if : :obj:`str` + This should be if_True or if_False to match boolstate. + If the condition in this step is true or false, give the component + the label in this string. Options are 'accepted', 'rejected', + 'nochange', or intermediate_classification labels predefined in the + decision tree. If 'nochange' then don't change the current component + classification + decision_boolean : :obj:`pd.Series(bool)` + A dataframe column of equal length to component_table where each value + is True or False. + tag_if : :obj:`str` + This should be tag_if_true or tag_if_false to match boolstate + A string containing a label in classification_tags that will be added to + the classification_tags column in component_table if a component is + classified as true or false. default=None + dont_warn_reclassify : :obj:`bool` + If this function changes a component classification from accepted or + rejected to something else, it gives a warning. If this is True, that + warning is suppressed. default=False + + Returns + ------- + selector : :obj:`tedana.selection.component_selector.ComponentSelector` + Operates on the True OR False components depending on boolstate + component_table["classifications"] will reflect any new + classifications. + component_status_table will have a new column titled + "Node current_node_idx" that is a copy of the updated classifications + column. + component_table["classification_tags"] will be updated to include any + new tags. Each tag should appear only once in the string and tags will + be separated by commas. + + Warns + ----- + UserWarning + If a classification is changed away from accepted or rejected and + dont_warn_reclassify is False, then a warning is logged + + Note + ---- + This is designed to be run by + :func:`~tedana.selection.selection_utils.change_comptable_classifications`. + This function is run twice, ones for changes to make of a component is + True and again for components that are False. + """ + if classify_if != "nochange": + changeidx = decision_boolean.index[np.asarray(decision_boolean) == boolstate] + if not changeidx.empty: + current_classifications = set( + selector.component_table.loc[changeidx, "classification"].tolist() + ) + if current_classifications.intersection({"accepted", "rejected"}): + if not dont_warn_reclassify: + # don't make a warning if classify_if matches the current classification + # That is reject->reject shouldn't throw a warning + if ( + ("accepted" in current_classifications) and (classify_if != "accepted") + ) or (("rejected" in current_classifications) and (classify_if != "rejected")): + LGR.warning( + f"Step {selector.current_node_idx}: Some classifications are" + " changing away from accepted or rejected. Once a component is " + "accepted or rejected, it shouldn't be reclassified" + ) + selector.component_table.loc[changeidx, "classification"] = classify_if + # NOTE: CAUTION: extremely bizarre pandas behavior violates guarantee + # that df['COLUMN'] matches the df as a a whole in this case. + # We cannot replicate this consistently, but it seems to happen in some + # datasets where decide_comps does not select all components. We strongly + # suspect it has something to do with passing via reference a pandas + # data series. + # We do not understand why, but copying the table and thus removing references + # to past memory locations seems to reliably solve this issue. + # TODO: understand why this happens and avoid the problem without this hack. + # Comment line below to re-introduce original bug. For the kundu decision + # tree it happens on node 6 which is the first time decide_comps is for + # a subset of components + selector.component_table = selector.component_table.copy() + + if tag_if is not None: # only run if a tag is provided + for idx in changeidx: + tmpstr = selector.component_table.loc[idx, "classification_tags"] + if tmpstr == "" or isinstance(tmpstr, float): + tmpset = set([tag_if]) + else: + tmpset = set(tmpstr.split(",")) + tmpset.update([tag_if]) + selector.component_table.loc[idx, "classification_tags"] = ",".join( + str(s) for s in tmpset + ) + else: + LGR.info( + f"Step {selector.current_node_idx}: No components fit criterion " + f"{boolstate} to change classification" + ) + return selector + + +def clean_dataframe(component_table): + """ + Reorder columns in component table so that "classification" + and "classification_tags" are last. + + Parameters + ---------- + component_table : (C x M) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric + + Returns + ------- + component_table : (C x M) :obj:`pandas.DataFrame` + Same data as input, but the final two columns are "classification" + and "classification_tags" + """ + cols_at_end = ["classification", "classification_tags"] + component_table = component_table[ + [c for c in component_table if c not in cols_at_end] + + [c for c in cols_at_end if c in component_table] + ] + + return component_table + + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") +RefLGR = logging.getLogger("REFERENCES") + +################################################# +# Functions to validate inputs or log information +################################################# + + +def confirm_metrics_exist(component_table, necessary_metrics, function_name=None): + """ + Confirm that all metrics declared in necessary_metrics are + already included in comptable. + + Parameters + ---------- + component_table : (C x M) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric. The index should be the component number. + necessary_metrics : :obj:`set` + A set of strings of metric names + function_name : :obj:`str` + Text identifying the function name that called this function + + Returns + ------- + metrics_exist : :obj:`bool` + True if all metrics in necessary_metrics are in component_table + + Raises + ------ + ValueError + If metrics_exist is False then raise an error and end the program + + Note + ----- + This doesn't check if there are data in each metric's column, just that + the columns exist. Also, the string in `necessary_metrics` and the + column labels in component_table will only be matched if they're identical. + """ + missing_metrics = necessary_metrics - set(component_table.columns) + metrics_exist = len(missing_metrics) > 0 + if metrics_exist is True: + if function_name is None: + function_name = "unknown function" + + error_msg = ( + f"Necessary metrics for {function_name}: " + f"{necessary_metrics}. " + f"Comptable metrics: {set(component_table.columns)}. " + f"MISSING METRICS: {missing_metrics}." + ) + raise ValueError(error_msg) + + return metrics_exist + + +def log_decision_tree_step( + function_name_idx, + comps2use, + decide_comps=None, + n_true=None, + n_false=None, + if_true=None, + if_false=None, + calc_outputs=None, +): + """Logging text to add after every decision tree calculation + + Parameters + ---------- + function_name_idx : :obj:`str` + The name of the function that should be logged. By convention, this + be "Step current_node_idx: function_name" + comps2use : :obj:`list[int]` or -1 + A list of component indices that should be used by a function. + Only used to report no components found if empty and report + the number of components found if not empty. + Note: ``calc_`` functions that don't use component metrics do not + need to use the component_table and may not require selecting + components. For those functions, set comps2use==-1 to avoid + logging a warning that no components were found. Currently, + this is only used by `calc_extend_factor` + decide_comps : :obj:`str` or :obj:`list[str]` or :obj:`list[int]` + This is string or a list of strings describing what classifications + of components to operate on. Only used in this function to report + its contents if no components with these classifications were found + n_true, n_false : :obj:`int` + The number of components classified as True or False + if_true, if_false : :obj:`str` + If a component is true or false, the classification to assign that + component + calc_outputs : :obj:`dict` + A dictionary with output information from the function. If it contains a key + "calc_cross_comp_metrics" then the value for that key is a list of + cross component metrics (i.e. kappa or rho elbows) that were calculated + within the function. Each of those metrics will also be a key in calc_outputs + and those keys and values will be logged by this function + + Returns + ------- + Information is added to the LGR.info logger. This either logs that + nothing was changed, the number of components classified as true or + false and what they changed to, or the cross component metrics that were + calculated + """ + if not (comps2use == -1) and not comps2use: + LGR.info( + f"{function_name_idx} not applied because no remaining components were " + f"classified as {decide_comps}" + ) + + if if_true or if_false: + LGR.info( + f"{function_name_idx} applied to {len(comps2use)} components. " + f"{n_true} True -> {if_true}. " + f"{n_false} False -> {if_false}." + ) + + if calc_outputs: + if "calc_cross_comp_metrics" in calc_outputs: + calc_summaries = [ + f"{metric_name}={calc_outputs[metric_name]}" + for metric_name in calc_outputs["calc_cross_comp_metrics"] + ] + LGR.info(f"{function_name_idx} calculated: {', '.join(calc_summaries)}") + else: + LGR.warning( + f"{function_name_idx} logged to write out cross_component_metrics, " + "but none were calculated" + ) + + +def log_classification_counts(decision_node_idx, component_table): + """Log the total counts for each component classification in component_table. + + Parameters + ---------- + decision_node_idx : :obj:`int` + The index number for the function in the decision tree that just + finished executing + component_table : (C x M) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric. Only the "classification" column is usd in this function + + Returns + ------- + The LGR.info logger will add a line like: \ + 'Step 4: Total component classifications: 10 accepted, 5 provisionalreject, 8 rejected' + """ + classification_labels, label_counts = np.unique( + component_table["classification"].values, return_counts=True + ) + label_summaries = [ + f"{label_counts[i]} {label}" for i, label in enumerate(classification_labels) + ] + prelude = f"Step {decision_node_idx}: Total component classifications:" + out_str = f"{prelude} {', '.join(label_summaries)}" + LGR.info(out_str) + + +####################################################### +# Calculations that are used in decision tree functions +####################################################### +def getelbow_cons(arr, return_val=False): + """Elbow using mean/variance method - conservative + + Parameters + ---------- + arr : (C,) array_like + Metric (e.g., Kappa or Rho) values. + return_val : :obj:`bool`, optional + Return the value of the elbow instead of the index. Default: False + + Returns + ------- + :obj:`int` or :obj:`float` + Either the elbow index (if return_val is True) or the values at the + elbow index (if return_val is False) + """ + if arr.ndim != 1: + raise ValueError(f"Parameter arr should be 1d, not {arr.ndim}d") + + if not arr.size: + raise ValueError( + "Empty array detected during elbow calculation. " + "This error happens when getelbow_cons is incorrectly called on no components. " + "If you see this message, please open an issue at " + "https://github.com/ME-ICA/tedana/issues with the full traceback and any data " + "necessary to reproduce this error, so that we create additional data checks to " + "prevent this from happening." + ) + + arr = np.sort(arr)[::-1] + nk = len(arr) + temp1 = [ + (arr[nk - 5 - ii - 1] > arr[nk - 5 - ii : nk].mean() + 2 * arr[nk - 5 - ii : nk].std()) + for ii in range(nk - 5) + ] + ds = np.array(temp1[::-1], dtype=np.int32) + dsum = [] + c_ = 0 + for d_ in ds: + c_ = (c_ + d_) * d_ + dsum.append(c_) + + e2 = np.argmax(np.array(dsum)) + elind = np.max([getelbow(arr), e2]) + + if return_val: + return arr[elind] + else: + return elind + + +def getelbow(arr, return_val=False): + """Get elbow using linear projection method - moderate. + + Parameters + ---------- + arr : (C,) array_like + Metric (e.g., Kappa or Rho) values. + return_val : :obj:`bool`, optional + Return the value of the elbow instead of the index. Default: False + + Returns + ------- + :obj:`int` or :obj:`float` + Either the elbow index (if return_val is True) or the values at the + elbow index (if return_val is False) + """ + if arr.ndim != 1: + raise ValueError(f"Parameter arr should be 1d, not {arr.ndim}d") + + if not arr.size: + raise ValueError( + "Empty array detected during elbow calculation. " + "This error happens when getelbow is incorrectly called on no components. " + "If you see this message, please open an issue at " + "https://github.com/ME-ICA/tedana/issues with the full traceback and any data " + "necessary to reproduce this error, so that we create additional data checks to " + "prevent this from happening." + ) + + arr = np.sort(arr)[::-1] + n_components = arr.shape[0] + coords = np.array([np.arange(n_components), arr]) + p = coords - coords[:, 0].reshape(2, 1) + b = p[:, -1] + b_hat = np.reshape(b / np.sqrt((b**2).sum()), (2, 1)) + proj_p_b = p - np.dot(b_hat.T, p) * np.tile(b_hat, (1, n_components)) + d = np.sqrt((proj_p_b**2).sum(axis=0)) + k_min_ind = d.argmax() + + if return_val: + return arr[k_min_ind] + else: + return k_min_ind + + +def kappa_elbow_kundu(component_table, n_echos, comps2use=None): + """ + Calculate an elbow for kappa using the approach originally in + Prantik Kundu's MEICA v2.5 code + + Parameters + ---------- + component_table : (C x M) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric. The index should be the component number. + Only the 'kappa' column is used in this function + n_echos : :obj:`int` + The number of echos in the multi-echo data + comps2use : :obj:`list[int]` + A list of component indices used to calculate the elbow + default=None which means use all components + + Returns + ------- + kappa_elbow : :obj:`float` + The 'elbow' value for kappa values, above which components are considered + more likely to contain T2* weighted signals. + minimum of kappa_allcomps_elbow and kappa_nonsig_elbow + kappa_allcomps_elbow : :obj:`float` + The elbow for kappa values using all components in comps2use + kappa_nonsig_elbow : :obj:`float` + The elbow for kappa values excluding kappa values above a threshold + None if there are fewer than 6 values remaining after thresholding + varex_upper_p : :obj:`float` + This is the median "variance explained" across components with kappa values + greater than the kappa_elbow calculated using all components + + Note + ---- + The kappa elbow calculation in Kundu's original meica code calculates + one elbow using all components' kappa values, one elbow excluding kappa + values above a threshold, and then selects the lower of the two thresholds. + This is replicated by setting comps2use to None or by giving a list that + includes all component numbers. If comps2use includes indices for only a + subset of components then the kappa values from just those components + will be used for both elbow calculations. + + varex_upper_p isn't used for anything in this function, but it is calculated + on kappa values and is used in rho_elbow_kundu_liberal. For several reasons + it made more sense to calculate here. + """ + + # If comps2use is None then set to a list of all component numbers + if not comps2use: + comps2use = list(range(component_table.shape[0])) + kappas2use = component_table.loc[comps2use, "kappa"].to_numpy() + + # low kappa threshold + _, _, f01 = getfbounds(n_echos) + # get kappa values for components below a significance threshold + kappas_nonsig = kappas2use[kappas2use < f01] + + kappa_allcomps_elbow = getelbow(kappas2use, return_val=True) + # How often would an elbow from all Kappa values ever be lower than one from + # a subset of lower values? + # Note: Only use the subset of values if it includes at least 6 data points + # That is enough to calculate an elbow of a curve + # This is an arbitrary threshold not from the original meica and is + # worth reconsidering at some point + if kappas_nonsig.size >= 6: + kappa_nonsig_elbow = getelbow(kappas_nonsig, return_val=True) + + kappa_elbow = np.min((kappa_nonsig_elbow, kappa_allcomps_elbow)) + LGR.info(("Calculating kappa elbow based on min of all and nonsig components.")) + else: + kappa_elbow = kappa_allcomps_elbow + kappa_nonsig_elbow = None + LGR.info(("Calculating kappa elbow based on all components.")) + + # Calculating varex_upper_p + # Upper limit for variance explained is median across components with high + # Kappa values. High Kappa is defined as Kappa above Kappa elbow. + high_kappa_idx = np.squeeze(np.argwhere(kappas2use > kappa_allcomps_elbow)) + # list(kappa_comps2use.index[kappas2use > kappa_allcomps_elbow]) + varex_upper_p = np.median( + component_table.loc[ + high_kappa_idx, + "variance explained", + ] + ) + + return kappa_elbow, kappa_allcomps_elbow, kappa_nonsig_elbow, varex_upper_p + + +def rho_elbow_kundu_liberal( + component_table, n_echos, rho_elbow_type="kundu", comps2use=None, subset_comps2use=-1 +): + """ + Calculate an elbow for rho using the approach originally in + Prantik Kundu's MEICA v2.5 code and with a slightly more + liberal threshold + + Parameters + ---------- + component_table : (C x M) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric. The index should be the component number. + Only the 'kappa' column is used in this function + n_echos : :obj:`int` + The number of echos in the multi-echo data + rho_elbow_type : :obj:`str` + The algorithm used to calculate the rho elbow. Current options are + 'kundu' and 'liberal'. + comps2use : :obj:`list[int]` + A list of component indices used to calculate the elbow + default=None which means use all components + subset_comps2use : :obj:`list[int]` + A list of component indices used to calculate the elbow + If None then only calculate a threshold using all components + default=-1 which means use only 'unclassified' components + + Returns + ------- + rho_elbow : :obj:`float` + The 'elbow' value for rho values, above which components are considered + more likely to contain S0 weighted signals + rho_allcomps_elbow : :obj:`float` + rho elbow calculated using all components in comps2use + rho_unclassified_elbow : :obj:`float` + rho elbow clculated using all components in subset_comps2use + None if subset_comps2use is None + elbow_f05 : :obj:`float` + A significant threshold based on the number of echoes. Used + as part of the mean for rho_elbow_type=='kundu' + + Note + ---- + The rho elbow calculation in Kundu's original meica code calculates + one elbow using all components' rho values, one elbow using only + unclassified components (excluding 2-3 remaining high variance componetns), + on threshold based on the number of echos, and takes the mean of those 3 values + To replicate the original code, comps2use should include indices for all components + and subset_comps2use should includes indices for unclassified components + + Also, in practice, one of these elbows is sometimes extremely low and the + mean creates an overly agressive rho threshold (values >rho_elbow are more + likely rejected). The liberal threshold option takes the max of the two + elbows based on rho values. The assumption is that the threshold on + unclassified components is always lower and can likely be excluded. Both + rho elbows are now logged so that it will be possible to confirm this with + data & make additional adjustments to this threshold. + + Additionally, the liberal threshold does not exclude 2-3 high variance components + from the unclassified threshold. This was done as a practical matter because + those components are now removed in a separate node, dec_reclassify_high_var_comps, + and adding that separate node to the minimal tree would make it less minimal, but + it also seems reasonable since there was no clear reason why they elbow with them + removed was reliably better than the elbow containing them. More direct comparisons + between these two arbitrary thresholds might be useful at some point. + """ + if rho_elbow_type not in ["kundu", "liberal"]: + raise ValueError( + f"rho_elbow_kundu_liberal: rho_elbow_type must be 'kundu' or 'liberal'" + f"It is {rho_elbow_type} " + ) + + # If comps2use is None then set to a list of all component numbers + if not comps2use: + comps2use = list(range(component_table.shape[0])) + + # If subset_comps2use is -1 then set to a list of all unclassified components + if subset_comps2use == -1: + subset_comps2use = component_table.index[ + component_table["classification"] == "unclassified" + ].tolist() + + # One rho elbow threshold set just on the number of echoes + elbow_f05, _, _ = getfbounds(n_echos) + + # One rho elbow threshold set using all componets in comps2use + rhos_comps2use = component_table.loc[comps2use, "rho"].to_numpy() + rho_allcomps_elbow = getelbow(rhos_comps2use, return_val=True) + + # low kappa threshold + # get kappa values for components below a significance threshold + # kappas_nonsig = kappas2use[kappas2use < f01] + + # Only calculate + if not subset_comps2use: + LGR.warning( + "No unclassified components for rho elbow calculation only elbow based " + "on all components is used" + ) + rho_unclassified_elbow = None + rho_elbow = rho_allcomps_elbow + + else: + rho_unclassified_elbow = getelbow( + component_table.loc[subset_comps2use, "rho"], return_val=True + ) + + if rho_elbow_type == "kundu": + rho_elbow = np.mean((rho_allcomps_elbow, rho_unclassified_elbow, elbow_f05)) + else: # rho_elbow_type == 'liberal' + rho_elbow = np.maximum(rho_allcomps_elbow, rho_unclassified_elbow) + + return rho_elbow, rho_allcomps_elbow, rho_unclassified_elbow, elbow_f05 + + +def get_extend_factor(n_vols=None, extend_factor=None): + """ + extend_factor is a scaler used to set a threshold for the d_table_score in + the kundu decision tree. + + It is either defined by the number of volumes in the time series or directly + defined by the user. If it is defined by the user, that takes precedence over + using the number of volumes in a calculation + + Parameters + ---------- + n_vols : :obj:`int` + The number of volumes in an fMRI time series. default=None + In the MEICA code, extend_factor was hard-coded to 2 for data with more + than 100 volumes and 3 for data with less than 100 volumes. + Now is linearly ramped from 2-3 for vols between 90 & 110 + extend_factor : :obj:`float` + The scaler used to set a threshold for d_table_score. default=None + + Returns + ------- + extend_factor : :obj:`float` + + Note + ---- + Either n_vols OR extend_factor is a required input + """ + if extend_factor: + if isinstance(extend_factor, int): + extend_factor = float(extend_factor) + LGR.info(f"extend_factor={extend_factor}, as defined by user") + elif n_vols: + if n_vols < 90: + extend_factor = 3.0 + elif n_vols < 110: + extend_factor = 2.0 + (n_vols - 90) / 20.0 + else: + extend_factor = 2.0 + LGR.info(f"extend_factor={extend_factor}, based on number of fMRI volumes") + else: + error_msg = "get_extend_factor need n_vols or extend_factor as an input" + raise ValueError(error_msg) + + return extend_factor diff --git a/tedana/selection/tedica.py b/tedana/selection/tedica.py index 55b6db4eb..682bb5c61 100644 --- a/tedana/selection/tedica.py +++ b/tedana/selection/tedica.py @@ -3,117 +3,36 @@ """ import logging -import numpy as np -from scipy import stats - from tedana.metrics import collect -from tedana.selection._utils import clean_dataframe, getelbow -from tedana.stats import getfbounds +from tedana.selection.component_selector import ComponentSelector LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") -def manual_selection(comptable, acc=None, rej=None): - """ - Perform manual selection of components. +def automatic_selection(component_table, n_echos, n_vols, tree="kundu", verbose=False): + """Classify components based on component table and decision tree type. Parameters ---------- - comptable : (C x M) :obj:`pandas.DataFrame` - Component metric table, where `C` is components and `M` is metrics - acc : :obj:`list`, optional - List of accepted components. Default is None. - rej : :obj:`list`, optional - List of rejected components. Default is None. - - Returns - ------- - comptable : (C x M) :obj:`pandas.DataFrame` - Component metric table with classification. - metric_metadata : :obj:`dict` - Dictionary with metadata about calculated metrics. - Each entry corresponds to a column in ``comptable``. - """ - LGR.info("Performing manual ICA component selection") - RepLGR.info( - "Next, components were manually classified as " - "BOLD (TE-dependent), non-BOLD (TE-independent), or " - "uncertain (low-variance)." - ) - if ( - "classification" in comptable.columns - and "original_classification" not in comptable.columns - ): - comptable["original_classification"] = comptable["classification"] - comptable["original_rationale"] = comptable["rationale"] - - comptable["classification"] = "accepted" - comptable["rationale"] = "" - - all_comps = comptable.index.values - if acc is not None: - acc = [int(comp) for comp in acc] - - if rej is not None: - rej = [int(comp) for comp in rej] - - if acc is not None and rej is None: - rej = sorted(np.setdiff1d(all_comps, acc)) - elif acc is None and rej is not None: - acc = sorted(np.setdiff1d(all_comps, rej)) - elif acc is None and rej is None: - LGR.info("No manually accepted or rejected components supplied. Accepting all components.") - # Accept all components if no manual selection provided - acc = all_comps[:] - rej = [] - - ign = np.setdiff1d(all_comps, np.union1d(acc, rej)) - comptable.loc[acc, "classification"] = "accepted" - comptable.loc[rej, "classification"] = "rejected" - comptable.loc[rej, "rationale"] += "I001;" - comptable.loc[ign, "classification"] = "ignored" - comptable.loc[ign, "rationale"] += "I001;" - - # Move decision columns to end - comptable = clean_dataframe(comptable) - metric_metadata = collect.get_metadata(comptable) - return comptable, metric_metadata - - -def kundu_selection_v2(comptable, n_echos, n_vols): - """ - Classify components as "accepted", "rejected", or "ignored" based on - relevant metrics. - - The selection process uses previously calculated parameters listed in - comptable for each ICA component such as Kappa (a T2* weighting metric), - Rho (an S0 weighting metric), and variance explained. - See `Notes` for additional calculated metrics used to classify each - component into one of the listed groups. - - Parameters - ---------- - comptable : (C x M) :obj:`pandas.DataFrame` - Component metric table. One row for each component, with a column for - each metric. The index should be the component number. + component_table : :obj:`pd.DataFrame` + The component table to classify n_echos : :obj:`int` - Number of echos in original data - n_vols : :obj:`int` - Number of volumes in dataset + The number of echoes in this dataset + tree : :obj:`str` + The type of tree to use for the ComponentSelector object. Default="kundu" + verbose : :obj:`bool` + More verbose logging output if True. Default=False Returns ------- - comptable : :obj:`pandas.DataFrame` - Updated component table with additional metrics and with - classification (accepted, rejected, or ignored) - metric_metadata : :obj:`dict` - Dictionary with metadata about calculated metrics. - Each entry corresponds to a column in ``comptable``. + selector : :obj:`tedana.selection.component_selector.ComponentSelector` + Contains component classifications in a component_table and provenance + and metadata from the component selection process Notes ----- - The selection algorithm used in this function was originated in ME-ICA + If tree=kundu, the selection algorithm used in this function was originated in ME-ICA by Prantik Kundu, and his original implementation is available at: https://github.com/ME-ICA/me-ica/blob/\ b2781dd087ab9de99a2ec3925f04f02ce84f0adc/meica.libs/select_model.py @@ -128,9 +47,14 @@ def kundu_selection_v2(comptable, n_echos, n_vols): components, a hypercommented version of this attempt is available at: https://gist.github.com/emdupre/ca92d52d345d08ee85e104093b81482e + If tree=="minimal", a selection algorithm based on the "kundu" tree will be used. + The differences between the "minimal" and "kundu" trees are described in the `FAQ`_. + References ---------- .. footbibliography:: + + .. _FAQ: faq.html """ LGR.info("Performing ICA component selection with Kundu decision tree v2.5") RepLGR.info( @@ -140,276 +64,13 @@ def kundu_selection_v2(comptable, n_echos, n_vols): "decision tree (v2.5) \\citep{kundu2013integrated}." ) - comptable["classification"] = "accepted" - comptable["rationale"] = "" - - # Set knobs - LOW_PERC = 25 - HIGH_PERC = 90 - if n_vols < 100: - EXTEND_FACTOR = 3 - else: - EXTEND_FACTOR = 2 - RESTRICT_FACTOR = 2 - - # Lists of components - all_comps = np.arange(comptable.shape[0]) - # unclf is a full list that is whittled down over criteria - # since the default classification is "accepted", at the end of the tree - # the remaining elements in unclf are classified as accepted - unclf = all_comps.copy() - - """ - Step 1: Reject anything that's obviously an artifact - a. Estimate a null variance - """ - # Rho is higher than Kappa - temp_rej0a = all_comps[(comptable["rho"] > comptable["kappa"])] - comptable.loc[temp_rej0a, "classification"] = "rejected" - comptable.loc[temp_rej0a, "rationale"] += "I002;" - - # Number of significant voxels for S0 model is higher than number for T2 - # model *and* number for T2 model is greater than zero. - temp_rej0b = all_comps[ - ((comptable["countsigFS0"] > comptable["countsigFT2"]) & (comptable["countsigFT2"] > 0)) - ] - comptable.loc[temp_rej0b, "classification"] = "rejected" - comptable.loc[temp_rej0b, "rationale"] += "I003;" - rej = np.union1d(temp_rej0a, temp_rej0b) - - # Dice score for S0 maps is higher than Dice score for T2 maps and variance - # explained is higher than the median across components. - temp_rej1 = all_comps[ - (comptable["dice_FS0"] > comptable["dice_FT2"]) - & (comptable["variance explained"] > np.median(comptable["variance explained"])) - ] - comptable.loc[temp_rej1, "classification"] = "rejected" - comptable.loc[temp_rej1, "rationale"] += "I004;" - rej = np.union1d(temp_rej1, rej) - - # T-value is less than zero (noise has higher F-statistics than signal in - # map) and variance explained is higher than the median across components. - temp_rej2 = unclf[ - (comptable.loc[unclf, "signal-noise_t"] < 0) - & (comptable.loc[unclf, "variance explained"] > np.median(comptable["variance explained"])) - ] - comptable.loc[temp_rej2, "classification"] = "rejected" - comptable.loc[temp_rej2, "rationale"] += "I005;" - rej = np.union1d(temp_rej2, rej) - unclf = np.setdiff1d(unclf, rej) - - # Quit early if no potentially accepted components remain - if len(unclf) == 0: - LGR.warning("No BOLD-like components detected. Ignoring all remaining components.") - ign = sorted(np.setdiff1d(all_comps, rej)) - comptable.loc[ign, "classification"] = "ignored" - comptable.loc[ign, "rationale"] += "I006;" - - # Move decision columns to end - comptable = clean_dataframe(comptable) - metric_metadata = collect.get_metadata(comptable) - return comptable, metric_metadata - - """ - Step 2: Make a guess for what the good components are, in order to - estimate good component properties - a. Not outlier variance - b. Kappa>kappa_elbow - c. Rho getelbow(comptable["kappa"], return_val=True), - "variance explained", - ] - ) - - # Sort component table by variance explained and find outlier components by - # change in variance explained from one component to the next. - # Remove variance-explained outliers from list of components to consider - # for acceptance. These components will have another chance to be accepted - # later on. - # NOTE: We're not sure why this is done this way, nor why it's specifically - # done three times. - ncls = unclf.copy() - for i_loop in range(3): - temp_comptable = comptable.loc[ncls].sort_values( - by=["variance explained"], ascending=False - ) - diff_vals = temp_comptable["variance explained"].diff(-1) - diff_vals = diff_vals.fillna(0) - ncls = temp_comptable.loc[diff_vals < varex_upper_p].index.values - - # Compute elbows from other elbows - f05, _, f01 = getfbounds(n_echos) - kappas_nonsig = comptable.loc[comptable["kappa"] < f01, "kappa"] - if not kappas_nonsig.size: - LGR.warning( - "No nonsignificant kappa values detected. " - "Only using elbow calculated from all kappa values." - ) - kappas_nonsig_elbow = np.nan - else: - kappas_nonsig_elbow = getelbow(kappas_nonsig, return_val=True) - - kappas_all_elbow = getelbow(comptable["kappa"], return_val=True) - - # NOTE: Would an elbow from all Kappa values *ever* be lower than one from - # a subset of lower (i.e., nonsignificant) values? - kappa_elbow = np.nanmin((kappas_all_elbow, kappas_nonsig_elbow)) - rhos_ncls_elbow = getelbow(comptable.loc[ncls, "rho"], return_val=True) - rhos_all_elbow = getelbow(comptable["rho"], return_val=True) - rho_elbow = np.mean((rhos_ncls_elbow, rhos_all_elbow, f05)) - - # Provisionally accept components based on Kappa and Rho elbows - acc_prov = ncls[ - (comptable.loc[ncls, "kappa"] >= kappa_elbow) & (comptable.loc[ncls, "rho"] < rho_elbow) - ] - - # Quit early if no potentially accepted components remain - if len(acc_prov) <= 1: - LGR.warning("Too few BOLD-like components detected. Ignoring all remaining.") - ign = sorted(np.setdiff1d(all_comps, rej)) - comptable.loc[ign, "classification"] = "ignored" - comptable.loc[ign, "rationale"] += "I006;" - - # Move decision columns to end - comptable = clean_dataframe(comptable) - metric_metadata = collect.get_metadata(comptable) - return comptable, metric_metadata - - # Calculate "rate" for kappa: kappa range divided by variance explained - # range, for potentially accepted components - # NOTE: What is the logic behind this? - kappa_rate = ( - np.max(comptable.loc[acc_prov, "kappa"]) - np.min(comptable.loc[acc_prov, "kappa"]) - ) / ( - np.max(comptable.loc[acc_prov, "variance explained"]) - - np.min(comptable.loc[acc_prov, "variance explained"]) - ) - comptable["kappa ratio"] = kappa_rate * comptable["variance explained"] / comptable["kappa"] - - # Calculate bounds for variance explained - varex_lower = stats.scoreatpercentile(comptable.loc[acc_prov, "variance explained"], LOW_PERC) - varex_upper = stats.scoreatpercentile(comptable.loc[acc_prov, "variance explained"], HIGH_PERC) - - """ - Step 3: Get rid of midk components; i.e., those with higher than - max decision score and high variance - """ - max_good_d_score = EXTEND_FACTOR * len(acc_prov) - midk = unclf[ - (comptable.loc[unclf, "d_table_score"] > max_good_d_score) - & (comptable.loc[unclf, "variance explained"] > EXTEND_FACTOR * varex_upper) - ] - comptable.loc[midk, "classification"] = "rejected" - comptable.loc[midk, "rationale"] += "I007;" - unclf = np.setdiff1d(unclf, midk) - acc_prov = np.setdiff1d(acc_prov, midk) - - """ - Step 4: Find components to ignore - """ - # collect high variance unclassified components - # and mix of high/low provisionally accepted - high_varex = np.union1d( - acc_prov, unclf[comptable.loc[unclf, "variance explained"] > varex_lower] - ) - # ignore low variance components - ign = np.setdiff1d(unclf, high_varex) - # but only if they have bad decision scores - ign = np.setdiff1d(ign, ign[comptable.loc[ign, "d_table_score"] < max_good_d_score]) - # and low kappa - ign = np.setdiff1d(ign, ign[comptable.loc[ign, "kappa"] > kappa_elbow]) - comptable.loc[ign, "classification"] = "ignored" - comptable.loc[ign, "rationale"] += "I008;" - unclf = np.setdiff1d(unclf, ign) - - """ - Step 5: Scrub the set if there are components that haven't been rejected or - ignored, but are still not listed in the provisionally accepted group. - """ - if len(unclf) > len(acc_prov): - comptable["d_table_score_scrub"] = np.nan - # Recompute the midk steps on the limited set to clean up the tail - d_table_rank = np.vstack( - [ - len(unclf) - stats.rankdata(comptable.loc[unclf, "kappa"]), - len(unclf) - stats.rankdata(comptable.loc[unclf, "dice_FT2"]), - len(unclf) - stats.rankdata(comptable.loc[unclf, "signal-noise_t"]), - stats.rankdata(comptable.loc[unclf, "countnoise"]), - len(unclf) - stats.rankdata(comptable.loc[unclf, "countsigFT2"]), - ] - ).T - comptable.loc[unclf, "d_table_score_scrub"] = d_table_rank.mean(1) - num_acc_guess = int( - np.mean( - [ - np.sum( - (comptable.loc[unclf, "kappa"] > kappa_elbow) - & (comptable.loc[unclf, "rho"] < rho_elbow) - ), - np.sum(comptable.loc[unclf, "kappa"] > kappa_elbow), - ] - ) - ) - - # Rejection candidate based on artifact type A: candartA - conservative_guess = num_acc_guess / RESTRICT_FACTOR - candartA = np.intersect1d( - unclf[comptable.loc[unclf, "d_table_score_scrub"] > conservative_guess], - unclf[comptable.loc[unclf, "kappa ratio"] > EXTEND_FACTOR * 2], - ) - candartA = candartA[ - comptable.loc[candartA, "variance explained"] > varex_upper * EXTEND_FACTOR - ] - comptable.loc[candartA, "classification"] = "rejected" - comptable.loc[candartA, "rationale"] += "I009;" - midk = np.union1d(midk, candartA) - unclf = np.setdiff1d(unclf, midk) - - # Rejection candidate based on artifact type B: candartB - conservative_guess2 = num_acc_guess * HIGH_PERC / 100.0 - candartB = unclf[comptable.loc[unclf, "d_table_score_scrub"] > conservative_guess2] - candartB = candartB[ - comptable.loc[candartB, "variance explained"] > varex_lower * EXTEND_FACTOR - ] - comptable.loc[candartB, "classification"] = "rejected" - comptable.loc[candartB, "rationale"] += "I010;" - midk = np.union1d(midk, candartB) - unclf = np.setdiff1d(unclf, midk) - - # Find components to ignore - # Of the remaining components, ignore ones with higher variance even if their - # decision tree scores are poor, instead of rejecting them - sorted_varex = np.flip(np.sort(comptable.loc[unclf, "variance explained"].to_numpy())) - new_varex_lower = stats.scoreatpercentile(sorted_varex[:num_acc_guess], LOW_PERC) - candart = unclf[comptable.loc[unclf, "d_table_score_scrub"] > num_acc_guess] - ign_add0 = candart[comptable.loc[candart, "variance explained"] > new_varex_lower] - ign_add0 = np.setdiff1d(ign_add0, midk) - comptable.loc[ign_add0, "classification"] = "ignored" - comptable.loc[ign_add0, "rationale"] += "I011;" - ign = np.union1d(ign, ign_add0) - unclf = np.setdiff1d(unclf, ign) - - # Ignore low Kappa, high variance explained components - ign_add1 = np.intersect1d( - unclf[comptable.loc[unclf, "kappa"] <= kappa_elbow], - unclf[comptable.loc[unclf, "variance explained"] > new_varex_lower], - ) - ign_add1 = np.setdiff1d(ign_add1, midk) - comptable.loc[ign_add1, "classification"] = "ignored" - comptable.loc[ign_add1, "rationale"] += "I012;" - - # at this point, unclf is equivalent to accepted + component_table["classification_tags"] = "" + xcomp = { + "n_echos": n_echos, + "n_vols": n_vols, + } + selector = ComponentSelector(tree, component_table, cross_component_metrics=xcomp) + selector.select() + selector.metadata = collect.get_metadata(selector.component_table) - # Move decision columns to end - comptable = clean_dataframe(comptable) - metric_metadata = collect.get_metadata(comptable) - return comptable, metric_metadata + return selector diff --git a/tedana/selection/tedpca.py b/tedana/selection/tedpca.py index 143d15572..5a99fea71 100644 --- a/tedana/selection/tedpca.py +++ b/tedana/selection/tedpca.py @@ -7,7 +7,7 @@ from tedana import utils from tedana.metrics import collect -from tedana.selection._utils import clean_dataframe, getelbow, getelbow_cons +from tedana.selection.selection_utils import clean_dataframe, getelbow, getelbow_cons from tedana.stats import getfbounds LGR = logging.getLogger("GENERAL") @@ -17,8 +17,7 @@ def kundu_tedpca(comptable, n_echos, kdaw=10.0, rdaw=1.0, stabilize=False): - """ - Select PCA components using Kundu's decision tree approach. + """Select PCA components using Kundu's decision tree approach. Parameters ---------- diff --git a/tedana/tests/data/cornell_three_echo_outputs.txt b/tedana/tests/data/cornell_three_echo_outputs.txt index 5240740ba..821f07b8e 100644 --- a/tedana/tests/data/cornell_three_echo_outputs.txt +++ b/tedana/tests/data/cornell_three_echo_outputs.txt @@ -7,9 +7,13 @@ desc-ICA_components.nii.gz desc-ICA_decomposition.json desc-tedana_metrics.json desc-tedana_metrics.tsv +desc-tedana_registry.json +desc-ICACrossComponent_metrics.json +desc-ICA_status_table.tsv +desc-ICA_decision_tree.json desc-ICA_mixing.tsv desc-ICA_stat-z_components.nii.gz -desc-PCA_cross_component_metrics.json +desc-PCACrossComponent_metrics.json desc-PCA_decomposition.json desc-PCA_metrics.json desc-PCA_metrics.tsv diff --git a/tedana/tests/data/fiu_four_echo_outputs.txt b/tedana/tests/data/fiu_four_echo_outputs.txt index a3a3da3d4..e7fd98823 100644 --- a/tedana/tests/data/fiu_four_echo_outputs.txt +++ b/tedana/tests/data/fiu_four_echo_outputs.txt @@ -10,18 +10,15 @@ desc-ICA_components.nii.gz desc-ICA_decomposition.json desc-tedana_metrics.json desc-tedana_metrics.tsv +desc-tedana_registry.json +desc-ICACrossComponent_metrics.json +desc-ICA_status_table.tsv +desc-ICA_decision_tree.json desc-ICAS0_stat-F_statmap.nii.gz desc-ICAT2_stat-F_statmap.nii.gz desc-ICA_mixing.tsv +desc-ICA_mixing_static.tsv desc-ICA_stat-z_components.nii.gz -desc-PCAAveragingWeights_components.nii.gz -desc-PCAS0_stat-F_statmap.nii.gz -desc-PCAT2_stat-F_statmap.nii.gz -desc-PCA_decomposition.json -desc-PCA_metrics.json -desc-PCA_metrics.tsv -desc-PCA_mixing.tsv -desc-PCA_stat-z_components.nii.gz desc-T1likeEffect_min.nii.gz desc-adaptiveGoodSignal_mask.nii.gz desc-globalSignal_map.nii.gz @@ -33,7 +30,6 @@ desc-optcomAccepted_bold.nii.gz desc-optcomDenoised_bold.nii.gz desc-optcomMIRDenoised_bold.nii.gz desc-optcomNoGlobalSignal_bold.nii.gz -desc-optcomPCAReduced_bold.nii.gz desc-optcomRejected_bold.nii.gz desc-optcomWithGlobalSignal_bold.nii.gz desc-optcom_bold.nii.gz @@ -42,36 +38,24 @@ echo-1_desc-Denoised_bold.nii.gz echo-1_desc-ICAT2ModelPredictions_components.nii.gz echo-1_desc-ICAS0ModelPredictions_components.nii.gz echo-1_desc-ICA_components.nii.gz -echo-1_desc-PCAT2ModelPredictions_components.nii.gz -echo-1_desc-PCAS0ModelPredictions_components.nii.gz -echo-1_desc-PCA_components.nii.gz echo-1_desc-Rejected_bold.nii.gz echo-2_desc-Accepted_bold.nii.gz echo-2_desc-Denoised_bold.nii.gz echo-2_desc-ICAT2ModelPredictions_components.nii.gz echo-2_desc-ICAS0ModelPredictions_components.nii.gz echo-2_desc-ICA_components.nii.gz -echo-2_desc-PCAT2ModelPredictions_components.nii.gz -echo-2_desc-PCAS0ModelPredictions_components.nii.gz -echo-2_desc-PCA_components.nii.gz echo-2_desc-Rejected_bold.nii.gz echo-3_desc-Accepted_bold.nii.gz echo-3_desc-Denoised_bold.nii.gz echo-3_desc-ICAT2ModelPredictions_components.nii.gz echo-3_desc-ICAS0ModelPredictions_components.nii.gz echo-3_desc-ICA_components.nii.gz -echo-3_desc-PCAT2ModelPredictions_components.nii.gz -echo-3_desc-PCAS0ModelPredictions_components.nii.gz -echo-3_desc-PCA_components.nii.gz echo-3_desc-Rejected_bold.nii.gz echo-4_desc-Accepted_bold.nii.gz echo-4_desc-Denoised_bold.nii.gz echo-4_desc-ICAT2ModelPredictions_components.nii.gz echo-4_desc-ICAS0ModelPredictions_components.nii.gz echo-4_desc-ICA_components.nii.gz -echo-4_desc-PCAT2ModelPredictions_components.nii.gz -echo-4_desc-PCAS0ModelPredictions_components.nii.gz -echo-4_desc-PCA_components.nii.gz echo-4_desc-Rejected_bold.nii.gz references.bib report.txt diff --git a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt index ce203aebd..ecf1753b1 100644 --- a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt +++ b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt @@ -1,6 +1,7 @@ dataset_description.json desc-limited_S0map.nii.gz desc-limited_T2starmap.nii.gz +desc-tedana_registry.json desc-optcom_bold.nii.gz S0map.nii.gz T2starmap.nii.gz diff --git a/tedana/tests/data/nih_five_echo_outputs_verbose.txt b/tedana/tests/data/nih_five_echo_outputs_verbose.txt index 6f1913708..6203c421b 100644 --- a/tedana/tests/data/nih_five_echo_outputs_verbose.txt +++ b/tedana/tests/data/nih_five_echo_outputs_verbose.txt @@ -9,6 +9,10 @@ desc-ICA_components.nii.gz desc-ICA_decomposition.json desc-tedana_metrics.json desc-tedana_metrics.tsv +desc-tedana_registry.json +desc-ICACrossComponent_metrics.json +desc-ICA_status_table.tsv +desc-ICA_decision_tree.json desc-ICAS0_stat-F_statmap.nii.gz desc-ICAT2_stat-F_statmap.nii.gz desc-ICA_mixing.tsv @@ -26,7 +30,7 @@ desc-limited_S0map.nii.gz desc-limited_T2starmap.nii.gz desc-optcomAccepted_bold.nii.gz desc-optcomDenoised_bold.nii.gz -desc-optcomPCAReduced_bold.nii.gz +desc-optcom_whitened_bold.nii.gz desc-optcomRejected_bold.nii.gz desc-optcom_bold.nii.gz echo-1_desc-Accepted_bold.nii.gz diff --git a/tedana/tests/data/reclassify_debug_out.txt b/tedana/tests/data/reclassify_debug_out.txt new file mode 100644 index 000000000..7de495692 --- /dev/null +++ b/tedana/tests/data/reclassify_debug_out.txt @@ -0,0 +1,25 @@ +figures +references.bib +report.txt +sub-testymctestface_betas_OC.nii.gz +sub-testymctestface_betas_hik_OC.nii.gz +sub-testymctestface_betas_hik_OC_MIR.nii.gz +sub-testymctestface_dataset_description.json +sub-testymctestface_dn_ts_OC.nii.gz +sub-testymctestface_dn_ts_OC_MIR.nii.gz +sub-testymctestface_feats_OC2.nii.gz +sub-testymctestface_hik_ts_OC.nii.gz +sub-testymctestface_hik_ts_OC_MIR.nii.gz +sub-testymctestface_ica_components.nii.gz +sub-testymctestface_ica_cross_component_metrics.json +sub-testymctestface_ica_decision_tree.json +sub-testymctestface_ica_decomposition.json +sub-testymctestface_ica_metrics.json +sub-testymctestface_ica_metrics.tsv +sub-testymctestface_ica_mir_mixing.tsv +sub-testymctestface_ica_mixing.tsv +sub-testymctestface_ica_orth_mixing.tsv +sub-testymctestface_ica_status_table.tsv +sub-testymctestface_lowk_ts_OC.nii.gz +sub-testymctestface_registry.json +sub-testymctestface_sphis_hik.nii.gz diff --git a/tedana/tests/data/reclassify_no_bold.txt b/tedana/tests/data/reclassify_no_bold.txt new file mode 100644 index 000000000..277e94cc4 --- /dev/null +++ b/tedana/tests/data/reclassify_no_bold.txt @@ -0,0 +1,16 @@ +dataset_description.json +desc-ICACrossComponent_metrics.json +desc-ICA_components.nii.gz +desc-ICA_decision_tree.json +desc-ICA_decomposition.json +desc-ICA_mixing.tsv +desc-ICA_stat-z_components.nii.gz +desc-ICA_status_table.tsv +desc-optcomDenoised_bold.nii.gz +desc-optcomRejected_bold.nii.gz +desc-tedana_metrics.json +desc-tedana_metrics.tsv +desc-tedana_registry.json +figures +references.bib +report.txt diff --git a/tedana/tests/data/reclassify_quiet_out.txt b/tedana/tests/data/reclassify_quiet_out.txt new file mode 100644 index 000000000..2107755b9 --- /dev/null +++ b/tedana/tests/data/reclassify_quiet_out.txt @@ -0,0 +1,93 @@ +dataset_description.json +desc-ICAAccepted_components.nii.gz +desc-ICAAccepted_stat-z_components.nii.gz +desc-ICACrossComponent_metrics.json +desc-ICA_components.nii.gz +desc-ICA_decision_tree.json +desc-ICA_decomposition.json +desc-ICA_mixing.tsv +desc-ICA_stat-z_components.nii.gz +desc-ICA_status_table.tsv +desc-optcomAccepted_bold.nii.gz +desc-optcomDenoised_bold.nii.gz +desc-optcomRejected_bold.nii.gz +desc-tedana_metrics.json +desc-tedana_metrics.tsv +desc-tedana_registry.json +figures +references.bib +report.txt +tedana_report.html +figures/carpet_accepted.svg +figures/carpet_denoised.svg +figures/carpet_optcom.svg +figures/carpet_rejected.svg +figures/comp_000.png +figures/comp_001.png +figures/comp_002.png +figures/comp_003.png +figures/comp_004.png +figures/comp_005.png +figures/comp_006.png +figures/comp_007.png +figures/comp_008.png +figures/comp_009.png +figures/comp_010.png +figures/comp_011.png +figures/comp_012.png +figures/comp_013.png +figures/comp_014.png +figures/comp_015.png +figures/comp_016.png +figures/comp_017.png +figures/comp_018.png +figures/comp_019.png +figures/comp_020.png +figures/comp_021.png +figures/comp_022.png +figures/comp_023.png +figures/comp_024.png +figures/comp_025.png +figures/comp_026.png +figures/comp_027.png +figures/comp_028.png +figures/comp_029.png +figures/comp_030.png +figures/comp_031.png +figures/comp_032.png +figures/comp_033.png +figures/comp_034.png +figures/comp_035.png +figures/comp_036.png +figures/comp_037.png +figures/comp_038.png +figures/comp_039.png +figures/comp_040.png +figures/comp_041.png +figures/comp_042.png +figures/comp_043.png +figures/comp_044.png +figures/comp_045.png +figures/comp_046.png +figures/comp_047.png +figures/comp_048.png +figures/comp_049.png +figures/comp_050.png +figures/comp_051.png +figures/comp_052.png +figures/comp_053.png +figures/comp_054.png +figures/comp_055.png +figures/comp_056.png +figures/comp_057.png +figures/comp_058.png +figures/comp_059.png +figures/comp_060.png +figures/comp_061.png +figures/comp_062.png +figures/comp_063.png +figures/comp_064.png +figures/comp_065.png +figures/comp_066.png +figures/comp_067.png +figures/comp_068.png diff --git a/tedana/tests/data/reclassify_run_twice.txt b/tedana/tests/data/reclassify_run_twice.txt new file mode 100644 index 000000000..9609c85ba --- /dev/null +++ b/tedana/tests/data/reclassify_run_twice.txt @@ -0,0 +1,20 @@ +dataset_description.json +desc-ICAAccepted_components.nii.gz +desc-ICAAccepted_stat-z_components.nii.gz +desc-ICACrossComponent_metrics.json +desc-ICA_components.nii.gz +desc-ICA_decision_tree.json +desc-ICA_decomposition.json +desc-ICA_mixing.tsv +desc-ICA_stat-z_components.nii.gz +desc-ICA_status_table.tsv +desc-optcomAccepted_bold.nii.gz +desc-optcomDenoised_bold.nii.gz +desc-optcomRejected_bold.nii.gz +desc-tedana_metrics.json +desc-tedana_metrics.tsv +desc-tedana_registry.json +figures +references.bib +report.txt +report_old.txt diff --git a/tedana/tests/data/sample_comptable.tsv b/tedana/tests/data/sample_comptable.tsv new file mode 100644 index 000000000..ad4551417 --- /dev/null +++ b/tedana/tests/data/sample_comptable.tsv @@ -0,0 +1,22 @@ +Component kappa rho variance explained normalized variance explained countsigFT2 countsigFS0 dice_FT2 dice_FS0 countnoise signal-noise_t signal-noise_p d_table_score optimal sign classification classification_tags +ICA_00 11.773633384130344 12.244047372613279 1.8708761636220743 0.0244263937653776 70 281 0.0 0.0 4653 0.0 0.0 19.4 1 rejected Unlikely BOLD +ICA_01 13.469100021727968 10.597359822668041 1.8141851634150163 0.024946211086853073 356 0 0.0 0.0 4427 7.160459030871695 4.424304742815937e-12 16.3 1 rejected None,Unlikely BOLD +ICA_02 22.044384493213173 14.081602067232835 5.0844942012809025 0.05182420614143496 3860 331 0.41309492505916384 0.0 4096 0.0 0.0 7.1 -1 accepted None,Likely BOLD +ICA_03 21.435565425383167 13.08776097105916 6.337651400602204 0.06267167186849244 3501 340 0.4229266174554928 0.0 4154 0.0 0.0 8.7 -1 accepted None,Likely BOLD +ICA_04 21.571783725457784 13.180291764692623 5.410512852983703 0.05415357748457931 3111 379 0.3955332578087069 0.0 4351 0.0 0.0 13.5 1 accepted None,Likely BOLD +ICA_05 20.455815181233913 13.530653271482537 5.103253880526514 0.0484684678593299 2992 122 0.41797072185764766 0.0 4253 4.26379519408162 8.466083940618762e-05 11.6 1 rejected Unlikely BOLD +ICA_06 21.998300075307892 12.342036790070972 4.661405806338223 0.047164097532350104 3647 446 0.4382828993666432 0.0 4331 0.0 0.0 9.5 1 accepted None,Likely BOLD +ICA_07 21.57683009938704 14.067931569329534 5.052657635488493 0.0471509419227379 2995 368 0.38913870632998965 0.0 4288 0.0 0.0 12.7 -1 accepted None,Likely BOLD +ICA_08 20.662379427772397 13.605577967277663 4.899573691276539 0.0487272274448804 3292 486 0.40497951465490073 0.0 4169 2.4101037193307158 0.019728835459480055 10.4 1 accepted None,Likely BOLD +ICA_09 21.350101713170858 14.433444938178267 5.480380082264091 0.053822797428019244 2934 529 0.40547176429323045 0.0 4240 5.942792150490043 3.1181725670893785e-08 10.4 1 accepted None,Likely BOLD +ICA_10 21.333212633418892 12.977569833261267 4.88266845033625 0.048109815519354025 2757 244 0.48600223964165734 0.0 4271 3.193303025882193 0.0019400513552828209 11.0 1 accepted None,Likely BOLD +ICA_11 22.70767526961776 9.5558503504626 2.106637746364626 0.0290437126768305 3131 0 0.6537842190016103 0.0 3383 18.975490976691813 4.409180770584508e-76 2.2 -1 rejected None,Unlikely BOLD +ICA_12 21.42480737193139 12.523285689527825 4.700042017821479 0.04555941778867682 3216 445 0.46589486858573215 0.0 4349 8.925209841985136 6.428202512989954e-14 8.4 -1 accepted None,Likely BOLD +ICA_13 20.461714898910873 13.372399561878185 4.91288433926911 0.049139412646169 2988 379 0.37319449109842123 0.0 4270 0.0 0.0 14.7 1 accepted None,Likely BOLD +ICA_14 20.831824101283157 14.123062103306893 5.117731849017717 0.052114111704595754 2849 768 0.42624709458251386 0.0 4295 0.0 0.0 13.9 -1 accepted None,Likely BOLD +ICA_15 23.443590881814984 13.43898022294784 5.607029047746701 0.054302339698066616 3847 409 0.4945730351771937 0.0 4266 4.303738107001976 3.587737594462498e-05 4.2 -1 accepted None,Likely BOLD +ICA_16 21.510571197601205 13.279774339508181 4.847709079277335 0.04722499686178239 3042 227 0.4956953642384106 0.0 4275 11.819966876475757 1.8003157916056668e-22 6.6 -1 accepted None,Likely BOLD +ICA_17 21.38843440973818 13.768635078323872 5.040219756607229 0.04728714164872373 2519 50 0.4331983805668016 0.0 4339 5.108828337559395 3.1788748004451597e-06 12.0 1 accepted None,Likely BOLD +ICA_18 23.30080869317963 14.338966963066872 5.690965607643776 0.05575825515462903 4046 541 0.36185133239831696 0.0 4234 3.321412684022962 0.0014954034132828892 7.0 -1 accepted None,Likely BOLD +ICA_19 21.957993034031354 13.929222476750176 6.323103550347913 0.05978202785850936 3480 195 0.44127806563039723 0.0 4174 7.475122521508307 6.663523274208933e-10 5.0 -1 accepted None,Likely BOLD +ICA_20 22.28373739816742 13.346603659167044 5.0560176777701145 0.04832317590860793 2347 150 0.4419735927727589 0.0 4298 4.609368981883021 2.9109368062960523e-05 9.6 -1 accepted None,Likely BOLD diff --git a/tedana/tests/test_bibtex.py b/tedana/tests/test_bibtex.py new file mode 100644 index 000000000..885593ba7 --- /dev/null +++ b/tedana/tests/test_bibtex.py @@ -0,0 +1,10 @@ +"""Tests for bibtex""" + +from tedana import bibtex + + +def test_warn_no_citation_found(caplog): + citations = ["Nonexistent et al, 0 AD"] + ref_list = [] + bibtex.reduce_references(citations, ref_list) + assert f"Citation {citations[0]} not found." in caplog.text diff --git a/tedana/tests/test_component_selector.py b/tedana/tests/test_component_selector.py new file mode 100644 index 000000000..5f9a3da52 --- /dev/null +++ b/tedana/tests/test_component_selector.py @@ -0,0 +1,330 @@ +"""Tests for the decision tree modularization""" +import glob +import json +import os +import os.path as op + +import pandas as pd +import pytest + +from tedana.selection import component_selector + +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) + +# ---------------------------------------------------------------------- +# Functions Used For Tests +# ---------------------------------------------------------------------- + + +def sample_comptable(): + """Retrieves a sample component table""" + sample_fname = op.join(THIS_DIR, "data", "sample_comptable.tsv") + + return pd.read_csv(sample_fname, delimiter="\t") + + +def dicts_to_test(treechoice): + """ + Outputs decision tree dictionaries to use to test tree validation + + Parameters + ---------- + treechoice: :obj:`str` One of several labels to select which dict to output + Options are: + "valid": A tree that would trigger all warnings, but pass validation + "extra_req_param": A tree with an undefined required parameter for a decision node function + "extra_opt_param": A tree with an undefined optional parameter for a decision node function + "missing_req_param": A missing required param in a decision node function + "missing_function": An undefined decision node function + "missing_key": A dict missing one of the required keys (refs) + + Returns + ------- + tree: :ojb:`dict` A dict that can be input into component_selector.validate_tree + """ + + # valid_dict is a simple valid dictionary to test + # It includes a few things that should trigger warnings, but not errors. + valid_dict = { + "tree_id": "valid_simple_tree", + "info": "This is a short valid tree", + "report": "", + "refs": "", + # Warning for an unused key + "unused_key": "There can be added keys that are valid, but aren't used", + "necessary_metrics": ["kappa", "rho"], + "intermediate_classifications": ["random1"], + "classification_tags": ["Random1"], + "nodes": [ + { + "functionname": "dec_left_op_right", + "parameters": { + "if_true": "rejected", + "if_false": "nochange", + "decide_comps": "all", + "op": ">", + "left": "rho", + "right": "kappa", + }, + "kwargs": { + "log_extra_info": "random1 if Kappa", + "left": "kappa", + "right": "rho", + }, + "kwargs": { + "log_extra_info": "random2 if Kappa>Rho", + "log_extra_report": "", + # Warning for an non-predefined classification assigned to a component + "tag_if_true": "random2notpredefined", + }, + }, + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "accepted", + # Warning for an non-predefined classification used to select + # components to operate on + "decide_comps": "random2notpredefined", + }, + "kwargs": { + "log_extra_info": "", + "log_extra_report": "", + # Warning for a tag that wasn't predefined + "tag": "Random2_NotPredefined", + }, + }, + { + "functionname": "manual_classify", + "parameters": { + "new_classification": "rejected", + "decide_comps": "random1", + }, + "kwargs": { + "tag": "Random1", + }, + }, + ], + } + + tree = valid_dict + if treechoice == "valid": + return tree + elif treechoice == "extra_req_param": + tree["nodes"][0]["parameters"]["nonexistent_req_param"] = True + elif treechoice == "extra_opt_param": + tree["nodes"][0]["kwargs"]["nonexistent_opt_param"] = True + elif treechoice == "missing_req_param": + tree["nodes"][0]["parameters"].pop("op") + elif treechoice == "missing_function": + tree["nodes"][0]["functionname"] = "not_a_function" + elif treechoice == "missing_key": + tree.pop("refs") + elif treechoice == "null_value": + tree["nodes"][0]["parameters"]["left"] = None + else: + raise Exception(f"{treechoice} is an invalid option for treechoice") + + return tree + + +# ---------------------------------------------------------------------- +# component_selector Tests +# ---------------------------------------------------------------------- + + +# load_config +# ----------- +def test_load_config_fails(): + """Tests for load_config failure modes""" + + # We recast to ValueError in the file not found and directory cases + with pytest.raises(ValueError): + component_selector.load_config("THIS FILE DOES NOT EXIST.txt") + + # Raises IsADirectoryError for a directory + with pytest.raises(ValueError): + component_selector.load_config(".") + + # Note: we defer validation errors for validate_tree even though + # load_config may raise them + + +def test_load_config_succeeds(): + """Tests to make sure load_config succeeds""" + + # The minimal tree should have an id of "minimal_decision_tree_test1" + tree = component_selector.load_config("minimal") + assert tree["tree_id"] == "minimal_decision_tree_test1" + + +def test_minimal(): + """Smoke test for constructor for ComponentSelector using minimal tree""" + xcomp = { + "n_echos": 3, + } + selector = component_selector.ComponentSelector( + "minimal", + sample_comptable(), + cross_component_metrics=xcomp.copy(), + ) + selector.select() + + # rerun without classification_tags column initialized + selector = component_selector.ComponentSelector( + "minimal", + sample_comptable(), + cross_component_metrics=xcomp.copy(), + ) + selector.component_table = selector.component_table.drop(columns="classification_tags") + selector.select() + + +# validate_tree +# ------------- + + +def test_validate_tree_succeeds(): + """ + Tests to make sure validate_tree suceeds for all default + decision trees in decision trees + Tested on all default trees in ./tedana/resources/decision_trees + Note: If there is a tree in the default trees directory that + is being developed and not yet valid, it's file name should + include 'invalid' as a prefix + """ + + default_tree_names = glob.glob( + os.path.join(THIS_DIR, "../resources/decision_trees/[!invalid]*.json") + ) + + for tree_name in default_tree_names: + f = open(tree_name) + tree = json.load(f) + assert component_selector.validate_tree(tree) + + # Test a few extra possabilities just using the minimal.json tree + if "/minimal.json" in tree_name: + # Should remove/ignore the "reconstruct_from" key during validation + tree["reconstruct_from"] = "testinput" + # Need to test handling of the tag_if_false kwarg somewhere + tree["nodes"][1]["kwargs"]["tag_if_false"] = "testing tag" + assert component_selector.validate_tree(tree) + + +def test_validate_tree_warnings(): + """ + Tests to make sure validate_tree triggers all warning conditions + but still succeeds + """ + + # A tree that raises all possible warnings in the validator should still be valid + assert component_selector.validate_tree(dicts_to_test("valid")) + + +def test_validate_tree_fails(): + """ + Tests to make sure validate_tree fails for invalid trees + Tests ../resources/decision_trees/invalid*.json and + ./data/ComponentSelection/invalid*.json trees + """ + + # An empty dict should not be valid + with pytest.raises(component_selector.TreeError): + component_selector.validate_tree({}) + + # A tree that is missing a required key should not be valid + with pytest.raises(component_selector.TreeError): + component_selector.validate_tree(dicts_to_test("missing_key")) + + # Calling a selection node function that does not exist should not be valid + with pytest.raises(component_selector.TreeError): + component_selector.validate_tree(dicts_to_test("missing_function")) + + # Calling a function with an non-existent required parameter should not be valid + with pytest.raises(component_selector.TreeError): + component_selector.validate_tree(dicts_to_test("extra_req_param")) + + # Calling a function with an non-existent optional parameter should not be valid + with pytest.raises(component_selector.TreeError): + component_selector.validate_tree(dicts_to_test("extra_opt_param")) + + # Calling a function missing a required parameter should not be valid + with pytest.raises(component_selector.TreeError): + component_selector.validate_tree(dicts_to_test("missing_req_param")) + + +def test_check_null_fails(): + """Tests to trigger check_null missing parameter error""" + + selector = component_selector.ComponentSelector("minimal", sample_comptable()) + selector.tree = dicts_to_test("null_value") + + params = selector.tree["nodes"][0]["parameters"] + functionname = selector.tree["nodes"][0]["functionname"] + with pytest.raises(ValueError): + selector.check_null(params, functionname) + + +def test_check_null_succeeds(): + """Tests check_null finds empty parameter in self""" + + # "left" is missing from the function definition in node + # but is found as an initialized cross component metric + xcomp = { + "left": 3, + } + selector = component_selector.ComponentSelector( + "minimal", + sample_comptable(), + cross_component_metrics=xcomp, + ) + selector.tree = dicts_to_test("null_value") + + params = selector.tree["nodes"][0]["parameters"] + functionname = selector.tree["nodes"][0]["functionname"] + selector.check_null(params, functionname) + + +def test_are_only_necessary_metrics_used_warning(): + """Tests a warning that wasn't triggered in other test workflows""" + + selector = component_selector.ComponentSelector("minimal", sample_comptable()) + + # warning when an element of necessary_metrics was not in used_metrics + selector.tree["used_metrics"] = set(["A", "B", "C"]) + selector.necessary_metrics = set(["B", "C", "D"]) + selector.are_only_necessary_metrics_used() + + +def test_are_all_components_accepted_or_rejected(): + """Tests warnings are triggered in are_all_components_accepted_or_rejected""" + + selector = component_selector.ComponentSelector("minimal", sample_comptable()) + selector.component_table.loc[7, "classification"] = "intermediate1" + selector.component_table.loc[[1, 3, 5], "classification"] = "intermediate2" + selector.are_all_components_accepted_or_rejected() + + +def test_selector_properties_smoke(): + """Tests to confirm properties match expected results""" + + selector = component_selector.ComponentSelector("minimal", sample_comptable()) + + assert selector.n_comps == 21 + + # Also runs selector.likely_bold_comps and should need to deal with sets in each field + assert selector.n_likely_bold_comps == 17 + + assert selector.n_accepted_comps == 17 + + assert selector.rejected_comps.sum() == 4 diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 4a050deaa..a8de177f4 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -3,10 +3,15 @@ """ import glob +import json +import logging import os +import os.path as op import re import shutil +import subprocess import tarfile +from datetime import datetime from gzip import GzipFile from io import BytesIO @@ -15,11 +20,18 @@ import requests from pkg_resources import resource_filename +from tedana.io import InputHarvester from tedana.workflows import t2smap as t2smap_cli from tedana.workflows import tedana as tedana_cli +from tedana.workflows.ica_reclassify import ica_reclassify_workflow +# Need to see if a no BOLD warning occurred +LOGGER = logging.getLogger(__name__) +# Added a testing logger to output whether or not testing data were downlaoded +TestLGR = logging.getLogger("TESTING") -def check_integration_outputs(fname, outpath): + +def check_integration_outputs(fname, outpath, n_logs=1): """ Checks outputs of integration tests @@ -40,10 +52,11 @@ def check_integration_outputs(fname, outpath): # Checks for log file log_regex = "^tedana_[12][0-9]{3}-[0-9]{2}-[0-9]{2}T[0-9]{2}[0-9]{2}[0-9]{2}.tsv$" logfiles = [out for out in existing if re.match(log_regex, out)] - assert len(logfiles) == 1 + assert len(logfiles) == n_logs - # Removes logfile from list of existing files - existing.remove(logfiles[0]) + # Removes logfiles from list of existing files + for log in logfiles: + existing.remove(log) # Compares remaining files with those expected with open(fname, "r") as f: @@ -52,23 +65,160 @@ def check_integration_outputs(fname, outpath): assert sorted(tocheck) == sorted(existing) -def download_test_data(osf, outpath): +def data_for_testing_info(test_dataset=str): """ - Downloads tar.gz data stored at `osf` and unpacks into `outpath` + Get the path and download link for each dataset used for testing + + Also creates the base directories into which the data and output + directories are written Parameters ---------- - osf : str - URL to OSF file that contains data to be downloaded - outpath : str + test_dataset : str + References one of the datasets to download. It can be: + three-echo + three-echo-reclassify + four-echo + five-echo + + Returns + ------- + test_data_path : str + The path to the local directory where the data will be downloaded + osfID : str + The ID for the OSF file. + Data download link would be https://osf.io/osfID/download + Metadata download link would be https://osf.io/osfID/metadata/?format=datacite-json + """ + + tedana_path = os.path.dirname(tedana_cli.__file__) + base_data_path = os.path.abspath(os.path.join(tedana_path, "../../.testing_data_cache")) + os.makedirs(base_data_path, exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs"), exist_ok=True) + if test_dataset == "three-echo": + test_data_path = os.path.join(base_data_path, "three-echo/TED.three-echo") + osfID = "rqhfc" + os.makedirs(os.path.join(base_data_path, "three-echo"), exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs/three-echo"), exist_ok=True) + elif test_dataset == "three-echo-reclassify": + test_data_path = os.path.join(base_data_path, "reclassify") + osfID = "f6g45" + os.makedirs(os.path.join(base_data_path, "outputs/reclassify"), exist_ok=True) + elif test_dataset == "four-echo": + test_data_path = os.path.join(base_data_path, "four-echo/TED.four-echo") + osfID = "gnj73" + os.makedirs(os.path.join(base_data_path, "four-echo"), exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs/four-echo"), exist_ok=True) + elif test_dataset == "five-echo": + test_data_path = os.path.join(base_data_path, "five-echo/TED.five-echo") + osfID = "9c42e" + os.makedirs(os.path.join(base_data_path, "five-echo"), exist_ok=True) + os.makedirs(os.path.join(base_data_path, "outputs/five-echo"), exist_ok=True) + else: + raise ValueError(f"{test_dataset} is not a valid dataset string for data_for_testing_info") + + return test_data_path, osfID + + +def download_test_data(osfID, test_data_path): + """ + If current data is not already available, downloads tar.gz data + stored at `https://osf.io/osfID/download` + and unpacks into `out_path` + + Parameters + ---------- + osfID : str + The ID for the OSF file. + out_path : str Path to directory where OSF data should be extracted """ - req = requests.get(osf) + try: + datainfo = requests.get(f"https://osf.io/{osfID}/metadata/?format=datacite-json") + except Exception: + if len(os.listdir(test_data_path)) == 0: + raise ConnectionError( + f"Cannot access https://osf.io/{osfID} and testing data " "are not yet downloaded" + ) + else: + TestLGR.warning( + f"Cannot access https://osf.io/{osfID}. " + f"Using local copy of testing data in {test_data_path} " + "but cannot validate that local copy is up-to-date" + ) + return + datainfo.raise_for_status() + metadata = json.loads(datainfo.content) + # 'dates' is a list with all udpates to the file, the last item in the list + # is the most recent and the 'date' field in the list is the date of the last + # update. + osf_filedate = metadata["dates"][-1]["date"] + + # File the file with the most recent date for comparision with + # the lsst updated date for the osf file + if os.path.exists(test_data_path): + filelist = glob.glob(f"{test_data_path}/*") + most_recent_file = max(filelist, key=os.path.getctime) + if os.path.exists(most_recent_file): + local_filedate = os.path.getmtime(most_recent_file) + local_filedate_str = str(datetime.fromtimestamp(local_filedate).date()) + local_data_exists = True + else: + local_data_exists = False + else: + local_data_exists = False + if local_data_exists: + if local_filedate_str == osf_filedate: + TestLGR.info( + f"Downloaded and up-to-date data already in {test_data_path}. Not redownloading" + ) + return + else: + TestLGR.info( + f"Downloaded data in {test_data_path} was last modified on " + f"{local_filedate_str}. Data on https://osf.io/{osfID} " + f" was last updated on {osf_filedate}. Deleting and redownloading" + ) + shutil.rmtree(test_data_path) + req = requests.get(f"https://osf.io/{osfID}/download") req.raise_for_status() t = tarfile.open(fileobj=GzipFile(fileobj=BytesIO(req.content))) - os.makedirs(outpath, exist_ok=True) - t.extractall(outpath) + os.makedirs(test_data_path, exist_ok=True) + t.extractall(test_data_path) + + +def reclassify_raw() -> str: + test_data_path, _ = data_for_testing_info("three-echo-reclassify") + return os.path.join(test_data_path, "TED.three-echo") + + +def reclassify_raw_registry() -> str: + return os.path.join(reclassify_raw(), "desc-tedana_registry.json") + + +def guarantee_reclassify_data() -> None: + """Ensures that the reclassify data exists at the expected path and return path.""" + + test_data_path, osfID = data_for_testing_info("three-echo-reclassify") + + # Should now be checking and not downloading for each test so don't see if statement here + # if not os.path.exists(reclassify_raw_registry()): + download_test_data(osfID, test_data_path) + # else: + # Path exists, be sure that everything in registry exists + ioh = InputHarvester(reclassify_raw_registry()) + all_present = True + for _, v in ioh.registry.items(): + if not isinstance(v, list): + if not os.path.exists(os.path.join(reclassify_raw(), v)): + all_present = False + break + if not all_present: + # Something was removed, need to re-download + shutil.rmtree(reclassify_raw()) + guarantee_reclassify_data() + return test_data_path def test_integration_five_echo(skip_integration): @@ -77,18 +227,19 @@ def test_integration_five_echo(skip_integration): if skip_integration: pytest.skip("Skipping five-echo integration test") - out_dir = "/tmp/data/five-echo/TED.five-echo" - out_dir_manual = "/tmp/data/five-echo/TED.five-echo-manual" + test_data_path, osfID = data_for_testing_info("five-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/five-echo")) + # out_dir_manual = f"{out_dir}-manual" if os.path.exists(out_dir): shutil.rmtree(out_dir) - if os.path.exists(out_dir_manual): - shutil.rmtree(out_dir_manual) + # if os.path.exists(out_dir_manual): + # shutil.rmtree(out_dir_manual) # download data and run the test - download_test_data("https://osf.io/9c42e/download", os.path.dirname(out_dir)) - prepend = "/tmp/data/five-echo/p06.SBJ01_S09_Task11_e" + download_test_data(osfID, test_data_path) + prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" suffix = ".sm.nii.gz" datalist = [prepend + str(i + 1) + suffix for i in range(5)] echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] @@ -108,33 +259,6 @@ def test_integration_five_echo(skip_integration): df = pd.read_table(comptable) assert isinstance(df, pd.DataFrame) - # Test re-running, but use the CLI - acc_comps = df.loc[df["classification"] == "ignored"].index.values - acc_comps = [str(c) for c in acc_comps] - mixing = os.path.join(out_dir, "desc-ICA_mixing.tsv") - t2smap = os.path.join(out_dir, "T2starmap.nii.gz") - args = ( - ["-d"] - + datalist - + ["-e"] - + [str(te) for te in echo_times] - + [ - "--out-dir", - out_dir_manual, - "--debug", - "--verbose", - "--manacc", - *acc_comps, - "--ctab", - comptable, - "--mix", - mixing, - "--t2smap", - t2smap, - ] - ) - tedana_cli._main(args) - # compare the generated output files fn = resource_filename("tedana", "tests/data/nih_five_echo_outputs_verbose.txt") check_integration_outputs(fn, out_dir) @@ -146,8 +270,9 @@ def test_integration_four_echo(skip_integration): if skip_integration: pytest.skip("Skipping four-echo integration test") - out_dir = "/tmp/data/four-echo/TED.four-echo" - out_dir_manual = "/tmp/data/four-echo/TED.four-echo-manual" + test_data_path, osfID = data_for_testing_info("four-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/four-echo")) + out_dir_manual = f"{out_dir}-manual" if os.path.exists(out_dir): shutil.rmtree(out_dir) @@ -156,13 +281,13 @@ def test_integration_four_echo(skip_integration): shutil.rmtree(out_dir_manual) # download data and run the test - download_test_data("https://osf.io/gnj73/download", os.path.dirname(out_dir)) - prepend = "/tmp/data/four-echo/" - prepend += "sub-PILOT_ses-01_task-localizerDetection_run-01_echo-" + download_test_data(osfID, test_data_path) + prepend = f"{test_data_path}/sub-PILOT_ses-01_task-localizerDetection_run-01_echo-" suffix = "_space-sbref_desc-preproc_bold+orig.HEAD" datalist = [prepend + str(i + 1) + suffix for i in range(4)] tedana_cli.tedana_workflow( data=datalist, + mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"), tes=[11.8, 28.04, 44.28, 60.52], out_dir=out_dir, tedpca="kundu-stabilize", @@ -172,26 +297,13 @@ def test_integration_four_echo(skip_integration): verbose=True, ) - # Test re-running with the component table - mixing_matrix = os.path.join(out_dir, "desc-ICA_mixing.tsv") - comptable = os.path.join(out_dir, "desc-tedana_metrics.tsv") - temporary_comptable = os.path.join(out_dir, "temporary_metrics.tsv") - comptable_df = pd.read_table(comptable) - comptable_df.loc[comptable_df["classification"] == "ignored", "classification"] = "accepted" - comptable_df.to_csv(temporary_comptable, sep="\t", index=False) - tedana_cli.tedana_workflow( - data=datalist, - tes=[11.8, 28.04, 44.28, 60.52], + ica_reclassify_workflow( + op.join(out_dir, "desc-tedana_registry.json"), + accept=[1, 2, 3], + reject=[4, 5, 6], out_dir=out_dir_manual, - tedpca="kundu-stabilize", - gscontrol=["gsr", "mir"], - png_cmap="bone", - mixm=mixing_matrix, - ctab=temporary_comptable, - debug=True, - verbose=False, + mir=True, ) - os.remove(temporary_comptable) # compare the generated output files fn = resource_filename("tedana", "tests/data/fiu_four_echo_outputs.txt") @@ -205,8 +317,9 @@ def test_integration_three_echo(skip_integration): if skip_integration: pytest.skip("Skipping three-echo integration test") - out_dir = "/tmp/data/three-echo/TED.three-echo" - out_dir_manual = "/tmp/data/three-echo/TED.three-echo-rerun" + test_data_path, osfID = data_for_testing_info("three-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/three-echo")) + out_dir_manual = f"{out_dir}-rerun" if os.path.exists(out_dir): shutil.rmtree(out_dir) @@ -215,9 +328,9 @@ def test_integration_three_echo(skip_integration): shutil.rmtree(out_dir_manual) # download data and run the test - download_test_data("https://osf.io/rqhfc/download", os.path.dirname(out_dir)) + download_test_data(osfID, test_data_path) tedana_cli.tedana_workflow( - data="/tmp/data/three-echo/three_echo_Cornell_zcat.nii.gz", + data=f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", tes=[14.5, 38.5, 62.5], out_dir=out_dir, low_mem=True, @@ -227,7 +340,7 @@ def test_integration_three_echo(skip_integration): # Test re-running, but use the CLI args = [ "-d", - "/tmp/data/three-echo/three_echo_Cornell_zcat.nii.gz", + f"{test_data_path}/three_echo_Cornell_zcat.nii.gz", "-e", "14.5", "38.5", @@ -236,8 +349,7 @@ def test_integration_three_echo(skip_integration): out_dir_manual, "--debug", "--verbose", - "--ctab", - os.path.join(out_dir, "desc-tedana_metrics.tsv"), + "-f", "--mix", os.path.join(out_dir, "desc-ICA_mixing.tsv"), ] @@ -248,17 +360,290 @@ def test_integration_three_echo(skip_integration): check_integration_outputs(fn, out_dir) +def test_integration_reclassify_insufficient_args(skip_integration): + if skip_integration: + pytest.skip("Skipping reclassify insufficient args") + + guarantee_reclassify_data() + + args = [ + "ica_reclassify", + reclassify_raw_registry(), + ] + + result = subprocess.run(args, capture_output=True) + assert b"ValueError: Must manually accept or reject" in result.stderr + assert result.returncode != 0 + + +def test_integration_reclassify_quiet_csv(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify quiet csv") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + # Make some files that have components to manually accept and reject + to_accept = [i for i in range(3)] + to_reject = [i for i in range(7, 4)] + acc_df = pd.DataFrame(data=to_accept, columns=["Components"]) + rej_df = pd.DataFrame(data=to_reject, columns=["Components"]) + acc_csv_fname = os.path.join(reclassify_raw(), "accept.csv") + rej_csv_fname = os.path.join(reclassify_raw(), "reject.csv") + acc_df.to_csv(acc_csv_fname) + rej_df.to_csv(rej_csv_fname) + + args = [ + "ica_reclassify", + "--manacc", + acc_csv_fname, + "--manrej", + rej_csv_fname, + "--out-dir", + out_dir, + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_quiet_spaces(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify quiet space-delimited integers") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + args = [ + "ica_reclassify", + "--manacc", + "1", + "2", + "3", + "--manrej", + "4", + "5", + "6", + "--out-dir", + out_dir, + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_quiet_string(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify quiet string of integers") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/quiet")) + + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + args = [ + "ica_reclassify", + "--manacc", + "1,2,3", + "--manrej", + "4,5,6,", + "--out-dir", + out_dir, + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_quiet_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_debug(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify debug") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/debug")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + args = [ + "ica_reclassify", + "--manacc", + "1", + "2", + "3", + "--prefix", + "sub-testymctestface", + "--convention", + "orig", + "--tedort", + "--mir", + "--no-reports", + "--out-dir", + out_dir, + "--debug", + reclassify_raw_registry(), + ] + + results = subprocess.run(args, capture_output=True) + assert results.returncode == 0 + fn = resource_filename("tedana", "tests/data/reclassify_debug_out.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_both_rej_acc(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/both_rej_acc")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + with pytest.raises( + ValueError, + match=r"The following components were both accepted and", + ): + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2, 3], + reject=[1, 2, 3], + out_dir=out_dir, + ) + + +def test_integration_reclassify_run_twice(skip_integration): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/run_twice")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2, 3], + out_dir=out_dir, + no_reports=True, + ) + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2, 3], + out_dir=out_dir, + overwrite=True, + no_reports=True, + ) + fn = resource_filename("tedana", "tests/data/reclassify_run_twice.txt") + check_integration_outputs(fn, out_dir, n_logs=2) + + +def test_integration_reclassify_no_bold(skip_integration, caplog): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/no_bold")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + ioh = InputHarvester(reclassify_raw_registry()) + comptable = ioh.get_file_contents("ICA metrics tsv") + to_accept = [i for i in range(len(comptable))] + + ica_reclassify_workflow( + reclassify_raw_registry(), + reject=to_accept, + out_dir=out_dir, + no_reports=True, + ) + assert "No accepted components remaining after manual classification!" in caplog.text + + fn = resource_filename("tedana", "tests/data/reclassify_no_bold.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_accrej_files(skip_integration, caplog): + if skip_integration: + pytest.skip("Skip reclassify both rejected and accepted") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/no_bold")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + ioh = InputHarvester(reclassify_raw_registry()) + comptable = ioh.get_file_contents("ICA metrics tsv") + to_accept = [i for i in range(len(comptable))] + + ica_reclassify_workflow( + reclassify_raw_registry(), + reject=to_accept, + out_dir=out_dir, + no_reports=True, + ) + assert "No accepted components remaining after manual classification!" in caplog.text + + fn = resource_filename("tedana", "tests/data/reclassify_no_bold.txt") + check_integration_outputs(fn, out_dir) + + +def test_integration_reclassify_index_failures(skip_integration, caplog): + if skip_integration: + pytest.skip("Skip reclassify index failures") + + test_data_path = guarantee_reclassify_data() + out_dir = os.path.abspath(os.path.join(test_data_path, "../outputs/reclassify/index_failures")) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + + with pytest.raises( + ValueError, + match=r"_parse_manual_list expected a list of integers, but the input is", + ): + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[1, 2.5, 3], + out_dir=out_dir, + no_reports=True, + ) + + with pytest.raises( + ValueError, + match=r"_parse_manual_list expected integers or a filename, but the input is", + ): + ica_reclassify_workflow( + reclassify_raw_registry(), + accept=[2.5], + out_dir=out_dir, + no_reports=True, + ) + + def test_integration_t2smap(skip_integration): """Integration test of the full t2smap workflow using five-echo test data""" if skip_integration: pytest.skip("Skipping t2smap integration test") - out_dir = "/tmp/data/five-echo/t2smap_five-echo" + test_data_path, osfID = data_for_testing_info("five-echo") + out_dir = os.path.abspath(os.path.join(test_data_path, "../../outputs/t2smap_five-echo")) if os.path.exists(out_dir): shutil.rmtree(out_dir) # download data and run the test - download_test_data("https://osf.io/9c42e/download", os.path.dirname(out_dir)) - prepend = "/tmp/data/five-echo/p06.SBJ01_S09_Task11_e" + download_test_data(osfID, test_data_path) + prepend = f"{test_data_path}/p06.SBJ01_S09_Task11_e" suffix = ".sm.nii.gz" datalist = [prepend + str(i + 1) + suffix for i in range(5)] echo_times = [15.4, 29.7, 44.0, 58.3, 72.6] diff --git a/tedana/tests/test_io.py b/tedana/tests/test_io.py index 5b59758aa..19062babd 100644 --- a/tedana/tests/test_io.py +++ b/tedana/tests/test_io.py @@ -2,6 +2,7 @@ Tests for tedana.io """ +import json import os import nibabel as nib @@ -218,3 +219,81 @@ def test_prep_data_for_json(): } new_d = me.prep_data_for_json(d) assert isinstance(new_d["dictionary"]["array"], list) + + +def test_str_to_component_list(): + """ + Tests for converting a string to a component list + """ + int_list_1 = [1] + int_list_2 = [1, 4, 5] + test_list_1 = [str(x) for x in int_list_1] + test_list_2 = [str(x) for x in int_list_2] + delims_to_test = ( + "\t", + "\n", + " ", + ",", + ) + for d in delims_to_test: + test_data = d.join(test_list_1) + assert me.str_to_component_list(test_data) == int_list_1 + test_data = d.join(test_list_2) + assert me.str_to_component_list(test_data) == int_list_2 + + # Test that one-line, one-element works + assert me.str_to_component_list("1\n") == [1] + # Test that one-line, multi-element works + assert me.str_to_component_list("1,1\n") == [1, 1] + # Test that extra delimeter is ignored + assert me.str_to_component_list("1,1,") == [1, 1] + + with pytest.raises(ValueError, match=r"While parsing component"): + me.str_to_component_list("1,2\t") + + +def test_fname_to_component_list(): + test_data = [1, 2, 3] + temp_csv_fname = os.path.join(data_dir, "test.csv") + df = pd.DataFrame(data=test_data) + df.to_csv(path_or_buf=temp_csv_fname) + result = me.fname_to_component_list(temp_csv_fname) + os.remove(temp_csv_fname) + assert result == test_data + + temp_txt_fname = os.path.join(data_dir, "test.txt") + with open(temp_txt_fname, "w") as fp: + fp.write("1,1,") + + result = me.fname_to_component_list(temp_txt_fname) + os.remove(temp_txt_fname) + assert result == [1, 1] + + +def test_CustomEncoder(): + """ + Test the encoder we use for JSON incompatibilities + """ + # np int64 + test_data = {"data": np.int64(4)} + encoded = json.dumps(test_data, cls=me.CustomEncoder) + decoded = json.loads(encoded) + assert test_data == decoded + + # np array + test_data = {"data": np.asarray([1, 2, 3])} + encoded = json.dumps(test_data, cls=me.CustomEncoder) + decoded = json.loads(encoded) + assert np.array_equal(test_data["data"], decoded["data"]) + + # set should become list + test_data = {"data": set(["cat", "dog", "fish"])} + encoded = json.dumps(test_data, cls=me.CustomEncoder) + decoded = json.loads(encoded) + assert list(test_data["data"]) == decoded["data"] + + # no special cases should use standard encoder + test_data = {"pet": "dog"} + encoded = json.dumps(test_data, cls=me.CustomEncoder) + decoded = json.loads(encoded) + assert test_data == decoded diff --git a/tedana/tests/test_selection.py b/tedana/tests/test_selection.py deleted file mode 100644 index 8bed1eb19..000000000 --- a/tedana/tests/test_selection.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -Tests for tedana.selection -""" - -import numpy as np -import pandas as pd - -from tedana import selection - - -def test_manual_selection(): - """ - Check that manual_selection runs correctly for different combinations of - accepted and rejected components. - """ - comptable = pd.DataFrame(index=np.arange(100)) - comptable, metric_metadata = selection.manual_selection(comptable, acc=[1, 3, 5]) - assert comptable.loc[comptable.classification == "accepted"].shape[0] == 3 - assert comptable.loc[comptable.classification == "rejected"].shape[0] == ( - comptable.shape[0] - 3 - ) - - comptable, metric_metadata = selection.manual_selection(comptable, rej=[1, 3, 5]) - assert comptable.loc[comptable.classification == "rejected"].shape[0] == 3 - assert comptable.loc[comptable.classification == "accepted"].shape[0] == ( - comptable.shape[0] - 3 - ) - - comptable, metric_metadata = selection.manual_selection( - comptable, acc=[0, 2, 4], rej=[1, 3, 5] - ) - assert comptable.loc[comptable.classification == "accepted"].shape[0] == 3 - assert comptable.loc[comptable.classification == "rejected"].shape[0] == 3 - assert comptable.loc[comptable.classification == "ignored"].shape[0] == comptable.shape[0] - 6 diff --git a/tedana/tests/test_selection_nodes.py b/tedana/tests/test_selection_nodes.py new file mode 100644 index 000000000..909af2b21 --- /dev/null +++ b/tedana/tests/test_selection_nodes.py @@ -0,0 +1,1304 @@ +"""Tests for the tedana.selection.selection_nodes module.""" +import os + +import pytest + +from tedana.selection import selection_nodes +from tedana.tests.test_selection_utils import sample_selector + +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def test_manual_classify_smoke(): + """Smoke tests for all options in manual_classify""" + + selector = sample_selector(options="provclass") + + decide_comps = "provisional accept" + new_classification = "accepted" + + # Outputs just the metrics used in this function (nothing in this case) + used_metrics = selection_nodes.manual_classify( + selector, decide_comps, new_classification, only_used_metrics=True + ) + assert used_metrics == set() + + # Standard execution where components are changed from "provisional accept" to "accepted" + # And all extra logging code is run + selector = selection_nodes.manual_classify( + selector, + decide_comps, + new_classification, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + tag="test tag", + ) + # There should be 4 selected components and component_status_table should + # have a new column "Node 0" + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 4 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 0 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # No components with "NotALabel" classification so nothing selected and no + # Node 1 column not created in component_status_table + selector.current_node_idx = 1 + selector = selection_nodes.manual_classify(selector, "NotAClassification", new_classification) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + # Changing components from "rejected" to "accepted" and suppressing warning + selector.current_node_idx = 2 + selector = selection_nodes.manual_classify( + selector, + "rejected", + new_classification, + clear_classification_tags=True, + log_extra_report="report log", + log_extra_info="info log", + tag="test tag", + dont_warn_reclassify=True, + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 4 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + +def test_dec_left_op_right_succeeds(): + """tests for successful calls to dec_left_op_right""" + + selector = sample_selector(options="provclass") + + decide_comps = "provisional accept" + + # Outputs just the metrics used in this function {"kappa", "rho"} + used_metrics = selection_nodes.dec_left_op_right( + selector, "accepted", "rejected", decide_comps, ">", "kappa", "rho", only_used_metrics=True + ) + assert len(used_metrics - {"kappa", "rho"}) == 0 + + # Standard execution where components with kappa>rho are changed from + # "provisional accept" to "accepted" + # And all extra logging code and options are run + # left and right are both component_table_metrics + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + "rho", + left_scale=0.9, + right_scale=1.4, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + tag_if_true="test true tag", + tag_if_false="test false tag", + ) + # scales are set to make sure 3 components are true and 1 is false using + # the sample component table + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 3 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 1 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # No components with "NotALabel" classification so nothing selected and no + # Node 1 column is created in component_status_table + selector.current_node_idx = 1 + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + "NotAClassification", + ">", + "kappa", + "rho", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + # Re-initializing selector so that it has components classificated as + # "provisional accept" again + selector = sample_selector(options="provclass") + # Test when left is a component_table_metric, & right is a cross_component_metric + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + "test_elbow", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 3 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 1 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # right is a component_table_metric, left is a cross_component_metric + # left also has a left_scale that's a cross component metric + selector = sample_selector(options="provclass") + selector.cross_component_metrics["new_cc_metric"] = 1.02 + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "test_elbow", + "kappa", + left_scale="new_cc_metric", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 1 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 3 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # left component_table_metric, right is a constant integer value + selector = sample_selector(options="provclass") + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + 21, + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 3 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 1 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # right component_table_metric, left is a constant float value + selector = sample_selector(options="provclass") + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + 21.0, + "kappa", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 1 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 3 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # Testing combination of two statements. kappa>21 AND rho<14 + selector = sample_selector(options="provclass") + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + "<", + 21.0, + "kappa", + left2="rho", + op2="<", + right2=14, + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 2 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 2 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # Testing combination of three statements. kappa>21 AND rho<14 AND 'variance explained'<5 + selector = sample_selector(options="provclass") + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + "<", + 21.0, + "kappa", + left2="rho", + op2="<", + right2=14, + left3="variance explained", + op3="<", + right3=5, + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 1 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 3 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + +def test_dec_left_op_right_fails(): + """tests for calls to dec_left_op_right that raise errors""" + + selector = sample_selector(options="provclass") + decide_comps = "provisional accept" + + # Raise error for left value that is not a metric + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "NotAMetric", + 21, + ) + + # Raise error for right value that is not a metric + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + 21, + "NotAMetric", + ) + + # Raise error for invalid operator + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + "><", + "kappa", + 21, + ) + + # Raise error for right_scale that is not a number + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + 21.0, + "kappa", + right_scale="NotANumber", + ) + + # Raise error for right_scale that a column in the component_table + # which isn't allowed since the scale value needs to resolve to a + # a fixed number and not a different number for each component + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selector = selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + 21.0, + "kappa", + right_scale="rho", + ) + + # Raise error if some but not all parameters for the second conditional statement are defined + # In this case, op2 is not defined + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + 21, + left2="rho", + right2=14, + ) + + # Raise error for invalid operator for op2 + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + 21, + left2="rho", + op2="<>", + right2=14, + ) + + # Raise error if some but not all parameters for the third conditional statement are defined + # In this case, op3 is not defined + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + 21, + left2="rho", + right2=14, + op2="<", + left3="variance explained", + right3=5, + ) + + # Raise error if there's a third conditional statement but not a second statement + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selection_nodes.dec_left_op_right( + selector, + "accepted", + "rejected", + decide_comps, + ">", + "kappa", + 21, + left3="variance explained", + right3=5, + op3="<", + ) + + +def test_dec_variance_lessthan_thresholds_smoke(): + """Smoke tests for dec_variance_lessthan_thresholds""" + + selector = sample_selector(options="provclass") + decide_comps = "provisional accept" + + # Outputs just the metrics used in this function {"variance explained"} + used_metrics = selection_nodes.dec_variance_lessthan_thresholds( + selector, "accepted", "rejected", decide_comps, only_used_metrics=True + ) + assert len(used_metrics - {"variance explained"}) == 0 + + # Standard execution where with all extra logging code and options changed from defaults + selector = selection_nodes.dec_variance_lessthan_thresholds( + selector, + "accepted", + "rejected", + decide_comps, + var_metric="normalized variance explained", + single_comp_threshold=0.05, + all_comp_threshold=0.09, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + tag_if_true="test true tag", + tag_if_false="test false tag", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 1 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 3 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # No components with "NotALabel" classification so nothing selected and no + # Node 1 column not created in component_status_table + selector.current_node_idx = 1 + selector = selection_nodes.dec_variance_lessthan_thresholds( + selector, "accepted", "rejected", "NotAClassification" + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + # Running without specifying logging text generates internal text + selector = sample_selector(options="provclass") + selector = selection_nodes.dec_variance_lessthan_thresholds( + selector, "accepted", "rejected", decide_comps + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 4 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + +def test_calc_kappa_elbow(): + """Smoke tests for calc_kappa_elbow""" + + selector = sample_selector() + decide_comps = "all" + + # Outputs just the metrics used in this function + used_metrics = selection_nodes.calc_kappa_elbow(selector, decide_comps, only_used_metrics=True) + assert len(used_metrics - {"kappa"}) == 0 + + # Standard call to this function. + selector = selection_nodes.calc_kappa_elbow( + selector, + decide_comps, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = { + "kappa_elbow_kundu", + "kappa_allcomps_elbow", + "kappa_nonsig_elbow", + "varex_upper_p", + } + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_elbow_kundu"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_allcomps_elbow"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_nonsig_elbow"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_upper_p"] > 0 + + # Using a subset of components for decide_comps. + selector = selection_nodes.calc_kappa_elbow( + selector, + decide_comps="accepted", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = { + "kappa_elbow_kundu", + "kappa_allcomps_elbow", + "kappa_nonsig_elbow", + "varex_upper_p", + } + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_elbow_kundu"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_allcomps_elbow"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_nonsig_elbow"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_upper_p"] > 0 + + # No components with "NotALabel" classification so nothing selected + selector = sample_selector() + decide_comps = "NotALabel" + + # Outputs just the metrics used in this function + selector = selection_nodes.calc_kappa_elbow(selector, decide_comps) + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_elbow_kundu"] is None + ) + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_allcomps_elbow"] + is None + ) + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_nonsig_elbow"] is None + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_upper_p"] is None + + +def test_calc_rho_elbow(): + """Smoke tests for calc_rho_elbow""" + + selector = sample_selector(options="unclass") + decide_comps = "all" + + # Outputs just the metrics used in this function + used_metrics = selection_nodes.calc_rho_elbow(selector, decide_comps, only_used_metrics=True) + assert len(used_metrics - {"kappa", "rho", "variance explained"}) == 0 + + # Standard call to this function. + selector = selection_nodes.calc_rho_elbow( + selector, + decide_comps, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = { + "rho_elbow_kundu", + "rho_allcomps_elbow", + "rho_unclassified_elbow", + "elbow_f05", + } + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_elbow_kundu"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_allcomps_elbow"] > 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_unclassified_elbow"] > 0 + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["elbow_f05"] > 0 + + # Standard call to this function using rho_elbow_type="liberal" + selector = selection_nodes.calc_rho_elbow( + selector, + decide_comps, + rho_elbow_type="liberal", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = { + "rho_elbow_liberal", + "rho_allcomps_elbow", + "rho_unclassified_elbow", + "elbow_f05", + } + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_elbow_liberal"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_allcomps_elbow"] > 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_unclassified_elbow"] > 0 + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["elbow_f05"] > 0 + + # Using a subset of components for decide_comps. + selector = selection_nodes.calc_rho_elbow( + selector, + decide_comps=["accepted", "unclassified"], + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = { + "rho_elbow_kundu", + "rho_allcomps_elbow", + "rho_unclassified_elbow", + "elbow_f05", + } + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_elbow_kundu"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_allcomps_elbow"] > 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_unclassified_elbow"] > 0 + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["elbow_f05"] > 0 + + with pytest.raises(ValueError): + selection_nodes.calc_rho_elbow(selector, decide_comps, rho_elbow_type="perfect") + + # No components with "NotALabel" classification so nothing selected + selector = sample_selector() + decide_comps = "NotALabel" + + # Outputs just the metrics used in this function + selector = selection_nodes.calc_rho_elbow(selector, decide_comps) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_elbow_kundu"] is None + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_allcomps_elbow"] is None + ) + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["rho_unclassified_elbow"] + is None + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["elbow_f05"] is None + + +def test_calc_median_smoke(): + """Smoke tests for calc_median""" + + selector = sample_selector() + decide_comps = "all" + + # Outputs just the metrics used in this function {"variance explained"} + used_metrics = selection_nodes.calc_median( + selector, + decide_comps, + metric_name="variance explained", + median_label="varex", + only_used_metrics=True, + ) + assert len(used_metrics - set(["variance explained"])) == 0 + + # Standard call to this function. + selector = selection_nodes.calc_median( + selector, + decide_comps, + metric_name="variance explained", + median_label="varex", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"median_varex"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["median_varex"] > 0 + + # repeating standard call and should make a warning because metric_varex already exists + selector = selection_nodes.calc_median( + selector, decide_comps, metric_name="variance explained", median_label="varex" + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["median_varex"] > 0 + + # Log without running if no components of decide_comps are in the component table + selector = sample_selector() + selector = selection_nodes.calc_median( + selector, + decide_comps="NotAClassification", + metric_name="variance explained", + median_label="varex", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["median_varex"] is None + + # Crashes because median_label is not a string + with pytest.raises(ValueError): + selector = selection_nodes.calc_median( + selector, + decide_comps, + metric_name="variance explained", + median_label=5, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + + # Crashes because median_name is not a string + with pytest.raises(ValueError): + selector = selection_nodes.calc_median( + selector, + decide_comps, + metric_name=5, + median_label="varex", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + + +def test_dec_classification_doesnt_exist_smoke(): + """Smoke tests for dec_classification_doesnt_exist""" + + selector = sample_selector(options="unclass") + decide_comps = ["unclassified", "provisional accept"] + + # Outputs just the metrics used in this function {"variance explained"} + used_metrics = selection_nodes.dec_classification_doesnt_exist( + selector, + "rejected", + decide_comps, + class_comp_exists="provisional accept", + only_used_metrics=True, + ) + assert len(used_metrics) == 0 + + # Standard execution where with all extra logging code and options changed from defaults + selector = selection_nodes.dec_classification_doesnt_exist( + selector, + "accepted", + decide_comps, + at_least_num_exist=1, + class_comp_exists="provisional accept", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + tag="test true tag", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + # Lists the number of components in decide_comps in n_false + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 17 + # During normal execution, it will find provionally accepted components + # and do nothing so another node isn't created + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + # No components with "NotALabel" classification so nothing selected and no + # Node 1 column not created in component_status_table + # Running without specifying logging text generates internal text + selector.current_node_idx = 1 + selector = selection_nodes.dec_classification_doesnt_exist( + selector, + "accepted", + "NotAClassification", + class_comp_exists="provisional accept", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + # Other normal state is to change classifications when there are + # no components with class_comp_exists. Since the component_table + # initialized with sample_selector as not "provisional reject" + # components, using that for class_comp_exists + selector = sample_selector() + decide_comps = "accepted" + selector = selection_nodes.dec_classification_doesnt_exist( + selector, + "changed accepted", + decide_comps, + class_comp_exists="provisional reject", + tag="test true tag", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 17 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 0 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # Standard execution with at_least_num_exist=5 which should trigger the + # components don't exist output + selector = sample_selector(options="unclass") + selector = selection_nodes.dec_classification_doesnt_exist( + selector, + "accepted", + decide_comps=["unclassified", "provisional accept"], + at_least_num_exist=5, + class_comp_exists="provisional accept", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + tag="test true tag", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 17 + # Lists the number of components in decide_comps in n_false + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 0 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + +def test_dec_reclassify_high_var_comps(): + """tests for dec_reclassify_high_var_comps""" + + selector = sample_selector(options="unclass") + decide_comps = "unclassified" + + # Outputs just the metrics used in this function {"variance explained"} + used_metrics = selection_nodes.dec_reclassify_high_var_comps( + selector, + "unclass_highvar", + decide_comps, + only_used_metrics=True, + ) + assert len(used_metrics - set(["variance explained"])) == 0 + + # Raises an error since varex_upper_p not in cross_component_metrics + # & there are components in decide_comps + with pytest.raises(ValueError): + selection_nodes.dec_reclassify_high_var_comps( + selector, + "unclass_highvar", + decide_comps, + ) + + # varex_upper_p not in cross_component_metrics, + # but doesn't raise an error because no components in decide_comps + selection_nodes.dec_reclassify_high_var_comps( + selector, + "unclass_highvar", + "NotAClassification", + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + # Add varex_upper_p to cross component_metrics to run normal test + selector = sample_selector(options="unclass") + selector.cross_component_metrics["varex_upper_p"] = 0.97 + + # Standard execution where with all extra logging code and options changed from defaults + selection_nodes.dec_reclassify_high_var_comps( + selector, + "unclass_highvar", + decide_comps, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + tag="test true tag", + ) + # Lists the number of components in decide_comps in n_true or n_false + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 3 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_false"] == 10 + assert f"Node {selector.current_node_idx}" in selector.component_status_table + + # No components with "NotALabel" classification so nothing selected and no + # Node 1 column is created in component_status_table + selector.current_node_idx = 1 + selector = selection_nodes.dec_reclassify_high_var_comps( + selector, "unclass_highvar", "NotAClassification" + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["n_true"] == 0 + assert f"Node {selector.current_node_idx}" not in selector.component_status_table + + +def test_calc_varex_thresh_smoke(): + """Smoke tests for calc_varex_thresh""" + + # Standard use of this function requires some components to be "provisional accept" + selector = sample_selector() + decide_comps = "all" + + # Outputs just the metrics used in this function {"variance explained"} + used_metrics = selection_nodes.calc_varex_thresh( + selector, decide_comps, thresh_label="upper", percentile_thresh=90, only_used_metrics=True + ) + assert len(used_metrics - set(["variance explained"])) == 0 + + # Standard call to this function. + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="upper", + percentile_thresh=90, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"varex_upper_thresh", "upper_perc"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_upper_thresh"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["upper_perc"] == 90 + + # Standard call , but thresh_label is "" + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="", + percentile_thresh=90, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"varex_thresh", "perc"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_thresh"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["perc"] == 90 + + # Standard call using num_highest_var_comps as an integer + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="new_lower", + percentile_thresh=25, + num_highest_var_comps=8, + ) + calc_cross_comp_metrics = {"varex_new_lower_thresh", "new_lower_perc"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_new_lower_thresh"] > 0 + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["new_lower_perc"] == 25 + + # Standard call using num_highest_var_comps as a value in cross_component_metrics + selector.cross_component_metrics["num_acc_guess"] = 10 + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="new_lower", + percentile_thresh=25, + num_highest_var_comps="num_acc_guess", + ) + calc_cross_comp_metrics = {"varex_new_lower_thresh", "new_lower_perc"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_new_lower_thresh"] > 0 + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["new_lower_perc"] == 25 + + # Raise error if num_highest_var_comps is a string, but not in cross_component_metrics + with pytest.raises(ValueError): + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="new_lower", + percentile_thresh=25, + num_highest_var_comps="NotACrossCompMetric", + ) + + # Do not raise error if num_highest_var_comps is a string & not in cross_component_metrics, + # but decide_comps doesn't select any components + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps="NoComponents", + thresh_label="new_lower", + percentile_thresh=25, + num_highest_var_comps="NotACrossCompMetric", + ) + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_new_lower_thresh"] + is None + ) + # percentile_thresh doesn't depend on components and is assigned + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["new_lower_perc"] == 25 + + # Raise error if num_highest_var_comps is not an integer + with pytest.raises(ValueError): + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="new_lower", + percentile_thresh=25, + num_highest_var_comps=9.5, + ) + + # Still run num_highest_var_comps is larger than the number of selected components + # NOTE: To match original functionaly this will run but add an info message + # and set num_highest_var_comps to the number of selected components + # + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="new_lower", + percentile_thresh=25, + num_highest_var_comps=55, + ) + calc_cross_comp_metrics = {"varex_new_lower_thresh", "new_lower_perc"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_new_lower_thresh"] > 0 + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["new_lower_perc"] == 25 + + # Run warning logging code to see if any of the cross_component_metrics + # already exists and would be over-written + selector = sample_selector(options="provclass") + selector.cross_component_metrics["varex_upper_thresh"] = 1 + selector.cross_component_metrics["upper_perc"] = 1 + decide_comps = "provisional accept" + selector = selection_nodes.calc_varex_thresh( + selector, + decide_comps, + thresh_label="upper", + percentile_thresh=90, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_upper_thresh"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["upper_perc"] == 90 + + # Raise error if percentile_thresh isn't a number + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selector = selection_nodes.calc_varex_thresh( + selector, decide_comps, thresh_label="upper", percentile_thresh="NotANumber" + ) + + # Raise error if percentile_thresh isn't a number between 0 & 100 + selector = sample_selector(options="provclass") + with pytest.raises(ValueError): + selector = selection_nodes.calc_varex_thresh( + selector, decide_comps, thresh_label="upper", percentile_thresh=101 + ) + + # Log without running if no components of decide_comps are in the component table + selector = sample_selector() + selector = selection_nodes.calc_varex_thresh( + selector, decide_comps="NotAClassification", thresh_label="upper", percentile_thresh=90 + ) + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["varex_upper_thresh"] is None + ) + # percentile_thresh doesn't depend on components and is assigned + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["upper_perc"] == 90 + + +def test_calc_extend_factor_smoke(): + """Smoke tests for calc_extend_factor""" + + selector = sample_selector() + + # Outputs just the metrics used in this function {""} + used_metrics = selection_nodes.calc_extend_factor(selector, only_used_metrics=True) + assert used_metrics == set() + + # Standard call to this function. + selector = selection_nodes.calc_extend_factor( + selector, + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"extend_factor"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["extend_factor"] > 0 + + # Run warning logging code for if any of the cross_component_metrics + # already existed and would be over-written + selector = sample_selector() + selector.cross_component_metrics["extend_factor"] = 1.0 + selector = selection_nodes.calc_extend_factor(selector) + + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["extend_factor"] > 0 + + # Run with extend_factor defined as an input + selector = sample_selector() + selector = selection_nodes.calc_extend_factor(selector, extend_factor=1.2) + + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["extend_factor"] == 1.2 + + +def test_calc_max_good_meanmetricrank_smoke(): + """Smoke tests for calc_max_good_meanmetricrank""" + + # Standard use of this function requires some components to be "provisional accept" + selector = sample_selector("provclass") + # This function requires "extend_factor" to already be defined + selector.cross_component_metrics["extend_factor"] = 2.0 + + # Outputs just the metrics used in this function {""} + used_metrics = selection_nodes.calc_max_good_meanmetricrank( + selector, "provisional accept", only_used_metrics=True + ) + assert used_metrics == set() + + # Standard call to this function. + selector = selection_nodes.calc_max_good_meanmetricrank( + selector, + "provisional accept", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"max_good_meanmetricrank"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["max_good_meanmetricrank"] > 0 + ) + + # Standard call to this function with a user defined metric_suffix + selector = sample_selector("provclass") + selector.cross_component_metrics["extend_factor"] = 2.0 + selector = selection_nodes.calc_max_good_meanmetricrank( + selector, "provisional accept", metric_suffix="testsfx" + ) + calc_cross_comp_metrics = {"max_good_meanmetricrank_testsfx"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"][ + "max_good_meanmetricrank_testsfx" + ] + > 0 + ) + + # Run warning logging code for if any of the cross_component_metrics + # already existed and would be over-written + selector = sample_selector("provclass") + selector.cross_component_metrics["max_good_meanmetricrank"] = 10 + selector.cross_component_metrics["extend_factor"] = 2.0 + + selector = selection_nodes.calc_max_good_meanmetricrank(selector, "provisional accept") + calc_cross_comp_metrics = {"max_good_meanmetricrank"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["max_good_meanmetricrank"] > 0 + ) + + # Raise an error if "extend_factor" isn't pre-defined + selector = sample_selector("provclass") + with pytest.raises(ValueError): + selector = selection_nodes.calc_max_good_meanmetricrank(selector, "provisional accept") + + # Log without running if no components of decide_comps are in the component table + selector = sample_selector() + selector.cross_component_metrics["extend_factor"] = 2.0 + + selector = selection_nodes.calc_max_good_meanmetricrank(selector, "NotAClassification") + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["max_good_meanmetricrank"] + is None + ) + + +def test_calc_varex_kappa_ratio_smoke(): + """Smoke tests for calc_varex_kappa_ratio""" + + # Standard use of this function requires some components to be "provisional accept" + selector = sample_selector("provclass") + + # Outputs just the metrics used in this function {""} + used_metrics = selection_nodes.calc_varex_kappa_ratio( + selector, "provisional accept", only_used_metrics=True + ) + assert used_metrics == {"kappa", "variance explained"} + + # Standard call to this function. + selector = selection_nodes.calc_varex_kappa_ratio( + selector, + "provisional accept", + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"kappa_rate"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_rate"] > 0 + + # Run warning logging code for if any of the cross_component_metrics + # already existed and would be over-written + selector = sample_selector("provclass") + selector.cross_component_metrics["kappa_rate"] = 10 + selector = selection_nodes.calc_varex_kappa_ratio(selector, "provisional accept") + + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_rate"] > 0 + + # Log without running if no components of decide_comps are in the component table + selector = sample_selector() + selector = selection_nodes.calc_varex_kappa_ratio(selector, "NotAClassification") + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["kappa_rate"] is None + + # Raise error if "varex kappa ratio" is already in component_table + selector = sample_selector("provclass") + selector.component_table["varex kappa ratio"] = 42 + with pytest.raises(ValueError): + selector = selection_nodes.calc_varex_kappa_ratio(selector, "provisional accept") + + +def test_calc_revised_meanmetricrank_guesses_smoke(): + """Smoke tests for calc_revised_meanmetricrank_guesses""" + + # Standard use of this function requires some components to be "provisional accept" + selector = sample_selector("provclass") + selector.cross_component_metrics["kappa_elbow_kundu"] = 19.1 + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + + # Outputs just the metrics used in this function {""} + used_metrics = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, + ["provisional accept", "provisional reject", "unclassified"], + only_used_metrics=True, + ) + assert used_metrics == { + "kappa", + "dice_FT2", + "signal-noise_t", + "countnoise", + "countsigFT2", + "rho", + } + + # Standard call to this function. + selector = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, + ["provisional accept", "provisional reject", "unclassified"], + log_extra_report="report log", + log_extra_info="info log", + custom_node_label="custom label", + ) + calc_cross_comp_metrics = {"num_acc_guess", "conservative_guess", "restrict_factor"} + output_calc_cross_comp_metrics = set( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["calc_cross_comp_metrics"] + ) + # Confirming the intended metrics are added to outputs and they have non-zero values + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["num_acc_guess"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["conservative_guess"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["restrict_factor"] == 2 + + # Run warning logging code for if any of the cross_component_metrics + # already existed and would be over-written + selector = sample_selector("provclass") + selector.cross_component_metrics["kappa_elbow_kundu"] = 19.1 + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + selector.cross_component_metrics["num_acc_guess"] = 10 + selector.cross_component_metrics["conservative_guess"] = 10 + selector.cross_component_metrics["restrict_factor"] = 5 + selector = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, ["provisional accept", "provisional reject", "unclassified"] + ) + + assert len(output_calc_cross_comp_metrics - calc_cross_comp_metrics) == 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["num_acc_guess"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["conservative_guess"] > 0 + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["restrict_factor"] == 2 + + # Log without running if no components of decide_comps are in the component table + selector = sample_selector() + selector.cross_component_metrics["kappa_elbow_kundu"] = 19.1 + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + selector = selection_nodes.calc_revised_meanmetricrank_guesses(selector, "NotAClassification") + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["num_acc_guess"] is None + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["conservative_guess"] is None + ) + + # Raise error if "d_table_score_node0" is already in component_table + selector = sample_selector("provclass") + selector.cross_component_metrics["kappa_elbow_kundu"] = 19.1 + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + selector.component_table["d_table_score_node0"] = 42 + with pytest.raises(ValueError): + selector = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, ["provisional accept", "provisional reject", "unclassified"] + ) + + # Raise error if restrict_factor isn't a number + selector = sample_selector("provclass") + selector.cross_component_metrics["kappa_elbow_kundu"] = 19.1 + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + with pytest.raises(ValueError): + selector = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, + ["provisional accept", "provisional reject", "unclassified"], + restrict_factor="2", + ) + + # Raise error if kappa_elbow_kundu isn't in cross_component_metrics + selector = sample_selector("provclass") + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + with pytest.raises(ValueError): + selector = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, ["provisional accept", "provisional reject", "unclassified"] + ) + + # Do not raise error if kappa_elbow_kundu isn't in cross_component_metrics + # and there are no components in decide_comps + selector = sample_selector("provclass") + selector.cross_component_metrics["rho_elbow_kundu"] = 15.2 + + selector = selection_nodes.calc_revised_meanmetricrank_guesses( + selector, decide_comps="NoComponents" + ) + assert selector.tree["nodes"][selector.current_node_idx]["outputs"]["num_acc_guess"] is None + assert ( + selector.tree["nodes"][selector.current_node_idx]["outputs"]["conservative_guess"] is None + ) diff --git a/tedana/tests/test_selection_utils.py b/tedana/tests/test_selection_utils.py index d3c51aa8b..2f25baea8 100644 --- a/tedana/tests/test_selection_utils.py +++ b/tedana/tests/test_selection_utils.py @@ -1,45 +1,479 @@ -"""Tests for the tedana.selection._utils module.""" +"""Tests for the tedana.selection.selection_utils module.""" +import os + import numpy as np +import pandas as pd import pytest -from tedana.selection import _utils +from tedana.selection import selection_utils +from tedana.selection.component_selector import ComponentSelector + +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def sample_component_table(options=None): + """ + Retrieves a sample component table + + Options: Different strings will also the contents of the component table + 'provclass': Change the classifications to "provisional accept" for 4 components + 'unclass': Change 4 classifications to "provisional accept", 2 to accepted, + 2 to rejected, and the rest to "unclassified" + """ + + sample_fname = os.path.join(THIS_DIR, "data", "sample_comptable.tsv") + component_table = pd.read_csv(sample_fname, delimiter="\t") + component_table["classification_tags"] = "" + if options == "unclass": + component_table["classification"] = "unclassified" + component_table.loc[[16, 18], "classification"] = "accepted" + component_table.loc[[11, 13], "classification"] = "rejected" + + if (options == "provclass") or (options == "unclass"): + component_table.loc[[2, 4, 6, 8], "classification"] = "provisional accept" + return component_table + + +def sample_selector(options=None): + """ + Retrieves a sample component table and initializes + a selector using that component table and the minimal tree + + options: Different strings will alter the selector + 'provclass': Change the classifications to "provisional accept" for 4 components + 'unclass': Change 4 classifications to "provisional accept" and the rest to "unclassified" + + """ + + tree = "minimal" + + component_table = sample_component_table(options=options) + + xcomp = { + "n_echos": 3, + "n_vols": 201, + "test_elbow": 21, + } + selector = ComponentSelector(tree, component_table, cross_component_metrics=xcomp) + selector.current_node_idx = 0 + + return selector + + +############################################################## +# Functions that are used for interacting with component_table +############################################################## + + +def test_selectcomps2use_succeeds(): + """ + Tests to make sure selectcomps2use runs with full range of inputs. + Include tests to make sure the correct number of components are selected + from the pre-defined sample_comptable.tsv component table + """ + selector = sample_selector() + + decide_comps_options = [ + "rejected", + ["accepted"], + "all", + ["accepted", "rejected"], + 4, + [2, 6, 4], + "NotALabel", + ] + # Given the pre-defined comptable in sample_table_selector, these + # are the expected number of components that should be selected + # for each of the above decide_comps_options + decide_comps_lengths = [4, 17, 21, 21, 1, 3, 0] + + for idx, decide_comps in enumerate(decide_comps_options): + comps2use = selection_utils.selectcomps2use(selector, decide_comps) + assert len(comps2use) == decide_comps_lengths[idx], ( + f"selectcomps2use test should select {decide_comps_lengths[idx]} with " + f"decide_comps={decide_comps}, but it selected {len(comps2use)}" + ) + + +def test_selectcomps2use_fails(): + """Tests for selectcomps2use failure modes""" + selector = sample_selector() + + decide_comps_options = [ + 18.2, # no floats + [11.2, 13.1], # no list of floats + ["accepted", 4], # needs to be either int or string, not both + [4, 3, -1, 9], # no index should be < 0 + [2, 4, 6, 21], # no index should be > number of 0 indexed components + 22, # no index should be > number of 0 indexed components + ] + for decide_comps in decide_comps_options: + with pytest.raises(ValueError): + selection_utils.selectcomps2use(selector, decide_comps) + + selector.component_table = selector.component_table.drop(columns="classification") + with pytest.raises(ValueError): + selection_utils.selectcomps2use(selector, "all") + + +def test_comptable_classification_changer_succeeds(): + """ + All conditions where comptable_classification_changer should run + Note: This confirms the function runs, but not that outputs are accurate + Also tests conditions where the warning logger is used, but doesn't + check the logger + """ + + def validate_changes(expected_classification): + # check every element that was supposed to change, did change + changeidx = decision_boolean.index[np.asarray(decision_boolean) == boolstate] + new_vals = selector.component_table.loc[changeidx, "classification"] + for val in new_vals: + assert val == expected_classification + + # Change if true + selector = sample_selector(options="provclass") + decision_boolean = selector.component_table["classification"] == "provisional accept" + boolstate = True + selector = selection_utils.comptable_classification_changer( + selector, boolstate, "accepted", decision_boolean, tag_if="testing_tag" + ) + validate_changes("accepted") + + # Run nochange condition + selector = sample_selector(options="provclass") + decision_boolean = selector.component_table["classification"] == "provisional accept" + selector = selection_utils.comptable_classification_changer( + selector, boolstate, "nochange", decision_boolean, tag_if="testing_tag" + ) + validate_changes("provisional accept") + + # Change if false + selector = sample_selector(options="provclass") + decision_boolean = selector.component_table["classification"] != "provisional accept" + boolstate = False + selector = selection_utils.comptable_classification_changer( + selector, boolstate, "rejected", decision_boolean, tag_if="testing_tag1, testing_tag2" + ) + validate_changes("rejected") + + # Change from accepted to rejected, which should output a warning + # (test if the warning appears?) + selector = sample_selector(options="provclass") + decision_boolean = selector.component_table["classification"] == "accepted" + boolstate = True + selector = selection_utils.comptable_classification_changer( + selector, boolstate, "rejected", decision_boolean, tag_if="testing_tag" + ) + validate_changes("rejected") + + # Change from rejected to accepted and suppress warning + selector = sample_selector(options="provclass") + decision_boolean = selector.component_table["classification"] == "rejected" + boolstate = True + selector = selection_utils.comptable_classification_changer( + selector, + boolstate, + "accepted", + decision_boolean, + tag_if="testing_tag", + dont_warn_reclassify=True, + ) + validate_changes("accepted") + + +def test_change_comptable_classifications_succeeds(): + """All conditions where change_comptable_classifications should run""" + + selector = sample_selector(options="provclass") + + # Given the rho values in the sample table, decision_boolean should have + # 2 True and 2 False values + comps2use = selection_utils.selectcomps2use(selector, "provisional accept") + rho = selector.component_table.loc[comps2use, "rho"] + decision_boolean = rho < 13.5 + + selector, n_true, n_false = selection_utils.change_comptable_classifications( + selector, + "accepted", + "nochange", + decision_boolean, + tag_if_true="testing_tag1", + tag_if_false="testing_tag2", + ) + + assert n_true == 2 + assert n_false == 2 + # check every element that was supposed to change, did change + changeidx = decision_boolean.index[np.asarray(decision_boolean) == True] # noqa: E712 + new_vals = selector.component_table.loc[changeidx, "classification"] + for val in new_vals: + assert val == "accepted" + + +def test_clean_dataframe_smoke(): + """A smoke test for the clean_dataframe function""" + component_table = sample_component_table(options="comptable") + selection_utils.clean_dataframe(component_table) + + +################################################# +# Functions to validate inputs or log information +################################################# + + +def test_confirm_metrics_exist_succeeds(): + """tests confirm_metrics_exist run with correct inputs""" + component_table = sample_component_table(options="comptable") + + # Testing for metrics that exist with 1 or 2 necessary metrics in a set + # Returns True if an undefined metric exists so using "assert not" + assert not selection_utils.confirm_metrics_exist(component_table, {"kappa"}) + assert not selection_utils.confirm_metrics_exist(component_table, {"kappa", "rho"}) + + +def test_confirm_metrics_exist_fails(): + """tests confirm_metrics_exist for failure conditions""" + + component_table = sample_component_table(options="comptable") + + # Should fail with and error would have default or pre-defined file name + with pytest.raises(ValueError): + selection_utils.confirm_metrics_exist(component_table, {"kappa", "quack"}) + with pytest.raises(ValueError): + selection_utils.confirm_metrics_exist( + component_table, {"kappa", "mooo"}, function_name="farm" + ) + + +def test_log_decision_tree_step_smoke(): + """A smoke test for log_decision_tree_step""" + + selector = sample_selector() + + # Standard run for logging classification changes + comps2use = selection_utils.selectcomps2use(selector, "reject") + selection_utils.log_decision_tree_step( + "Step 0: test_function_name", + comps2use, + decide_comps="reject", + n_true=5, + n_false=2, + if_true="accept", + if_false="reject", + ) + + # Standard use for logging cross_component_metric calculation + outputs = { + "calc_cross_comp_metrics": [ + "kappa_elbow_kundu", + "rho_elbow_kundu", + ], + "kappa_elbow_kundu": 45, + "rho_elbow_kundu": 12, + } + selection_utils.log_decision_tree_step( + "Step 0: test_function_name", comps2use, calc_outputs=outputs + ) + + # Puts a warning in the logger if outputs doesn't have a cross_component_metrics field + outputs = { + "kappa_elbow_kundu": 45, + "rho_elbow_kundu": 12, + } + selection_utils.log_decision_tree_step( + "Step 0: test_function_name", comps2use, calc_outputs=outputs + ) + + # Logging no components found with a specified classification + comps2use = selection_utils.selectcomps2use(selector, "NotALabel") + selection_utils.log_decision_tree_step( + "Step 0: test_function_name", + comps2use, + decide_comps="NotALabel", + n_true=5, + n_false=2, + if_true="accept", + if_false="reject", + ) + + +def test_log_classification_counts_smoke(): + """A smoke test for log_classification_counts""" + + component_table = sample_component_table(options="comptable") + + selection_utils.log_classification_counts(5, component_table) + + +####################################################### +# Calculations that are used in decision tree functions +####################################################### def test_getelbow_smoke(): """A smoke test for the getelbow function.""" arr = np.random.random(100) - idx = _utils.getelbow(arr) + idx = selection_utils.getelbow(arr) assert isinstance(idx, np.int32) or isinstance(idx, np.int64) - val = _utils.getelbow(arr, return_val=True) + val = selection_utils.getelbow(arr, return_val=True) assert isinstance(val, float) # Running an empty array should raise a ValueError arr = np.array([]) with pytest.raises(ValueError): - _utils.getelbow(arr) + selection_utils.getelbow(arr) # Running a 2D array should raise a ValueError arr = np.random.random((100, 100)) with pytest.raises(ValueError): - _utils.getelbow(arr) + selection_utils.getelbow(arr) -def test_getelbow_cons(): +def test_getelbow_cons_smoke(): """A smoke test for the getelbow_cons function.""" arr = np.random.random(100) - idx = _utils.getelbow_cons(arr) + idx = selection_utils.getelbow_cons(arr) assert isinstance(idx, np.int32) or isinstance(idx, np.int64) - val = _utils.getelbow_cons(arr, return_val=True) + val = selection_utils.getelbow_cons(arr, return_val=True) assert isinstance(val, float) # Running an empty array should raise a ValueError arr = np.array([]) with pytest.raises(ValueError): - _utils.getelbow_cons(arr) + selection_utils.getelbow_cons(arr) # Running a 2D array should raise a ValueError arr = np.random.random((100, 100)) with pytest.raises(ValueError): - _utils.getelbow_cons(arr) + selection_utils.getelbow_cons(arr) + + +def test_kappa_elbow_kundu_smoke(): + """A smoke test for the kappa_elbow_kundu function""" + + component_table = sample_component_table() + + # Normal execution. With n_echoes==5 a few components will be excluded for the nonsig threshold + ( + kappa_elbow_kundu, + kappa_allcomps_elbow, + kappa_nonsig_elbow, + varex_upper_p, + ) = selection_utils.kappa_elbow_kundu(component_table, n_echos=5) + assert isinstance(kappa_elbow_kundu, float) + assert isinstance(kappa_allcomps_elbow, float) + assert isinstance(kappa_nonsig_elbow, float) + assert isinstance(varex_upper_p, float) + + # For the sample component_table, when n_echos=6, there are fewer than 5 components + # that are greater than an f01 threshold and a different condition in kappa_elbow_kundu is run + ( + kappa_elbow_kundu, + kappa_allcomps_elbow, + kappa_nonsig_elbow, + varex_upper_p, + ) = selection_utils.kappa_elbow_kundu(component_table, n_echos=6) + assert isinstance(kappa_elbow_kundu, float) + assert isinstance(kappa_allcomps_elbow, float) + assert isinstance(kappa_nonsig_elbow, type(None)) + assert isinstance(varex_upper_p, float) + + # Run using only a subset of components + ( + kappa_elbow_kundu, + kappa_allcomps_elbow, + kappa_nonsig_elbow, + varex_upper_p, + ) = selection_utils.kappa_elbow_kundu( + component_table, + n_echos=5, + comps2use=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20], + ) + assert isinstance(kappa_elbow_kundu, float) + assert isinstance(kappa_allcomps_elbow, float) + assert isinstance(kappa_nonsig_elbow, float) + assert isinstance(varex_upper_p, float) + + +def test_rho_elbow_kundu_liberal_smoke(): + """A smoke test for the rho_elbow_kundu_liberal function""" + + component_table = sample_component_table(options="unclass") + # Normal execution with default kundu threshold + ( + rho_elbow_kundu, + rho_allcomps_elbow, + rho_unclassified_elbow, + elbow_f05, + ) = selection_utils.rho_elbow_kundu_liberal(component_table, n_echos=3) + assert isinstance(rho_elbow_kundu, float) + assert isinstance(rho_allcomps_elbow, float) + assert isinstance(rho_unclassified_elbow, float) + assert isinstance(elbow_f05, float) + + # Normal execution with liberal threshold + ( + rho_elbow_kundu, + rho_allcomps_elbow, + rho_unclassified_elbow, + elbow_f05, + ) = selection_utils.rho_elbow_kundu_liberal( + component_table, n_echos=3, rho_elbow_type="liberal" + ) + assert isinstance(rho_elbow_kundu, float) + assert isinstance(rho_allcomps_elbow, float) + assert isinstance(rho_unclassified_elbow, float) + assert isinstance(elbow_f05, float) + + # Run using only a subset of components + ( + rho_elbow_kundu, + rho_allcomps_elbow, + rho_unclassified_elbow, + elbow_f05, + ) = selection_utils.rho_elbow_kundu_liberal( + component_table, + n_echos=3, + rho_elbow_type="kundu", + comps2use=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20], + subset_comps2use=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 18, 20], + ) + assert isinstance(rho_elbow_kundu, float) + assert isinstance(rho_allcomps_elbow, float) + assert isinstance(rho_unclassified_elbow, float) + assert isinstance(elbow_f05, float) + + # Run with no unclassified components and thus subset_comps2use is empty + component_table = sample_component_table() + ( + rho_elbow_kundu, + rho_allcomps_elbow, + rho_unclassified_elbow, + elbow_f05, + ) = selection_utils.rho_elbow_kundu_liberal(component_table, n_echos=3) + assert isinstance(rho_elbow_kundu, float) + assert isinstance(rho_allcomps_elbow, float) + assert isinstance(rho_unclassified_elbow, type(None)) + assert isinstance(elbow_f05, float) + + with pytest.raises(ValueError): + selection_utils.rho_elbow_kundu_liberal( + component_table, n_echos=3, rho_elbow_type="perfect" + ) + + +def test_get_extend_factor_smoke(): + """A smoke test for get_extend_factor""" + + val = selection_utils.get_extend_factor(extend_factor=int(10)) + assert isinstance(val, float) + + for n_vols in [80, 100, 120]: + val = selection_utils.get_extend_factor(n_vols=n_vols) + assert isinstance(val, float) + + with pytest.raises(ValueError): + selection_utils.get_extend_factor() diff --git a/tedana/utils.py b/tedana/utils.py index 0248f9f2a..8a4b7e44a 100644 --- a/tedana/utils.py +++ b/tedana/utils.py @@ -76,6 +76,12 @@ def make_adaptive_mask(data, mask=None, getsum=False, threshold=1): # get 33rd %ile of `first_echo` and find corresponding index # NOTE: percentile is arbitrary + # TODO: "interpolation" param changed to "method" in numpy 1.22.0 + # confirm method="higher" is the same as interpolation="higher" + # Current minimum version for numpy in tedana is 1.16 where + # there is no "method" parameter. Either wait until we bump + # our minimum numpy version to 1.22 or add a version check + # or try/catch statement. perc = np.percentile(first_echo, 33, interpolation="higher") perc_val = echo_means[:, 0] == perc diff --git a/tedana/workflows/__init__.py b/tedana/workflows/__init__.py index c83313efd..571a2d567 100644 --- a/tedana/workflows/__init__.py +++ b/tedana/workflows/__init__.py @@ -1,9 +1,9 @@ # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- # ex: set sts=4 ts=4 sw=4 et: - +from .ica_reclassify import ica_reclassify_workflow from .t2smap import t2smap_workflow # Overrides submodules with their functions. from .tedana import tedana_workflow -__all__ = ["tedana_workflow", "t2smap_workflow"] +__all__ = ["tedana_workflow", "t2smap_workflow", "ica_reclassify_workflow"] diff --git a/tedana/workflows/ica_reclassify.py b/tedana/workflows/ica_reclassify.py new file mode 100644 index 000000000..209764778 --- /dev/null +++ b/tedana/workflows/ica_reclassify.py @@ -0,0 +1,544 @@ +""" +Run the reclassification workflow for a previous tedana run +""" +import argparse +import datetime +import logging +import os +import os.path as op +from glob import glob + +import numpy as np +import pandas as pd + +import tedana.gscontrol as gsc +from tedana import __version__, io, reporting, selection, utils +from tedana.bibtex import get_description_references +from tedana.io import ( + ALLOWED_COMPONENT_DELIMITERS, + fname_to_component_list, + str_to_component_list, +) + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") + + +def _get_parser(): + """ + Parses command line inputs for tedana + + Returns + ------- + parser.parse_args() : argparse dict + """ + + from tedana import __version__ + + verstr = "ica_reclassify v{}".format(__version__) + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + # Argument parser follow template provided by RalphyZ + # https://stackoverflow.com/a/43456577 + optional = parser._action_groups.pop() + required = parser.add_argument_group("Required Arguments") + required.add_argument( + "registry", + help="File registry from a previous tedana run", + ) + optional.add_argument( + "--manacc", + dest="manual_accept", + nargs="+", + help=( + "Component indices to accept (zero-indexed)." + "Supply as a comma-delimited liist with no spaces, " + "as a csv file, or as a text file with an allowed " + f"delimiter {repr(ALLOWED_COMPONENT_DELIMITERS)}." + ), + default=[], + ) + optional.add_argument( + "--manrej", + dest="manual_reject", + nargs="+", + help=( + "Component indices to accept (zero-indexed)." + "Supply as a comma-delimited liist with no spaces, " + "as a csv file, or as a text file with an allowed " + f"delimiter {repr(ALLOWED_COMPONENT_DELIMITERS)}." + ), + default=[], + ) + optional.add_argument( + "--config", + dest="config", + help="File naming configuration.", + default="auto", + ) + optional.add_argument( + "--out-dir", + dest="out_dir", + type=str, + metavar="PATH", + help="Output directory.", + default=".", + ) + optional.add_argument( + "--prefix", dest="prefix", type=str, help="Prefix for filenames generated.", default="" + ) + optional.add_argument( + "--convention", + dest="convention", + action="store", + choices=["orig", "bids"], + help=("Filenaming convention. bids will use the latest BIDS derivatives version."), + default="bids", + ) + optional.add_argument( + "--tedort", + dest="tedort", + action="store_true", + help=("Orthogonalize rejected components w.r.t. accepted components prior to denoising."), + default=False, + ) + optional.add_argument( + "--mir", + dest="mir", + action="store_true", + help="Run minimum image regression.", + default=False, + ) + optional.add_argument( + "--no-reports", + dest="no_reports", + action="store_true", + help=( + "Creates a figures folder with static component " + "maps, timecourse plots and other diagnostic " + "images and displays these in an interactive " + "reporting framework" + ), + default=False, + ) + optional.add_argument( + "--png-cmap", dest="png_cmap", type=str, help="Colormap for figures", default="coolwarm" + ) + optional.add_argument( + "--debug", + dest="debug", + action="store_true", + help=( + "Logs in the terminal will have increased " + "verbosity, and will also be written into " + "a .tsv file in the output directory." + ), + default=False, + ) + optional.add_argument( + "--overwrite", + "-f", + dest="overwrite", + action="store_true", + help="Force overwriting of files.", + ) + optional.add_argument( + "--quiet", dest="quiet", help=argparse.SUPPRESS, action="store_true", default=False + ) + optional.add_argument("-v", "--version", action="version", version=verstr) + + parser._action_groups.append(optional) + return parser + + +def _main(argv=None): + """ica_reclassify entry point""" + + args = _get_parser().parse_args(argv) + + # Run ica_reclassify_workflow + ica_reclassify_workflow( + args.registry, + accept=args.manual_accept, + reject=args.manual_reject, + out_dir=args.out_dir, + config=args.config, + prefix=args.prefix, + convention=args.convention, + tedort=args.tedort, + mir=args.mir, + no_reports=args.no_reports, + png_cmap=args.png_cmap, + overwrite=args.overwrite, + debug=args.debug, + quiet=args.quiet, + ) + + +def _parse_manual_list(manual_list): + """ + Parse the list of components to accept or reject into a list of integers + + Parameters + ---------- + manual_list: :obj:`str` :obj:`list[str]` or [] or None + String of integers separated by spaces, commas, or tabs + A file name for a file that contains integers + + Returns + ------- + manual_nums: :obj:`list[int]` + A list of integers or an empty list. + + Note + ---- + Do not need to check if integers are less than 0 or greater than the total + number of components here, because it is later checked in selectcomps2use + and a descriptive error message will appear there + """ + if not manual_list: + manual_nums = [] + elif len(manual_list) > 1: + # Assume that this is a list of integers, but raise error if not + manual_nums = [] + for x in manual_list: + if float(x) == int(x): + manual_nums.append(int(x)) + else: + raise ValueError( + "_parse_manual_list expected a list of integers, " + f"but the input is {manual_list}" + ) + elif op.exists(op.expanduser(str(manual_list[0]).strip(" "))): + # filename was given + manual_nums = fname_to_component_list(op.expanduser(str(manual_list[0]).strip(" "))) + elif type(manual_list[0]) == str: + # arbitrary string was given, length of list is 1 + manual_nums = str_to_component_list(manual_list[0]) + elif type(manual_list[0]) == int: + # Is a single integer and should remain a list with a single integer + manual_nums = manual_list + else: + raise ValueError( + f"_parse_manual_list expected integers or a filename, but the input is {manual_list}" + ) + + return manual_nums + + +def ica_reclassify_workflow( + registry, + accept=[], + reject=[], + out_dir=".", + config="auto", + convention="bids", + prefix="", + tedort=False, + mir=False, + no_reports=False, + png_cmap="coolwarm", + overwrite=False, + debug=False, + quiet=False, +): + """ + Run the post-tedana manual classification workflow. + + Please remember to cite [1]_. + + Parameters + ---------- + registry: :obj:`str` + The previously run registry as a JSON file. + accept: :obj: `list` + A list of integer values of components to accept in this workflow. + reject: :obj: `list` + A list of integer values of components to reject in this workflow. + out_dir : :obj:`str`, optional + Output directory. + tedort : :obj:`bool`, optional + Orthogonalize rejected components w.r.t. accepted ones prior to + denoising. Default is False. + mir : :obj:`bool`, optional + Run minimum image regression after denoising. Default is False. + no_reports : obj:'bool', optional + Do not generate .html reports and .png plots. Default is false such + that reports are generated. + png_cmap : obj:'str', optional + Name of a matplotlib colormap to be used when generating figures. + Cannot be used with --no-png. Default is 'coolwarm'. + debug : :obj:`bool`, optional + Whether to run in debugging mode or not. Default is False. + overwrite : :obj:`bool`, optional + Whether to force file overwrites. Default is False. + quiet : :obj:`bool`, optional + If True, suppresses logging/printing of messages. Default is False. + + Notes + ----- + This workflow writes out several files. For a complete list of the files + generated by this workflow, please visit + https://tedana.readthedocs.io/en/latest/outputs.html + + References + ---------- + .. [1] DuPre, E. M., Salo, T., Ahmed, Z., Bandettini, P. A., Bottenhorn, K. L., + Caballero-Gaudes, C., Dowdle, L. T., Gonzalez-Castillo, J., Heunis, S., + Kundu, P., Laird, A. R., Markello, R., Markiewicz, C. J., Moia, S., + Staden, I., Teves, J. B., Uruñuela, E., Vaziri-Pashkam, M., + Whitaker, K., & Handwerker, D. A. (2021). + TE-dependent analysis of multi-echo fMRI with tedana. + Journal of Open Source Software, 6(66), 3669. doi:10.21105/joss.03669. + """ + out_dir = op.abspath(out_dir) + if not op.isdir(out_dir): + os.mkdir(out_dir) + + # If accept and reject are a list of integers, they stay the same + # If they are a filename, load numbers of from + # If they are a string of values, convert to a list of ints + accept = _parse_manual_list(accept) + reject = _parse_manual_list(reject) + + # Check that there is no overlap in accepted/rejected components + if accept: + acc = set(accept) + else: + acc = () + if reject: + rej = set(reject) + else: + rej = () + + if (not accept) and (not reject): + # TODO: remove + print(accept) + print(reject) + raise ValueError("Must manually accept or reject at least one component") + + in_both = [] + for a in acc: + if a in rej: + in_both.append(a) + + if len(in_both) != 0: + raise ValueError("The following components were both accepted and rejected: " f"{in_both}") + + # boilerplate + basename = "report" + extension = "txt" + repname = op.join(out_dir, (basename + "." + extension)) + bibtex_file = op.join(out_dir, "references.bib") + repex = op.join(out_dir, (basename + "*")) + previousreps = glob(repex) + previousreps.sort(reverse=True) + for f in previousreps: + previousparts = op.splitext(f) + newname = previousparts[0] + "_old" + previousparts[1] + os.rename(f, newname) + + # create logfile name + basename = "tedana_" + extension = "tsv" + start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") + logname = op.join(out_dir, (basename + start_time + "." + extension)) + utils.setup_loggers(logname=logname, repname=repname, quiet=quiet, debug=debug) + + LGR.info("Using output directory: {}".format(out_dir)) + + ioh = io.InputHarvester(registry) + comptable = ioh.get_file_contents("ICA metrics tsv") + xcomp = ioh.get_file_contents("ICA cross component metrics json") + status_table = ioh.get_file_contents("ICA status table tsv") + previous_tree_fname = ioh.get_file_path("ICA decision tree json") + mmix = np.asarray(ioh.get_file_contents("ICA mixing tsv")) + adaptive_mask = ioh.get_file_contents("adaptive mask img") + # If global signal was removed in the previous run, we can assume that + # the user wants to use that file again. If not, use the default of + # optimally combined data. + gskey = "removed gs combined img" + if ioh.get_file_path(gskey): + data_oc = ioh.get_file_contents(gskey) + used_gs = True + else: + data_oc = ioh.get_file_contents("combined img") + used_gs = False + + io_generator = io.OutputGenerator( + data_oc, + convention=convention, + prefix=prefix, + config=config, + overwrite=overwrite, + verbose=False, + out_dir=out_dir, + old_registry=ioh.registry, + ) + + # Make a new selector with the added files + selector = selection.component_selector.ComponentSelector( + previous_tree_fname, + comptable, + cross_component_metrics=xcomp, + status_table=status_table, + ) + + if accept: + selector.add_manual(accept, "accepted") + + if reject: + selector.add_manual(reject, "rejected") + + selector.select() + comptable = selector.component_table + + # NOTE: most of these will be identical to previous, but this makes + # things easier for programs which will view the data after running. + # First, make the output generator + comp_names = comptable["Component"].values + mixing_df = pd.DataFrame(data=mmix, columns=comp_names) + to_copy = [ + "z-scored ICA components img", + "ICA mixing tsv", + "ICA decomposition json", + "ICA metrics json", + ] + if used_gs: + to_copy.append(gskey) + to_copy.append("has gs combined img") + + for tc in to_copy: + print(tc) + io_generator.save_file(ioh.get_file_contents(tc), tc) + + # Save component selector and tree + selector.to_files(io_generator) + + if selector.n_accepted_comps == 0: + LGR.warning( + "No accepted components remaining after manual classification! " + "Please check data and results!" + ) + + mmix_orig = mmix.copy() + # TODO: make this a function + if tedort: + comps_accepted = selector.accepted_comps + comps_rejected = selector.rejected_comps + acc_ts = mmix[:, comps_accepted] + rej_ts = mmix[:, comps_rejected] + betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] + pred_rej_ts = np.dot(acc_ts, betas) + resid = rej_ts - pred_rej_ts + mmix[:, comps_rejected] = resid + comp_names = [ + io.add_decomp_prefix(comp, prefix="ica", max_value=comptable.index.max()) + for comp in range(selector.n_comps) + ] + mixing_df = pd.DataFrame(data=mmix, columns=comp_names) + io_generator.save_file(mixing_df, "ICA orthogonalized mixing tsv") + RepLGR.info( + "Rejected components' time series were then " + "orthogonalized with respect to accepted components' time " + "series." + ) + + n_vols = data_oc.shape[3] + img_t_r = io_generator.reference_img.header.get_zooms()[-1] + adaptive_mask = utils.reshape_niimg(adaptive_mask) + mask_denoise = adaptive_mask >= 1 + data_oc = utils.reshape_niimg(data_oc) + + # TODO: make a better result-writing function + # #############################################!!!! + # TODO: make a better time series creation function + # - get_ts_fit_tag(include=[], exclude=[]) + # - get_ts_regress/residual_tag(include=[], exclude=[]) + # How to handle [acc/rej] + tag ? + io.writeresults( + data_oc, + mask=mask_denoise, + comptable=comptable, + mmix=mmix, + n_vols=n_vols, + io_generator=io_generator, + ) + + if mir: + io_generator.overwrite = True + gsc.minimum_image_regression(data_oc, mmix, mask_denoise, comptable, io_generator) + io_generator.overwrite = False + + # Write out BIDS-compatible description file + derivative_metadata = { + "Name": "tedana Outputs", + "BIDSVersion": "1.5.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "ica_reclassify", + "Version": __version__, + "Description": ( + "A denoising pipeline for the identification and removal " + "of non-BOLD noise from multi-echo fMRI data." + ), + "CodeURL": "https://github.com/ME-ICA/tedana", + } + ], + } + io_generator.save_file(derivative_metadata, "data description json") + + with open(repname, "r") as fo: + report = [line.rstrip() for line in fo.readlines()] + report = " ".join(report) + with open(repname, "w") as fo: + fo.write(report) + + # Collect BibTeX entries for cited papers + references = get_description_references(report) + + with open(bibtex_file, "w") as fo: + fo.write(references) + + if not no_reports: + LGR.info("Making figures folder with static component maps and timecourse plots.") + + dn_ts, hikts, lowkts = io.denoise_ts(data_oc, mmix, mask_denoise, comptable) + + # Figure out which control methods were used + gscontrol = [] + if used_gs: + gscontrol.append("gsr") + if mir: + gscontrol.append("mir") + gscontrol = None if gscontrol is [] else gscontrol + + reporting.static_figures.carpet_plot( + optcom_ts=data_oc, + denoised_ts=dn_ts, + hikts=hikts, + lowkts=lowkts, + mask=mask_denoise, + io_generator=io_generator, + gscontrol=gscontrol, + ) + reporting.static_figures.comp_figures( + data_oc, + mask=mask_denoise, + comptable=comptable, + mmix=mmix_orig, + io_generator=io_generator, + png_cmap=png_cmap, + ) + + LGR.info("Generating dynamic report") + reporting.generate_report(io_generator, tr=img_t_r) + + io_generator.save_self() + LGR.info("Workflow completed") + utils.teardown_loggers() + + +if __name__ == "__main__": + _main() diff --git a/tedana/workflows/t2smap.py b/tedana/workflows/t2smap.py index 0711f969b..67cf39e57 100644 --- a/tedana/workflows/t2smap.py +++ b/tedana/workflows/t2smap.py @@ -25,8 +25,8 @@ def _get_parser(): ------- parser.parse_args() : argparse dict """ - parser = argparse.ArgumentParser() - # Argument parser follow templtate provided by RalphyZ + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + # Argument parser follow template provided by RalphyZ # https://stackoverflow.com/a/43456577 optional = parser._action_groups.pop() required = parser.add_argument_group("Required Arguments") @@ -90,12 +90,14 @@ def _get_parser(): dest="fittype", action="store", choices=["loglin", "curvefit"], - help="Desired Fitting Method" - '"loglin" means that a linear model is fit' - " to the log of the data, default" - '"curvefit" means that a more computationally' - "demanding monoexponential model is fit" - "to the raw data", + help=( + "Desired T2*/S0 fitting method. " + '"loglin" means that a linear model is fit ' + "to the log of the data. " + '"curvefit" means that a more computationally ' + "demanding monoexponential model is fit " + "to the raw data. " + ), default="loglin", ) optional.add_argument( @@ -117,7 +119,7 @@ def _get_parser(): dest="combmode", action="store", choices=["t2s", "paid"], - help=("Combination scheme for TEs: t2s (Posse 1999, default), paid (Poser)"), + help=("Combination scheme for TEs: t2s (Posse 1999), paid (Poser)"), default="t2s", ) optional.add_argument( @@ -130,7 +132,7 @@ def _get_parser(): "threadpoolctl to set the parameter outside " "of the workflow function. Higher numbers of " "threads tend to slow down performance on " - "typical datasets. Default is 1." + "typical datasets." ), default=1, ) @@ -321,6 +323,7 @@ def t2smap_workflow( ], } io_generator.save_file(derivative_metadata, "data description json") + io_generator.save_self() LGR.info("Workflow completed") utils.teardown_loggers() diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 3a2bd15c6..a60e3235f 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -8,7 +8,6 @@ import os import os.path as op import shutil -import sys from glob import glob import numpy as np @@ -48,8 +47,8 @@ def _get_parser(): from tedana import __version__ verstr = "tedana v{}".format(__version__) - parser = argparse.ArgumentParser() - # Argument parser follow templtate provided by RalphyZ + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + # Argument parser follow template provided by RalphyZ # https://stackoverflow.com/a/43456577 optional = parser._action_groups.pop() required = parser.add_argument_group("Required Arguments") @@ -123,7 +122,6 @@ def _get_parser(): '"curvefit" means that a more computationally ' "demanding monoexponential model is fit " "to the raw data. " - 'Default is "loglin".' ), default="loglin", ) @@ -132,7 +130,7 @@ def _get_parser(): dest="combmode", action="store", choices=["t2s"], - help=("Combination scheme for TEs: t2s (Posse 1999, default)"), + help=("Combination scheme for TEs: t2s (Posse 1999)"), default="t2s", ) optional.add_argument( @@ -144,15 +142,29 @@ def _get_parser(): "PCA decomposition with the mdl, kic and aic options " "is based on a Moving Average (stationary Gaussian) " "process and are ordered from most to least aggressive. " + "'kundu' or 'kundu-stabilize' are selection methods that " + "were distributed with MEICA. " "Users may also provide a float from 0 to 1, " "in which case components will be selected based on the " "cumulative variance explained or an integer greater than 1" - "in which case the specificed number of components will be" + "in which case the specificed number of components will be " "selected." - "Default='aic'." ), + choices=["mdl", "kic", "aic"], default="aic", ) + optional.add_argument( + "--tree", + dest="tree", + help=( + "Decision tree to use. You may use a " + "packaged tree (kundu, minimal) or supply a JSON " + "file which matches the decision tree file " + "specification. Minimal still being tested with more" + "details in docs" + ), + default="kundu", + ) optional.add_argument( "--seed", dest="fixed_seed", @@ -163,7 +175,6 @@ def _get_parser(): "algorithm. Set to an integer value for " "reproducible ICA results. Set to -1 for " "varying results across ICA calls. " - "Default=42." ), default=42, ) @@ -204,12 +215,12 @@ def _get_parser(): nargs="+", help=( "Perform additional denoising to remove " - "spatially diffuse noise. Default is None. " + "spatially diffuse noise. " "This argument can be single value or a space " "delimited list" ), choices=["mir", "gsr"], - default=None, + default="", ) optional.add_argument( "--no-reports", @@ -254,7 +265,7 @@ def _get_parser(): "threadpoolctl to set the parameter outside " "of the workflow function. Higher numbers of " "threads tend to slow down performance on " - "typical datasets. Default is 1." + "typical datasets." ), default=1, ) @@ -270,13 +281,6 @@ def _get_parser(): default=False, ) optional.add_argument( - "--quiet", dest="quiet", help=argparse.SUPPRESS, action="store_true", default=False - ) - optional.add_argument("-v", "--version", action="version", version=verstr) - parser._action_groups.append(optional) - - rerungrp = parser.add_argument_group("Arguments for Rerunning the Workflow") - rerungrp.add_argument( "--t2smap", dest="t2smap", metavar="FILE", @@ -284,7 +288,7 @@ def _get_parser(): help=("Precalculated T2* map in the same space as the input data."), default=None, ) - rerungrp.add_argument( + optional.add_argument( "--mix", dest="mixm", metavar="FILE", @@ -292,27 +296,20 @@ def _get_parser(): help=("File containing mixing matrix. If not provided, ME-PCA & ME-ICA is done."), default=None, ) - rerungrp.add_argument( - "--ctab", - dest="ctab", - metavar="FILE", - type=lambda x: is_valid_file(parser, x), - help=( - "File containing a component table from which " - "to extract pre-computed classifications. " - "Requires --mix." - ), - default=None, + + optional.add_argument( + "--quiet", dest="quiet", help=argparse.SUPPRESS, action="store_true", default=False ) - rerungrp.add_argument( - "--manacc", - dest="manacc", - metavar="INT", - type=int, - nargs="+", - help=("List of manually accepted components. Requires --ctab and --mix."), - default=None, + parser.add_argument( + "--overwrite", + "-f", + dest="overwrite", + action="store_true", + help="Force overwriting of files.", + default=False, ) + optional.add_argument("-v", "--version", action="version", version=verstr) + parser._action_groups.append(optional) return parser @@ -326,6 +323,7 @@ def tedana_workflow( prefix="", fittype="loglin", combmode="t2s", + tree="kundu", tedpca="aic", fixed_seed=42, maxit=500, @@ -338,10 +336,9 @@ def tedana_workflow( low_mem=False, debug=False, quiet=False, + overwrite=False, t2smap=None, mixm=None, - ctab=None, - manacc=None, ): """ Run the "canonical" TE-Dependent ANAlysis workflow. @@ -355,6 +352,9 @@ def tedana_workflow( list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. + + Other Parameters + ---------------- out_dir : :obj:`str`, optional Output directory. mask : :obj:`str` or None, optional @@ -362,6 +362,12 @@ def tedana_workflow( spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. + convention : {'bids', 'orig'}, optional + Filenaming convention. bids uses the latest BIDS derivatives version (1.5.0). + Default is 'bids'. + prefix : :obj:`str` or None, optional + Prefix for filenames generated. + Default is "" fittype : {'loglin', 'curvefit'}, optional Monoexponential fitting method. 'loglin' uses the the default linear fit to the log of the data. 'curvefit' uses a monoexponential fit to @@ -369,61 +375,59 @@ def tedana_workflow( Default is 'loglin'. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). + tree : {'kundu', 'minimal', 'json file'}, optional + Decision tree to use for component selection. Can be a + packaged tree (kundu, minimal) or a user-supplied JSON file that + matches the decision tree file specification. Minimal is intented + to be a simpler process that is a bit more conservative, but it + accepts and rejects some distinct components compared to kundu. + Testing to better understand the effects of the differences is ongoing. + Default is 'kundu'. tedpca : {'mdl', 'aic', 'kic', 'kundu', 'kundu-stabilize', float}, optional Method with which to select components in TEDPCA. If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. Default is 'aic'. + fixed_seed : :obj:`int`, optional + Value passed to ``mdp.numx_rand.seed()``. + Set to a positive integer value for reproducible ICA results; + otherwise, set to -1 for varying results across calls. + maxit : :obj:`int`, optional + Maximum number of iterations for ICA. Default is 500. + maxrestart : :obj:`int`, optional + Maximum number of attempts for ICA. If ICA fails to converge, the + fixed seed will be updated and ICA will be run again. If convergence + is achieved before maxrestart attempts, ICA will finish early. + Default is 10. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 'mir', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. - verbose : :obj:`bool`, optional - Generate intermediate and additional files. Default is False. no_reports : obj:'bool', optional Do not generate .html reports and .png plots. Default is false such that reports are generated. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. Cannot be used with --no-png. Default is 'coolwarm'. + verbose : :obj:`bool`, optional + Generate intermediate and additional files. Default is False. + low_mem : :obj:`bool`, optional + Enables low-memory processing, including the use of IncrementalPCA. + May increase workflow duration. Default is False. + debug : :obj:`bool`, optional + Whether to run in debugging mode or not. Default is False. t2smap : :obj:`str`, optional Precalculated T2* map in the same space as the input data. Values in the map must be in seconds. mixm : :obj:`str` or None, optional File containing mixing matrix, to be used when re-running the workflow. If not provided, ME-PCA and ME-ICA are done. Default is None. - ctab : :obj:`str` or None, optional - File containing component table from which to extract pre-computed - classifications, to be used with 'mixm' when re-running the workflow. - Default is None. - manacc : :obj:`list` of :obj:`int` or None, optional - List of manually accepted components. Can be a list of the components - numbers or None. - If provided, this parameter requires ``mixm`` and ``ctab`` to be provided as well. - Default is None. - - Other Parameters - ---------------- - fixed_seed : :obj:`int`, optional - Value passed to ``mdp.numx_rand.seed()``. - Set to a positive integer value for reproducible ICA results; - otherwise, set to -1 for varying results across calls. - maxit : :obj:`int`, optional - Maximum number of iterations for ICA. Default is 500. - maxrestart : :obj:`int`, optional - Maximum number of attempts for ICA. If ICA fails to converge, the - fixed seed will be updated and ICA will be run again. If convergence - is achieved before maxrestart attempts, ICA will finish early. - Default is 10. - low_mem : :obj:`bool`, optional - Enables low-memory processing, including the use of IncrementalPCA. - May increase workflow duration. Default is False. - debug : :obj:`bool`, optional - Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. + overwrite : :obj:`bool`, optional + If True, force overwriting of files. Default is False. Notes ----- @@ -473,17 +477,27 @@ def tedana_workflow( # a float on [0, 1] or an int >= 1 tedpca = check_tedpca_value(tedpca, is_parser=False) + # For z-catted files, make sure it's a list of size 1 + if isinstance(data, str): + data = [data] + LGR.info("Loading input data: {}".format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) + io_generator = io.OutputGenerator( ref_img, convention=convention, out_dir=out_dir, prefix=prefix, config="auto", + overwrite=overwrite, verbose=verbose, ) + # Record inputs to OutputGenerator + # TODO: turn this into an IOManager since this isn't really output + io_generator.register_input(data) + n_samp, n_echos, n_vols = catd.shape LGR.debug("Resulting data shape: {}".format(catd.shape)) @@ -510,26 +524,6 @@ def tedana_workflow( elif mixm is not None: raise IOError("Argument 'mixm' must be an existing file.") - if ctab is not None and op.isfile(ctab): - ctab = op.abspath(ctab) - # Allow users to re-run on same folder - metrics_name = io_generator.get_name("ICA metrics tsv") - if ctab != metrics_name: - shutil.copyfile(ctab, metrics_name) - shutil.copyfile(ctab, op.join(io_generator.out_dir, op.basename(ctab))) - elif ctab is not None: - raise IOError("Argument 'ctab' must be an existing file.") - - if ctab and not mixm: - LGR.warning("Argument 'ctab' requires argument 'mixm'.") - ctab = None - elif manacc is not None and (not mixm or not ctab): - LGR.warning("Argument 'manacc' requires arguments 'mixm' and 'ctab'.") - manacc = None - elif manacc is not None: - # coerce to list of integers - manacc = [int(m) for m in manacc] - if t2smap is not None and op.isfile(t2smap): t2smap_file = io_generator.get_name("t2star img") t2smap = op.abspath(t2smap) @@ -680,67 +674,74 @@ def tedana_workflow( "ICA", metrics=required_metrics, ) - comptable, metric_metadata = selection.kundu_selection_v2(comptable, n_echos, n_vols) - - n_bold_comps = comptable[comptable.classification == "accepted"].shape[0] - if (n_restarts < maxrestart) and (n_bold_comps == 0): + ica_selector = selection.automatic_selection(comptable, n_echos, n_vols, tree=tree) + n_likely_bold_comps = ica_selector.n_likely_bold_comps + if (n_restarts < maxrestart) and (n_likely_bold_comps == 0): LGR.warning("No BOLD components found. Re-attempting ICA.") - elif n_bold_comps == 0: + elif n_likely_bold_comps == 0: LGR.warning("No BOLD components found, but maximum number of restarts reached.") keep_restarting = False else: keep_restarting = False + # If we're going to restart, temporarily allow force overwrite + if keep_restarting: + io_generator.overwrite = True RepLGR.disabled = True # Disable the report to avoid duplicate text RepLGR.disabled = False # Re-enable the report after the while loop is escaped + io_generator.overwrite = overwrite # Re-enable original overwrite behavior else: LGR.info("Using supplied mixing matrix from ICA") mixing_file = io_generator.get_name("ICA mixing tsv") mmix = pd.read_table(mixing_file).values - if ctab is None: - required_metrics = [ - "kappa", - "rho", - "countnoise", - "countsigFT2", - "countsigFS0", - "dice_FT2", - "dice_FS0", - "signal-noise_t", - "variance explained", - "normalized variance explained", - "d_table_score", - ] - comptable = metrics.collect.generate_metrics( - catd, - data_oc, - mmix, - masksum_clf, - tes, - io_generator, - "ICA", - metrics=required_metrics, - ) - comptable, metric_metadata = selection.kundu_selection_v2(comptable, n_echos, n_vols) - else: - LGR.info("Using supplied component table for classification") - comptable = pd.read_table(ctab) - # Change rationale value of rows with NaN to empty strings - comptable.loc[comptable.rationale.isna(), "rationale"] = "" - - if manacc is not None: - comptable, metric_metadata = selection.manual_selection(comptable, acc=manacc) + required_metrics = [ + "kappa", + "rho", + "countnoise", + "countsigFT2", + "countsigFS0", + "dice_FT2", + "dice_FS0", + "signal-noise_t", + "variance explained", + "normalized variance explained", + "d_table_score", + ] + comptable = metrics.collect.generate_metrics( + catd, + data_oc, + mmix, + masksum_clf, + tes, + io_generator, + "ICA", + metrics=required_metrics, + ) + ica_selector = selection.automatic_selection( + comptable, + n_echos, + n_vols, + tree=tree, + ) - # Write out ICA files. + # TODO The ICA mixing matrix should be written out after it is created + # It is currently being writen after component selection is done + # and rewritten if an existing mixing matrix is given as an input comp_names = comptable["Component"].values mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - io_generator.save_file(mixing_df, "ICA mixing tsv") + if not op.exists(io_generator.get_name("ICA mixing tsv")): + io_generator.save_file(mixing_df, "ICA mixing tsv") + else: # Make sure the relative path to the supplied mixing matrix is saved in the registry + io_generator.registry["ICA mixing tsv"] = op.basename( + io_generator.get_name("ICA mixing tsv") + ) betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask_denoise), mask_denoise) io_generator.save_file(betas_oc, "z-scored ICA components img") - # Save component table and associated json - io_generator.save_file(comptable, "ICA metrics tsv") + # Save component selector and tree + ica_selector.to_files(io_generator) + # Save metrics and metadata metric_metadata = metrics.collect.get_metadata(comptable) io_generator.save_file(metric_metadata, "ICA metrics json") @@ -754,25 +755,27 @@ def tedana_workflow( "Description": "ICA fit to dimensionally-reduced optimally combined data.", "Method": "tedana", } - with open(io_generator.get_name("ICA decomposition json"), "w") as fo: - json.dump(decomp_metadata, fo, sort_keys=True, indent=4) + io_generator.save_file(decomp_metadata, "ICA decomposition json") - if comptable[comptable.classification == "accepted"].shape[0] == 0: + if ica_selector.n_likely_bold_comps == 0: LGR.warning("No BOLD components detected! Please check data and results!") + # TODO: un-hack separate comptable + comptable = ica_selector.component_table + mmix_orig = mmix.copy() if tedort: - acc_idx = comptable.loc[~comptable.classification.str.contains("rejected")].index.values - rej_idx = comptable.loc[comptable.classification.str.contains("rejected")].index.values - acc_ts = mmix[:, acc_idx] - rej_ts = mmix[:, rej_idx] + comps_accepted = ica_selector.accepted_comps + comps_rejected = ica_selector.rejected_comps + acc_ts = mmix[:, comps_accepted] + rej_ts = mmix[:, comps_rejected] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts - mmix[:, rej_idx] = resid + mmix[:, comps_rejected] = resid comp_names = [ io.add_decomp_prefix(comp, prefix="ICA", max_value=comptable.index.max()) - for comp in comptable.index.values + for comp in range(ica_selector.n_comps) ] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) @@ -798,6 +801,9 @@ def tedana_workflow( if verbose: io.writeresults_echoes(catd, mmix, mask_denoise, comptable, io_generator) + # Write out registry of outputs + io_generator.save_self() + # Write out BIDS-compatible description file derivative_metadata = { "Name": "tedana Outputs", @@ -867,15 +873,8 @@ def tedana_workflow( png_cmap=png_cmap, ) - if sys.version_info.major == 3 and sys.version_info.minor < 6: - warn_msg = ( - "Reports requested but Python version is less than " - "3.6.0. Dynamic reports will not be generated." - ) - LGR.warn(warn_msg) - else: - LGR.info("Generating dynamic report") - reporting.generate_report(io_generator, tr=img_t_r) + LGR.info("Generating dynamic report") + reporting.generate_report(io_generator, tr=img_t_r) LGR.info("Workflow completed") utils.teardown_loggers() diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 64fea1c89..000000000 --- a/versioneer.py +++ /dev/null @@ -1,1822 +0,0 @@ - -# Version: 0.18 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere to your $PATH -* add a `[versioneer]` section to your setup.cfg (see below) -* run `versioneer install` in your source tree, commit the results - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/warner/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other langauges) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - -### Unicode version strings - -While Versioneer works (and is continually tested) with both Python 2 and -Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. -Newer releases probably generate unicode version strings on py2. It's not -clear that this is wrong, but it may be surprising for applications when then -write these strings to a network connection or include them in bytes-oriented -APIs like cryptographic checksums. - -[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates -this question. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . - -""" - -from __future__ import print_function -try: - import configparser -except ImportError: - import ConfigParser as configparser -import errno -import json -import os -import re -import subprocess -import sys - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: - parser.readfp(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None - cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): - cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -LONG_VERSION_PY['git'] = ''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%%d" %% pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: - pass - if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.18) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except EnvironmentError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(): - """Get the custom setuptools/distutils subclasses used by Versioneer.""" - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/warner/python-versioneer/issues/52 - - cmds = {} - - # we add "version" to both distutils and setuptools - from distutils.core import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in both distutils and setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # we override different "build_py" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py - else: - from distutils.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.distutils_buildexe import py2exe as _py2exe # py3 - except ImportError: - from py2exe.build_exe import py2exe as _py2exe # py2 - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # we override different "sdist" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist - else: - from distutils.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -INIT_PY_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - - -def do_setup(): - """Main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except EnvironmentError: - old = "" - if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1)