diff --git a/.circleci/config.yml b/.circleci/config.yml index 3cc6d7eab..587a51a33 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -533,9 +533,9 @@ jobs: - data-v2- - restore_cache: keys: - - bold-v5-{{ .Branch }} - - bold-v5-master - - bold-v5- + - bold-v6-{{ .Branch }} + - bold-v6-master + - bold-v6- - run: name: Remove old, cached configs @@ -570,7 +570,7 @@ jobs: path: /tmp/bold/misc - save_cache: - key: bold-v5-{{ .Branch }} + key: bold-v6-{{ .Branch }} paths: - /tmp/bold/work diff --git a/.maint/requirements.txt b/.maint/requirements.txt new file mode 100644 index 000000000..0fe377d22 --- /dev/null +++ b/.maint/requirements.txt @@ -0,0 +1,3 @@ +click +fuzzywuzzy +python-Levenshtein \ No newline at end of file diff --git a/.maint/update_authors.py b/.maint/update_authors.py index c16dca814..6a24712aa 100755 --- a/.maint/update_authors.py +++ b/.maint/update_authors.py @@ -119,9 +119,14 @@ def get_git_lines(fname='line-contributors.txt'): lines = contrib_file.read_text().splitlines() git_line_summary_path = shutil.which('git-line-summary') + if not git_line_summary_path: + git_line_summary_path = "git summary --dedup-by-email".split(" ") + else: + git_line_summary_path = [git_line_summary_path] + if not lines and git_line_summary_path: print('Running git-line-summary on repo') - lines = sp.check_output([git_line_summary_path]).decode().splitlines() + lines = sp.check_output(git_line_summary_path).decode().splitlines() lines = [line for line in lines if 'Not Committed Yet' not in line] contrib_file.write_text('\n'.join(lines)) diff --git a/.zenodo.json b/.zenodo.json index a3722bebb..a7e96e386 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -12,11 +12,6 @@ "affiliation": "Department of Psychology, Stanford University, CA, USA", "name": "Christopher J. Markiewicz" }, - { - "orcid": "0000-0001-7159-1387", - "affiliation": "Quantivly Inc., Somerville, MA, USA", - "name": "Zvi Baratz" - }, { "orcid": "0000-0003-3715-7012", "affiliation": "Department of Neuroimaging, Institute of Psychiatry, Psychology and Neuroscience, King's College London, London, UK", @@ -34,6 +29,12 @@ } ], "contributors": [ + { + "orcid": "0000-0001-7159-1387", + "affiliation": "Quantivly Inc., Somerville, MA, USA", + "name": "Zvi Baratz", + "type": "Researcher" + }, { "affiliation": "The University of Washington eScience Institute, WA, USA", "name": "Teresa Gomez", @@ -132,6 +133,12 @@ "name": "Taylor Salo", "type": "Researcher" }, + { + "orcid": "0000-0002-9661-1396", + "affiliation": "Brigham and Women's Hospital, Mass General Brigham, Harvard Medical School, MA, USA", + "name": "Jon Haitz Legarreta Gorro\u00f1o", + "type": "Researcher" + }, { "affiliation": "Max Planck Institute for Human Development, Berlin, Germany", "name": "Michael Krause", diff --git a/CHANGES.rst b/CHANGES.rst index 71fa3d89b..61e488aea 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,17 +1,35 @@ -24.0.0 (April 17, 2024) -======================= -Initial major release of 2024, featuring the **extraction of IQMs from DWI data** -for first time in *MRIQC*'s timeline. +24.0.2 (August 26, 2024) +======================== +A patch release with bugfixes and enhancements. -.. admonition:: Author list for papers based on *MRIQC* 23.0 series +CHANGES +------- + +* FIX: Pin latest *NiReports* release (24.0.2) addressing ``fMRIPlot`` issues by @oesteban (`#1342 `__) +* FIX: Edge artifacts in first and last slices due to interpolation by @oesteban (`#1338 `__) +* FIX: Normalize bids-filters' modality keys to be lowercase by @oesteban (`#1332 `__) +* ENH: Add license NOTICE to start banner by @oesteban (`#1343 `__) +* ENH: Enable writing crashfiles in compressed-pickle format by @oesteban (`#1339 `__) +* ENH: Use ``orjson`` to serialize JSON, addressing *Numpy* serialization issues by @oesteban (`#1337 `__) +* ENH: Handle WebAPI timeouts more gently by @oesteban (`#1336 `__) + + +24.0.1 (August 20, 2024) +======================== +A patch release with a large number of bugfixes (mostly focusing on memory issues), maintenance +activities, and metadata crawling before *Nipype* kicks in as a major optimization. + +With thanks to @jhlegarreta for his first contribution in `#1293 __`. + +.. admonition:: Author list for papers based on *MRIQC* 24.0 series As described in the `Contributor Guidelines `__, - anyone listed as developer or contributor may write and submit manuscripts + anyone listed as a developer or contributor may write and submit manuscripts about *MRIQC*. To do so, please move the author(s) name(s) to the front of the following list: - Christopher J. Markiewicz \ :sup:`1`\ ; Zvi Baratz \ :sup:`2`\ ; Eilidh MacNicol \ :sup:`3`\ ; Céline Provins \ :sup:`4`\ ; Teresa Gomez \ :sup:`5`\ ; Dylan Nielson \ :sup:`6`\ ; Jan Varada \ :sup:`7`\ ; Ross W. Blair \ :sup:`1`\ ; Dimitri Papadopoulos Orfanos \ :sup:`8`\ ; William Triplett \ :sup:`9`\ ; Mathias Goncalves \ :sup:`1`\ ; Nikita Beliy \ :sup:`10`\ ; John A. Lee \ :sup:`11`\ ; Patrick Sadil \ :sup:`12`\ ; Ursula A. Tooley \ :sup:`13`\ ; Yibei Chen \ :sup:`14`\ ; James D. Kent \ :sup:`15`\ ; Yaroslav O. Halchenko \ :sup:`16`\ ; Bennet Fauber \ :sup:`17`\ ; Taylor Salo \ :sup:`18`\ ; Michael Krause \ :sup:`19`\ ; Pablo Velasco \ :sup:`20`\ ; Thomas Nichols \ :sup:`21`\ ; Adam Huffman \ :sup:`22`\ ; Elodie Savary \ :sup:`4`\ ; Johannes Achtzehn \ :sup:`23`\ ; Joke Durnez \ :sup:`1`\ ; Satrajit S. Ghosh \ :sup:`24`\ ; Adam C. Raikes \ :sup:`25`\ ; Asier Erramuzpe \ :sup:`26`\ ; Benjamin Kay \ :sup:`27`\ ; Daniel Birman \ :sup:`1`\ ; McKenzie P. Hagen \ :sup:`28`\ ; Michael Dayan \ :sup:`29`\ ; Michael G. Clark \ :sup:`30`\ ; Rafael Garcia-Dias \ :sup:`31`\ ; Adam G. Thomas \ :sup:`32`\ ; Russell A. Poldrack \ :sup:`1`\ ; Ariel Rokem \ :sup:`5`\ ; Oscar Esteban \ :sup:`4`\ . + Christopher J. Markiewicz \ :sup:`1`\ ; Zvi Baratz \ :sup:`2`\ ; Eilidh MacNicol \ :sup:`3`\ ; Céline Provins \ :sup:`4`\ ; Teresa Gomez \ :sup:`5`\ ; Dylan Nielson \ :sup:`6`\ ; Ross W. Blair \ :sup:`1`\ ; Jan Varada \ :sup:`7`\ ; Dimitri Papadopoulos Orfanos \ :sup:`8`\ ; William Triplett \ :sup:`9`\ ; Mathias Goncalves \ :sup:`1`\ ; Nikita Beliy \ :sup:`10`\ ; John A. Lee \ :sup:`11`\ ; Yibei Chen \ :sup:`12`\ ; Ursula A. Tooley \ :sup:`13`\ ; Patrick Sadil \ :sup:`14`\ ; Yaroslav O. Halchenko \ :sup:`15`\ ; James D. Kent \ :sup:`16`\ ; Taylor Salo \ :sup:`17`\ ; Bennet Fauber \ :sup:`18`\ ; Thomas Nichols \ :sup:`19`\ ; Pablo Velasco \ :sup:`20`\ ; Michael Krause \ :sup:`21`\ ; Jon Haitz Legarreta Gorroño \ :sup:`22`\ ; Satrajit S. Ghosh \ :sup:`23`\ ; Joke Durnez \ :sup:`1`\ ; Johannes Achtzehn \ :sup:`24`\ ; Elodie Savary \ :sup:`4`\ ; Adam Huffman \ :sup:`25`\ ; Rafael Garcia-Dias \ :sup:`26`\ ; Michael G. Clark \ :sup:`27`\ ; Michael Dayan \ :sup:`28`\ ; McKenzie P. Hagen \ :sup:`29`\ ; Daniel Birman \ :sup:`1`\ ; Benjamin Kay \ :sup:`30`\ ; Asier Erramuzpe \ :sup:`31`\ ; Adam C. Raikes \ :sup:`32`\ ; Adam G. Thomas \ :sup:`33`\ ; Russell A. Poldrack \ :sup:`1`\ ; Ariel Rokem \ :sup:`5`\ ; Oscar Esteban \ :sup:`4`\ . Affiliations: @@ -26,27 +44,63 @@ for first time in *MRIQC*'s timeline. 9. University of Florida: Gainesville, Florida, US 10. CRC ULiege, Liege, Belgium 11. Quansight, Dublin, Ireland - 12. Johns Hopkins Bloomberg School of Public Health, MD, USA + 12. McGovern Institute for Brain Research, Massachusetts Institute of Technology, Cambridge, USA 13. Department of Neuroscience, University of Pennsylvania, PA, USA - 14. McGovern Institute for Brain Research, Massachusetts Institute of Technology, Cambridge, USA - 15. Department of Psychology, University of Texas at Austin, TX, USA - 16. Psychological and Brain Sciences Department, Dartmouth College, NH, USA - 17. University of Michigan, Ann Arbor, USA - 18. Department of Psychology, Florida International University, FL, USA - 19. Max Planck Institute for Human Development, Berlin, Germany + 14. Johns Hopkins Bloomberg School of Public Health, MD, USA + 15. Psychological and Brain Sciences Department, Dartmouth College, NH, USA + 16. Department of Psychology, University of Texas at Austin, TX, USA + 17. Department of Psychology, Florida International University, FL, USA + 18. University of Michigan, Ann Arbor, USA + 19. Oxford Big Data Institute, University of Oxford, Oxford, GB 20. Center for Brain Imaging, New York University, NY, USA - 21. Oxford Big Data Institute, University of Oxford, Oxford, GB - 22. Department of Physics, Imperial College London, London, UK - 23. Charité Berlin, Berlin, Germany - 24. McGovern Institute for Brain Research, MIT, MA, USA; and Department of Otolaryngology, Harvard Medical School, MA, USA - 25. Center for Innovation in Brain Science, University of Arizona, Tucson, AZ, USA - 26. Computational Neuroimaging Lab, BioCruces Health Research Institute - 27. Washington University School of Medicine, St.Louis, MO, USA - 28. Psychology Department, University of Washington, Seattle, WA, USA - 29. International Committee of the Red Cross - ICRC, Geneva, Switzerland - 30. National Institutes of Health, USA - 31. Institute of Psychiatry, Psychology & Neuroscience, King's College London, London, UK - 32. Data Science and Sharing Team, National Institute of Mental Health, Bethesda, MD, USA + 21. Max Planck Institute for Human Development, Berlin, Germany + 22. Brigham and Women's Hospital, Mass General Brigham, Harvard Medical School, MA, USA + 23. McGovern Institute for Brain Research, MIT, MA, USA; and Department of Otolaryngology, Harvard Medical School, MA, USA + 24. Charité Berlin, Berlin, Germany + 25. Department of Physics, Imperial College London, London, UK + 26. Institute of Psychiatry, Psychology & Neuroscience, King's College London, London, UK + 27. National Institutes of Health, USA + 28. International Committee of the Red Cross - ICRC, Geneva, Switzerland + 29. Psychology Department, University of Washington, Seattle, WA, USA + 30. Washington University School of Medicine, St.Louis, MO, USA + 31. Computational Neuroimaging Lab, BioCruces Health Research Institute + 32. Center for Innovation in Brain Science, University of Arizona, Tucson, AZ, USA + 33. Data Science and Sharing Team, National Institute of Mental Health, Bethesda, MD, USA + +CHANGES +------- + +* FIX: Multiecho fMRI crashing with 'unhashable type' errors by @oesteban in https://github.com/nipreps/mriqc/pull/1295 +* FIX: Set ``n_procs`` instead of ``num_threads`` on node ``apply_hmc`` by @oesteban in https://github.com/nipreps/mriqc/pull/1309 +* FIX: Address memory issues by limiting ``BigPlot``'s parallelization. by @oesteban in https://github.com/nipreps/mriqc/pull/1320 +* FIX: Address memory issues in the DWI pipeline by @oesteban in https://github.com/nipreps/mriqc/pull/1323 +* FIX: Limit IQMs' node number of processes and, therefore, memory by @oesteban in https://github.com/nipreps/mriqc/pull/1325 +* FIX: Resolve numeric overflow in drift estimation node by @oesteban in https://github.com/nipreps/mriqc/pull/1324 +* FIX: Revise bugfix #1324 by @oesteban in https://github.com/nipreps/mriqc/pull/1327 +* FIX: Remove unreachable code within DWI pipeline by @oesteban in https://github.com/nipreps/mriqc/pull/1328 +* ENH: Allow moving the cache folder with an environment variable by @oesteban in https://github.com/nipreps/mriqc/pull/1285 +* ENH: Flatten multi-echo lists in circumstances that they fail by @oesteban in https://github.com/nipreps/mriqc/pull/1286 +* ENH: Added type hints to config module by @zvi-quantivly in https://github.com/nipreps/mriqc/pull/1288 +* ENH: Add test for the CLI parser by @jhlegarreta in https://github.com/nipreps/mriqc/pull/1293 +* ENH: Add CLI entry point test by @jhlegarreta in https://github.com/nipreps/mriqc/pull/1294 +* ENH: Add a development Dockerfile for testing local changes to the repo. by @rwblair in https://github.com/nipreps/mriqc/pull/1299 +* ENH: Crawl dataset's metadata only once and before Nipype's workflow by @oesteban in https://github.com/nipreps/mriqc/pull/1317 +* ENH(dMRI): Deal gracefully with small CC masks by @oesteban in https://github.com/nipreps/mriqc/pull/1311 +* ENH: Leverage new spun-off apply interface by @oesteban in https://github.com/nipreps/mriqc/pull/1313 +* MAINT: Removed personal information from maintainers and updated in contributors by @zvi-quantivly in https://github.com/nipreps/mriqc/pull/1289 +* MAINT: Add JHLegarreta to the contributors list by @jhlegarreta in https://github.com/nipreps/mriqc/pull/1301 +* MAINT: Flexibilize pandas pinned version by @oesteban in https://github.com/nipreps/mriqc/pull/1310 +* MAINT: Remove *Pandas*'s ``FutureWarning`` by @oesteban in https://github.com/nipreps/mriqc/pull/1326 +* DOC: Add description of ``summary_fg`` to the documentation by @celprov in https://github.com/nipreps/mriqc/pull/1306 +* STY: Apply ruff/flake8-implicit-str-concat rule ISC001 by @DimitriPapadopoulos in https://github.com/nipreps/mriqc/pull/1296 +* STY: Format *Jupyter notebooks* by @oesteban in https://github.com/nipreps/mriqc/pull/1321 + +**Full Changelog**: https://github.com/nipreps/mriqc/compare/24.0.0...24.0.1 + +24.0.0 (April 17, 2024) +======================= +Initial major release of 2024, featuring the **extraction of IQMs from DWI data** +for the first time in *MRIQC*'s timeline. CHANGES ------- diff --git a/MANIFEST.in b/MANIFEST.in index a0aadf832..979112822 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,6 +9,8 @@ recursive-exclude test/ * exclude .* exclude Dockerfile +exclude Dockerfile_devel +exclude Makefile #data recursive-include mriqc/data * diff --git a/NOTICE b/NOTICE deleted file mode 100644 index a7e7c982c..000000000 --- a/NOTICE +++ /dev/null @@ -1,17 +0,0 @@ -MRIQC -Copyright 2021 The NiPreps Developers. - -This product includes software developed by -the NiPreps Community (https://nipreps.org/). - -Portions of this software were developed at the Department of -Psychology at Stanford University, Stanford, CA, US. - -This software contains code ultimately derived from the -PCP Quality Assessment Protocol (QAP; -http://preprocessed-connectomes-project.org/quality-assessment-protocol) -by C. Craddock, S. Giavasis, D. Clark, Z. Shezhad, and J. Pellman. - -This software is also distributed as a Docker container image. -The bootstrapping file for the image ("Dockerfile") is licensed -under the MIT License. diff --git a/NOTICE b/NOTICE new file mode 120000 index 000000000..233b6c360 --- /dev/null +++ b/NOTICE @@ -0,0 +1 @@ +mriqc/data/NOTICE \ No newline at end of file diff --git a/README.rst b/README.rst index af2402bae..ad9b3cc97 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ mriqc: image quality metrics for quality assessment of MRI ========================================================== -|DOI| |Zenodo| |Package| |Pythons| |DevStatus| |License| |Documentation| |CircleCI| +|DOI| |Zenodo| |Package| |Pythons| |DevStatus| |License| |Documentation| |CircleCI| |EOSS| MRIQC extracts no-reference IQMs (image quality metrics) from structural (T1w and T2w) and functional MRI (magnetic resonance imaging) @@ -126,3 +126,6 @@ brain connectivity using MRI*” (grant number :target: http://mriqc.readthedocs.io/en/latest/?badge=latest .. |CircleCI| image:: https://circleci.com/gh/nipreps/mriqc/tree/master.svg?style=shield :target: https://circleci.com/gh/nipreps/mriqc/tree/master +.. |EOSS| image:: https://chanzuckerberg.github.io/open-science/badges/CZI-EOSS.svg + :target: https://czi.co/EOSS + :alt: CZI's Essential Open Source Software for Science diff --git a/docs/notebooks/MRIQC Web API.ipynb b/docs/notebooks/MRIQC Web API.ipynb index bc64528c1..b0e6cfcd9 100644 --- a/docs/notebooks/MRIQC Web API.ipynb +++ b/docs/notebooks/MRIQC Web API.ipynb @@ -21,12 +21,13 @@ "source": [ "import pandas as pd\n", "from json import load\n", - "import urllib.request, json \n", + "import urllib.request, json\n", "from pandas.io.json import json_normalize\n", "import seaborn as sns\n", "import pylab as plt\n", "import multiprocessing as mp\n", "import numpy as np\n", + "\n", "%matplotlib inline" ] }, @@ -52,23 +53,22 @@ " url_root = 'https://mriqc.nimh.nih.gov/api/v1/{modality}?{query}'\n", " page = 1\n", " dfs = []\n", - " \n", + "\n", " if versions is None:\n", " versions = ['*']\n", "\n", " for version in versions:\n", " while True:\n", " query = []\n", - " \n", + "\n", " if software is not None:\n", " query.append('\"provenance.software\":\"%s\"' % software)\n", - " \n", + "\n", " if version != '*':\n", " query.append('\"provenance.version\":\"%s\"' % version)\n", - " \n", + "\n", " page_url = url_root.format(\n", - " modality=modality,\n", - " query='where={%s}&page=%d' % (','.join(query), page)\n", + " modality=modality, query='where={%s}&page=%d' % (','.join(query), page)\n", " )\n", " with urllib.request.urlopen(page_url) as url:\n", " data = json.loads(url.read().decode())\n", @@ -87,13 +87,13 @@ " Distribution plot of a given measure\n", " \"\"\"\n", " sns.distplot(data, ax=ax, label=label)\n", - " \n", + "\n", " if xlabel is not None:\n", " ax.set_xlabel(xlabel)\n", - " \n", + "\n", " if min is None:\n", " min = np.percentile(data, 0.5)\n", - " \n", + "\n", " if max is None:\n", " max = np.percentile(data, 99.5)\n", " ax.set_xlim((min, max))" @@ -165,7 +165,7 @@ "ax.set_title('Number of T1w records in database')\n", "ax.legend()\n", "\n", - "plt.savefig(\"fig03a-0.svg\", bbox_inches='tight', transparent=False, pad_inches=0)" + "plt.savefig('fig03a-0.svg', bbox_inches='tight', transparent=False, pad_inches=0)" ] }, { @@ -195,7 +195,7 @@ "ax.plot(dates_bold_u, list(range(1, len(dates_bold_u) + 1)), label='unique')\n", "ax.set_title('Number of BOLD records in database')\n", "ax.legend()\n", - "plt.savefig(\"fig03a-1.svg\", bbox_inches='tight', transparent=False, pad_inches=0)" + "plt.savefig('fig03a-1.svg', bbox_inches='tight', transparent=False, pad_inches=0)" ] }, { @@ -221,8 +221,17 @@ } ], "source": [ - "print(','.join([l for l in df_t1w.columns \n", - " if not l.startswith('_') and not l.startswith('bids_meta') and not l.startswith('provenance')]))" + "print(\n", + " ','.join(\n", + " [\n", + " l\n", + " for l in df_t1w.columns\n", + " if not l.startswith('_')\n", + " and not l.startswith('bids_meta')\n", + " and not l.startswith('provenance')\n", + " ]\n", + " )\n", + ")" ] }, { @@ -242,14 +251,18 @@ } ], "source": [ - "f, ax = plt.subplots(1, 5, figsize=(25,5))\n", + "f, ax = plt.subplots(1, 5, figsize=(25, 5))\n", "plot_measure(df_t1w_unique.cjv, xlabel='Coefficient of joint variation (CJV)', ax=ax[0])\n", "plot_measure(df_t1w_unique.cnr, xlabel='Contrast-to-noise ratio (CNR)', ax=ax[1])\n", - "plot_measure(df_t1w_unique.snr_wm, xlabel='Signal-to-noise ratio estimated on the white-matter (SNR)', ax=ax[2])\n", + "plot_measure(\n", + " df_t1w_unique.snr_wm,\n", + " xlabel='Signal-to-noise ratio estimated on the white-matter (SNR)',\n", + " ax=ax[2],\n", + ")\n", "plot_measure(df_t1w_unique.fwhm_avg, xlabel='Smoothness (FWHM)', ax=ax[3])\n", "plot_measure(df_t1w_unique.wm2max, xlabel='WM-to-max intensity ratio (WM2MAX)', ax=ax[4])\n", "plt.suptitle('Distributions of some IQMs extracted from T1-weighted MRI')\n", - "plt.savefig(\"fig03b-0.svg\", bbox_inches='tight', transparent=False, pad_inches=0)" + "plt.savefig('fig03b-0.svg', bbox_inches='tight', transparent=False, pad_inches=0)" ] }, { @@ -275,8 +288,17 @@ } ], "source": [ - "print(','.join([l for l in df_bold.columns \n", - " if not l.startswith('_') and not l.startswith('bids_meta') and not l.startswith('provenance')]))" + "print(\n", + " ','.join(\n", + " [\n", + " l\n", + " for l in df_bold.columns\n", + " if not l.startswith('_')\n", + " and not l.startswith('bids_meta')\n", + " and not l.startswith('provenance')\n", + " ]\n", + " )\n", + ")" ] }, { @@ -296,8 +318,13 @@ } ], "source": [ - "f, ax = plt.subplots(1, 5, figsize=(25,5))\n", - "plot_measure(df_bold_unique[df_bold_unique.fd_mean < 10].fd_mean, xlabel='Framewise Displacement (mm)', max=2, ax=ax[0])\n", + "f, ax = plt.subplots(1, 5, figsize=(25, 5))\n", + "plot_measure(\n", + " df_bold_unique[df_bold_unique.fd_mean < 10].fd_mean,\n", + " xlabel='Framewise Displacement (mm)',\n", + " max=2,\n", + " ax=ax[0],\n", + ")\n", "plot_measure(df_bold_unique[df_bold_unique.dvars_nstd < 100].dvars_nstd, xlabel='DVARS', ax=ax[1])\n", "plot_measure(df_bold_unique.gcor, xlabel='Global correlation', ax=ax[2])\n", "plot_measure(df_bold_unique.gsr_x, label='x-axis', ax=ax[3])\n", @@ -305,7 +332,9 @@ "ax[3].legend()\n", "plot_measure(df_bold_unique.tsnr, xlabel='Temporal SNR (tSNR)', ax=ax[4])\n", "plt.suptitle('Distributions of some IQMs extracted from BOLD fMRI')\n", - "plt.savefig(\"fig03b-1.png\", bbox_inches='tight', transparent=False, pad_inches=0, facecolor='white')" + "plt.savefig(\n", + " 'fig03b-1.png', bbox_inches='tight', transparent=False, pad_inches=0, facecolor='white'\n", + ")" ] } ], diff --git a/docs/notebooks/Paper-v1.0.ipynb b/docs/notebooks/Paper-v1.0.ipynb index c46707c1d..5fbda5fee 100644 --- a/docs/notebooks/Paper-v1.0.ipynb +++ b/docs/notebooks/Paper-v1.0.ipynb @@ -17,7 +17,7 @@ "import numpy as np\n", "import seaborn as sn\n", "\n", - "sn.set(style=\"whitegrid\")" + "sn.set(style='whitegrid')" ] }, { @@ -29,6 +29,7 @@ "outputs": [], "source": [ "from mriqc.classifier import data as mcd\n", + "\n", "abide, _ = mcd.read_dataset(x_path, y_path, rate_label='rater_1')\n", "sites = list(sorted(set(abide.site.values.ravel())))\n", "\n", @@ -38,18 +39,27 @@ "\n", "for site in sites:\n", " subabide = abide.loc[abide.site.str.contains(site)]\n", - " \n", - " medians = np.median(subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']],\n", - " axis=0)\n", - " \n", - " mins = np.abs(medians - np.min(\n", - " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0))\n", "\n", - " maxs = np.abs(medians - np.max(\n", - " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0))\n", + " medians = np.median(\n", + " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0\n", + " )\n", + "\n", + " mins = np.abs(\n", + " medians\n", + " - np.min(\n", + " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0\n", + " )\n", + " )\n", + "\n", + " maxs = np.abs(\n", + " medians\n", + " - np.max(\n", + " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0\n", + " )\n", + " )\n", "\n", " ranges = np.max(np.vstack((maxs, mins)), axis=0)\n", - " \n", + "\n", " print(\n", " fmt.format(\n", " site=site,\n", @@ -57,8 +67,8 @@ " sr=tuple(ranges[:3].astype(int)),\n", " sp=tuple(medians[3:]),\n", " spr=tuple(ranges[3:]),\n", - "\n", - "))" + " )\n", + " )" ] }, { @@ -69,7 +79,7 @@ }, "outputs": [], "source": [ - "#data_path = '/home/oesteban/Google Drive/mriqc'\n", + "# data_path = '/home/oesteban/Google Drive/mriqc'\n", "data_path = '/home/oesteban/tmp/mriqc-ml-tests-2/'\n", "out_path = data_path\n", "loso = pd.read_csv(op.join(data_path, 'cv_loso_inner.csv'), index_col=False)\n", @@ -78,6 +88,7 @@ "kfold_outer = pd.read_csv(op.join(data_path, 'cv_kfold_outer.csv'), index_col=False)\n", "loso_outer = pd.read_csv(op.join(data_path, 'cv_loso_outer.csv'), index_col=False)\n", "\n", + "\n", "def gen_newparams(dataframe):\n", " thisdf = dataframe.copy()\n", " thisdf['zscored_str'] = ['nzs'] * len(thisdf['zscored'])\n", @@ -86,6 +97,7 @@ " del thisdf['zscored_str']\n", " return thisdf\n", "\n", + "\n", "loso = gen_newparams(loso)\n", "kfold = gen_newparams(kfold)" ] @@ -108,41 +120,55 @@ "for i, split_cv in enumerate([loso, kfold]):\n", " best_models[spstr[i]] = {}\n", " splitcols = [col for col in split_cv.columns.ravel() if col.startswith('split0')]\n", - " for clf in ['svc_linear-nzs', 'svc_rbf-nzs', 'rfc-nzs', 'svc_linear-zs', 'svc_rbf-zs', 'rfc-zs']:\n", + " for clf in [\n", + " 'svc_linear-nzs',\n", + " 'svc_rbf-nzs',\n", + " 'rfc-nzs',\n", + " 'svc_linear-zs',\n", + " 'svc_rbf-zs',\n", + " 'rfc-zs',\n", + " ]:\n", " thismodeldf = split_cv.loc[split_cv.params.str.contains(clf)]\n", " max_auc = thismodeldf.mean_auc.max()\n", " best = thismodeldf.loc[thismodeldf.mean_auc >= max_auc]\n", " best_list = best.params.values.ravel().tolist()\n", - " \n", + "\n", " if len(best_list) == 1:\n", " best_models[spstr[i]][clf] = best_list[0]\n", " else:\n", - " overall_means = [thismodeldf.loc[thismodeldf.params.str.contains(pset), 'mean_auc'].mean()\n", - " for pset in best_list]\n", + " overall_means = [\n", + " thismodeldf.loc[thismodeldf.params.str.contains(pset), 'mean_auc'].mean()\n", + " for pset in best_list\n", + " ]\n", " overall_max = np.max(overall_means)\n", " if sum([val >= overall_max for val in overall_means]) == 1:\n", " best_models[spstr[i]][clf] = best_list[np.argmax(overall_means)]\n", " else:\n", " best_models[spstr[i]][clf] = best_list[0]\n", - " \n", + "\n", "newdict = {'AUC': [], 'Classifier': [], 'Split scheme': []}\n", "\n", - "modelnames = {'rfc-nzs': 'RFC-nzs', 'rfc-zs': 'RFC-zs',\n", - " 'svc_linear-nzs': 'SVC_lin-nzs', 'svc_linear-zs': 'SVC_lin-zs',\n", - " 'svc_rbf-nzs': 'SVC_rbf-nzs', 'svc_rbf-zs': 'SVC_rbf-zs'}\n", + "modelnames = {\n", + " 'rfc-nzs': 'RFC-nzs',\n", + " 'rfc-zs': 'RFC-zs',\n", + " 'svc_linear-nzs': 'SVC_lin-nzs',\n", + " 'svc_linear-zs': 'SVC_lin-zs',\n", + " 'svc_rbf-nzs': 'SVC_rbf-nzs',\n", + " 'svc_rbf-zs': 'SVC_rbf-zs',\n", + "}\n", "\n", "for key, val in list(best_models['LoSo'].items()):\n", " scores = loso.loc[loso.params.str.contains(val), 'mean_auc'].values.ravel().tolist()\n", " nscores = len(scores)\n", - " \n", + "\n", " newdict['AUC'] += scores\n", " newdict['Classifier'] += [modelnames[key]] * nscores\n", " newdict['Split scheme'] += ['LoSo (16 folds)'] * nscores\n", - " \n", + "\n", "for key, val in list(best_models['10-fold'].items()):\n", " scores = kfold.loc[kfold.params.str.contains(val), 'mean_auc'].values.ravel().tolist()\n", " nscores = len(scores)\n", - " \n", + "\n", " newdict['AUC'] += scores\n", " newdict['Classifier'] += [modelnames[key]] * nscores\n", " newdict['Split scheme'] += ['10-fold'] * nscores\n", @@ -158,70 +184,120 @@ }, "outputs": [], "source": [ - "def plot_cv_outer(data, score='auc', zscored=0, ax=None, ds030_score=None,\n", - " split_type='LoSo', color='dodgerblue'):\n", - " \n", + "def plot_cv_outer(\n", + " data, score='auc', zscored=0, ax=None, ds030_score=None, split_type='LoSo', color='dodgerblue'\n", + "):\n", " if ax is None:\n", " ax = plt.gca()\n", - " \n", + "\n", " outer_score = data.loc[data[score].notnull(), [score, 'zscored']]\n", - " sn.distplot(outer_score.loc[outer_score.zscored==zscored, score],\n", - " hist=True, norm_hist=True, ax=ax, color=color, label=split_type)\n", + " sn.distplot(\n", + " outer_score.loc[outer_score.zscored == zscored, score],\n", + " hist=True,\n", + " norm_hist=True,\n", + " ax=ax,\n", + " color=color,\n", + " label=split_type,\n", + " )\n", " ax.set_xlim([0.4, 1.0])\n", " ax.grid(False)\n", " ax.set_yticklabels([])\n", - " \n", - " mean = outer_score.loc[outer_score.zscored==zscored, score].mean()\n", - " std = outer_score.loc[outer_score.zscored==zscored, score].std()\n", + "\n", + " mean = outer_score.loc[outer_score.zscored == zscored, score].mean()\n", + " std = outer_score.loc[outer_score.zscored == zscored, score].std()\n", "\n", " mean_coord = draw_line(mean, ax=ax, color=color, lw=2.0, marker='o', extend=True)\n", - " \n", + "\n", " ymax = ax.get_ylim()[1]\n", " draw_line(mean - std, ax=ax, color=color, extend=True)\n", " draw_line(mean + std, ax=ax, color=color, extend=True)\n", - " \n", - " \n", + "\n", " ax.annotate(\n", - " '$\\mu$=%0.3f' % mean, xy=(mean_coord[0], 0.75*ymax), xytext=(-35, 30),\n", - " textcoords='offset points', va='center', color='w', size=14,\n", + " '$\\mu$=%0.3f' % mean,\n", + " xy=(mean_coord[0], 0.75 * ymax),\n", + " xytext=(-35, 30),\n", + " textcoords='offset points',\n", + " va='center',\n", + " color='w',\n", + " size=14,\n", " bbox=dict(boxstyle='round', fc=color, ec='none', color='none', lw=0),\n", " arrowprops=dict(\n", - " arrowstyle='wedge,tail_width=0.8', lw=0, patchA=None, patchB=None,\n", - " fc=color, ec='none', relpos=(0.5, 0.5)))\n", - " sigmay = 0.70*ymax\n", - " ax.annotate(s='', xy=(mean - std, sigmay), xytext=(mean + std, sigmay), arrowprops=dict(arrowstyle='<->'))\n", + " arrowstyle='wedge,tail_width=0.8',\n", + " lw=0,\n", + " patchA=None,\n", + " patchB=None,\n", + " fc=color,\n", + " ec='none',\n", + " relpos=(0.5, 0.5),\n", + " ),\n", + " )\n", + " sigmay = 0.70 * ymax\n", + " ax.annotate(\n", + " s='',\n", + " xy=(mean - std, sigmay),\n", + " xytext=(mean + std, sigmay),\n", + " arrowprops=dict(arrowstyle='<->'),\n", + " )\n", " ax.annotate(\n", - " '$2\\sigma$=%0.3f' % (2 * std), xy=(mean_coord[0], 0.70*ymax), xytext=(-25, -12),\n", - " textcoords='offset points', va='center', color='k', size=12,\n", - " bbox=dict(boxstyle='round', fc='w', ec='none', color='none', alpha=.7, lw=0))\n", - " \n", + " '$2\\sigma$=%0.3f' % (2 * std),\n", + " xy=(mean_coord[0], 0.70 * ymax),\n", + " xytext=(-25, -12),\n", + " textcoords='offset points',\n", + " va='center',\n", + " color='k',\n", + " size=12,\n", + " bbox=dict(boxstyle='round', fc='w', ec='none', color='none', alpha=0.7, lw=0),\n", + " )\n", + "\n", " if ds030_score is not None:\n", " ds030_coord = draw_line(ds030_score, ax=ax, color='k', marker='o')\n", " ax.annotate(\n", - " 'DS030', xy=ds030_coord, xytext=(-100, 0),\n", - " textcoords='offset points', va='center', color='w', size=16,\n", + " 'DS030',\n", + " xy=ds030_coord,\n", + " xytext=(-100, 0),\n", + " textcoords='offset points',\n", + " va='center',\n", + " color='w',\n", + " size=16,\n", " bbox=dict(boxstyle='round', fc=color, ec='none', color='none', lw=0),\n", " arrowprops=dict(\n", - " arrowstyle='wedge,tail_width=0.8', lw=0, patchA=None, patchB=None,\n", - " fc=color, ec='none', relpos=(0.5, 0.5)))\n", - " \n", - " \n", - "def draw_line(score, ax=None, color='k', marker=None, lw=.7, extend=False):\n", + " arrowstyle='wedge,tail_width=0.8',\n", + " lw=0,\n", + " patchA=None,\n", + " patchB=None,\n", + " fc=color,\n", + " ec='none',\n", + " relpos=(0.5, 0.5),\n", + " ),\n", + " )\n", + "\n", + "\n", + "def draw_line(score, ax=None, color='k', marker=None, lw=0.7, extend=False):\n", " if ax is None:\n", " ax = plt.gca()\n", - " \n", + "\n", " if score > 1.0:\n", " score = 1.0\n", - " \n", + "\n", " coords = [score, -1]\n", " pdf_points = ax.lines[0].get_data()\n", " coords[1] = np.interp([coords[0]], pdf_points[0], pdf_points[1])\n", - " \n", + "\n", " if extend:\n", - " ax.axvline(coords[0], ymin=coords[1] / ax.get_ylim()[1], ymax=0.75, color='gray', lw=.7)\n", - " \n", - " ax.axvline(coords[0], ymin=coords[1] / ax.get_ylim()[1], ymax=0, color=color, marker=marker, markevery=2,\n", - " markeredgewidth=1.5, markerfacecolor='w', markeredgecolor=color, lw=lw)\n", + " ax.axvline(coords[0], ymin=coords[1] / ax.get_ylim()[1], ymax=0.75, color='gray', lw=0.7)\n", + "\n", + " ax.axvline(\n", + " coords[0],\n", + " ymin=coords[1] / ax.get_ylim()[1],\n", + " ymax=0,\n", + " color=color,\n", + " marker=marker,\n", + " markevery=2,\n", + " markeredgewidth=1.5,\n", + " markerfacecolor='w',\n", + " markeredgecolor=color,\n", + " lw=lw,\n", + " )\n", "\n", " return coords" ] @@ -234,43 +310,67 @@ }, "outputs": [], "source": [ - "sn.set(style=\"whitegrid\")\n", + "sn.set(style='whitegrid')\n", "\n", - "fig = plt.figure(figsize=(20, 8)) \n", - "ax1 = plt.subplot2grid((2,4), (0,0), colspan=2, rowspan=2)\n", + "fig = plt.figure(figsize=(20, 8))\n", + "ax1 = plt.subplot2grid((2, 4), (0, 0), colspan=2, rowspan=2)\n", "\n", - "sn.violinplot(x='Classifier', y='AUC', hue='Split scheme', data=newdf, split=True,\n", - " palette=['dodgerblue', 'darkorange'], ax=ax1)\n", + "sn.violinplot(\n", + " x='Classifier',\n", + " y='AUC',\n", + " hue='Split scheme',\n", + " data=newdf,\n", + " split=True,\n", + " palette=['dodgerblue', 'darkorange'],\n", + " ax=ax1,\n", + ")\n", "ax1.set_ylim([0.70, 1.0])\n", "ax1.set_ylabel('AUC')\n", "ax1.set_xlabel('Model')\n", "ax1.set_title('Model selection - Inner loop of nested cross-validation')\n", "\n", - "ax2 = plt.subplot2grid((2,4), (0, 2))\n", + "ax2 = plt.subplot2grid((2, 4), (0, 2))\n", "plot_cv_outer(kfold_outer, zscored=0, score='auc', ax=ax2, ds030_score=0.695, split_type='10-fold')\n", "ax2.set_xlabel('')\n", "ax2.legend()\n", "ax2.set_title('Evaluation - Outer loop of nested cross-validation')\n", "ax2.title.set_position([1.1, 1.0])\n", "\n", - "ax3 = plt.subplot2grid((2,4), (1, 2))\n", - "plot_cv_outer(loso_outer, zscored=0, score='auc', ax=ax3, ds030_score=0.695, color='darkorange', split_type='LoSo (17 folds)')\n", + "ax3 = plt.subplot2grid((2, 4), (1, 2))\n", + "plot_cv_outer(\n", + " loso_outer,\n", + " zscored=0,\n", + " score='auc',\n", + " ax=ax3,\n", + " ds030_score=0.695,\n", + " color='darkorange',\n", + " split_type='LoSo (17 folds)',\n", + ")\n", "ax3.legend()\n", "ax3.set_xlabel('AUC')\n", "\n", - "ax4 = plt.subplot2grid((2,4), (0, 3))\n", - "plot_cv_outer(kfold_outer, zscored=0, score='acc', ax=ax4, ds030_score=0.7283, split_type='10-fold')\n", + "ax4 = plt.subplot2grid((2, 4), (0, 3))\n", + "plot_cv_outer(\n", + " kfold_outer, zscored=0, score='acc', ax=ax4, ds030_score=0.7283, split_type='10-fold'\n", + ")\n", "ax4.set_xlabel('')\n", "ax4.legend()\n", "\n", - "ax5 = plt.subplot2grid((2,4), (1, 3))\n", - "plot_cv_outer(loso_outer, zscored=0, score='acc', ax=ax5, ds030_score=0.7283, color='darkorange', split_type='LoSo (17 folds)')\n", + "ax5 = plt.subplot2grid((2, 4), (1, 3))\n", + "plot_cv_outer(\n", + " loso_outer,\n", + " zscored=0,\n", + " score='acc',\n", + " ax=ax5,\n", + " ds030_score=0.7283,\n", + " color='darkorange',\n", + " split_type='LoSo (17 folds)',\n", + ")\n", "ax5.legend()\n", "ax5.set_xlabel('Accuracy')\n", "\n", "\n", - "fig.savefig(op.join(out_path, 'crossvalidation.pdf'),\n", - " bbox_inches='tight', pad_inches=0, dpi=300)" + "fig.savefig(op.join(out_path, 'crossvalidation.pdf'), bbox_inches='tight', pad_inches=0, dpi=300)" ] }, { @@ -282,8 +382,10 @@ "outputs": [], "source": [ "zscoreddf = loso_outer.loc[loso_outer.zscored == 0, ['auc', 'acc', 'site']]\n", - "palette = sn.color_palette(\"cubehelix\", len(set(zscoreddf.site)))\n", - "sn.pairplot(zscoreddf.loc[zscoreddf.auc.notnull(), ['auc', 'acc', 'site']], hue='site', palette=palette)" + "palette = sn.color_palette('cubehelix', len(set(zscoreddf.site)))\n", + "sn.pairplot(\n", + " zscoreddf.loc[zscoreddf.auc.notnull(), ['auc', 'acc', 'site']], hue='site', palette=palette\n", + ")" ] }, { @@ -295,11 +397,11 @@ "outputs": [], "source": [ "sites = sorted(list(set(loso_outer.site.ravel().tolist())))\n", - "palette = sn.color_palette(\"husl\", len(sites))\n", + "palette = sn.color_palette('husl', len(sites))\n", "fig = plt.figure()\n", "for i, site in enumerate(sites):\n", " sitedf = loso_outer.loc[loso_outer.site == site]\n", - " accdf = sitedf.loc[sitedf.zscored==0]\n", + " accdf = sitedf.loc[sitedf.zscored == 0]\n", " sn.distplot(accdf.acc.values.ravel(), bins=20, kde=0, label=site, color=palette[i])\n", "\n", "fig.gca().legend()\n", diff --git a/docs/notebooks/Paper-v2.0.ipynb b/docs/notebooks/Paper-v2.0.ipynb index 8df1256c2..02f2c03e5 100644 --- a/docs/notebooks/Paper-v2.0.ipynb +++ b/docs/notebooks/Paper-v2.0.ipynb @@ -86,7 +86,8 @@ "mviz.figure1(\n", " op.join(abide_path, 'sub-50137', 'anat', 'sub-50137_T1w.nii.gz'),\n", " op.join(abide_path, 'sub-50110', 'anat', 'sub-50110_T1w.nii.gz'),\n", - " out_file)" + " out_file,\n", + ")" ] }, { @@ -121,47 +122,141 @@ "from mriqc.classifier.sklearn import preprocessing as mcsp\n", "\n", "# Concatenate ABIDE & DS030\n", - "fulldata = combine_datasets([\n", - " (x_path, y_path, 'ABIDE'),\n", - " (ds030_x_path, ds030_y_path, 'DS030'),\n", - "])\n", + "fulldata = combine_datasets(\n", + " [\n", + " (x_path, y_path, 'ABIDE'),\n", + " (ds030_x_path, ds030_y_path, 'DS030'),\n", + " ]\n", + ")\n", "\n", "# Names of all features\n", - "features =[\n", - " 'cjv', 'cnr', 'efc', 'fber',\n", - " 'fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z',\n", - " 'icvs_csf', 'icvs_gm', 'icvs_wm',\n", - " 'inu_med', 'inu_range', \n", - " 'qi_1', 'qi_2',\n", - " 'rpve_csf', 'rpve_gm', 'rpve_wm',\n", - " 'size_x', 'size_y', 'size_z',\n", - " 'snr_csf', 'snr_gm', 'snr_total', 'snr_wm',\n", - " 'snrd_csf', 'snrd_gm', 'snrd_total', 'snrd_wm',\n", - " 'spacing_x', 'spacing_y', 'spacing_z',\n", - " 'summary_bg_k', 'summary_bg_mad', 'summary_bg_mean', 'summary_bg_median', 'summary_bg_n', 'summary_bg_p05', 'summary_bg_p95', 'summary_bg_stdv',\n", - " 'summary_csf_k', 'summary_csf_mad', 'summary_csf_mean', 'summary_csf_median', 'summary_csf_n', 'summary_csf_p05', 'summary_csf_p95', 'summary_csf_stdv',\n", - " 'summary_gm_k', 'summary_gm_mad', 'summary_gm_mean', 'summary_gm_median', 'summary_gm_n', 'summary_gm_p05', 'summary_gm_p95', 'summary_gm_stdv',\n", - " 'summary_wm_k', 'summary_wm_mad', 'summary_wm_mean', 'summary_wm_median', 'summary_wm_n', 'summary_wm_p05', 'summary_wm_p95', 'summary_wm_stdv',\n", - " 'tpm_overlap_csf', 'tpm_overlap_gm', 'tpm_overlap_wm',\n", - " 'wm2max'\n", + "features = [\n", + " 'cjv',\n", + " 'cnr',\n", + " 'efc',\n", + " 'fber',\n", + " 'fwhm_avg',\n", + " 'fwhm_x',\n", + " 'fwhm_y',\n", + " 'fwhm_z',\n", + " 'icvs_csf',\n", + " 'icvs_gm',\n", + " 'icvs_wm',\n", + " 'inu_med',\n", + " 'inu_range',\n", + " 'qi_1',\n", + " 'qi_2',\n", + " 'rpve_csf',\n", + " 'rpve_gm',\n", + " 'rpve_wm',\n", + " 'size_x',\n", + " 'size_y',\n", + " 'size_z',\n", + " 'snr_csf',\n", + " 'snr_gm',\n", + " 'snr_total',\n", + " 'snr_wm',\n", + " 'snrd_csf',\n", + " 'snrd_gm',\n", + " 'snrd_total',\n", + " 'snrd_wm',\n", + " 'spacing_x',\n", + " 'spacing_y',\n", + " 'spacing_z',\n", + " 'summary_bg_k',\n", + " 'summary_bg_mad',\n", + " 'summary_bg_mean',\n", + " 'summary_bg_median',\n", + " 'summary_bg_n',\n", + " 'summary_bg_p05',\n", + " 'summary_bg_p95',\n", + " 'summary_bg_stdv',\n", + " 'summary_csf_k',\n", + " 'summary_csf_mad',\n", + " 'summary_csf_mean',\n", + " 'summary_csf_median',\n", + " 'summary_csf_n',\n", + " 'summary_csf_p05',\n", + " 'summary_csf_p95',\n", + " 'summary_csf_stdv',\n", + " 'summary_gm_k',\n", + " 'summary_gm_mad',\n", + " 'summary_gm_mean',\n", + " 'summary_gm_median',\n", + " 'summary_gm_n',\n", + " 'summary_gm_p05',\n", + " 'summary_gm_p95',\n", + " 'summary_gm_stdv',\n", + " 'summary_wm_k',\n", + " 'summary_wm_mad',\n", + " 'summary_wm_mean',\n", + " 'summary_wm_median',\n", + " 'summary_wm_n',\n", + " 'summary_wm_p05',\n", + " 'summary_wm_p95',\n", + " 'summary_wm_stdv',\n", + " 'tpm_overlap_csf',\n", + " 'tpm_overlap_gm',\n", + " 'tpm_overlap_wm',\n", + " 'wm2max',\n", "]\n", "\n", "# Names of features that can be normalized\n", "coi = [\n", - " 'cjv', 'cnr', 'efc', 'fber', 'fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z',\n", - " 'snr_csf', 'snr_gm', 'snr_total', 'snr_wm', 'snrd_csf', 'snrd_gm', 'snrd_total', 'snrd_wm',\n", - " 'summary_csf_mad', 'summary_csf_mean', 'summary_csf_median', 'summary_csf_p05', 'summary_csf_p95', 'summary_csf_stdv', 'summary_gm_k', 'summary_gm_mad', 'summary_gm_mean', 'summary_gm_median', 'summary_gm_p05', 'summary_gm_p95', 'summary_gm_stdv', 'summary_wm_k', 'summary_wm_mad', 'summary_wm_mean', 'summary_wm_median', 'summary_wm_p05', 'summary_wm_p95', 'summary_wm_stdv'\n", + " 'cjv',\n", + " 'cnr',\n", + " 'efc',\n", + " 'fber',\n", + " 'fwhm_avg',\n", + " 'fwhm_x',\n", + " 'fwhm_y',\n", + " 'fwhm_z',\n", + " 'snr_csf',\n", + " 'snr_gm',\n", + " 'snr_total',\n", + " 'snr_wm',\n", + " 'snrd_csf',\n", + " 'snrd_gm',\n", + " 'snrd_total',\n", + " 'snrd_wm',\n", + " 'summary_csf_mad',\n", + " 'summary_csf_mean',\n", + " 'summary_csf_median',\n", + " 'summary_csf_p05',\n", + " 'summary_csf_p95',\n", + " 'summary_csf_stdv',\n", + " 'summary_gm_k',\n", + " 'summary_gm_mad',\n", + " 'summary_gm_mean',\n", + " 'summary_gm_median',\n", + " 'summary_gm_p05',\n", + " 'summary_gm_p95',\n", + " 'summary_gm_stdv',\n", + " 'summary_wm_k',\n", + " 'summary_wm_mad',\n", + " 'summary_wm_mean',\n", + " 'summary_wm_median',\n", + " 'summary_wm_p05',\n", + " 'summary_wm_p95',\n", + " 'summary_wm_stdv',\n", "]\n", "\n", "# Plot batches\n", - "fig = mviz.plot_batches(fulldata, cols=list(reversed(coi)),\n", - " out_file=op.join(outputs_path, 'figures/fig02-batches-a.pdf'))\n", + "fig = mviz.plot_batches(\n", + " fulldata,\n", + " cols=list(reversed(coi)),\n", + " out_file=op.join(outputs_path, 'figures/fig02-batches-a.pdf'),\n", + ")\n", "\n", "# Apply new site-wise scaler\n", "scaler = mcsp.BatchRobustScaler(by='site', columns=coi)\n", "scaled = scaler.fit_transform(fulldata)\n", - "fig = mviz.plot_batches(scaled, cols=coi, site_labels='right',\n", - " out_file=op.join(outputs_path, 'figures/fig02-batches-b.pdf'))" + "fig = mviz.plot_batches(\n", + " scaled,\n", + " cols=coi,\n", + " site_labels='right',\n", + " out_file=op.join(outputs_path, 'figures/fig02-batches-b.pdf'),\n", + ")" ] }, { @@ -180,10 +275,13 @@ "outputs": [], "source": [ "from sklearn.metrics import cohen_kappa_score\n", + "\n", "overlap = mdata[np.all(~np.isnan(mdata[['rater_1', 'rater_2']]), axis=1)]\n", "y1 = overlap.rater_1.values.ravel().tolist()\n", "y2 = overlap.rater_2.values.ravel().tolist()\n", - "fig = mviz.inter_rater_variability(y1, y2, out_file=op.join(outputs_path, 'figures', 'fig02-irv.pdf'))\n", + "fig = mviz.inter_rater_variability(\n", + " y1, y2, out_file=op.join(outputs_path, 'figures', 'fig02-irv.pdf')\n", + ")\n", "\n", "print(\"Cohen's Kappa %f\" % cohen_kappa_score(y1, y2))\n", "\n", @@ -192,7 +290,7 @@ "\n", "y2 = overlap.rater_2.values.ravel()\n", "y2[y2 == 0] = 1\n", - "print(\"Cohen's Kappa (binarized): %f\" % cohen_kappa_score(y1, y2))\n" + "print(\"Cohen's Kappa (binarized): %f\" % cohen_kappa_score(y1, y2))" ] }, { @@ -212,16 +310,72 @@ "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sn\n", - "rfc_acc=[0.842, 0.815, 0.648, 0.609, 0.789, 0.761, 0.893, 0.833, 0.842, 0.767, 0.806, 0.850, 0.878, 0.798, 0.559, 0.881, 0.375]\n", - "svc_lin_acc=[0.947, 0.667, 0.870, 0.734, 0.754, 0.701, 0.750, 0.639, 0.877, 0.767, 0.500, 0.475, 0.837, 0.768, 0.717, 0.050, 0.429]\n", - "svc_rbf_acc=[0.947, 0.852, 0.500, 0.578, 0.772, 0.712, 0.821, 0.583, 0.912, 0.767, 0.500, 0.450, 0.837, 0.778, 0.441, 0.950, 0.339]\n", "\n", - "df = pd.DataFrame({\n", - " 'site': list(range(len(sites))) * 3,\n", - " 'accuracy': rfc_acc + svc_lin_acc + svc_rbf_acc,\n", - " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites)\n", - " \n", - "})\n", + "rfc_acc = [\n", + " 0.842,\n", + " 0.815,\n", + " 0.648,\n", + " 0.609,\n", + " 0.789,\n", + " 0.761,\n", + " 0.893,\n", + " 0.833,\n", + " 0.842,\n", + " 0.767,\n", + " 0.806,\n", + " 0.850,\n", + " 0.878,\n", + " 0.798,\n", + " 0.559,\n", + " 0.881,\n", + " 0.375,\n", + "]\n", + "svc_lin_acc = [\n", + " 0.947,\n", + " 0.667,\n", + " 0.870,\n", + " 0.734,\n", + " 0.754,\n", + " 0.701,\n", + " 0.750,\n", + " 0.639,\n", + " 0.877,\n", + " 0.767,\n", + " 0.500,\n", + " 0.475,\n", + " 0.837,\n", + " 0.768,\n", + " 0.717,\n", + " 0.050,\n", + " 0.429,\n", + "]\n", + "svc_rbf_acc = [\n", + " 0.947,\n", + " 0.852,\n", + " 0.500,\n", + " 0.578,\n", + " 0.772,\n", + " 0.712,\n", + " 0.821,\n", + " 0.583,\n", + " 0.912,\n", + " 0.767,\n", + " 0.500,\n", + " 0.450,\n", + " 0.837,\n", + " 0.778,\n", + " 0.441,\n", + " 0.950,\n", + " 0.339,\n", + "]\n", + "\n", + "df = pd.DataFrame(\n", + " {\n", + " 'site': list(range(len(sites))) * 3,\n", + " 'accuracy': rfc_acc + svc_lin_acc + svc_rbf_acc,\n", + " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites),\n", + " }\n", + ")\n", "\n", "\n", "x = np.arange(len(sites))\n", @@ -237,9 +391,11 @@ "\n", "fig = plt.figure(figsize=(10, 3))\n", "ax2 = plt.subplot2grid((1, 4), (0, 3))\n", - "plot = sn.violinplot(data=df, x='Model', y=\"accuracy\", ax=ax2, palette=colors, bw=.1, linewidth=.7)\n", + "plot = sn.violinplot(\n", + " data=df, x='Model', y='accuracy', ax=ax2, palette=colors, bw=0.1, linewidth=0.7\n", + ")\n", "for i in range(dim):\n", - " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", + " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", "# ax2.axhline(np.percentile(allvals[i], 50), ls='--', color=colors[i], lw=.8)\n", "# sn.swarmplot(x=\"model\", y=\"accuracy\", data=df, color=\"w\", alpha=.5, ax=ax2);\n", "ax2.yaxis.tick_right()\n", @@ -247,14 +403,13 @@ "ax2.set_xticklabels(ax2.get_xticklabels(), rotation=40)\n", "ax2.set_ylim([0.0, 1.0])\n", "\n", - "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3) \n", + "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3)\n", "for i in range(dim):\n", " y = [d[i] for d in data]\n", - " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=.6)\n", + " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=0.6)\n", " print(np.average(allvals[i]), np.std(allvals[i]))\n", - " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", - " \n", - " \n", + " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", + "\n", "\n", "plt.xlim([-0.2, 16.75])\n", "plt.grid(False)\n", @@ -272,16 +427,71 @@ }, "outputs": [], "source": [ - "rfc_roc_auc=[0.597, 0.380, 0.857, 0.610, 0.698, 0.692, 0.963, 0.898, 0.772, 0.596, 0.873, 0.729, 0.784, 0.860, 0.751, 0.900, 0.489]\n", - "svc_lin_roc_auc=[0.583, 0.304, 0.943, 0.668, 0.691, 0.754, 1.000, 0.778, 0.847, 0.590, 0.857, 0.604, 0.604, 0.838, 0.447, 0.650, 0.501]\n", - "svc_rbf_roc_auc=[0.681, 0.217, 0.827, 0.553, 0.738, 0.616, 0.889, 0.813, 0.845, 0.658, 0.779, 0.493, 0.726, 0.510, 0.544, 0.500, 0.447]\n", + "rfc_roc_auc = [\n", + " 0.597,\n", + " 0.380,\n", + " 0.857,\n", + " 0.610,\n", + " 0.698,\n", + " 0.692,\n", + " 0.963,\n", + " 0.898,\n", + " 0.772,\n", + " 0.596,\n", + " 0.873,\n", + " 0.729,\n", + " 0.784,\n", + " 0.860,\n", + " 0.751,\n", + " 0.900,\n", + " 0.489,\n", + "]\n", + "svc_lin_roc_auc = [\n", + " 0.583,\n", + " 0.304,\n", + " 0.943,\n", + " 0.668,\n", + " 0.691,\n", + " 0.754,\n", + " 1.000,\n", + " 0.778,\n", + " 0.847,\n", + " 0.590,\n", + " 0.857,\n", + " 0.604,\n", + " 0.604,\n", + " 0.838,\n", + " 0.447,\n", + " 0.650,\n", + " 0.501,\n", + "]\n", + "svc_rbf_roc_auc = [\n", + " 0.681,\n", + " 0.217,\n", + " 0.827,\n", + " 0.553,\n", + " 0.738,\n", + " 0.616,\n", + " 0.889,\n", + " 0.813,\n", + " 0.845,\n", + " 0.658,\n", + " 0.779,\n", + " 0.493,\n", + " 0.726,\n", + " 0.510,\n", + " 0.544,\n", + " 0.500,\n", + " 0.447,\n", + "]\n", "\n", - "df = pd.DataFrame({\n", - " 'site': list(range(len(sites))) * 3,\n", - " 'auc': rfc_roc_auc + svc_lin_roc_auc + svc_rbf_roc_auc,\n", - " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites)\n", - " \n", - "})\n", + "df = pd.DataFrame(\n", + " {\n", + " 'site': list(range(len(sites))) * 3,\n", + " 'auc': rfc_roc_auc + svc_lin_roc_auc + svc_rbf_roc_auc,\n", + " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites),\n", + " }\n", + ")\n", "\n", "x = np.arange(len(sites))\n", "data = list(zip(rfc_roc_auc, svc_lin_roc_auc, svc_rbf_roc_auc))\n", @@ -296,23 +506,22 @@ "\n", "fig = plt.figure(figsize=(10, 3))\n", "ax2 = plt.subplot2grid((1, 4), (0, 3))\n", - "plot = sn.violinplot(data=df, x='Model', y=\"auc\", ax=ax2, palette=colors, bw=.1, linewidth=.7)\n", + "plot = sn.violinplot(data=df, x='Model', y='auc', ax=ax2, palette=colors, bw=0.1, linewidth=0.7)\n", "for i in range(dim):\n", - " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", + " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", "\n", "ax2.yaxis.tick_right()\n", "ax2.set_ylabel('')\n", "ax2.set_xticklabels(ax2.get_xticklabels(), rotation=40)\n", "ax2.set_ylim([0.0, 1.0])\n", "\n", - "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3) \n", + "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3)\n", "for i in range(dim):\n", " y = [d[i] for d in data]\n", - " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=.6)\n", + " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=0.6)\n", " print(np.average(allvals[i]), np.std(allvals[i]))\n", - " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", - " \n", - " \n", + " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", + "\n", "\n", "plt.xlim([-0.2, 16.75])\n", "plt.grid(False)\n", @@ -343,15 +552,21 @@ "source": [ "from sklearn.metrics import confusion_matrix\n", "\n", - "pred_file = op.abspath(op.join(\n", - " '..', 'mriqc/data/csv',\n", - " 'mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-test_pred.csv'))\n", + "pred_file = op.abspath(\n", + " op.join(\n", + " '..',\n", + " 'mriqc/data/csv',\n", + " 'mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-test_pred.csv',\n", + " )\n", + ")\n", "\n", "pred_y = pd.read_csv(pred_file)\n", "true_y = pd.read_csv(ds030_y_path)\n", "true_y.rater_1 *= -1\n", "true_y.rater_1[true_y.rater_1 < 0] = 0\n", - "print(confusion_matrix(true_y.rater_1.tolist(), pred_y.pred_y.values.ravel().tolist(), labels=[0, 1]))" + "print(\n", + " confusion_matrix(true_y.rater_1.tolist(), pred_y.pred_y.values.ravel().tolist(), labels=[0, 1])\n", + ")" ] }, { @@ -371,27 +586,76 @@ "source": [ "import seaborn as sn\n", "from sklearn.externals.joblib import load as loadpkl\n", - "sn.set_style(\"white\")\n", + "\n", + "sn.set_style('white')\n", "\n", "# Get the RFC\n", - "estimator = loadpkl(pkgrf('mriqc', 'data/mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-train_estimator.pklz'))\n", + "estimator = loadpkl(\n", + " pkgrf(\n", + " 'mriqc',\n", + " 'data/mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-train_estimator.pklz',\n", + " )\n", + ")\n", "forest = estimator.named_steps['rfc']\n", "\n", "# Features selected in cross-validation\n", "features = [\n", - " \"cjv\", \"cnr\", \"efc\", \"fber\", \"fwhm_avg\", \"fwhm_x\", \"fwhm_y\", \"fwhm_z\", \"icvs_csf\", \"icvs_gm\", \"icvs_wm\",\n", - " \"qi_1\", \"qi_2\", \"rpve_csf\", \"rpve_gm\", \"rpve_wm\", \"snr_csf\", \"snr_gm\", \"snr_total\", \"snr_wm\", \"snrd_csf\",\n", - " \"snrd_gm\", \"snrd_total\", \"snrd_wm\", \"summary_bg_k\", \"summary_bg_stdv\", \"summary_csf_k\", \"summary_csf_mad\",\n", - " \"summary_csf_mean\", \"summary_csf_median\", \"summary_csf_p05\", \"summary_csf_p95\", \"summary_csf_stdv\",\n", - " \"summary_gm_k\", \"summary_gm_mad\", \"summary_gm_mean\", \"summary_gm_median\", \"summary_gm_p05\", \"summary_gm_p95\",\n", - " \"summary_gm_stdv\", \"summary_wm_k\", \"summary_wm_mad\", \"summary_wm_mean\", \"summary_wm_median\", \"summary_wm_p05\",\n", - " \"summary_wm_p95\", \"summary_wm_stdv\", \"tpm_overlap_csf\", \"tpm_overlap_gm\", \"tpm_overlap_wm\"]\n", + " 'cjv',\n", + " 'cnr',\n", + " 'efc',\n", + " 'fber',\n", + " 'fwhm_avg',\n", + " 'fwhm_x',\n", + " 'fwhm_y',\n", + " 'fwhm_z',\n", + " 'icvs_csf',\n", + " 'icvs_gm',\n", + " 'icvs_wm',\n", + " 'qi_1',\n", + " 'qi_2',\n", + " 'rpve_csf',\n", + " 'rpve_gm',\n", + " 'rpve_wm',\n", + " 'snr_csf',\n", + " 'snr_gm',\n", + " 'snr_total',\n", + " 'snr_wm',\n", + " 'snrd_csf',\n", + " 'snrd_gm',\n", + " 'snrd_total',\n", + " 'snrd_wm',\n", + " 'summary_bg_k',\n", + " 'summary_bg_stdv',\n", + " 'summary_csf_k',\n", + " 'summary_csf_mad',\n", + " 'summary_csf_mean',\n", + " 'summary_csf_median',\n", + " 'summary_csf_p05',\n", + " 'summary_csf_p95',\n", + " 'summary_csf_stdv',\n", + " 'summary_gm_k',\n", + " 'summary_gm_mad',\n", + " 'summary_gm_mean',\n", + " 'summary_gm_median',\n", + " 'summary_gm_p05',\n", + " 'summary_gm_p95',\n", + " 'summary_gm_stdv',\n", + " 'summary_wm_k',\n", + " 'summary_wm_mad',\n", + " 'summary_wm_mean',\n", + " 'summary_wm_median',\n", + " 'summary_wm_p05',\n", + " 'summary_wm_p95',\n", + " 'summary_wm_stdv',\n", + " 'tpm_overlap_csf',\n", + " 'tpm_overlap_gm',\n", + " 'tpm_overlap_wm',\n", + "]\n", "\n", "nft = len(features)\n", "\n", "forest = estimator.named_steps['rfc']\n", - "importances = np.median([tree.feature_importances_ for tree in forest.estimators_],\n", - " axis=0)\n", + "importances = np.median([tree.feature_importances_ for tree in forest.estimators_], axis=0)\n", "# importances = np.median(, axis=0)\n", "indices = np.argsort(importances)[::-1]\n", "\n", @@ -410,8 +674,12 @@ "plt.gca().set_xticklabels([features[i] for i in indices], rotation=90)\n", "plt.xlim([-1, nft])\n", "plt.show()\n", - "fig.savefig(op.join(outputs_path, 'figures', 'fig06-exp2-fi.pdf'),\n", - " bbox_inches='tight', pad_inches=0, dpi=300)" + "fig.savefig(\n", + " op.join(outputs_path, 'figures', 'fig06-exp2-fi.pdf'),\n", + " bbox_inches='tight',\n", + " pad_inches=0,\n", + " dpi=300,\n", + ")" ] }, { @@ -429,17 +697,63 @@ }, "outputs": [], "source": [ - "fn = ['10225', '10235', '10316', '10339', '10365', '10376',\n", - " '10429', '10460', '10506', '10527', '10530', '10624',\n", - " '10696', '10891', '10948', '10968', '10977', '11050',\n", - " '11052', '11142', '11143', '11149', '50004', '50005',\n", - " '50008', '50010', '50016', '50027', '50029', '50033',\n", - " '50034', '50036', '50043', '50047', '50049', '50053',\n", - " '50054', '50055', '50085', '60006', '60010', '60012',\n", - " '60014', '60016', '60021', '60046', '60052', '60072',\n", - " '60073', '60084', '60087', '70051', '70060', '70072']\n", - "fp = ['10280', '10455', '10523', '11112', '50020', '50048',\n", - " '50052', '50061', '50073', '60077']" + "fn = [\n", + " '10225',\n", + " '10235',\n", + " '10316',\n", + " '10339',\n", + " '10365',\n", + " '10376',\n", + " '10429',\n", + " '10460',\n", + " '10506',\n", + " '10527',\n", + " '10530',\n", + " '10624',\n", + " '10696',\n", + " '10891',\n", + " '10948',\n", + " '10968',\n", + " '10977',\n", + " '11050',\n", + " '11052',\n", + " '11142',\n", + " '11143',\n", + " '11149',\n", + " '50004',\n", + " '50005',\n", + " '50008',\n", + " '50010',\n", + " '50016',\n", + " '50027',\n", + " '50029',\n", + " '50033',\n", + " '50034',\n", + " '50036',\n", + " '50043',\n", + " '50047',\n", + " '50049',\n", + " '50053',\n", + " '50054',\n", + " '50055',\n", + " '50085',\n", + " '60006',\n", + " '60010',\n", + " '60012',\n", + " '60014',\n", + " '60016',\n", + " '60021',\n", + " '60046',\n", + " '60052',\n", + " '60072',\n", + " '60073',\n", + " '60084',\n", + " '60087',\n", + " '70051',\n", + " '70060',\n", + " '70072',\n", + "]\n", + "fp = ['10280', '10455', '10523', '11112', '50020', '50048', '50052', '50061', '50073', '60077']" ] }, { @@ -450,12 +764,7 @@ }, "outputs": [], "source": [ - "fn_clear = [\n", - " ('10316', 98),\n", - " ('10968', 122),\n", - " ('11050', 110),\n", - " ('11149', 111)\n", - "]" + "fn_clear = [('10316', 98), ('10968', 122), ('11050', 110), ('11149', 111)]" ] }, { @@ -469,14 +778,18 @@ "import matplotlib.pyplot as plt\n", "from mriqc.viz.utils import plot_slice\n", "import nibabel as nb\n", + "\n", "for im, z in fn_clear:\n", " image_path = op.join(ds030_path, 'sub-%s' % im, 'anat', 'sub-%s_T1w.nii.gz' % im)\n", " imdata = nb.load(image_path).get_data()\n", - " \n", + "\n", " fig, ax = plt.subplots()\n", " plot_slice(imdata[..., z], annotate=True)\n", - " fig.savefig(op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", - " dpi=300, bbox_inches='tight')\n", + " fig.savefig(\n", + " op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", + " dpi=300,\n", + " bbox_inches='tight',\n", + " )\n", " plt.clf()\n", " plt.close()" ] @@ -496,11 +809,14 @@ "for im, z in fp_clear:\n", " image_path = op.join(ds030_path, 'sub-%s' % im, 'anat', 'sub-%s_T1w.nii.gz' % im)\n", " imdata = nb.load(image_path).get_data()\n", - " \n", + "\n", " fig, ax = plt.subplots()\n", " plot_slice(imdata[..., z], annotate=True)\n", - " fig.savefig(op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", - " dpi=300, bbox_inches='tight')\n", + " fig.savefig(\n", + " op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", + " dpi=300,\n", + " bbox_inches='tight',\n", + " )\n", " plt.clf()\n", " plt.close()" ] diff --git a/docs/notebooks/SpikesPlotter.ipynb b/docs/notebooks/SpikesPlotter.ipynb index 7ab764d2c..dc9f48595 100644 --- a/docs/notebooks/SpikesPlotter.ipynb +++ b/docs/notebooks/SpikesPlotter.ipynb @@ -22,15 +22,25 @@ "outputs": [], "source": [ "import os.path as op\n", + "\n", "wf_path = '/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/'\n", "\n", - "in_fft = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01'\n", - " '..func..sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_zsfft.nii.gz')\n", + "in_fft = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01'\n", + " '..func..sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_zsfft.nii.gz',\n", + ")\n", "\n", - "in_file = op.join(wf_path, '_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..sub-ben01_'\n", - " 'task-unknown_bold.nii.gz/reorient_and_discard/sub-ben01_task-unknown_bold.nii.gz')\n", - "in_spikes = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..'\n", - " 'sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_spikes.tsv')" + "in_file = op.join(\n", + " wf_path,\n", + " '_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..sub-ben01_'\n", + " 'task-unknown_bold.nii.gz/reorient_and_discard/sub-ben01_task-unknown_bold.nii.gz',\n", + ")\n", + "in_spikes = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..'\n", + " 'sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_spikes.tsv',\n", + ")" ] }, { @@ -39,13 +49,22 @@ "metadata": {}, "outputs": [], "source": [ - "in_fft = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", - " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_zsfft.nii.gz')\n", + "in_fft = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", + " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_zsfft.nii.gz',\n", + ")\n", "\n", - "in_file = op.join(wf_path, '_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09..func..sub-ds205s09_'\n", - " 'task-view_acq-LR_run-02_bold.nii.gz/reorient_and_discard/sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz')\n", - "in_spikes = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", - " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_spikes.tsv')" + "in_file = op.join(\n", + " wf_path,\n", + " '_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09..func..sub-ds205s09_'\n", + " 'task-view_acq-LR_run-02_bold.nii.gz/reorient_and_discard/sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz',\n", + ")\n", + "in_spikes = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", + " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_spikes.tsv',\n", + ")" ] }, { @@ -95,8 +114,8 @@ "\n", "data_path = '/home/oesteban/Data/ABIDE/sub-50465/anat/sub-50465_T1w.nii.gz'\n", "data_path = '/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben04..func..sub-ben04_task-unknown_bold.nii.gz/compute_tsnr/stdev.nii.gz'\n", - "#data_path = '/home/oesteban/Data/rewardBeastBIDS2/sub-119/func/sub-119_task-rest_sbref.nii.gz'\n", - "#data_path ='/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/_in_file_..home..oesteban..Data..circle-tests..sub-ds003s03..func..sub-ds003s03_task-rhymejudgment_bold.nii.gz/compute_tsnr/stdev.nii.gz'" + "# data_path = '/home/oesteban/Data/rewardBeastBIDS2/sub-119/func/sub-119_task-rest_sbref.nii.gz'\n", + "# data_path ='/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/_in_file_..home..oesteban..Data..circle-tests..sub-ds003s03..func..sub-ds003s03_task-rhymejudgment_bold.nii.gz/compute_tsnr/stdev.nii.gz'" ] }, { @@ -106,6 +125,7 @@ "outputs": [], "source": [ "from mriqc.viz import utils as mvu\n", + "\n", "mvu.plot_mosaic(data_path, cmap='viridis')" ] }, diff --git a/docs/notebooks/Supplemental Materials.ipynb b/docs/notebooks/Supplemental Materials.ipynb index 7399c4587..0758f3489 100644 --- a/docs/notebooks/Supplemental Materials.ipynb +++ b/docs/notebooks/Supplemental Materials.ipynb @@ -27,6 +27,7 @@ "import pandas as pd\n", "from mriqc.viz import misc as mviz\n", "from pkg_resources import resource_filename as pkgrf\n", + "\n", "outputs_path = '../../mriqc-data/'" ] }, @@ -58,10 +59,12 @@ "outputs": [], "source": [ "fig = mviz.raters_variability_plot(\n", - " mdata, raters=['rater_1', 'rater_2', 'rater_3'], \n", + " mdata,\n", + " raters=['rater_1', 'rater_2', 'rater_3'],\n", " rater_names=['Rater 1', 'Rater 2A', 'Rater 2B'],\n", " out_file=op.join(outputs_path, 'figures', 'suppl-fig02.pdf'),\n", - " only_overlap=False)" + " only_overlap=False,\n", + ")" ] }, { @@ -71,12 +74,17 @@ "outputs": [], "source": [ "from sklearn.metrics import cohen_kappa_score\n", + "\n", "overlap = mdata[np.all(~np.isnan(mdata[['rater_2', 'rater_3']]), axis=1)]\n", "y1 = overlap.rater_2.values.ravel().tolist()\n", "y2 = overlap.rater_3.values.ravel().tolist()\n", "\n", - "fig = mviz.inter_rater_variability(y1, y2, raters=['Protocol A', 'Protocol B'],\n", - " out_file=op.join(outputs_path, 'figures', 'suppl-intrarv.pdf'))\n", + "fig = mviz.inter_rater_variability(\n", + " y1,\n", + " y2,\n", + " raters=['Protocol A', 'Protocol B'],\n", + " out_file=op.join(outputs_path, 'figures', 'suppl-intrarv.pdf'),\n", + ")\n", "\n", "print(\"Cohen's Kappa %f\" % cohen_kappa_score(y1, y2))\n", "\n", diff --git a/docs/notebooks/finding_spikes.ipynb b/docs/notebooks/finding_spikes.ipynb index a89fc657e..a2e688673 100644 --- a/docs/notebooks/finding_spikes.ipynb +++ b/docs/notebooks/finding_spikes.ipynb @@ -16,6 +16,7 @@ "import seaborn as sns\n", "from nilearn.plotting import plot_epi, plot_anat, plot_roi\n", "import numpy as np\n", + "\n", "%matplotlib inline" ] }, @@ -38,7 +39,7 @@ }, "outputs": [], "source": [ - "in_file = \"data/sub-ben01_task-unknown_bold.nii.gz\"" + "in_file = 'data/sub-ben01_task-unknown_bold.nii.gz'" ] }, { @@ -72,7 +73,7 @@ }, "outputs": [], "source": [ - "#mask_nii = compute_epi_mask(in_4d_nii)\n", + "# mask_nii = compute_epi_mask(in_4d_nii)\n", "\n", "mask_data = compute_mask(mean_nii.get_data())\n", "mask_nii = new_img_like(mean_nii, mask_data)\n", @@ -106,14 +107,14 @@ "from scipy import ndimage\n", "\n", "# Input here is a binarized and intersected mask data from previous section\n", - "dil_mask = ndimage.binary_dilation(mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis]/8))\n", + "dil_mask = ndimage.binary_dilation(\n", + " mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis] / 8)\n", + ")\n", "\n", "# Now, we visualize the same using `plot_roi` with data being converted to Nifti\n", "# image. In all new image like, reference image is the same but second argument\n", "# varies with data specific\n", - "dil_mask_nii = new_img_like(\n", - " mask_nii,\n", - " dil_mask.astype(np.int))\n", + "dil_mask_nii = new_img_like(mask_nii, dil_mask.astype(np.int))\n", "plot_roi(dil_mask_nii, mean_nii)" ] }, @@ -150,10 +151,10 @@ }, "outputs": [], "source": [ - "rep = [1,1,1]\n", + "rep = [1, 1, 1]\n", "rep[longest_axis] = mask_nii.shape[longest_axis]\n", "new_mask_3d = np.logical_not(np.tile(new_mask_2d, rep))\n", - "#new_mask_3d = " + "# new_mask_3d =" ] }, { @@ -164,9 +165,7 @@ }, "outputs": [], "source": [ - "new_mask_nii = new_img_like(\n", - " mask_nii,\n", - " new_mask_3d.astype(np.int))\n", + "new_mask_nii = new_img_like(mask_nii, new_mask_3d.astype(np.int))\n", "plot_roi(new_mask_nii, mean_nii)" ] }, @@ -182,7 +181,7 @@ "\n", "data4d = in_4d_nii.get_data()\n", "for slice_i in range(in_4d_nii.shape[2]):\n", - " slice_data = data4d[:,:,slice_i,:][new_mask_3d[:,:,slice_i]].mean(axis=0)\n", + " slice_data = data4d[:, :, slice_i, :][new_mask_3d[:, :, slice_i]].mean(axis=0)\n", " slice_data = zscore(slice_data)\n", " plt.plot(slice_data)" ] @@ -195,7 +194,7 @@ }, "outputs": [], "source": [ - "data4d[:,:,slice_i,:][new_mask_3d[:,:,slice_i]].shape" + "data4d[:, :, slice_i, :][new_mask_3d[:, :, slice_i]].shape" ] }, { @@ -236,47 +235,45 @@ " mask_nii = new_img_like(mean_nii, mask_data)\n", "\n", " plot_roi(mask_nii, mean_nii)\n", - " \n", + "\n", " a = np.where(mask_nii.get_data() != 0)\n", " bbox = np.max(a[0]) - np.min(a[0]), np.max(a[1]) - np.min(a[1]), np.max(a[2]) - np.min(a[2])\n", " print(bbox)\n", " print(np.argmax(bbox))\n", " longest_axis = np.argmax(bbox)\n", - " \n", + "\n", " from scipy import ndimage\n", "\n", " # Input here is a binarized and intersected mask data from previous section\n", - " dil_mask = ndimage.binary_dilation(mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis]/8))\n", + " dil_mask = ndimage.binary_dilation(\n", + " mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis] / 8)\n", + " )\n", "\n", " # Now, we visualize the same using `plot_roi` with data being converted to Nifti\n", " # image. In all new image like, reference image is the same but second argument\n", " # varies with data specific\n", - " dil_mask_nii = new_img_like(\n", - " mask_nii,\n", - " dil_mask.astype(np.int))\n", + " dil_mask_nii = new_img_like(mask_nii, dil_mask.astype(np.int))\n", " plot_roi(dil_mask_nii, mean_nii)\n", - " \n", + "\n", " rep = list(mask_nii.shape)\n", " rep[longest_axis] = -1\n", " new_mask_2d = dil_mask.max(axis=longest_axis).reshape(rep)\n", " new_mask_2d.shape\n", - " \n", - " rep = [1,1,1]\n", + "\n", + " rep = [1, 1, 1]\n", " rep[longest_axis] = mask_nii.shape[longest_axis]\n", " new_mask_3d = np.logical_not(np.tile(new_mask_2d, rep))\n", - " \n", - " new_mask_nii = new_img_like(\n", - " mask_nii,\n", - " new_mask_3d.astype(np.int))\n", + "\n", + " new_mask_nii = new_img_like(mask_nii, new_mask_3d.astype(np.int))\n", " plot_roi(new_mask_nii, mean_nii)\n", - " \n", + "\n", " from scipy.stats.mstats import zscore\n", "\n", - " data4d = in_4d_nii.get_data()[:,:,:,skip:]\n", + " data4d = in_4d_nii.get_data()[:, :, :, skip:]\n", " plt.figure()\n", " for slice_i in range(in_4d_nii.shape[2]):\n", - " slice_data = data4d[:,:,slice_i,:][new_mask_3d[:,:,slice_i]].mean(axis=0)\n", - " #slice_data = zscore(slice_data)\n", + " slice_data = data4d[:, :, slice_i, :][new_mask_3d[:, :, slice_i]].mean(axis=0)\n", + " # slice_data = zscore(slice_data)\n", " plt.plot(slice_data)\n", " plt.title(in_file)" ] @@ -289,7 +286,7 @@ }, "outputs": [], "source": [ - "plot_spikes(\"D:/example_artifacts_dataset/sub-ben01/func/sub-ben01_task-unknown_bold.nii.gz\")" + "plot_spikes('D:/example_artifacts_dataset/sub-ben01/func/sub-ben01_task-unknown_bold.nii.gz')" ] }, { @@ -312,7 +309,7 @@ }, "outputs": [], "source": [ - "for file in glob(\"D:/*/sub-*/func/sub-*_task-*_bold.nii.gz\"):\n", + "for file in glob('D:/*/sub-*/func/sub-*_task-*_bold.nii.gz'):\n", " plot_spikes(file)" ] }, @@ -335,7 +332,7 @@ }, "outputs": [], "source": [ - "glob(\"D:/*/sub-*/func/sub-*_task-*_bold.nii.gz\")" + "glob('D:/*/sub-*/func/sub-*_task-*_bold.nii.gz')" ] } ], diff --git a/mriqc/cli/parser.py b/mriqc/cli/parser.py index 6d24b8fa5..db0611a1e 100644 --- a/mriqc/cli/parser.py +++ b/mriqc/cli/parser.py @@ -338,6 +338,14 @@ def _bids_filter(value): type=Path, help='Nipype plugin configuration file.', ) + g_outputs.add_argument( + '--crashfile-format', + action='store', + default='txt', + choices=['txt', 'pklz'], + type=str, + help='Nipype crashfile format', + ) g_outputs.add_argument( '--no-sub', default=False, @@ -479,7 +487,6 @@ def _bids_filter(value): def parse_args(args=None, namespace=None): """Parse args and run further checks on the command line.""" - from contextlib import suppress from json import loads from logging import DEBUG, FileHandler from pathlib import Path @@ -487,9 +494,10 @@ def parse_args(args=None, namespace=None): from niworkflows.utils.bids import DEFAULT_BIDS_QUERIES, collect_data - from mriqc import __version__ + from mriqc import __version__, data from mriqc._warnings import DATE_FMT, LOGGER_FMT, _LogFormatter from mriqc.messages import PARTICIPANT_START + from mriqc.utils.misc import initialize_meta_and_data parser = _build_parser() opts = parser.parse_args(args, namespace) @@ -517,6 +525,7 @@ def parse_args(args=None, namespace=None): f' * BIDS filters-file: {opts.bids_filter_file.absolute()}.', ) + notice_path = data.load.readable('NOTICE') config.loggers.cli.log( 26, PARTICIPANT_START.format( @@ -524,6 +533,9 @@ def parse_args(args=None, namespace=None): bids_dir=opts.bids_dir, output_dir=opts.output_dir, analysis_level=opts.analysis_level, + notice='\n '.join( + ['NOTICE'] + notice_path.read_text().splitlines(keepends=False)[1:] + ), extra_messages='\n'.join(extra_messages), ), ) @@ -543,7 +555,9 @@ def parse_args(args=None, namespace=None): # Load BIDS filters if opts.bids_filter_file: - config.execution.bids_filters = loads(opts.bids_filter_file.read_text()) + config.execution.bids_filters = { + k.lower(): v for k, v in loads(opts.bids_filter_file.read_text()).items() + } bids_dir = config.execution.bids_dir output_dir = config.execution.output_dir @@ -554,10 +568,9 @@ def parse_args(args=None, namespace=None): if output_dir == bids_dir: parser.error( 'The selected output folder is the same as the input BIDS folder. ' - 'Please modify the output path (suggestion: %s).' - % bids_dir + f'Please modify the output path (suggestion: {bids_dir}).' / 'derivatives' - / ('mriqc-%s' % version.split('+')[0]) + / ('mriqc-{}'.format(version.split('+')[0])) ) if bids_dir in work_dir.parents: @@ -642,11 +655,7 @@ def parse_args(args=None, namespace=None): f'MRIQC is unable to process the following modalities: {", ".join(unknown_mods)}.' ) - # Estimate the biggest file size / leave 1GB if some file does not exist (datalad) - with suppress(FileNotFoundError): - config.workflow.biggest_file_gb = _get_biggest_file_size_gb( - config.workflow.inputs.values() - ) + initialize_meta_and_data() # set specifics for alternative populations if opts.species.lower() != 'human': @@ -660,17 +669,3 @@ def parse_args(args=None, namespace=None): config.workflow.fd_radius = 7.5 # block uploads for the moment; can be reversed before wider release config.execution.no_sub = True - - -def _get_biggest_file_size_gb(files): - """Identify the largest file size (allows multi-echo groups).""" - - import os - - sizes = [] - for file in files: - if isinstance(file, (list, tuple)): - sizes.append(_get_biggest_file_size_gb(file)) - else: - sizes.append(os.path.getsize(file)) - return max(sizes) / (1024**3) diff --git a/mriqc/cli/run.py b/mriqc/cli/run.py index c8839f060..cf43d4752 100644 --- a/mriqc/cli/run.py +++ b/mriqc/cli/run.py @@ -266,7 +266,10 @@ def main(argv=None): ) ), ) - config.to_filename(config.execution.log_dir / f'config-{config.execution.run_uuid}.toml') + config.to_filename( + config.execution.log_dir / f'config-{config.execution.run_uuid}.toml', + store_inputs=False, # Inputs are not necessary anymore + ) sys.exit(exitcode) diff --git a/mriqc/config.py b/mriqc/config.py index d57521815..d26968f54 100644 --- a/mriqc/config.py +++ b/mriqc/config.py @@ -91,6 +91,7 @@ from __future__ import annotations import os +import pickle import sys from contextlib import suppress from pathlib import Path @@ -576,8 +577,8 @@ class workflow(_Config): analysis_level: list[str] = ['participant'] """Level of analysis.""" - biggest_file_gb: int = 1 - """Size of largest file in GB.""" + biggest_file_gb: dict[int] = 1 + """Dictionary holding the size of largest file in GB (per modality).""" deoblique: bool = False """Deoblique the functional scans during head motion correction preprocessing.""" despike: bool = False @@ -590,6 +591,12 @@ class workflow(_Config): """Turn on FFT based spike detector (slow).""" inputs: list[str | os.PathLike] | None = None """List of files to be processed with MRIQC.""" + inputs_entities: dict[list[dict]] + """List of entities corresponding to inputs.""" + inputs_metadata: dict[list[dict | list[dict]]] | None = None + """List of metadata corresponding to inputs.""" + inputs_path: Path | None = None + """Path to a pickle file with the input paths and metadata.""" min_len_dwi: int = 7 """ Minimum DWI length to be considered a "processable" dataset @@ -602,6 +609,21 @@ class workflow(_Config): template_id: str = 'MNI152NLin2009cAsym' """TemplateFlow ID of template used for the anatomical processing.""" + _hidden: tuple[str, ...] = ('inputs', 'inputs_entities', 'inputs_metadata') + + @classmethod + def init(cls) -> None: + if cls.inputs_path is None: + cls.inputs_path = execution.work_dir / f'inputs-{execution.run_uuid}.pkl' + + if cls.inputs_path.exists(): + with open(cls.inputs_path, 'rb') as handle: + _inputs = pickle.load(handle) + + cls.inputs = _inputs['paths'] + cls.inputs_metadata = _inputs['metadata'] + cls.inputs_entities = _inputs['entities'] + class loggers: """Keep loggers easily accessible (see :py:func:`init`).""" @@ -727,7 +749,10 @@ def dumps() -> str: return dumps(get()) -def to_filename(filename: str | os.PathLike | None = None) -> Path: +def to_filename( + filename: str | os.PathLike | None = None, + store_inputs: bool = True, +) -> Path: """Write settings to file.""" if filename: @@ -738,6 +763,21 @@ def to_filename(filename: str | os.PathLike | None = None) -> Path: settings.file_path.parent.mkdir(exist_ok=True, parents=True) settings.file_path.write_text(dumps()) loggers.cli.debug(f'Saved MRIQC config file: {settings.file_path}.') + + if store_inputs: + if workflow.inputs_path is None: + workflow.inputs_path = execution.work_dir / f'inputs-{execution.run_uuid}.pkl' + + # Pickle inputs + with open(workflow.inputs_path, 'wb') as handle: + inputs_dict = { + 'paths': workflow.inputs, + 'metadata': workflow.inputs_metadata, + 'entities': workflow.inputs_entities, + } + pickle.dump(inputs_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) + + loggers.cli.debug(f'Saved MRIQC inputs file: {workflow.inputs_path}.') return settings.file_path diff --git a/mriqc/data/NOTICE b/mriqc/data/NOTICE new file mode 100644 index 000000000..939b4ca4b --- /dev/null +++ b/mriqc/data/NOTICE @@ -0,0 +1,17 @@ +MRIQC +Copyright © The NiPreps Developers. + +This product includes software developed by +the NiPreps Community (https://nipreps.org/). + +Portions of this software were developed at the Department of +Psychology at Stanford University, Stanford, CA, US. + +This software contains code ultimately derived from the +PCP Quality Assessment Protocol (QAP; +http://preprocessed-connectomes-project.org/quality-assessment-protocol) +by C. Craddock, S. Giavasis, D. Clark, Z. Shezhad, and J. Pellman. + +This software is also distributed as a Docker container image. +The bootstrapping file for the image ("Dockerfile") is licensed +under the MIT License. diff --git a/mriqc/data/__init__.py b/mriqc/data/__init__.py index e69de29bb..f4aed61d5 100644 --- a/mriqc/data/__init__.py +++ b/mriqc/data/__init__.py @@ -0,0 +1,37 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2024 The NiPreps Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# +""" +MRIQC data files + +.. autofunction:: load + +.. automethod:: load.readable + +.. automethod:: load.as_path + +.. automethod:: load.cached +""" + +from acres import Loader + +load = Loader(__package__) diff --git a/mriqc/data/testdata/group_bold.tsv b/mriqc/data/testdata/group_bold.tsv index 7506e9995..5f9135581 100644 --- a/mriqc/data/testdata/group_bold.tsv +++ b/mriqc/data/testdata/group_bold.tsv @@ -1,10 +1,10 @@ bids_name aor aqi dummy_trs dvars_nstd dvars_std dvars_vstd efc fber fd_mean fd_num fd_perc fwhm_avg fwhm_x fwhm_y fwhm_z gcor gsr_x gsr_y size_t size_x size_y size_z snr spacing_tr spacing_x spacing_y spacing_z summary_bg_k summary_bg_mad summary_bg_mean summary_bg_median summary_bg_n summary_bg_p05 summary_bg_p95 summary_bg_stdv summary_fg_k summary_fg_mad summary_fg_mean summary_fg_median summary_fg_n summary_fg_p05 summary_fg_p95 summary_fg_stdv tsnr -sub-ds205s03_task-functionallocalizer_run-01_bold 0.0038873170731707324 0.007676269512195123 0 23.556566426419753 1.111688350987654 1.037835274938272 0.5480388108408939 946.6738891601562 0.26118389619646404 32 39.02439024390244 2.460305 2.3309525 2.749775 2.3001875 0.0680646 -0.009326859377324581 0.05734974890947342 82 53 53 27 4.452167473312103 2.200000047683716 4.0 4.0 4.0 32.7759659672557 7.720380258975168 49.68507569151857 24.975608825683594 55688.0 2.341463327407837 179.5365753173828 79.87659545341923 1.3897322764135298 138.87641036027622 767.1765602872674 768.451171875 20155.0 461.5243835449219 1042.731689453125 172.5973051795346 60.897049377672374 -sub-ds205s03_task-view_run-01_bold 0.004324782608695651 0.008262664456521738 0 21.120018382857136 1.0831709981318685 1.0370256236263742 0.5459090166346154 1002.2031860351562 0.16520736608810047 18 19.565217391304348 2.342805 2.234215 2.613075 2.181125 0.0235103 -0.0050179013051092625 0.04793250188231468 92 53 53 27 4.4829699247170405 2.200000047683716 4.0 4.0 4.0 29.71988008857921 7.139052847581331 46.66574967004908 24.7608699798584 55362.0 0.032608695328235626 164.9891357421875 71.66784333938988 1.596677545134333 133.64368600192023 779.8773281882851 783.8695678710938 20481.0 457.5434875488281 1054.61962890625 174.85070038268285 62.58223795099184 -sub-ds205s03_task-view_run-02_bold 0.0012682608695652171 0.007114478586956522 0 33.47404192791209 1.3371308202197805 0.9764774098901093 0.5494999624875053 955.73681640625 0.22854716046970905 44 47.82608695652174 2.4598116666666665 2.3444425 2.7437 2.2912925 0.359354 -0.009432817809283733 0.052887145429849625 92 53 53 27 4.419438774661141 2.200000047683716 4.0 4.0 4.0 29.03249120482654 7.7192010499810335 49.54353785439428 24.934783935546875 55415.0 0.20652174949645996 181.78260803222656 80.45599936028407 1.4009872052560288 139.07454009400536 768.1427012059421 770.8587036132812 20428.0 459.5326232910156 1046.8260498046875 174.42029972293017 45.2559957196936 -sub-ds205s07_task-functionallocalizer_run-01_bold 0.006487361111111111 0.006664996805555556 0 26.95528650943662 1.4143280252112678 1.3278985984507037 0.5345065899755163 1009.7532348632812 0.1550487344317312 15 20.833333333333332 2.2598908333333334 2.16087 2.40925 2.2095525 0.0386767 -0.005986093543469906 0.03482493385672569 72 53 53 27 4.598492679701605 2.200000047683716 4.0 4.0 4.0 35.90517987432788 6.32165175453679 43.5521938439157 22.56944465637207 56450.0 0.0 157.3333282470703 77.35003442698779 1.656221989802022 134.60795658544046 716.0395720200634 717.1805419921875 19393.0 440.0694580078125 951.6666870117188 155.95589707318132 73.35539675597101 -sub-ds205s07_task-view_run-01_bold 0.005007608695652174 0.010561140108695653 0 26.932940011208803 1.2896941338461543 1.168551503516483 0.5321868933927048 1023.4715576171875 0.1520993052375479 15 16.304347826086957 2.266245 2.184035 2.4189475 2.1957525 0.0478984 -0.006112692411988974 0.03277275711297989 92 53 53 27 4.595802249004055 2.200000047683716 4.0 4.0 4.0 33.63529478261993 6.2365988314754315 42.79862974194479 22.380435943603516 56588.0 0.3804347813129425 153.56521606445312 75.85331035278197 1.585165284213823 135.14239559316104 713.6245609062549 715.9891357421875 19255.0 437.3913269042969 947.5869750976562 155.78793525432897 63.332828449318185 -sub-ds205s07_task-view_run-02_bold 0.0027991304347826083 0.008415782065217392 0 27.396022419560428 1.345632817142857 1.2136871409890106 0.5333849667366366 1016.1077880859375 0.15451170465747024 18 19.565217391304348 2.271710833333333 2.1726275 2.433375 2.20913 0.0505144 -0.005830460228025913 0.03532366082072258 92 53 53 27 4.598664236595992 2.200000047683716 4.0 4.0 4.0 36.078683930448996 6.575017673595487 43.39332640278381 22.565217971801758 56589.0 1.22826087474823 155.56521606445312 75.49560044499137 1.6540993322199018 136.17376514793506 717.751513360383 719.2989501953125 19254.0 443.8260803222656 952.5978393554688 156.41069531029106 64.89047080953605 -sub-ds205s09_task-view_acq-LR_run-01_bold 0.003159041095890411 0.010028379999999998 0 20.396152721388898 1.090989740416667 1.0059027894444443 0.5048353890125871 1243.320556640625 0.3258302280827085 45 61.64383561643836 2.0852608333333333 2.0588275 2.2995125 1.8974425 0.0461463 -0.0019858605228364468 0.021903924643993378 73 53 53 27 5.292682271102816 2.200000047683716 4.0 4.0 4.0 33.98009379704732 5.949264906571304 39.07313696894552 22.956466674804688 58369.0 1.0936830043792725 132.37034606933594 62.99641049623399 2.7398312528413937 115.58686871143783 791.6591133225957 809.4622802734375 17474.0 483.6707458496094 994.6715698242188 152.93552051211557 66.51667785644531 -sub-ds205s09_task-view_acq-LR_run-02_bold 0.005322328767123288 0.012026137260273973 0 33.41256753611111 1.7174510266666663 1.7695175424999998 0.5052536160578726 1231.6690673828125 0.3345943377548133 47 64.38356164383562 2.0866866666666666 2.0591175 2.3078075 1.893135 0.0393612 -0.0014911222970113158 0.021568207070231438 73 53 53 27 5.293464753553317 2.200000047683716 4.0 4.0 4.0 33.28261057706237 6.199152583273201 39.20170865624716 23.102066040039062 58361.0 1.1750245094299316 131.959716796875 62.4980652302799 2.7186948010130694 115.65690863557867 792.8781950379683 810.7703247070312 17482.0 484.3922119140625 996.912109375 153.16001398016576 59.753475189208984 -sub-ds205s09_task-view_acq-RL_run-01_bold 0.015073150684931508 0.015836369041095893 0 34.205267455972226 1.4370176472222222 1.2820233763888884 0.5044353080190683 1274.420654296875 0.5699059645824405 53 72.6027397260274 2.08056 2.068795 2.288455 1.88443 0.0560156 -0.0013055995805189013 0.018262900412082672 73 53 53 27 5.229556365898414 2.200000047683716 4.0 4.0 4.0 31.57766555712935 5.840893620522815 38.89056185642749 22.814455032348633 58350.0 3.888171911239624 132.03933715820312 61.89358205002823 2.599912401497959 117.24294190485038 794.526159305144 814.4539184570312 17493.0 471.96441650390625 999.9562377929688 155.73608576273267 52.76657485961914 +sub-ds205s03_task-functionallocalizer_run-01_bold 0.006865975609756098 0.01166174756097561 0 25.30130857259259 1.10068032382716 1.0422690707407405 0.5578 899.5551 0.26118389619646404 32 39.02439024390244 2.43764 2.3422275 2.692425 2.2782675 0.0529285 -0.012628363445401192 0.055389221757650375 82 53 53 27 4.266440439452425 2.200000047683716 4.0 4.0 4.0 35.3128 6.4186 50.9335 25.0 54995.0 19.0 173.0 77.1159 1.403 141.8145 759.3992 764.0 20848.0 428.0 1042.0 179.0677 53.9325022965204 +sub-ds205s03_task-view_run-01_bold 0.0055223913043478245 0.013993418152173915 0 21.883337964175826 1.0119163635164834 0.9670224290109894 0.5552 948.5766 0.16520736608810047 18 19.565217391304348 2.3244433333333334 2.2259025 2.592175 2.1552525 0.0183055 -0.008478646166622639 0.04804554581642151 92 53 53 27 4.254223958782439 2.200000047683716 4.0 4.0 4.0 34.6153 6.4461 47.5813 25.0 54686.0 19.0 157.0 67.2752 1.6046 136.8667 771.5826 779.0 21157.0 419.0 1054.0 183.1078 52.836231373017654 +sub-ds205s03_task-view_run-02_bold 0.0006475 0.01044403706521739 0 24.326057035714285 0.9611447131868133 0.8934934421978024 0.5573 902.4783 0.22854716046970905 44 47.82608695652174 2.3925758333333333 2.3057225 2.642175 2.22983 0.329435 -0.011322595179080963 0.053197648376226425 92 53 53 27 4.259133013458754 2.200000047683716 4.0 4.0 4.0 35.3124 7.4936 49.9201 26.0 54896.0 19.0 170.0 73.8075 1.461 140.8794 762.1778 767.0 20947.0 428.0 1044.0 180.0793 42.17791306972504 +sub-ds205s07_task-functionallocalizer_run-01_bold 0.00972736111111111 0.008194284305555556 0 22.879393483661968 1.1479378432394367 1.0417766205633798 0.541 962.3474 0.1550487344317312 15 20.833333333333332 2.2521649999999998 2.1681675 2.39802 2.1903075 0.0291203 -0.007456877268850803 0.03371790051460266 72 53 53 27 4.285941229350169 2.200000047683716 4.0 4.0 4.0 40.9527 6.1157 42.9513 23.0 55908.0 17.0 145.0 67.5711 1.7787 137.3466 708.532 714.0 19935.0 387.0 951.0 166.587 64.71407310082577 +sub-ds205s07_task-view_run-01_bold 0.00585054347826087 0.015204289673913043 0 22.382260123296703 0.9959491859340658 0.9369370696703297 0.5386 972.983 0.1520993052375479 15 16.304347826086957 2.2514825000000003 2.174985 2.38338 2.1960825 0.0282586 -0.006972935516387224 0.032188184559345245 92 53 53 27 4.313751528981998 2.200000047683716 4.0 4.0 4.0 41.5907 6.156 41.8577 23.0 56005.0 17.0 139.0 65.5813 1.8801 136.4316 706.7871 713.0 19838.0 394.0 946.0 165.2812 50.42334433761425 +sub-ds205s07_task-view_run-02_bold 0.0019870652173913047 0.014770980326086956 0 22.60577535087912 1.0123932324175822 0.9372968517582421 0.5404 965.007 0.15451170465747024 18 19.565217391304348 2.2389508333333334 2.1558375 2.3787425 2.1822725 0.0275085 -0.007005929946899414 0.03411094844341278 92 53 53 27 4.365633569269246 2.200000047683716 4.0 4.0 4.0 38.59 6.1883 42.5556 23.0 55958.0 17.0 143.0 65.669 1.7675 138.1076 711.3101 717.0 19885.0 400.0 952.0 164.2332 54.05953362467699 +sub-ds205s09_task-view_acq-LR_run-01_bold 0.005607945205479453 0.02232912082191781 0 23.14330828208334 1.043644975 0.9879293138888892 0.5131 1196.7195 0.3258302280827085 45 61.64383561643836 2.0490825 2.0376875 2.24239 1.86717 0.0367789 -0.004032289143651724 0.020073510706424713 73 53 53 27 5.010099661656575 2.200000047683716 4.0 4.0 4.0 42.9673 5.1654 39.4401 23.0 57775.0 18.0 124.0 58.7775 2.6347 119.7322 783.8782 805.0 18068.0 448.0 993.0 160.671 54.77862358093262 +sub-ds205s09_task-view_acq-LR_run-02_bold 0.006578767123287671 0.023950454794520546 0 35.81903538416666 1.5608953040277778 1.6171545502777782 0.5136 1186.884 0.3345943377548133 47 64.38356164383562 2.054238333333333 2.038455 2.2515525 1.8727075 0.0322103 -0.0038295735139399767 0.020550260320305824 73 53 53 27 5.029289310984106 2.200000047683716 4.0 4.0 4.0 44.2484 5.2449 39.9385 23.0 57802.0 18.0 126.0 59.851 2.6451 118.8398 785.5312 807.0 18041.0 451.0 994.0 160.4556 50.310420989990234 +sub-ds205s09_task-view_acq-RL_run-01_bold 0.04132 0.05151956945205479 0 34.941844608194444 1.1900752381944446 1.11412176375 0.5122 1214.6777 0.5699059645824405 53 72.6027397260274 2.103911666666667 2.0650525 2.28995 1.9567325 0.0259984 -0.003200419247150421 0.01928347535431385 73 53 53 27 5.178320854531057 2.200000047683716 4.0 4.0 4.0 42.1191 5.0721 39.7567 23.0 57912.0 18.0 126.0 60.2804 2.5073 116.7328 789.3764 810.0 17931.0 460.0 993.0 156.417 36.2927360534668 diff --git a/mriqc/interfaces/bids.py b/mriqc/interfaces/bids.py index 1d5232c00..178ed8681 100644 --- a/mriqc/interfaces/bids.py +++ b/mriqc/interfaces/bids.py @@ -23,7 +23,7 @@ import re from pathlib import Path -import simplejson as json +import orjson as json from nipype.interfaces.base import ( BaseInterfaceInputSpec, DynamicTraitedSpec, @@ -42,15 +42,19 @@ class IQMFileSinkInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): in_file = Str(mandatory=True, desc='path of input file') - subject_id = Str(mandatory=True, desc='the subject id') modality = Str(mandatory=True, desc='the qc type') + entities = traits.Dict(desc='entities corresponding to the input') + subject_id = Str(desc='the subject id') session_id = traits.Either(None, Str, usedefault=True) task_id = traits.Either(None, Str, usedefault=True) acq_id = traits.Either(None, Str, usedefault=True) rec_id = traits.Either(None, Str, usedefault=True) run_id = traits.Either(None, traits.Int, usedefault=True) dataset = Str(desc='dataset identifier') - dismiss_entities = traits.List(['part'], usedefault=True) + dismiss_entities = traits.List( + ['datatype', 'part', 'echo', 'extension', 'suffix'], + usedefault=True, + ) metadata = traits.Dict() provenance = traits.Dict() @@ -156,7 +160,7 @@ def _run_interface(self, runtime): ) # Fill in the "bids_meta" key - id_dict = {} + id_dict = self.inputs.entities if isdefined(self.inputs.entities) else {} for comp in BIDS_COMP: comp_val = getattr(self.inputs, comp, None) if isdefined(comp_val) and comp_val is not None: @@ -183,16 +187,17 @@ def _run_interface(self, runtime): self._out_dict['provenance'] = {} self._out_dict['provenance'].update(prov_dict) - with open(out_file, 'w') as f: - f.write( - json.dumps( - self._out_dict, - sort_keys=True, - indent=2, - ensure_ascii=False, - ) + Path(out_file).write_bytes( + json.dumps( + self._out_dict, + option=( + json.OPT_SORT_KEYS + | json.OPT_INDENT_2 + | json.OPT_APPEND_NEWLINE + | json.OPT_SERIALIZE_NUMPY + ), ) - + ) return runtime diff --git a/mriqc/interfaces/diffusion.py b/mriqc/interfaces/diffusion.py index c24c39cd7..b6714a7d7 100644 --- a/mriqc/interfaces/diffusion.py +++ b/mriqc/interfaces/diffusion.py @@ -663,9 +663,7 @@ def _run_interface(self, runtime): ) full_img = nb.load(self.inputs.full_epi) full_img.__class__( - (full_img.get_fdata() * fitted[np.newaxis, np.newaxis, np.newaxis, :]).astype( - full_img.header.get_data_dtype() - ), + full_img.get_fdata() * fitted[np.newaxis, np.newaxis, np.newaxis, :], full_img.affine, full_img.header, ).to_filename(self._results['out_full_file']) diff --git a/mriqc/interfaces/functional.py b/mriqc/interfaces/functional.py index f510c481d..7d321ad75 100644 --- a/mriqc/interfaces/functional.py +++ b/mriqc/interfaces/functional.py @@ -226,26 +226,12 @@ class Spikes(SimpleInterface): def _run_interface(self, runtime): func_nii = nb.load(self.inputs.in_file) - func_data = func_nii.get_fdata() + func_data = func_nii.get_fdata(dtype='float32') func_shape = func_data.shape ntsteps = func_shape[-1] tr = func_nii.header.get_zooms()[-1] nskip = self.inputs.skip_frames - if self.inputs.detrend: - from nilearn.signal import clean - - data = func_data.reshape(-1, ntsteps) - clean_data = clean(data[:, nskip:].T, t_r=tr, standardize=False).T - new_shape = ( - func_shape[0], - func_shape[1], - func_shape[2], - clean_data.shape[-1], - ) - func_data = np.zeros(func_shape) - func_data[..., nskip:] = clean_data.reshape(new_shape) - mask_data = np.bool_(nb.load(self.inputs.in_mask).dataobj) mask_data[..., :nskip] = 0 mask_data = np.stack([mask_data] * ntsteps, axis=-1) @@ -256,6 +242,11 @@ def _run_interface(self, runtime): mask_data[..., : self.inputs.skip_frames] = 1 brain = np.ma.array(func_data, mask=(mask_data == 1)) + if self.inputs.detrend: + from nilearn.signal import clean + + brain = clean(brain[:, nskip:].T, t_r=tr, standardize=False).T + if self.inputs.no_zscore: ts_z = find_peaks(brain) total_spikes = [] @@ -361,7 +352,7 @@ def _run_interface(self, runtime): # DVARS dvars = pd.read_csv( self.inputs.dvars, - delim_whitespace=True, + sep=r'\s+', skiprows=1, # column names have spaces header=None, names=['dvars_std', 'dvars_nstd', 'dvars_vstd'], @@ -369,16 +360,14 @@ def _run_interface(self, runtime): dvars.index = pd.RangeIndex(1, timeseries.index.max() + 1) # FD - fd = pd.read_csv( - self.inputs.fd, delim_whitespace=True, header=0, names=['framewise_displacement'] - ) + fd = pd.read_csv(self.inputs.fd, sep=r'\s+', header=0, names=['framewise_displacement']) fd.index = pd.RangeIndex(1, timeseries.index.max() + 1) # AQI - aqi = pd.read_csv(self.inputs.quality, delim_whitespace=True, header=None, names=['aqi']) + aqi = pd.read_csv(self.inputs.quality, sep=r'\s+', header=None, names=['aqi']) # Outliers - aor = pd.read_csv(self.inputs.outliers, delim_whitespace=True, header=None, names=['aor']) + aor = pd.read_csv(self.inputs.outliers, sep=r'\s+', header=None, names=['aor']) timeseries = pd.concat((timeseries, dvars, fd, aqi, aor), axis=1) diff --git a/mriqc/interfaces/webapi.py b/mriqc/interfaces/webapi.py index d50915cd1..1ee0b2cde 100644 --- a/mriqc/interfaces/webapi.py +++ b/mriqc/interfaces/webapi.py @@ -20,6 +20,9 @@ # # https://www.nipreps.org/community/licensing/ # +from pathlib import Path + +import orjson from nipype.interfaces.base import ( BaseInterfaceInputSpec, Bunch, @@ -116,10 +119,16 @@ class UploadIQMsInputSpec(BaseInterfaceInputSpec): auth_token = Str(mandatory=True, desc='authentication token') email = Str(desc='set sender email') strict = traits.Bool(False, usedefault=True, desc='crash if upload was not successful') + modality = Str( + 'undefined', + usedefault=True, + desc='override modality field if provided through metadata', + ) class UploadIQMsOutputSpec(TraitedSpec): api_id = traits.Either(None, traits.Str, desc='Id for report returned by the web api') + payload_file = File(desc='Submitted payload (only for debugging)') class UploadIQMs(SimpleInterface): @@ -138,12 +147,25 @@ def _run_interface(self, runtime): self._results['api_id'] = None - response = upload_qc_metrics( + response, payload = upload_qc_metrics( self.inputs.in_iqms, endpoint=self.inputs.endpoint, auth_token=self.inputs.auth_token, email=email, + modality=self.inputs.modality, + ) + + payload_str = orjson.dumps( + payload, + option=( + orjson.OPT_SORT_KEYS + | orjson.OPT_INDENT_2 + | orjson.OPT_APPEND_NEWLINE + | orjson.OPT_SERIALIZE_NUMPY + ), ) + Path('payload.json').write_bytes(payload_str) + self._results['payload_file'] = str(Path('payload.json').absolute()) try: self._results['api_id'] = response.json()['_id'] @@ -151,7 +173,9 @@ def _run_interface(self, runtime): # response did not give us an ID errmsg = ( 'QC metrics upload failed to create an ID for the record ' - f'uplOADED. rEsponse from server follows: {response.text}' + f'uploaded. Response from server follows: {response.text}' + '\n\nPayload:\n' + f'{payload_str}' ) config.loggers.interface.warning(errmsg) @@ -159,13 +183,20 @@ def _run_interface(self, runtime): config.loggers.interface.info(messages.QC_UPLOAD_COMPLETE) return runtime - errmsg = 'QC metrics failed to upload. Status %d: %s' % ( - response.status_code, - response.text, + errmsg = '\n'.join( + [ + 'Unsuccessful upload.', + f'Server response status {response.status_code}:', + response.text, + '', + '', + 'Payload:', + payload_str, + ] ) config.loggers.interface.warning(errmsg) if self.inputs.strict: - raise RuntimeError(response.text) + raise RuntimeError(errmsg) return runtime @@ -175,6 +206,7 @@ def upload_qc_metrics( endpoint=None, email=None, auth_token=None, + modality=None, ): """ Upload qc metrics to remote repository. @@ -191,8 +223,6 @@ def upload_qc_metrics( """ from copy import deepcopy - from json import dumps, loads - from pathlib import Path import requests @@ -201,37 +231,47 @@ def upload_qc_metrics( errmsg = 'Unknown API endpoint' if not endpoint else 'Authentication failed.' return Bunch(status_code=1, text=errmsg) - in_data = loads(Path(in_iqms).read_text()) + in_data = orjson.loads(Path(in_iqms).read_bytes()) # Extract metadata and provenance meta = in_data.pop('bids_meta') - - # For compatibility with WebAPI. Should be rolled back to int - if meta.get('run_id', None) is not None: - meta['run_id'] = '%d' % meta.get('run_id') - prov = in_data.pop('provenance') # At this point, data should contain only IQMs data = deepcopy(in_data) # Check modality - modality = meta.get('modality', 'None') + modality = meta.get('modality', None) or meta.get('suffix', None) or modality if modality not in ('T1w', 'bold', 'T2w'): errmsg = ( 'Submitting to MRIQCWebAPI: image modality should be "bold", "T1w", or "T2w", ' - '(found "%s")' % modality + f'(found "{modality}")' ) return Bunch(status_code=1, text=errmsg) # Filter metadata values that aren't in whitelist data['bids_meta'] = {k: meta[k] for k in META_WHITELIST if k in meta} + + # Check for fields with appended _id + bids_meta_names = {k: k.replace('_id', '') for k in META_WHITELIST if k.endswith('_id')} + data['bids_meta'].update({k: meta[v] for k, v in bids_meta_names.items() if v in meta}) + + # For compatibility with WebAPI. Should be rolled back to int + if (run_id := data['bids_meta'].get('run_id', None)) is not None: + data['bids_meta']['run_id'] = f'{run_id}' + + # One more chance for spelled-out BIDS entity acquisition + if (acq_id := meta.get('acquisition', None)) is not None: + data['bids_meta']['acq_id'] = acq_id + # Filter provenance values that aren't in whitelist data['provenance'] = {k: prov[k] for k in PROV_WHITELIST if k in prov} # Hash fields that may contain personal information data['bids_meta'] = _hashfields(data['bids_meta']) + data['bids_meta']['modality'] = modality + if email: data['provenance']['email'] = email @@ -239,19 +279,25 @@ def upload_qc_metrics( start_message = messages.QC_UPLOAD_START.format(url=endpoint) config.loggers.interface.info(start_message) + + errmsg = None try: # if the modality is bold, call "bold" endpoint response = requests.post( f'{endpoint}/{modality}', headers=headers, - data=dumps(data), + data=orjson.dumps(data, option=orjson.OPT_SERIALIZE_NUMPY), timeout=15, ) except requests.ConnectionError as err: - errmsg = 'QC metrics failed to upload due to connection error shown below:\n%s' % err - return Bunch(status_code=1, text=errmsg) + errmsg = (f'Error uploading IQMs: Connection error:', f'{err}') + except requests.exceptions.ReadTimeout as err: + errmsg = (f'Error uploading IQMs: Server {endpoint} is down.', f'{err}') + + if errmsg is not None: + response = Bunch(status_code=1, text='\n'.join(errmsg)) - return response + return response, data def _hashfields(data): diff --git a/mriqc/messages.py b/mriqc/messages.py index b9d05e653..f02d8c5d8 100644 --- a/mriqc/messages.py +++ b/mriqc/messages.py @@ -38,6 +38,11 @@ ------------------------------------------------------------------ Running MRIQC version {version} ---------------------------------------------------------------- + + {notice} + + ---------------------------------------------------------------- + * BIDS dataset path: {bids_dir}. * Output folder: {output_dir}. * Analysis levels: {analysis_level}. diff --git a/mriqc/qc/diffusion.py b/mriqc/qc/diffusion.py index 90cd19b9e..c77dbe56b 100644 --- a/mriqc/qc/diffusion.py +++ b/mriqc/qc/diffusion.py @@ -97,9 +97,18 @@ from __future__ import annotations +from contextlib import suppress +from warnings import warn + import numpy as np from statsmodels.robust.scale import mad +MIN_NUM_CC_MASK = 5 + + +class ExtremeValueWarning(UserWarning): + """A warning type for dubious metric values.""" + def noise_b0( in_b0: np.ndarray, @@ -198,18 +207,30 @@ def cc_snr( * The first element is the worst-case SNR (float). * The second element is the best-case SNR (float). + The SNR estimates are zero if there are no sufficient voxels to calculate them + (may occur if the number of orientations in the file is very low). + """ cc_mask = cc_mask > 0 # Ensure it's a boolean mask - std_signal = mad(in_b0[cc_mask]) + b_values = np.rint(b_values).astype(np.uint16) + n_shells = len(b_values) - cc_snr_estimates = {} + if (nvox_cc := cc_mask.sum()) < MIN_NUM_CC_MASK: + warn(f'CC mask is too small ({nvox_cc} voxels)', ExtremeValueWarning, stacklevel=1) + cc_snr_estimates = {'shell0': 0} + cc_snr_estimates = cc_snr_estimates | { + f'shell{shell_index:d}_worst': 0 for shell_index in range(1, n_shells + 1) + } + cc_snr_estimates = cc_snr_estimates | { + f'shell{shell_index:d}_best': 0 for shell_index in range(1, n_shells + 1) + } + return cc_snr_estimates, 0 + std_signal = mad(in_b0[cc_mask]) xyz = np.eye(3) - b_values = np.rint(b_values).astype(np.uint16) - n_shells = len(b_values) - + cc_snr_estimates = {} cc_snr_estimates['shell0'] = round(float(in_b0[cc_mask].mean() / std_signal), decimals) # Shell-wise calculation @@ -221,12 +242,16 @@ def cc_snr( axis_Y = np.argmin(np.sum((bvecs - xyz[1, :]) ** 2, axis=-1)) axis_Z = np.argmin(np.sum((bvecs - xyz[2, :]) ** 2, axis=-1)) - data_X = shell_data[..., axis_X] - data_Y = shell_data[..., axis_Y] - data_Z = shell_data[..., axis_Z] + mean_signal_worst = 0 + with suppress(IndexError): + data_X = shell_data[..., axis_X] + mean_signal_worst = np.mean(data_X) - mean_signal_worst = np.mean(data_X) - mean_signal_best = 0.5 * (np.mean(data_Y) + np.mean(data_Z)) + mean_signal_best = 0 + with suppress(IndexError): + data_Y = shell_data[..., axis_Y] + data_Z = shell_data[..., axis_Z] + mean_signal_best = 0.5 * (np.mean(data_Y) + np.mean(data_Z)) cc_snr_estimates[f'shell{shell_index:d}_worst'] = round( float(np.mean(mean_signal_worst / std_signal)), decimals diff --git a/mriqc/utils/misc.py b/mriqc/utils/misc.py index 269b9b670..4c4bf4033 100644 --- a/mriqc/utils/misc.py +++ b/mriqc/utils/misc.py @@ -22,11 +22,19 @@ # """Helper functions.""" +from __future__ import annotations + +import asyncio import json from collections import OrderedDict from collections.abc import Iterable +from functools import partial +from os import cpu_count from pathlib import Path +from typing import Callable, TypeVar +import nibabel as nb +import numpy as np import pandas as pd try: @@ -34,6 +42,8 @@ except ImportError: from collections.abc import MutableMapping +R = TypeVar('R') + IMTYPES = { 'T1w': 'anat', 'T2w': 'anat', @@ -59,6 +69,12 @@ """ +async def worker(job: Callable[[], R], semaphore) -> R: + async with semaphore: + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, job) + + def reorder_csv(csv_file, out_file=None): """ Put subject, session and scan in front of csv file @@ -168,7 +184,7 @@ def generate_pred(derivatives_dir, output_dir, mod): # Drop duplicates dataframe.drop_duplicates(bdits_cols, keep='last', inplace=True) - out_csv = Path(output_dir) / ('%s_predicted_qa_csv' % mod) + out_csv = Path(output_dir) / f'{mod}_predicted_qa_csv' dataframe[bdits_cols + ['mriqc_pred']].to_csv(str(out_csv), index=False) return out_csv @@ -179,7 +195,7 @@ def generate_tsv(output_dir, mod): """ # If some were found, generate the CSV file and group report - out_tsv = output_dir / ('group_%s.tsv' % mod) + out_tsv = output_dir / (f'group_{mod}.tsv') jsonfiles = list(output_dir.glob(f'sub-*/**/{IMTYPES[mod]}/sub-*_{mod}.json')) if not jsonfiles: return None, out_tsv @@ -249,7 +265,7 @@ def _flatten_list(xs): def _datalad_get(input_list, nprocs=None): from mriqc import config - if not config.execution.bids_dir_datalad: + if not config.execution.bids_dir_datalad or not config.execution.datalad_get: return # Delay datalad import until we're sure we'll need it @@ -273,3 +289,267 @@ def _datalad_get(input_list, nprocs=None): config.nipype.nprocs, ), ) + + +def _file_meta_and_size( + files: list | str, + volmin: int | None = 1, + volmax: int | None = None, +): + """ + Identify the largest file size (allows multi-echo groups). + + Parameters + ---------- + files : :obj:`list` + List of :obj:`os.pathlike` or sublist of :obj:`os.pathlike` (multi-echo case) + of files to be extracted. + volmin : :obj:`int` + Minimum number of volumes that inputs must have. + volmax : :obj:`int` + Maximum number of volumes that inputs must have. + + Returns + ------- + :obj:`tuple` + A tuple (metadata, entities, sizes, valid) of items containing the different + aspects extracted from the input(s). + + """ + + import os + + from mriqc import config + + multifile = isinstance(files, (list, tuple)) + if multifile: + metadata = [] + _bids_list = [] + _size_list = [] + _valid_list = [] + + for filename in files: + metadata_i, entities_i, sizes_i, valid_i = _file_meta_and_size( + filename, + volmin=volmin, + volmax=volmax, + ) + + # Add to output lists + metadata.append(metadata_i) + _bids_list.append(entities_i) + _size_list.append(sizes_i) + _valid_list.append(valid_i) + + valid = all(_valid_list) and len({_m['NumberOfVolumes'] for _m in metadata}) == 1 + return metadata, _merge_entities(_bids_list), np.sum(_size_list), valid + + metadata = config.execution.layout.get_metadata(files) + entities = config.execution.layout.parse_file_entities(files) + size = os.path.getsize(files) / (1024**3) + + metadata['FileSize'] = size + metadata['FileSizeUnits'] = 'GB' + + try: + nii = nb.load(files) + nifti_len = nii.shape[3] + except nb.filebasedimages.ImageFileError: + nifti_len = None + except IndexError: # shape has only 3 elements + nifti_len = 1 if nii.dataobj.ndim == 3 else -1 + + valid = True + if volmin is not None: + valid = nifti_len >= volmin + + if valid and volmax is not None: + valid = nifti_len <= volmax + + metadata['NumberOfVolumes'] = nifti_len + + return metadata, entities, size, valid + + +async def _extract_meta_and_size( + filelist: list, + volmin: int | None = 1, + volmax: int | None = None, + max_concurrent: int = min(cpu_count(), 12), +) -> tuple[list, list, list, list]: + """ + Extract corresponding metadata and file size in GB. + + Parameters + ---------- + filelist : :obj:`list` + List of :obj:`os.pathlike` or sublist of :obj:`os.pathlike` (multi-echo case) + of files to be extracted. + volmin : :obj:`int` + Minimum number of volumes that inputs must have. + volmax : :obj:`int` + Maximum number of volumes that inputs must have. + max_concurrent : :obj:`int` + Maximum number of concurrent coroutines (files or multi-echo sets). + + Returns + ------- + :obj:`tuple` + A tuple (metadata, entities, sizes, valid) of lists containing the different + aspects extracted from inputs. + + """ + + semaphore = asyncio.Semaphore(max_concurrent) + tasks = [] + for filename in filelist: + tasks.append( + asyncio.create_task( + worker( + partial( + _file_meta_and_size, + filename, + volmin=volmin, + volmax=volmax, + ), + semaphore, + ) + ) + ) + + # Gather guarantees the order of the output + metadata, entities, sizes, valid = list(zip(*await asyncio.gather(*tasks))) + return metadata, entities, sizes, valid + + +def initialize_meta_and_data( + max_concurrent: int = min(cpu_count(), 12), +) -> None: + """ + Mine data and metadata corresponding to the dataset. + + Get files if datalad enabled and extract the necessary metadata. + + Parameters + ---------- + max_concurrent : :obj:`int` + Maximum number of concurrent coroutines (files or multi-echo sets). + + Returns + ------- + :obj:`None` + + """ + from mriqc import config + + # Datalad-get all files + dataset = config.workflow.inputs.values() + _datalad_get(dataset) + + # Extract metadata and filesize + config.workflow.inputs_metadata = {} + config.workflow.inputs_entities = {} + config.workflow.biggest_file_gb = {} + for mod, input_list in config.workflow.inputs.items(): + config.loggers.cli.log( + 25, + f'Extracting metadata and entities for {len(input_list)} input runs ' + f"of modality '{mod}'...", + ) + + # Some modalities require a minimum number of volumes + volmin = None + if mod == 'bold': + volmin = config.workflow.min_len_bold + elif mod == 'dwi': + volmin = config.workflow.min_len_dwi + + # Some modalities require a maximum number of volumes + volmax = None + if mod in ('T1w', 'T2w'): + volmax = 1 + + # Run extraction in a asyncio coroutine loop + metadata, entities, size, valid = asyncio.run( + _extract_meta_and_size( + input_list, + max_concurrent=max_concurrent, + volmin=volmin, + volmax=volmax, + ) + ) + + # Identify nonconformant files that need to be dropped (and drop them) + if num_dropped := len(input_list) - np.sum(valid): + config.loggers.workflow.warn( + f'{num_dropped} cannot be processed (too short or too long)' + ) + + filtered_results = [ + _v[:-1] + for _v in zip(input_list, metadata, entities, size, valid) + if _v[-1] is True + ] + input_list, metadata, entities, size = list(zip(*filtered_results)) + config.workflow.inputs[mod] = input_list + + # Finalizing (write to config so that values are propagated) + _max_size = np.max(size) + config.workflow.inputs_metadata[mod] = metadata + config.workflow.inputs_entities[mod] = entities + config.workflow.biggest_file_gb[mod] = float(_max_size) # Cast required to store YAML + + config.loggers.cli.log( + 25, + f"File size ('{mod}'): {_max_size:.2f}|{np.mean(size):.2f} " 'GB [maximum|average].', + ) + + +def _merge_entities( + entities: list, +) -> dict: + """ + Merge a list of dictionaries with entities dropping those with nonuniform values. + + Examples + -------- + >>> _merge_entities([ + ... {'subject': '001', 'session': '001'}, + ... {'subject': '001', 'session': '002'}, + ... ]) + {'subject': '001'} + + >>> _merge_entities([ + ... {'subject': '001', 'session': '002'}, + ... {'subject': '001', 'session': '002'}, + ... ]) + {'subject': '001', 'session': '002'} + + >>> _merge_entities([ + ... {'subject': '001', 'session': '002'}, + ... {'subject': '001', 'session': '002', 'run': 1}, + ... ]) + {'subject': '001', 'session': '002'} + + >>> _merge_entities([ + ... {'subject': '001', 'session': '002'}, + ... {'subject': '001', 'run': 1}, + ... ]) + {'subject': '001'} + + """ + out_entities = {} + + bids_keys = set(entities[0].keys()) + for entities_i in entities[1:]: + bids_keys.intersection_update(entities_i.keys()) + + # Preserve ordering + bids_keys = [_b for _b in entities[0].keys() if _b in bids_keys] + + for key in bids_keys: + values = {_entities[key] for _entities in entities} + if len(values) == 1: + out_entities[key] = values.pop() + + return out_entities diff --git a/mriqc/workflows/anatomical/base.py b/mriqc/workflows/anatomical/base.py index 74c5fc656..a697a7b4d 100644 --- a/mriqc/workflows/anatomical/base.py +++ b/mriqc/workflows/anatomical/base.py @@ -52,6 +52,8 @@ """ +from itertools import chain + from nipype.interfaces import utility as niu from nipype.pipeline import engine as pe from niworkflows.interfaces.fixes import FixHeaderApplyTransforms as ApplyTransforms @@ -88,30 +90,52 @@ def anat_qc_workflow(name='anatMRIQC'): """ from mriqc.workflows.shared import synthstrip_wf - dataset = config.workflow.inputs.get('t1w', []) + config.workflow.inputs.get('t2w', []) - + # Enable if necessary + # mem_gb = max( + # config.workflow.biggest_file_gb['t1w'], + # config.workflow.biggest_file_gb['t2w'], + # ) + dataset = list( + chain( + config.workflow.inputs.get('t1w', []), + config.workflow.inputs.get('t2w', []), + ) + ) + metadata = list( + chain( + config.workflow.inputs_metadata.get('t1w', []), + config.workflow.inputs_metadata.get('t2w', []), + ) + ) + entities = list( + chain( + config.workflow.inputs_entities.get('t1w', []), + config.workflow.inputs_entities.get('t2w', []), + ) + ) message = BUILDING_WORKFLOW.format( modality='anatomical', - detail=( - f'for {len(dataset)} NIfTI files.' - if len(dataset) > 2 - else f"({' and '.join('<%s>' % v for v in dataset)})." - ), + detail=f'for {len(dataset)} NIfTI files.', ) config.loggers.workflow.info(message) - if config.execution.datalad_get: - from mriqc.utils.misc import _datalad_get - - _datalad_get(dataset) - # Initialize workflow workflow = pe.Workflow(name=name) # Define workflow, inputs and outputs # 0. Get data - inputnode = pe.Node(niu.IdentityInterface(fields=['in_file']), name='inputnode') - inputnode.iterables = [('in_file', dataset)] + inputnode = pe.Node( + niu.IdentityInterface( + fields=['in_file', 'metadata', 'entities'], + ), + name='inputnode', + ) + inputnode.synchronize = True # Do not test combinations of iterables + inputnode.iterables = [ + ('in_file', dataset), + ('metadata', metadata), + ('entities', entities), + ] outputnode = pe.Node(niu.IdentityInterface(fields=['out_json']), name='outputnode') @@ -146,7 +170,9 @@ def anat_qc_workflow(name='anatMRIQC'): ('in_file', 'inputnode.name_source'), ]), (inputnode, to_ras, [('in_file', 'in_file')]), - (inputnode, iqmswf, [('in_file', 'inputnode.in_file')]), + (inputnode, iqmswf, [('in_file', 'inputnode.in_file'), + ('metadata', 'inputnode.metadata'), + ('entities', 'inputnode.entities')]), (inputnode, norm, [(('in_file', _get_mod), 'inputnode.modality')]), (to_ras, skull_stripping, [('out_file', 'inputnode.in_files')]), (skull_stripping, hmsk, [ @@ -403,7 +429,6 @@ def compute_iqms(name='ComputeIQMs'): wf = compute_iqms() """ - from niworkflows.interfaces.bids import ReadSidecarJSON from mriqc.interfaces.anatomical import Harmonize from mriqc.workflows.utils import _tofloat @@ -413,6 +438,8 @@ def compute_iqms(name='ComputeIQMs'): niu.IdentityInterface( fields=[ 'in_file', + 'metadata', + 'entities', 'in_ras', 'brainmask', 'airmask', @@ -424,7 +451,6 @@ def compute_iqms(name='ComputeIQMs'): 'inu_corrected', 'in_inu', 'pvms', - 'metadata', 'std_tpms', ] ), @@ -435,9 +461,6 @@ def compute_iqms(name='ComputeIQMs'): name='outputnode', ) - # Extract metadata - meta = pe.Node(ReadSidecarJSON(index_db=config.execution.bids_database_dir), name='metadata') - # Add provenance addprov = pe.Node(AddProvenance(), name='provenance', run_without_submitting=True) @@ -472,17 +495,12 @@ def _getwm(inlist): # fmt: off workflow.connect([ - (inputnode, meta, [('in_file', 'in_file')]), - (inputnode, datasink, [('in_file', 'in_file'), - (('in_file', _get_mod), 'modality')]), + (inputnode, datasink, [ + ('in_file', 'in_file'), + (('in_file', _get_mod), 'modality'), + ('metadata', 'metadata'), + ('entities', 'entities')]), (inputnode, addprov, [(('in_file', _get_mod), 'modality')]), - (meta, datasink, [('subject', 'subject_id'), - ('session', 'session_id'), - ('task', 'task_id'), - ('acquisition', 'acq_id'), - ('reconstruction', 'rec_id'), - ('run', 'run_id'), - ('out_dict', 'metadata')]), (inputnode, addprov, [('in_file', 'in_file'), ('airmask', 'air_msk'), ('rotmask', 'rot_msk')]), diff --git a/mriqc/workflows/diffusion/base.py b/mriqc/workflows/diffusion/base.py index 11573409a..0053dc0d8 100644 --- a/mriqc/workflows/diffusion/base.py +++ b/mriqc/workflows/diffusion/base.py @@ -43,9 +43,6 @@ This workflow is orchestrated by :py:func:`dmri_qc_workflow`. """ -from pathlib import Path - -import numpy as np from nipype.interfaces import utility as niu from nipype.pipeline import engine as pe @@ -87,45 +84,32 @@ def dmri_qc_workflow(name='dwiMRIQC'): from mriqc.messages import BUILDING_WORKFLOW from mriqc.workflows.shared import synthstrip_wf as dmri_bmsk_workflow - workflow = pe.Workflow(name=name) - - dataset = config.workflow.inputs.get('dwi', []) - - full_data = [] - - for dwi_path in dataset: - bval = config.execution.layout.get_bval(dwi_path) - if bval and Path(bval).exists() and len(np.loadtxt(bval)) > config.workflow.min_len_dwi: - full_data.append(dwi_path) - else: - config.loggers.workflow.warn( - f'Dismissing {dwi_path} for processing. b-values are missing or ' - 'insufficient in number to execute the workflow.' - ) - - if set(dataset) - set(full_data): - config.workflow.inputs['dwi'] = full_data - config.to_filename() - + # Enable if necessary + # mem_gb = config.workflow.biggest_file_gb['dwi'] + dataset = config.workflow.inputs['dwi'] + metadata = config.workflow.inputs_metadata['dwi'] + entities = config.workflow.inputs_entities['dwi'] message = BUILDING_WORKFLOW.format( modality='diffusion', - detail=( - f'for {len(full_data)} NIfTI files.' - if len(full_data) > 2 - else f"({' and '.join('<%s>' % v for v in full_data)})." - ), + detail=f'for {len(dataset)} NIfTI files.', ) config.loggers.workflow.info(message) - if config.execution.datalad_get: - from mriqc.utils.misc import _datalad_get - - _datalad_get(full_data) - # Define workflow, inputs and outputs # 0. Get data, put it in RAS orientation - inputnode = pe.Node(niu.IdentityInterface(fields=['in_file']), name='inputnode') - inputnode.iterables = [('in_file', full_data)] + workflow = pe.Workflow(name=name) + inputnode = pe.Node( + niu.IdentityInterface( + fields=['in_file', 'metadata', 'entities'], + ), + name='inputnode', + ) + inputnode.synchronize = True # Do not test combinations of iterables + inputnode.iterables = [ + ('in_file', dataset), + ('metadata', metadata), + ('entities', entities), + ] sanitize = pe.Node( SanitizeImage( @@ -147,21 +131,21 @@ def dmri_qc_workflow(name='dwiMRIQC'): get_lowb = pe.Node( ExtractOrientations(), name='get_lowb', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), ) # Generate B0 reference dwi_ref = pe.Node( RobustAverage(mc_method=None), name='dwi_ref', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), ) hmc_b0 = pe.Node( Volreg(args='-Fourier -twopass', zpad=4, outputtype='NIFTI_GZ'), name='hmc_b0', mem_gb=3.0, - n_procs=config.nipype.omp_nthreads, + n_procs=config.nipype.nprocs, ) # Calculate brainmask @@ -180,13 +164,13 @@ def dmri_qc_workflow(name='dwiMRIQC'): averages = pe.MapNode( WeightedStat(), name='averages', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), iterfield=['in_weights'], ) stddev = pe.MapNode( WeightedStat(stat='std'), name='stddev', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), iterfield=['in_weights'], ) @@ -196,38 +180,38 @@ def dmri_qc_workflow(name='dwiMRIQC'): nthreads=config.nipype.omp_nthreads, ), name='dwidenoise', - n_procs=config.nipype.omp_nthreads, + n_procs=config.nipype.nprocs, ) drift = pe.Node( CorrectSignalDrift(), name='drift', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), ) sp_mask = pe.Node( SpikingVoxelsMask(), name='sp_mask', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), ) # Fit DTI/DKI model dwimodel = pe.Node( DiffusionModel(), name='dwimodel', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=config.nipype.nprocs, ) # Calculate CC mask cc_mask = pe.Node( CCSegmentation(), name='cc_mask', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), ) # Run PIESNO noise estimation piesno = pe.Node( PIESNO(), name='piesno', - n_procs=max(1, config.nipype.omp_nthreads // 2), + n_procs=max(1, config.nipype.nprocs // 2), ) # EPI to MNI registration @@ -245,7 +229,11 @@ def dmri_qc_workflow(name='dwiMRIQC'): (inputnode, dwi_report_wf, [ ('in_file', 'inputnode.name_source'), ]), - (inputnode, iqms_wf, [('in_file', 'inputnode.in_file')]), + (inputnode, iqms_wf, [ + ('in_file', 'inputnode.in_file'), + ('metadata', 'inputnode.metadata'), + ('entities', 'inputnode.entities'), + ]), (inputnode, sanitize, [('in_file', 'in_file')]), (sanitize, dwi_ref, [('out_file', 'in_file')]), (sanitize, sp_mask, [('out_file', 'in_file')]), @@ -335,7 +323,6 @@ def compute_iqms(name='ComputeIQMs'): wf = compute_iqms() """ - from niworkflows.interfaces.bids import ReadSidecarJSON from mriqc.interfaces import IQMFileSink from mriqc.interfaces.diffusion import DiffusionQC @@ -348,6 +335,8 @@ def compute_iqms(name='ComputeIQMs'): niu.IdentityInterface( fields=[ 'in_file', + 'metadata', + 'entities', 'in_shells', 'n_shells', 'b_values_file', @@ -377,7 +366,6 @@ def compute_iqms(name='ComputeIQMs'): niu.IdentityInterface( fields=[ 'out_file', - 'meta_sidecar', 'noise_floor', ] ), @@ -389,8 +377,6 @@ def compute_iqms(name='ComputeIQMs'): name='estimate_sigma', ) - meta = pe.Node(ReadSidecarJSON(index_db=config.execution.bids_database_dir), name='metadata') - measures = pe.Node(DiffusionQC(), name='measures') addprov = pe.Node( @@ -413,10 +399,11 @@ def compute_iqms(name='ComputeIQMs'): # fmt: off workflow.connect([ (inputnode, datasink, [('in_file', 'in_file'), + ('entities', 'entities'), + (('metadata', _filter_metadata), 'metadata'), ('n_shells', 'NumberOfShells'), ('b_values_shells', 'bValuesEstimation'), (('b_values_file', _bvals_report), 'bValues')]), - (inputnode, meta, [('in_file', 'in_file')]), (inputnode, measures, [('in_file', 'in_file'), ('b_values_file', 'in_bval_file'), ('b_values_shells', 'in_shells_bval'), @@ -439,15 +426,7 @@ def compute_iqms(name='ComputeIQMs'): ('piesno_sigma', 'piesno_sigma')]), (inputnode, addprov, [('in_file', 'in_file')]), (addprov, datasink, [('out_prov', 'provenance')]), - (meta, datasink, [('subject', 'subject_id'), - ('session', 'session_id'), - ('task', 'task_id'), - ('acquisition', 'acq_id'), - ('reconstruction', 'rec_id'), - ('run', 'run_id'), - (('out_dict', _filter_metadata), 'metadata')]), (datasink, outputnode, [('out_file', 'out_file')]), - (meta, outputnode, [('out_dict', 'meta_sidecar')]), (measures, datasink, [('out_qc', 'root')]), (inputnode, estimate_sigma, [('in_noise', 'in_file'), ('brain_mask', 'mask')]), @@ -504,7 +483,7 @@ def hmc_workflow(name='dMRI_HMC'): Volreg(args='-Fourier -twopass', zpad=4, outputtype='NIFTI_GZ'), name='motion_correct', mem_gb=3.0, - n_procs=config.nipype.omp_nthreads, + n_procs=config.nipype.nprocs, ) bvec_rot = pe.Node(RotateVectors(), name='bvec_rot') @@ -676,23 +655,23 @@ def epi_mni_align(name='SpatialNormalization'): def _mean(inlist): - import numpy as np + from numpy import mean - return np.mean(inlist) + return mean(inlist) def _parse_tqual(in_file): - import numpy as np + from numpy import mean with open(in_file) as fin: lines = fin.readlines() - return np.mean([float(line.strip()) for line in lines if not line.startswith('++')]) + return mean([float(line.strip()) for line in lines if not line.startswith('++')]) def _parse_tout(in_file): - import numpy as np + from numpy import loadtxt - data = np.loadtxt(in_file) # pylint: disable=no-member + data = loadtxt(in_file) # pylint: disable=no-member return data.mean() @@ -701,9 +680,9 @@ def _tolist(value): def _get_bvals(bmatrix): - import numpy as np + from numpy import squeeze - return np.squeeze(bmatrix[:, -1]).tolist() + return squeeze(bmatrix[:, -1]).tolist() def _first(inlist): @@ -722,11 +701,11 @@ def _all_but_first(inlist): def _estimate_sigma(in_file, mask): import nibabel as nb - import numpy as np + from numpy import median msk = nb.load(mask).get_fdata() > 0.5 return round( - float(np.median(nb.load(in_file).get_fdata()[msk])), + float(median(nb.load(in_file).get_fdata()[msk])), 6, ) diff --git a/mriqc/workflows/diffusion/output.py b/mriqc/workflows/diffusion/output.py index 87d073a7c..d68470862 100644 --- a/mriqc/workflows/diffusion/output.py +++ b/mriqc/workflows/diffusion/output.py @@ -81,10 +81,12 @@ def init_dwi_report_wf(name='dwi_report_wf'): mosaic_fa = pe.Node( PlotMosaic(cmap='Greys_r'), name='mosaic_fa', + n_procs=max(1, config.nipype.nprocs // 2), ) mosaic_md = pe.Node( PlotMosaic(cmap='Greys_r'), name='mosaic_md', + n_procs=max(1, config.nipype.nprocs // 2), ) mosaic_snr = pe.MapNode( @@ -97,6 +99,7 @@ def init_dwi_report_wf(name='dwi_report_wf'): ), name='mosaic_snr', iterfield=['before', 'after'], + n_procs=max(1, config.nipype.nprocs // 2), ) mosaic_noise = pe.MapNode( @@ -106,6 +109,7 @@ def init_dwi_report_wf(name='dwi_report_wf'): ), name='mosaic_noise', iterfield=['in_file'], + n_procs=max(1, config.nipype.nprocs // 2), ) if config.workflow.species.lower() in ('rat', 'mouse'): @@ -187,6 +191,7 @@ def _gen_entity(inlist): plot_heatmap = pe.Node( DWIHeatmap(scalarmap_label='Shell-wise Fractional Anisotropy (FA)'), name='plot_heatmap', + n_procs=config.nipype.nprocs, ) ds_report_hm = pe.Node( DerivativesDataSink( @@ -211,160 +216,6 @@ def _gen_entity(inlist): ]) # fmt: on - - if True: - return workflow - - # Generate crown mask - # Create the crown mask - dilated_mask = pe.Node(BinaryDilation(), name='dilated_mask') - subtract_mask = pe.Node(BinarySubtraction(), name='subtract_mask') - parcels = pe.Node(niu.Function(function=_carpet_parcellation), name='parcels') - - bigplot = pe.Node(FMRISummary(), name='BigPlot', mem_gb=mem_gb * 3.5) - - ds_report_carpet = pe.Node( - DerivativesDataSink( - base_directory=reportlets_dir, - desc='carpet', - datatype='figures', - ), - name='ds_report_carpet', - run_without_submitting=True, - ) - - # fmt: off - workflow.connect([ - # (inputnode, rnode, [("in_iqms", "in_iqms")]), - (inputnode, bigplot, [('hmc_epi', 'in_func'), - ('hmc_fd', 'fd'), - ('fd_thres', 'fd_thres'), - ('in_dvars', 'dvars'), - ('outliers', 'outliers'), - (('meta_sidecar', _get_tr), 'tr')]), - (inputnode, parcels, [('epi_parc', 'segmentation')]), - (inputnode, dilated_mask, [('brain_mask', 'in_mask')]), - (inputnode, subtract_mask, [('brain_mask', 'in_subtract')]), - (dilated_mask, subtract_mask, [('out_mask', 'in_base')]), - (subtract_mask, parcels, [('out_mask', 'crown_mask')]), - (parcels, bigplot, [('out', 'in_segm')]), - (inputnode, ds_report_carpet, [('name_source', 'source_file')]), - (bigplot, ds_report_carpet, [('out_file', 'in_file')]), - ]) - # fmt: on - - if config.workflow.fft_spikes_detector: - mosaic_spikes = pe.Node( - PlotSpikes( - out_file='plot_spikes.svg', - cmap='viridis', - title='High-Frequency spikes', - ), - name='PlotSpikes', - ) - - ds_report_spikes = pe.Node( - DerivativesDataSink( - base_directory=reportlets_dir, - desc='spikes', - datatype='figures', - ), - name='ds_report_spikes', - run_without_submitting=True, - ) - - # fmt: off - workflow.connect([ - (inputnode, ds_report_spikes, [('name_source', 'source_file')]), - (inputnode, mosaic_spikes, [('in_ras', 'in_file'), - ('in_spikes', 'in_spikes'), - ('in_fft', 'in_fft')]), - (mosaic_spikes, ds_report_spikes, [('out_file', 'in_file')]), - ]) - # fmt: on - - if not verbose: - return workflow - - # Verbose-reporting goes here - from nireports.interfaces import PlotContours - - mosaic_zoom = pe.Node( - PlotMosaic( - cmap='Greys_r', - ), - name='PlotMosaicZoomed', - ) - - plot_bmask = pe.Node( - PlotContours( - display_mode='y' if config.workflow.species.lower() in ('rat', 'mouse') else 'z', - levels=[0.5], - colors=['r'], - cut_coords=10, - out_file='bmask', - ), - name='PlotBrainmask', - ) - - ds_report_zoomed = pe.Node( - DerivativesDataSink( - base_directory=reportlets_dir, - desc='zoomed', - datatype='figures', - ), - name='ds_report_zoomed', - run_without_submitting=True, - ) - - ds_report_background = pe.Node( - DerivativesDataSink( - base_directory=reportlets_dir, - desc='background', - datatype='figures', - ), - name='ds_report_background', - run_without_submitting=True, - ) - - ds_report_bmask = pe.Node( - DerivativesDataSink( - base_directory=reportlets_dir, - desc='brainmask', - datatype='figures', - ), - name='ds_report_bmask', - run_without_submitting=True, - ) - - ds_report_norm = pe.Node( - DerivativesDataSink( - base_directory=reportlets_dir, - desc='norm', - datatype='figures', - ), - name='ds_report_norm', - run_without_submitting=True, - ) - - # fmt: off - workflow.connect([ - (inputnode, ds_report_norm, [('mni_report', 'in_file'), - ('name_source', 'source_file')]), - (inputnode, plot_bmask, [('epi_mean', 'in_file'), - ('brain_mask', 'in_contours')]), - (inputnode, mosaic_zoom, [('epi_mean', 'in_file'), - ('brain_mask', 'bbox_mask_file')]), - (inputnode, mosaic_noise, [('epi_mean', 'in_file')]), - (inputnode, ds_report_zoomed, [('name_source', 'source_file')]), - (inputnode, ds_report_background, [('name_source', 'source_file')]), - (inputnode, ds_report_bmask, [('name_source', 'source_file')]), - (mosaic_zoom, ds_report_zoomed, [('out_file', 'in_file')]), - (mosaic_noise, ds_report_background, [('out_file', 'in_file')]), - (plot_bmask, ds_report_bmask, [('out_file', 'in_file')]), - ]) - # fmt: on - return workflow diff --git a/mriqc/workflows/functional/base.py b/mriqc/workflows/functional/base.py index ec26cda9f..4d4eb1ed0 100644 --- a/mriqc/workflows/functional/base.py +++ b/mriqc/workflows/functional/base.py @@ -43,9 +43,6 @@ This workflow is orchestrated by :py:func:`fmri_qc_workflow`. """ -from collections.abc import Iterable - -import nibabel as nb from nipype.interfaces import utility as niu from nipype.pipeline import engine as pe from niworkflows.utils.connections import pop_file as _pop @@ -69,75 +66,43 @@ def fmri_qc_workflow(name='funcMRIQC'): """ from nipype.algorithms.confounds import TSNR, NonSteadyStateDetector from nipype.interfaces.afni import TStat - from niworkflows.interfaces.bids import ReadSidecarJSON from niworkflows.interfaces.header import SanitizeImage from mriqc.interfaces.functional import SelectEcho from mriqc.messages import BUILDING_WORKFLOW - from mriqc.utils.misc import _flatten_list as flatten - - workflow = pe.Workflow(name=name) - mem_gb = config.workflow.biggest_file_gb - - dataset = config.workflow.inputs.get('bold', []) - - if config.execution.datalad_get: - from mriqc.utils.misc import _datalad_get - - _datalad_get(dataset) - - full_files = [] - for bold_path in dataset: - try: - bold_len = nb.load( - bold_path[0] - if isinstance(bold_path, Iterable) and not isinstance(bold_path, (str, bytes)) - else bold_path - ).shape[3] - except nb.filebasedimages.ImageFileError: - bold_len = config.workflow.min_len_bold - except IndexError: # shape has only 3 elements - bold_len = 0 - if bold_len >= config.workflow.min_len_bold: - full_files.append(bold_path) - else: - config.loggers.workflow.warn( - f'Dismissing {bold_path} for processing: insufficient number of ' - f'timepoints ({bold_len}) to execute the workflow.' - ) + mem_gb = config.workflow.biggest_file_gb['bold'] + dataset = config.workflow.inputs['bold'] + metadata = config.workflow.inputs_metadata['bold'] + entities = config.workflow.inputs_entities['bold'] message = BUILDING_WORKFLOW.format( modality='functional', - detail=( - f'for {len(full_files)} BOLD runs.' - if len(full_files) > 2 - else f"({' and '.join('<%s>' % v for v in dataset)})." - ), + detail=f'for {len(dataset)} BOLD runs.', ) config.loggers.workflow.info(message) - if set(flatten(dataset)) - set(flatten(full_files)): - config.workflow.inputs['bold'] = full_files - config.to_filename() - # Define workflow, inputs and outputs # 0. Get data, put it in RAS orientation - inputnode = pe.Node(niu.IdentityInterface(fields=['in_file']), name='inputnode') - inputnode.iterables = [('in_file', full_files)] + workflow = pe.Workflow(name=name) + inputnode = pe.Node( + niu.IdentityInterface( + fields=['in_file', 'metadata', 'entities'], + ), + name='inputnode', + ) + inputnode.synchronize = True # Do not test combinations of iterables + inputnode.iterables = [ + ('in_file', dataset), + ('metadata', metadata), + ('entities', entities), + ] outputnode = pe.Node( niu.IdentityInterface(fields=['qc', 'mosaic', 'out_group', 'out_dvars', 'out_fd']), name='outputnode', ) - # Get metadata - meta = pe.MapNode( - ReadSidecarJSON(index_db=config.execution.bids_database_dir), - name='metadata', - iterfield=['in_file'], - ) - pick_echo = pe.Node(SelectEcho(), name='pick_echo') non_steady_state_detector = pe.Node(NonSteadyStateDetector(), name='non_steady_state_detector') @@ -184,10 +149,9 @@ def fmri_qc_workflow(name='funcMRIQC'): # fmt: off workflow.connect([ - (inputnode, meta, [('in_file', 'in_file')]), - (inputnode, pick_echo, [('in_file', 'in_files')]), + (inputnode, pick_echo, [('in_file', 'in_files'), + ('metadata', 'metadata')]), (inputnode, sanitize, [('in_file', 'in_file')]), - (meta, pick_echo, [('out_dict', 'metadata')]), (pick_echo, non_steady_state_detector, [('out_file', 'in_file')]), (non_steady_state_detector, sanitize, [('n_volumes_to_discard', 'n_volumes_to_discard')]), (sanitize, hmcwf, [('out_file', 'inputnode.in_file')]), @@ -195,14 +159,9 @@ def fmri_qc_workflow(name='funcMRIQC'): (hmcwf, tsnr, [('outputnode.out_file', 'in_file')]), (mean, ema, [(('out_file', _pop), 'inputnode.epi_mean')]), # Feed IQMs computation - (meta, iqmswf, [('out_dict', 'inputnode.metadata'), - ('subject', 'inputnode.subject'), - ('session', 'inputnode.session'), - ('task', 'inputnode.task'), - ('acquisition', 'inputnode.acquisition'), - ('reconstruction', 'inputnode.reconstruction'), - ('run', 'inputnode.run')]), - (inputnode, iqmswf, [('in_file', 'inputnode.in_file')]), + (inputnode, iqmswf, [('in_file', 'inputnode.in_file'), + ('metadata', 'inputnode.metadata'), + ('entities', 'inputnode.entities')]), (sanitize, iqmswf, [('out_file', 'inputnode.in_ras')]), (mean, iqmswf, [('out_file', 'inputnode.epi_mean')]), (hmcwf, iqmswf, [('outputnode.out_file', 'inputnode.hmc_epi'), @@ -213,6 +172,7 @@ def fmri_qc_workflow(name='funcMRIQC'): # Feed reportlet generation (inputnode, func_report_wf, [ ('in_file', 'inputnode.name_source'), + ('metadata', 'inputnode.meta_sidecar'), ]), (sanitize, func_report_wf, [('out_file', 'inputnode.in_ras')]), (mean, func_report_wf, [('out_file', 'inputnode.epi_mean')]), @@ -230,7 +190,6 @@ def fmri_qc_workflow(name='funcMRIQC'): ('outputnode.out_dvars', 'inputnode.in_dvars'), ('outputnode.outliers', 'inputnode.outliers'), ]), - (meta, func_report_wf, [('out_dict', 'inputnode.meta_sidecar')]), (hmcwf, outputnode, [('outputnode.out_fd', 'out_fd')]), ]) # fmt: on @@ -321,13 +280,15 @@ def compute_iqms(name='ComputeIQMs'): from mriqc.interfaces.transitional import GCOR from mriqc.workflows.utils import _tofloat, get_fwhmx - mem_gb = config.workflow.biggest_file_gb + mem_gb = config.workflow.biggest_file_gb['bold'] workflow = pe.Workflow(name=name) inputnode = pe.Node( niu.IdentityInterface( fields=[ 'in_file', + 'metadata', + 'entities', 'in_ras', 'epi_mean', 'brainmask', @@ -335,15 +296,8 @@ def compute_iqms(name='ComputeIQMs'): 'hmc_fd', 'fd_thres', 'in_tsnr', - 'metadata', 'mpars', 'exclude_index', - 'subject', - 'session', - 'task', - 'acquisition', - 'reconstruction', - 'run', ] ), name='inputnode', @@ -397,6 +351,7 @@ def compute_iqms(name='ComputeIQMs'): FunctionalQC(), name='measures', mem_gb=mem_gb * 3, + n_procs=max(1, config.nipype.nprocs // 2), iterfield=['in_epi', 'in_hmc', 'in_tsnr', 'in_dvars', 'in_fwhm'], ) @@ -467,12 +422,7 @@ def compute_iqms(name='ComputeIQMs'): (inputnode, addprov, [('in_file', 'in_file')]), (inputnode, datasink, [('in_file', 'in_file'), ('exclude_index', 'dummy_trs'), - (('subject', _pop), 'subject_id'), - (('session', _pop), 'session_id'), - (('task', _pop), 'task_id'), - (('acquisition', _pop), 'acq_id'), - (('reconstruction', _pop), 'rec_id'), - (('run', _pop), 'run_id'), + ('entities', 'entities'), ('metadata', 'metadata')]), (addprov, datasink, [('out_prov', 'provenance')]), (outliers, datasink, [(('out_file', _parse_tout), 'aor')]), @@ -557,7 +507,7 @@ def hmc(name='fMRI_HMC', omp_nthreads=None): from nipype.algorithms.confounds import FramewiseDisplacement from nipype.interfaces.afni import Despike, Refit, Volreg - mem_gb = config.workflow.biggest_file_gb + mem_gb = config.workflow.biggest_file_gb['bold'] workflow = pe.Workflow(name=name) @@ -586,12 +536,16 @@ def hmc(name='fMRI_HMC', omp_nthreads=None): # Apply transforms to other echos apply_hmc = pe.MapNode( - niu.Function(function=_apply_transforms, input_names=['in_file', 'in_xfm']), + niu.Function( + function=_apply_transforms, + input_names=['in_file', 'in_xfm', 'max_concurrent'], + ), name='apply_hmc', iterfield=['in_file'], # NiTransforms is a memory hog, so ensure only one process is running at a time n_procs=config.environment.cpu_count, ) + apply_hmc.inputs.max_concurrent = 4 # fmt: off workflow.connect([ @@ -815,14 +769,22 @@ def _parse_tout(in_file): return data.mean() -def _apply_transforms(in_file, in_xfm): +def _apply_transforms(in_file, in_xfm, max_concurrent): from pathlib import Path from nitransforms.linear import load + from nitransforms.resampling import apply from mriqc.utils.bids import derive_bids_fname - realigned = load(in_xfm, fmt='afni', reference=in_file, moving=in_file).apply(in_file) + realigned = apply( + load(in_xfm, fmt='afni', reference=in_file, moving=in_file), + in_file, + dtype_width=4, + serialize_nvols=2, + max_concurrent=max_concurrent, + mode='reflect', + ) out_file = derive_bids_fname( in_file, entity='desc-realigned', diff --git a/mriqc/workflows/functional/output.py b/mriqc/workflows/functional/output.py index 7ee734e30..df7318bef 100644 --- a/mriqc/workflows/functional/output.py +++ b/mriqc/workflows/functional/output.py @@ -49,7 +49,7 @@ def init_func_report_wf(name='func_report_wf'): # from mriqc.interfaces.reports import IndividualReport verbose = config.execution.verbose_reports - mem_gb = config.workflow.biggest_file_gb + mem_gb = config.workflow.biggest_file_gb['bold'] reportlets_dir = config.execution.work_dir / 'reportlets' workflow = pe.Workflow(name=name) @@ -97,6 +97,7 @@ def init_func_report_wf(name='func_report_wf'): name='SpikesFinderBgMask', mem_gb=mem_gb * 2.5, iterfield=['in_file', 'in_mask'], + n_procs=(config.nipype.nprocs + 3) // 4, # spikes is a memory hog ) # Generate crown mask @@ -110,6 +111,7 @@ def init_func_report_wf(name='func_report_wf'): name='BigPlot', mem_gb=mem_gb * 3.5, iterfield=['in_func', 'dvars', 'outliers', 'in_spikes_bg'], + n_procs=(config.nipype.nprocs + 3) // 4, # Big plot is a memory hog ) # fmt: off diff --git a/pyproject.toml b/pyproject.toml index 75eb85a4b..53c618558 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] dependencies = [ + "acres", "dipy", 'importlib_resources; python_version < "3.9"', # jinja2 imports deprecated function removed in 2.1 "markupsafe ~= 2.0.1", @@ -26,10 +27,11 @@ dependencies = [ "nibabel >= 3.0.1", "nilearn >= 0.5.1", "nipype ~= 1.4", - "nireports ~= 23.1", - "nitransforms ~= 23.0", + "nireports ~= 24.0.2", + "nitransforms ~= 24.0", "niworkflows ~=1.10.1", "numpy ~=1.20", + "orjson", "pandas", "pybids >= 0.15.6", "PyYAML",