diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..5d67802 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,28 @@ +[run] +branch = True +omit = */tests/* + _*.py + plotting.py + +[report] +; Regexes for lines to exclude from consideration +exclude_also = + ; Don't complain about missing debug-only code: + def __repr__ + if self\.debug + + ; Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + + ; Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + + ; Don't complain about abstract methods, they aren't run: + @(abc\.)?abstractmethod + +ignore_errors = True + +[html] +directory = htmlcov diff --git a/.editorconfig b/.editorconfig index d4a2c44..66de818 100644 --- a/.editorconfig +++ b/.editorconfig @@ -19,3 +19,6 @@ insert_final_newline = false [Makefile] indent_style = tab + +[*.py] +max_line_length = 80 diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml index 67dfc76..ce958e9 100644 --- a/.github/workflows/codacy.yml +++ b/.github/workflows/codacy.yml @@ -30,7 +30,7 @@ jobs: permissions: contents: read # for actions/checkout to fetch code security-events: write # for github/codeql-action/upload-sarif to upload SARIF results - actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status name: Codacy Security Scan runs-on: ubuntu-latest steps: diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml new file mode 100644 index 0000000..ced3824 --- /dev/null +++ b/.github/workflows/continuous-integration.yml @@ -0,0 +1,59 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: tests + +on: + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.8" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/sinhaharsh/protocol.git#egg=protocol + pip install git+https://github.com/sinhaharsh/MRdataset.git#egg=MRdataset + if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi + pip install . + - name: Lint with flake8 + run: | + make lint + # stop the build if there are Python syntax errors or undefined names + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest +# - name: Coverage +# run: | +# coverage run --rcfile=.coveragerc --source mrQA -m pytest +# coverage report -m +# coverage xml +# - name: Run codacy-coverage-reporter +# uses: codacy/codacy-coverage-reporter-action@v1 +# with: +# # project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} +# # or +# api-token: ${{ secrets.CODACY_API_TOKEN }} +# organization-provider: gh +# username: sinhaharsh +# project-name: mrQA +# coverage-reports: coverage.xml +# # or a comma-separated list for multiple reports +# # coverage-reports: , diff --git a/.gitignore b/.gitignore index feebfba..bb053a0 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,10 @@ ENV/ .vscode/ .idea/ /update_today.txt + +# codacy +*.sarif + +# mri protocol +*.xml +*.secrets diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a5ab97d..0000000 --- a/.travis.yml +++ /dev/null @@ -1,28 +0,0 @@ -# Config file for automatic testing at travis-ci.com - -language: python -python: - - 3.8 - - 3.7 - - 3.6 - -# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: pip install -U tox-travis - -# Command to run tests, e.g. python setup.py test -script: tox - -# Assuming you have installed the travis-ci CLI tool, after you -# create the Github repo and add it to Travis, run the -# following command to finish PyPI deployment setup: -# $ travis encrypt --add deploy.password -deploy: - provider: pypi - distributions: sdist bdist_wheel - user: sinhaharsh - password: - secure: PLEASE_REPLACE_ME - on: - tags: true - repo: sinhaharsh/mrQA - python: 3.8 diff --git a/MANIFEST.in b/MANIFEST.in index dae47eb..45a6a35 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,11 @@ include HISTORY.rst include LICENSE include README.rst -recursive-include tests * +graft mrQA/tests +prune mrQA/tests/htmlcov +prune mrQA/tests/.hypothesis +graft mrQA/scripts +graft mrQA/resources recursive-exclude * __pycache__ recursive-exclude * *.py[co] diff --git a/Makefile b/Makefile index bbfffac..76eb8a9 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint/flake8: ## check style with flake8 - flake8 mrQA tests + flake8 mrQA lint: lint/flake8 ## check style @@ -59,18 +59,19 @@ test-all: ## run tests on every Python version with tox tox coverage: ## check code coverage quickly with the default Python - coverage run --source mrQA -m pytest + coverage run --rcfile=.coveragerc --source mrQA -m pytest coverage report -m coverage html + coverage xml $(BROWSER) htmlcov/index.html +act: + act --secret-file .secrets + docs: ## generate Sphinx HTML documentation, including API docs - rm -f docs/mrQA.rst - rm -f docs/modules.rst - sphinx-apidoc -o docs/ mrQA $(MAKE) -C docs clean $(MAKE) -C docs html - $(BROWSER) docs/_build/html/index.html + $(BROWSER) docs/build/html/index.html servedocs: docs ## compile the docs watching for changes watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . @@ -85,3 +86,11 @@ dist: clean ## builds source and wheel package install: clean ## install the package to the active Python's site-packages python setup.py install + +merge: + git switch mrds-issue-12 + git push + git switch master + git merge mrds-issue-12 + git push origin master + git switch mrds-issue-12 diff --git a/README.rst b/README.rst index 3bc95cb..9d4bf4a 100644 --- a/README.rst +++ b/README.rst @@ -4,8 +4,12 @@ mrQA : automatic protocol compliance checks on MR datasets .. image:: https://img.shields.io/pypi/v/mrQA.svg :target: https://pypi.python.org/pypi/mrQA -.. image:: https://img.shields.io/travis/Open-Minds-Lab/mrQA.svg - :target: https://travis-ci.com/Open-Minds-Lab/mrQA +.. image:: https://app.codacy.com/project/badge/Grade/8cd263e1eaa0480d8fac50eba0094401 + :target: https://app.codacy.com/gh/sinhaharsh/mrQA/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade + +.. image:: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml/badge.svg + :target: https://github.com/sinhaharsh/mrQA/actions/workflows/continuous-integration.yml + .. image:: https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg :target: https://nbviewer.org/github/Open-Minds-Lab/mrQA/blob/master/examples/usage.ipynb diff --git a/docs/cli.rst b/docs/cli.rst new file mode 100644 index 0000000..b018c49 --- /dev/null +++ b/docs/cli.rst @@ -0,0 +1,7 @@ +Command Line +============ + +.. argparse:: + :module: mrQA.cli + :func: get_parser + :prog: mrQA diff --git a/docs/conf.py b/docs/conf.py index bf5fb78..0a7f4a9 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -34,7 +34,8 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon'] + 'sphinx.ext.napoleon', + 'sphinxarg.ext'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/usage.rst b/docs/usage.rst index c5b4a39..acd0279 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -4,15 +4,23 @@ CLI usage A protocol compliance report can be generated directly from the command line interface. The following is an example of generating a protocol compliance report -For a DICOM dataset:: +For a DICOM dataset + +.. code:: bash mrqa --data-source /path/to/dataset --format dicom --name my_dataset -For a BIDS dataset:: +For a BIDS dataset + +.. code:: bash mrqa --data-source /path/to/dataset --format bids --name my_dataset +.. toctree:: + :maxdepth: 1 + + cli API Tutorial ------------ diff --git a/examples/example-protocol.json b/examples/example-protocol.json new file mode 100644 index 0000000..786d675 --- /dev/null +++ b/examples/example-protocol.json @@ -0,0 +1,2675 @@ +{ + "wpc-6106_22": { + "ncanda-localizer-v1": { + "name": "ncanda-localizer-v1 *", + "Id": "152425e7-a021-486e-9e5d-57a2a7bcd58e", + "header_property": "TA: 0:15 PM: REF Voxel size: 0.6\u00d70.6\u00d710.0 mmPAT: Off Rel. SNR: 1.00 : fl ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "On", + "Load images to graphic segments": "On", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "On", + "Wait for user to start": "Off", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 3, + "Slices": 3, + "Dist. factor": 50, + "Position": "Isocenter", + "Orientation": "Coronal", + "Phase enc. dir.": "R >> L", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 300, + "FoV phase": 100.0, + "Slice thickness": 10.0, + "TR": 8.6, + "TE": 4.0, + "Averages": 1, + "Concatenations": 9, + "Filter": "Elliptical filter", + "Coil elements": "HE1-4;NE1,2" + }, + "Contrast - Common": { + "TR": 8.6, + "TE": 4.0, + "TD": 0, + "MTC": "Off", + "Magn. preparation": "None", + "Flip angle": 20, + "Fat suppr.": "None", + "Water suppr.": "None", + "SWI": "Off" + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Short term", + "Reconstruction": "Magnitude", + "Measurements": 1, + "Multiple series": "Each measurement" + }, + "Resolution - Common": { + "FoV read": 300, + "FoV phase": 100.0, + "Slice thickness": 10.0, + "Base resolution": 256, + "Phase resolution": 70, + "Phase partial Fourier": "Off", + "Interpolation": "On" + }, + "Resolution - iPAT": { + "PAT mode": "None" + }, + "Resolution - Filter Image": { + "Image Filter": "Off", + "Distortion Corr.": "Off", + "Prescan Normalize": "Off", + "Normalize": "Off", + "B1 filter": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "On" + }, + "Geometry - Common": { + "Slice group": 3, + "Slices": 3, + "Dist. factor": 50, + "Position": "Isocenter", + "Orientation": "Coronal", + "Phase enc. dir.": "R >> L", + "FoV read": 300, + "FoV phase": 100.0, + "Slice thickness": 10.0, + "TR": 8.6, + "Multi-slice mode": "Sequential", + "Series": "Interleaved", + "Concatenations": 9 + }, + "Geometry - AutoAlign": { + "Slice group": 3, + "Position": "Isocenter", + "Orientation": "Coronal", + "Phase enc. dir.": "R >> L", + "AutoAlign": "---", + "Initial Position": "Isocenter", + "L": 0.0, + "P": 0.0, + "H": 0.0, + "Initial Rotation": 0.0, + "Initial Orientation": "Sagittal" + }, + "Geometry - Saturation": { + "Saturation mode": "Standard", + "Fat suppr.": "None", + "Water suppr.": "None", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "REF", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Adaptive Combine", + "Save uncombined": "Off", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "Default" + }, + "System - Adjustments": { + "B0 Shim mode": "Tune up", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "Isocenter", + "Orientation": "Transversal", + "Rotation": 0.0, + "A >> P": 263, + "R >> L": 350, + "F >> H": 350, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Slice-sel." + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 8.6, + "Concatenations": 9, + "Segments": 1 + }, + "Physio - Cardiac": { + "Tagging": "None", + "Magn. preparation": "None", + "Fat suppr.": "None", + "Dark blood": "Off", + "FoV read": 300, + "FoV phase": 100.0, + "Phase resolution": 70 + }, + "Physio - PACE": { + "Resp. control": "Off", + "Concatenations": 9 + }, + "Inline - Common": { + "Subtract": "Off", + "Measurements": 1, + "StdDev": "Off", + "Liver registration": "Off", + "Save original images": "On" + }, + "Inline - MIP": { + "MIP-Sag": "Off", + "MIP-Cor": "Off", + "MIP-Tra": "Off", + "MIP-Time": "Off", + "Save original images": "On" + }, + "Inline - Soft Tissue": { + "Wash - In": "Off", + "Wash - Out": "Off", + "TTP": "Off", + "PEI": "Off", + "MIP - time": "Off", + "Measurements": 1 + }, + "Inline - Composing": { + "Distortion Corr.": "Off" + }, + "Sequence - Part 1": { + "Introduction": "On", + "Dimension": "2D", + "Phase stabilisation": "Off", + "Asymmetric echo": "Allowed", + "Contrasts": 1, + "Flow comp.": "No", + "Multi-slice mode": "Sequential", + "Bandwidth": 320 + }, + "Sequence - Part 2": { + "Segments": 1, + "Acoustic noise reduction": "None", + "RF pulse type": "Normal", + "Gradient mode": "Normal", + "Excitation": "Slice-sel.", + "RF spoiling": "On" + }, + "Sequence - Assistant": { + "Mode": "Off", + "Allowed delay": "0 s" + } + }, + "ncanda-t2fse-v1": { + "name": "ncanda-t2fse-v1 *", + "Id": "71b69d4c-a115-4b00-96f8-d7a3674b4620", + "header_property": "TA: 4:21 PM: FIX Voxel size: 0.5\u00d70.5\u00d71.2 mmPAT: 2 Rel. SNR: 1.00 : spc ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slab group": 1, + "Slabs": 1, + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "Slice oversampling": 0.0, + "Slices per slab": 160, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 1.2, + "TR": 3200, + "TE": 404, + "Averages": 1.0, + "Concatenations": 1, + "Filter": "Raw filter, Prescan Normalize", + "Coil elements": "HE1-4;NE1,2" + }, + "Contrast - Common": { + "TR": 3200, + "TE": 404, + "MTC": "Off", + "Magn. preparation": "None", + "Fat suppr.": "None", + "Blood suppr.": "Off", + "Restore magn.": "Off" + }, + "Contrast - Dynamic": { + "Averages": 1.0, + "Reconstruction": "Magnitude", + "Measurements": 1, + "Multiple series": "Each measurement" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 1.2, + "Base resolution": 256, + "Phase resolution": 101, + "Slice resolution": 100, + "Phase partial Fourier": "Allowed", + "Slice partial Fourier": "Off", + "Interpolation": "On" + }, + "Resolution - iPAT": { + "PAT mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Accel. factor 3D": 1, + "Reference scan mode": "Integrated" + }, + "Resolution - Filter Image": { + "Image Filter": "Off", + "Distortion Corr.": "Off", + "Prescan Normalize": "On", + "Unfiltered images": "Off", + "Normalize": "Off", + "B1 filter": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "On", + "Elliptical filter": "Off" + }, + "Geometry - Common": { + "Slab group": 1, + "Slabs": 1, + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Phase enc. dir.": "A >> P", + "Slice oversampling": 0.0, + "Slices per slab": 160, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 1.2, + "TR": 3200, + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slab group": 1, + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L2.2 A23.3 F19.0", + "L": 2.2, + "A": 23.3, + "F": 19.0, + "Initial Rotation": 0.0, + "Initial Orientation": "Sagittal" + }, + "Geometry - Saturation": { + "Fat suppr.": "None", + "Restore magn.": "Off", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Adaptive Combine", + "Save uncombined": "Off", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Standard", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Rotation": 0.0, + "A >> P": 240, + "F >> H": 240, + "R >> L": 192, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Non-sel." + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "Trigger delay": 0, + "TR": 3200, + "Concatenations": 1 + }, + "Physio - Cardiac": { + "Magn. preparation": "None", + "Fat suppr.": "None", + "Dark blood": "Off", + "FoV read": 240, + "FoV phase": 100.0, + "Phase resolution": 101 + }, + "Physio - PACE": { + "Resp. control": "Off", + "Concatenations": 1 + }, + "Inline - Common": { + "Subtract": "Off", + "Measurements": 1, + "StdDev": "Off", + "Save original images": "On" + }, + "Inline - MIP": { + "MIP-Sag": "Off", + "MIP-Cor": "Off", + "MIP-Tra": "Off", + "MIP-Time": "Off", + "Save original images": "On" + }, + "Inline - Composing": { + "Distortion Corr.": "Off" + }, + "Sequence - Part 1": { + "Introduction": "On", + "Dimension": "3D", + "Elliptical scanning": "Off", + "Reordering": "Linear", + "Flow comp.": "No", + "Echo spacing": 3.54, + "Adiabatic-mode": "Off", + "Bandwidth": 751 + }, + "Sequence - Part 2": { + "Echo train duration": 899, + "RF pulse type": "Normal", + "Gradient mode": "Fast", + "Excitation": "Non-sel.", + "Flip angle mode": "T2 var", + "Turbo factor": 282 + }, + "Sequence - Assistant": { + "Allowed delay": "30 s" + } + }, + "ncanda-mprage-v1": { + "name": "ncanda-mprage-v1 *", + "Id": "41a635b2-04bc-4671-a380-87f7326913fe", + "header_property": "TA: 8:08 PM: FIX Voxel size: 0.9\u00d70.9\u00d71.2 mmPAT: Off Rel. SNR: 1.00 : tfl ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slab group": 1, + "Slabs": 1, + "Dist. factor": 50, + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "Slice oversampling": 20.0, + "Slices per slab": 160, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 1.2, + "TR": 1900.0, + "TE": 2.92, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4;NE1,2" + }, + "Contrast - Common": { + "TR": 1900.0, + "TE": 2.92, + "Magn. preparation": "Non-sel. IR", + "TI": 900, + "Flip angle": 9, + "Fat suppr.": "None", + "Water suppr.": "None" + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 1, + "Multiple series": "Each measurement" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 1.2, + "Base resolution": 256, + "Phase resolution": 100, + "Slice resolution": 100, + "Phase partial Fourier": "Off", + "Slice partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "PAT mode": "None" + }, + "Resolution - Filter Image": { + "Image Filter": "Off", + "Distortion Corr.": "Off", + "Prescan Normalize": "On", + "Unfiltered images": "Off", + "Normalize": "Off", + "B1 filter": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off" + }, + "Geometry - Common": { + "Slab group": 1, + "Slabs": 1, + "Dist. factor": 50, + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Phase enc. dir.": "A >> P", + "Slice oversampling": 20.0, + "Slices per slab": 160, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 1.2, + "TR": 1900.0, + "Multi-slice mode": "Single shot", + "Series": "Ascending", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slab group": 1, + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L2.2 A23.3 F19.0", + "L": 2.2, + "A": 23.3, + "F": 19.0, + "Initial Rotation": 0.0, + "Initial Orientation": "Sagittal" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Adaptive Combine", + "Save uncombined": "Off", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Standard", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "On", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L2.2 A23.3 F19.0", + "Orientation": "Sagittal", + "Rotation": 0.0, + "A >> P": 240, + "F >> H": 240, + "R >> L": 192, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Non-sel." + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "Low", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 1900.0, + "Concatenations": 1 + }, + "Physio - Cardiac": { + "Magn. preparation": "Non-sel. IR", + "TI": 900, + "Fat suppr.": "None", + "Dark blood": "Off", + "FoV read": 240, + "FoV phase": 100.0, + "Phase resolution": 100 + }, + "Physio - PACE": { + "Resp. control": "Off", + "Concatenations": 1 + }, + "Inline - Common": { + "Subtract": "Off", + "Measurements": 1, + "StdDev": "Off", + "Save original images": "On" + }, + "Inline - MIP": { + "MIP-Sag": "Off", + "MIP-Cor": "Off", + "MIP-Tra": "Off", + "MIP-Time": "Off", + "Save original images": "On" + }, + "Inline - Composing": { + "Distortion Corr.": "Off" + }, + "Sequence - Part 1": { + "Introduction": "On", + "Dimension": "3D", + "Elliptical scanning": "Off", + "Reordering": "Linear", + "Asymmetric echo": "Allowed", + "Flow comp.": "No", + "Multi-slice mode": "Single shot", + "Echo spacing": 8.7, + "Bandwidth": 140 + }, + "Sequence - Part 2": { + "RF pulse type": "Normal", + "Gradient mode": "Normal", + "Excitation": "Non-sel.", + "RF spoiling": "On", + "Incr. Gradient spoiling": "Off", + "Turbo factor": 192 + }, + "Sequence - Assistant": { + "Mode": "Off" + } + }, + "ncanda-dti6b500pepolar-v1": { + "name": "ncanda-dti6b500pepolar-v1 *", + "Id": "169569d5-55eb-40e1-9ca1-25ac8bfe21ff", + "header_property": "TA: 1:22 PM: FIX Voxel size: 2.5\u00d72.5\u00d72.5 mmPAT: 2 Rel. SNR: 1.00 : epse ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 8000, + "TE": 89.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Raw filter", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 8000, + "TE": 89.0, + "MTC": "Off", + "Magn. preparation": "None", + "Fat suppr.": "Fat sat.", + "Fat sat. mode": "Weak" + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 1, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "Base resolution": 96, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 38, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "Off", + "Dynamic Field Corr.": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "On", + "Elliptical filter": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "A >> P", + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 8000, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L0.7 A21.9 F2.6", + "L": 0.7, + "A": 21.9, + "F": 2.6, + "Initial Rotation": 0.0, + "Initial Orientation": "Transversal" + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Fat sat. mode": "Weak", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Adaptive Combine", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Rotation": 0.0, + "A >> P": 240, + "R >> L": 240, + "F >> H": 160, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 8000, + "Concatenations": 1 + }, + "Physio - PACE": { + "Resp. control": "Off", + "Concatenations": 1 + }, + "Diff - Neuro": { + "Diffusion mode": "MDDW", + "Diff. directions": 6, + "Diffusion Scheme": "Bipolar", + "Diff. weightings": 2, + "b-value 1": 1, + "b-value 2": 1, + "Diff. weighted images": "On", + "Trace weighted images": "On", + "ADC maps": "Off", + "FA maps": "Off", + "Mosaic": "On", + "Tensor": "Off", + "Noise level": 40 + }, + "Diff - Body": { + "Diffusion mode": "MDDW", + "Diff. directions": 6, + "Diffusion Scheme": "Bipolar", + "Diff. weightings": 2, + "b-value 1": 1, + "b-value 2": 1, + "Diff. weighted images": "On", + "Trace weighted images": "On", + "ADC maps": "Off", + "Exponential ADC Maps": "Off", + "FA maps": "Off", + "Invert Gray Scale": "Off", + "Calculated Image": "Off", + "b-Value >=": "0 s/mm\u00b2", + "Noise level": 40 + }, + "Diff - Composing": { + "Distortion Corr.": "Off" + }, + "Sequence - Part 1": { + "Introduction": "On", + "Optimization": "None", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.62, + "Bandwidth": 1860 + }, + "Sequence - Part 2": { + "EPI factor": 96, + "RF pulse type": "Normal", + "Gradient mode": "Fast", + "Excitation": "Standard" + } + }, + "ncanda-dti60b1000-v1": { + "name": "ncanda-dti60b1000-v1 *", + "Id": "e7a4335c-8a0a-42fa-8ba5-e18fa22dc0d1", + "header_property": "TA: 8:42 PM: FIX Voxel size: 2.5\u00d72.5\u00d72.5 mmPAT: 2 Rel. SNR: 1.00 : epse ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 8000, + "TE": 89.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Raw filter", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 8000, + "TE": 89.0, + "MTC": "Off", + "Magn. preparation": "None", + "Fat suppr.": "Fat sat.", + "Fat sat. mode": "Weak" + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 1, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "Base resolution": 96, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 38, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "Off", + "Dynamic Field Corr.": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "On", + "Elliptical filter": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 8000, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Initial Position": "L0.7 A21.9 F2.6", + "L": 0.7, + "A": 21.9, + "F": 2.6, + "Initial Rotation": -180.0, + "Initial Orientation": "Transversal" + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Fat sat. mode": "Weak", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Adaptive Combine", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Rotation": 180.0, + "A >> P": 240, + "R >> L": 240, + "F >> H": 160, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 8000, + "Concatenations": 1 + }, + "Physio - PACE": { + "Resp. control": "Off", + "Concatenations": 1 + }, + "Diff - Neuro": { + "Diffusion mode": "Free", + "Diff. directions": 61, + "Diffusion Scheme": "Bipolar", + "Diff. weightings": 2, + "b-value 1": 1, + "b-value 2": 1, + "Diff. weighted images": "On", + "Trace weighted images": "On", + "ADC maps": "On", + "FA maps": "On", + "Mosaic": "On", + "Tensor": "On", + "Noise level": 40 + }, + "Diff - Body": { + "Diffusion mode": "Free", + "Diff. directions": 61, + "Diffusion Scheme": "Bipolar", + "Diff. weightings": 2, + "b-value 1": 1, + "b-value 2": 1, + "Diff. weighted images": "On", + "Trace weighted images": "On", + "ADC maps": "On", + "Exponential ADC Maps": "Off", + "FA maps": "On", + "Invert Gray Scale": "Off", + "Calculated Image": "Off", + "b-Value >=": "0 s/mm\u00b2", + "Noise level": 40 + }, + "Diff - Composing": { + "Distortion Corr.": "Off" + }, + "Sequence - Part 1": { + "Introduction": "On", + "Optimization": "None", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.62, + "Bandwidth": 1860 + }, + "Sequence - Part 2": { + "EPI factor": 96, + "RF pulse type": "Normal", + "Gradient mode": "Fast", + "Excitation": "Standard" + } + }, + "ncanda-dti30b400-v1": { + "name": "ncanda-dti30b400-v1 *", + "Id": "5ea94598-904e-4b51-8c79-6238a9e2beb6", + "header_property": "TA: 4:42 PM: FIX Voxel size: 2.5\u00d72.5\u00d72.5 mmPAT: 2 Rel. SNR: 1.00 : epse ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 8000, + "TE": 89.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Raw filter", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 8000, + "TE": 89.0, + "MTC": "Off", + "Magn. preparation": "None", + "Fat suppr.": "Fat sat.", + "Fat sat. mode": "Weak" + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 1, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "Base resolution": 96, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 38, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "Off", + "Dynamic Field Corr.": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "On", + "Elliptical filter": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 8000, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Initial Position": "L0.7 A21.9 F2.6", + "L": 0.7, + "A": 21.9, + "F": 2.6, + "Initial Rotation": -180.0, + "Initial Orientation": "Transversal" + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Fat sat. mode": "Weak", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Adaptive Combine", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Rotation": 180.0, + "A >> P": 240, + "R >> L": 240, + "F >> H": 160, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 8000, + "Concatenations": 1 + }, + "Physio - PACE": { + "Resp. control": "Off", + "Concatenations": 1 + }, + "Diff - Neuro": { + "Diffusion mode": "Free", + "Diff. directions": 31, + "Diffusion Scheme": "Bipolar", + "Diff. weightings": 2, + "b-value 1": 1, + "b-value 2": 1, + "Diff. weighted images": "On", + "Trace weighted images": "On", + "ADC maps": "On", + "FA maps": "On", + "Mosaic": "On", + "Tensor": "On", + "Noise level": 40 + }, + "Diff - Body": { + "Diffusion mode": "Free", + "Diff. directions": 31, + "Diffusion Scheme": "Bipolar", + "Diff. weightings": 2, + "b-value 1": 1, + "b-value 2": 1, + "Diff. weighted images": "On", + "Trace weighted images": "On", + "ADC maps": "On", + "Exponential ADC Maps": "Off", + "FA maps": "On", + "Invert Gray Scale": "Off", + "Calculated Image": "Off", + "b-Value >=": "0 s/mm\u00b2", + "Noise level": 40 + }, + "Diff - Composing": { + "Distortion Corr.": "Off" + }, + "Sequence - Part 1": { + "Introduction": "On", + "Optimization": "None", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.62, + "Bandwidth": 1860 + }, + "Sequence - Part 2": { + "EPI factor": 96, + "RF pulse type": "Normal", + "Gradient mode": "Fast", + "Excitation": "Standard" + } + }, + "ncanda-grefieldmap-v1": { + "name": "ncanda-grefieldmap-v1 *", + "Id": "085ebe03-a8f3-4906-a572-d443167d1ff8", + "header_property": "TA: 2:26 PM: FIX Voxel size: 2.5\u00d72.5\u00d72.5 mmRel. SNR: 1.00 : fm_r ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 752.0, + "TE 1": 5.68, + "TE 2": 8.14, + "Averages": 1, + "Concatenations": 1, + "Filter": "None", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 752.0, + "TE 1": 5.68, + "TE 2": 8.14, + "MTC": "Off", + "Flip angle": 60, + "Fat suppr.": "None" + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magn./Phase", + "Measurements": 1, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "Base resolution": 96, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - Filter Image": { + "Image Filter": "Off", + "Distortion Corr.": "Off", + "Prescan Normalize": "Off", + "Normalize": "Off", + "B1 filter": "Off" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 64, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 2.5, + "TR": 752.0, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Initial Position": "L0.7 A21.9 F2.6", + "L": 0.7, + "A": 21.9, + "F": 2.6, + "Initial Rotation": -180.0, + "Initial Orientation": "Transversal" + }, + "Geometry - Saturation": { + "Fat suppr.": "None", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Sum of Squares", + "Save uncombined": "Off", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "! Position": "L0.7 A21.9 F2.6", + "! Orientation": "Transversal", + "! Rotation": 180.0, + "! A >> P": 240, + "! R >> L": 240, + "! F >> H": 160, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Dimension": "2D", + "Asymmetric echo": "Off", + "Contrasts": 2, + "Flow comp.": "Yes", + "Multi-slice mode": "Interleaved", + "Bandwidth": 266 + }, + "Sequence - Part 2": { + "RF pulse type": "Normal", + "Gradient mode": "Normal", + "RF spoiling": "On" + }, + "Sequence - Assistant": { + "Mode": "Off" + } + }, + "ncanda-rsfmri-v1": { + "name": "ncanda-rsfmri-v1 *", + "Id": "44597b9a-f741-4998-8716-43937802e15c", + "header_property": "TA: 12:13 PM: FIX Voxel size: 3.8\u00d73.8\u00d75.0 mmPAT: 2 Rel. SNR: 1.00 : epfid ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 32, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 5.0, + "TR": 2200, + "TE": 30.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 2200, + "TE": 30.0, + "MTC": "Off", + "Flip angle": 79, + "Fat suppr.": "Fat sat." + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 330, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 5.0, + "Base resolution": 64, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "On" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off", + "Hamming": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 32, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 5.0, + "TR": 2200, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Initial Position": "L0.7 A21.9 F2.6", + "L": 0.7, + "A": 21.9, + "F": 2.6, + "Initial Rotation": -180.0, + "Initial Orientation": "Transversal" + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Sum of Squares", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Rotation": 180.0, + "A >> P": 240, + "R >> L": 240, + "F >> H": 160, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 2200, + "Concatenations": 1 + }, + "BOLD": { + "GLM Statistics": "Off", + "Dynamic t-maps": "Off", + "Ignore meas. at start": 0, + "Ignore after transition": 0, + "Model transition states": "On", + "Temp. highpass filter": "On", + "Threshold": 4.0, + "Paradigm size": 3, + "Meas[1]": "Baseline", + "Meas[2]": "Baseline", + "Meas[3]": "Active", + "Motion correction": "On", + "Spatial filter": "Off", + "Measurements": 330, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.55, + "Bandwidth": 2170 + }, + "Sequence - Part 2": { + "EPI factor": 64, + "RF pulse type": "Normal", + "Gradient mode": "Fast*", + "Excitation": "Standard" + } + }, + "ncanda-fmri-ringrewards-v1": { + "name": "ncanda-fmri-ringrewards-v1 *", + "Id": "ce4f59a6-862b-4957-a986-170f2c19f731", + "header_property": "TA: 5:12 PM: FIX Voxel size: 3.1\u00d73.1\u00d73.2 mmPAT: 2 Rel. SNR: 1.00 : epfid ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "TE": 28.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 1520, + "TE": 28.0, + "MTC": "Off", + "Flip angle": 73, + "Fat suppr.": "Fat sat." + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "Base resolution": 64, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "On" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off", + "Hamming": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L0.0 A12.6 H18.8", + "L": 0.0, + "A": 12.6, + "H": 18.8, + "Initial Rotation": 0.0, + "Initial Orientation": "T > C", + "T > C": -2.3, + "> S": 0.0 + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "P >> A", + "Transversal": "H >> F", + "Coil Combine Mode": "Sum of Squares", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Rotation": 0.0, + "A >> P": 200, + "R >> L": 200, + "F >> H": 93, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 1520, + "Concatenations": 1 + }, + "BOLD": { + "GLM Statistics": "Off", + "Dynamic t-maps": "Off", + "Ignore meas. at start": 0, + "Ignore after transition": 0, + "Model transition states": "On", + "Temp. highpass filter": "On", + "Threshold": 4.0, + "Paradigm size": 30, + "Meas[1]": "Active", + "Meas[2]": "Active", + "Meas[3]": "Active", + "Meas[4]": "Active", + "Meas[5]": "Active", + "Meas[6]": "Active", + "Meas[7]": "Active", + "Meas[8]": "Active", + "Meas[9]": "Active", + "Meas[10]": "Active", + "Meas[11]": "Active", + "Meas[12]": "Active", + "Meas[13]": "Active", + "Meas[14]": "Active", + "Meas[15]": "Active", + "Meas[16]": "Baseline", + "Meas[17]": "Baseline", + "Meas[18]": "Baseline", + "Meas[19]": "Baseline", + "Meas[20]": "Baseline", + "Meas[21]": "Baseline", + "Meas[22]": "Baseline", + "Meas[23]": "Baseline", + "Meas[24]": "Baseline", + "Meas[25]": "Baseline", + "Meas[26]": "Baseline", + "Meas[27]": "Baseline", + "Meas[28]": "Baseline", + "Meas[29]": "Baseline", + "Meas[30]": "Baseline", + "Motion correction": "On", + "Spatial filter": "Off", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.55, + "Bandwidth": 2170 + }, + "Sequence - Part 2": { + "EPI factor": 64, + "RF pulse type": "Normal", + "Gradient mode": "Fast*", + "Excitation": "Standard" + } + }, + "ncanda-fmri-ringrewards-v2": { + "name": "ncanda-fmri-ringrewards-v2 *", + "Id": "351278bb-2e68-4d31-a815-57f3c2c5b7f7", + "header_property": "TA: 5:12 PM: FIX Voxel size: 3.1\u00d73.1\u00d73.2 mmPAT: 2 Rel. SNR: 1.00 : epfid ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "TE": 28.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 1520, + "TE": 28.0, + "MTC": "Off", + "Flip angle": 73, + "Fat suppr.": "Fat sat." + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "Base resolution": 64, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "On" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off", + "Hamming": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L0.0 A12.6 H18.8", + "L": 0.0, + "A": 12.6, + "H": 18.8, + "Initial Rotation": 0.0, + "Initial Orientation": "T > C", + "T > C": -2.3, + "> S": 0.0 + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "P >> A", + "Transversal": "H >> F", + "Coil Combine Mode": "Sum of Squares", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Rotation": 0.0, + "A >> P": 200, + "R >> L": 200, + "F >> H": 93, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 1520, + "Concatenations": 1 + }, + "BOLD": { + "GLM Statistics": "Off", + "Dynamic t-maps": "Off", + "Ignore meas. at start": 0, + "Ignore after transition": 0, + "Model transition states": "On", + "Temp. highpass filter": "On", + "Threshold": 4.0, + "Paradigm size": 30, + "Meas[1]": "Active", + "Meas[2]": "Active", + "Meas[3]": "Active", + "Meas[4]": "Active", + "Meas[5]": "Active", + "Meas[6]": "Active", + "Meas[7]": "Active", + "Meas[8]": "Active", + "Meas[9]": "Active", + "Meas[10]": "Active", + "Meas[11]": "Active", + "Meas[12]": "Active", + "Meas[13]": "Active", + "Meas[14]": "Active", + "Meas[15]": "Active", + "Meas[16]": "Baseline", + "Meas[17]": "Baseline", + "Meas[18]": "Baseline", + "Meas[19]": "Baseline", + "Meas[20]": "Baseline", + "Meas[21]": "Baseline", + "Meas[22]": "Baseline", + "Meas[23]": "Baseline", + "Meas[24]": "Baseline", + "Meas[25]": "Baseline", + "Meas[26]": "Baseline", + "Meas[27]": "Baseline", + "Meas[28]": "Baseline", + "Meas[29]": "Baseline", + "Meas[30]": "Baseline", + "Motion correction": "On", + "Spatial filter": "Off", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.55, + "Bandwidth": 2170 + }, + "Sequence - Part 2": { + "EPI factor": 64, + "RF pulse type": "Normal", + "Gradient mode": "Fast*", + "Excitation": "Standard" + } + }, + "ncanda-fmri-ringrewards-v3": { + "name": "ncanda-fmri-ringrewards-v3 *", + "Id": "60970f18-de0a-4925-893c-c03bd70e923f", + "header_property": "TA: 5:12 PM: FIX Voxel size: 3.1\u00d73.1\u00d73.2 mmPAT: 2 Rel. SNR: 1.00 : epfid ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "TE": 28.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 1520, + "TE": 28.0, + "MTC": "Off", + "Flip angle": 73, + "Fat suppr.": "Fat sat." + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "Base resolution": 64, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "On" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off", + "Hamming": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L0.0 A12.6 H18.8", + "L": 0.0, + "A": 12.6, + "H": 18.8, + "Initial Rotation": 0.0, + "Initial Orientation": "T > C", + "T > C": -2.3, + "> S": 0.0 + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "P >> A", + "Transversal": "H >> F", + "Coil Combine Mode": "Sum of Squares", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Rotation": 0.0, + "A >> P": 200, + "R >> L": 200, + "F >> H": 93, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 1520, + "Concatenations": 1 + }, + "BOLD": { + "GLM Statistics": "Off", + "Dynamic t-maps": "Off", + "Ignore meas. at start": 0, + "Ignore after transition": 0, + "Model transition states": "On", + "Temp. highpass filter": "On", + "Threshold": 4.0, + "Paradigm size": 30, + "Meas[1]": "Active", + "Meas[2]": "Active", + "Meas[3]": "Active", + "Meas[4]": "Active", + "Meas[5]": "Active", + "Meas[6]": "Active", + "Meas[7]": "Active", + "Meas[8]": "Active", + "Meas[9]": "Active", + "Meas[10]": "Active", + "Meas[11]": "Active", + "Meas[12]": "Active", + "Meas[13]": "Active", + "Meas[14]": "Active", + "Meas[15]": "Active", + "Meas[16]": "Baseline", + "Meas[17]": "Baseline", + "Meas[18]": "Baseline", + "Meas[19]": "Baseline", + "Meas[20]": "Baseline", + "Meas[21]": "Baseline", + "Meas[22]": "Baseline", + "Meas[23]": "Baseline", + "Meas[24]": "Baseline", + "Meas[25]": "Baseline", + "Meas[26]": "Baseline", + "Meas[27]": "Baseline", + "Meas[28]": "Baseline", + "Meas[29]": "Baseline", + "Meas[30]": "Baseline", + "Motion correction": "On", + "Spatial filter": "Off", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.55, + "Bandwidth": 2170 + }, + "Sequence - Part 2": { + "EPI factor": 64, + "RF pulse type": "Normal", + "Gradient mode": "Fast*", + "Excitation": "Standard" + } + }, + "ncanda-fmri-ringrewards-v4": { + "name": "ncanda-fmri-ringrewards-v4 *", + "Id": "33428fda-9313-4dd2-868a-31f924864840", + "header_property": "TA: 5:12 PM: FIX Voxel size: 3.1\u00d73.1\u00d73.2 mmPAT: 2 Rel. SNR: 1.00 : epfid ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "TE": 28.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 1520, + "TE": 28.0, + "MTC": "Off", + "Flip angle": 73, + "Fat suppr.": "Fat sat." + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "Base resolution": 64, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "On" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off", + "Hamming": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 29, + "Dist. factor": 0, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "FoV read": 200, + "FoV phase": 100.0, + "Slice thickness": 3.2, + "TR": 1520, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Phase enc. dir.": "A >> P", + "AutoAlign": "---", + "Initial Position": "L0.0 A12.6 H18.8", + "L": 0.0, + "A": 12.6, + "H": 18.8, + "Initial Rotation": 0.0, + "Initial Orientation": "T > C", + "T > C": -2.3, + "> S": 0.0 + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "P >> A", + "Transversal": "H >> F", + "Coil Combine Mode": "Sum of Squares", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.0 A12.6 H18.8", + "Orientation": "T > C-2.3", + "Rotation": 0.0, + "A >> P": 200, + "R >> L": 200, + "F >> H": 93, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 1520, + "Concatenations": 1 + }, + "BOLD": { + "GLM Statistics": "Off", + "Dynamic t-maps": "Off", + "Ignore meas. at start": 0, + "Ignore after transition": 0, + "Model transition states": "On", + "Temp. highpass filter": "On", + "Threshold": 4.0, + "Paradigm size": 30, + "Meas[1]": "Active", + "Meas[2]": "Active", + "Meas[3]": "Active", + "Meas[4]": "Active", + "Meas[5]": "Active", + "Meas[6]": "Active", + "Meas[7]": "Active", + "Meas[8]": "Active", + "Meas[9]": "Active", + "Meas[10]": "Active", + "Meas[11]": "Active", + "Meas[12]": "Active", + "Meas[13]": "Active", + "Meas[14]": "Active", + "Meas[15]": "Active", + "Meas[16]": "Baseline", + "Meas[17]": "Baseline", + "Meas[18]": "Baseline", + "Meas[19]": "Baseline", + "Meas[20]": "Baseline", + "Meas[21]": "Baseline", + "Meas[22]": "Baseline", + "Meas[23]": "Baseline", + "Meas[24]": "Baseline", + "Meas[25]": "Baseline", + "Meas[26]": "Baseline", + "Meas[27]": "Baseline", + "Meas[28]": "Baseline", + "Meas[29]": "Baseline", + "Meas[30]": "Baseline", + "Motion correction": "On", + "Spatial filter": "Off", + "Measurements": 202, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.55, + "Bandwidth": 2170 + }, + "Sequence - Part 2": { + "EPI factor": 64, + "RF pulse type": "Normal", + "Gradient mode": "Fast*", + "Excitation": "Standard" + } + }, + "ncanda-alcpic-v1": { + "name": "ncanda-alcpic-v1 *", + "Id": "93dd8d6a-f858-491b-b515-b26004eb1f0b", + "header_property": "TA: 12:06 PM: FIX Voxel size: 3.8\u00d73.8\u00d75.0 mmPAT: 2 Rel. SNR: 1.00 : epfid ", + "Properties": { + "Prio recon": "Off", + "Load images to viewer": "On", + "Inline movie": "Off", + "Auto store images": "On", + "Load images to stamp segments": "Off", + "Load images to graphic segments": "Off", + "Auto open inline display": "Off", + "Auto close inline display": "Off", + "Start measurement without further preparation": "Off", + "Wait for user to start": "On", + "Start measurements": "Single measurement" + }, + "Routine": { + "Slice group": 1, + "Slices": 32, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Phase oversampling": 0, + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 5.0, + "TR": 2000, + "TE": 30.0, + "Averages": 1, + "Concatenations": 1, + "Filter": "Prescan Normalize", + "Coil elements": "HE1-4" + }, + "Contrast - Common": { + "TR": 2000, + "TE": 30.0, + "MTC": "Off", + "Flip angle": 79, + "Fat suppr.": "Fat sat." + }, + "Contrast - Dynamic": { + "Averages": 1, + "Averaging mode": "Long term", + "Reconstruction": "Magnitude", + "Measurements": 360, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Resolution - Common": { + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 5.0, + "Base resolution": 64, + "Phase resolution": 100, + "Phase partial Fourier": "Off", + "Interpolation": "Off" + }, + "Resolution - iPAT": { + "Accel. mode": "GRAPPA", + "Accel. factor PE": 2, + "Ref. lines PE": 24, + "Reference scan mode": "EPI/separate" + }, + "Resolution - Filter Image": { + "Distortion Corr.": "Off", + "Prescan Normalize": "On" + }, + "Resolution - Filter Rawdata": { + "Raw filter": "Off", + "Elliptical filter": "Off", + "Hamming": "Off" + }, + "Geometry - Common": { + "Slice group": 1, + "Slices": 32, + "Dist. factor": 0, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "FoV read": 240, + "FoV phase": 100.0, + "Slice thickness": 5.0, + "TR": 2000, + "Multi-slice mode": "Interleaved", + "Series": "Interleaved", + "Concatenations": 1 + }, + "Geometry - AutoAlign": { + "Slice group": 1, + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Phase enc. dir.": "P >> A", + "AutoAlign": "---", + "Initial Position": "L0.7 A21.9 F2.6", + "L": 0.7, + "A": 21.9, + "F": 2.6, + "Initial Rotation": -180.0, + "Initial Orientation": "Transversal" + }, + "Geometry - Saturation": { + "Fat suppr.": "Fat sat.", + "Special sat.": "None" + }, + "System - Miscellaneous": { + "Positioning mode": "FIX", + "Table position": 0, + "MSMA": "S - C - T", + "Sagittal": "R >> L", + "Coronal": "A >> P", + "Transversal": "F >> H", + "Coil Combine Mode": "Sum of Squares", + "Matrix Optimization": "Off", + "AutoAlign": "---", + "Coil Select Mode": "On - AutoCoilSelect" + }, + "System - Adjustments": { + "B0 Shim mode": "Advanced", + "B1 Shim mode": "TrueForm", + "Adjust with body coil": "Off", + "Confirm freq. adjustment": "Off", + "Assume Dominant Fat": "Off", + "Assume Silicone": "Off", + "Adjustment Tolerance": "Auto" + }, + "System - Adjust Volume": { + "Position": "L0.7 A21.9 F2.6", + "Orientation": "Transversal", + "Rotation": 180.0, + "A >> P": 240, + "R >> L": 240, + "F >> H": 160, + "Reset": "Off" + }, + "System - pTx Volumes": { + "B1 Shim mode": "TrueForm", + "Excitation": "Standard" + }, + "System - Tx/Rx": { + "Frequency 1H": "123.160045 MHz", + "Correction factor": 1, + "Gain": "High", + "Img. Scale Cor.": 1.0, + "Reset": "Off", + "? Ref. amplitude 1H": "0.000 V" + }, + "Physio - Signal1": { + "1st Signal/Mode": "None", + "TR": 2000, + "Concatenations": 1 + }, + "BOLD": { + "GLM Statistics": "Off", + "Dynamic t-maps": "Off", + "Ignore meas. at start": 0, + "Ignore after transition": 0, + "Model transition states": "On", + "Temp. highpass filter": "On", + "Threshold": 4.0, + "Paradigm size": 3, + "Meas[1]": "Baseline", + "Meas[2]": "Baseline", + "Meas[3]": "Active", + "Motion correction": "On", + "Spatial filter": "Off", + "Measurements": 360, + "Delay in TR": 0, + "Multiple series": "Off" + }, + "Sequence - Part 1": { + "Introduction": "Off", + "Multi-slice mode": "Interleaved", + "Free echo spacing": "Off", + "Echo spacing": 0.55, + "Bandwidth": 2170 + }, + "Sequence - Part 2": { + "EPI factor": 64, + "RF pulse type": "Normal", + "Gradient mode": "Fast*", + "Excitation": "Standard" + } + } + } +} diff --git a/examples/monitor_project.py b/examples/monitor_project.py index 164815a..30edc13 100644 --- a/examples/monitor_project.py +++ b/examples/monitor_project.py @@ -1,13 +1,10 @@ import argparse -import os.path -import sys +import multiprocessing as mp from pathlib import Path -from mrQA import monitor -from mrQA.utils import txt2list -from MRdataset.log import logger -from MRdataset.utils import valid_dirs -from MRdataset.config import DatasetEmptyException +from MRdataset import DatasetEmptyException, valid_dirs, load_mr_dataset +from mrQA import monitor, logger, check_compliance +from mrQA.utils import txt2list, filter_epi_fmap_pairs def main(): @@ -21,36 +18,126 @@ def main(): optional = parser.add_argument_group('optional arguments') # Add help - required.add_argument('-d', '--datasets-txt', type=str, required=True, - help='A txt file which contains a list of projects' + required.add_argument('-d', '--data-root', type=str, required=True, + help='A folder which contains projects' 'to process') + optional.add_argument('-t', '--task', type=str, + help='specify the task to be performed, one of' + ' [monitor, compile]', default='monitor') + optional.add_argument('-a', '--audit', type=str, + help='specify the audit type if compiling reports. ' + 'Choose one of [hz, vt]. Required if task is ' + 'compile', + default='vt') optional.add_argument('-o', '--output-dir', type=str, default='/home/mrqa/mrqa_reports/', help='specify the directory where the report' - ' would be saved. By default, the --data_source ' - 'directory will be used to save reports') + ' would be saved') + optional.add_argument('-x', '--exclude-fpath', type=str, + help='A txt file containing a' + 'list of folders to be skipped while' + 'monitoring') + required.add_argument('--config', type=str, + help='path to config file') args = parser.parse_args() - if Path(args.datasets_txt).exists(): - datasets_path = txt2list(args.datasets_txt) + if Path(args.data_root).exists(): + data_root = Path(args.data_root) + non_empty_folders = [] + for folder in data_root.iterdir(): + if folder.is_dir() and any(folder.iterdir()): + non_empty_folders.append(folder) else: - raise ValueError("Need a valid path to a txt file, which consists of " - f"names of projects to process. " - f"Got {args.datasets_txt}") - dirs = valid_dirs(datasets_path) - - for folder_path in dirs: - name = Path(folder_path).stem - print(f"\nProcessing {name}\n") - output_folder = Path(args.output_dir) / name - try: - monitor(name=name, - data_source=folder_path, - output_dir=output_folder, - decimals=2, - ) - except DatasetEmptyException as e: - logger.warning(f'{e}: Folder {name} has no DICOM files.') + raise ValueError("Need a valid path to a folder, which consists of " + f"projects to process. " + f"Got {args.data_root}") + + dirs = valid_dirs(non_empty_folders) + + if len(non_empty_folders) < 2: + dirs = [dirs] + if args.exclude_fpath is not None: + if not Path(args.exclude_fpath).exists(): + raise FileNotFoundError("Need a valid filepath to the exclude list") + exclude_filepath = Path(args.exclude_fpath).resolve() + skip_list = [Path(i).resolve() for i in txt2list(exclude_filepath)] + + for fpath in dirs: + if Path(fpath).resolve() in skip_list: + dirs.remove(fpath) + if args.task == 'monitor': + pool = mp.Pool(processes=10) + arguments = [(f, args.output_dir, args.config) for f in dirs] + pool.starmap(run, arguments) + elif args.task == 'compile': + compile_reports(args.data_root, args.output_dir, args.config, + args.audit) + else: + raise NotImplementedError(f"Task {args.task} not implemented. Choose " + "one of [monitor, compile]") + + +def run(folder_path, output_dir, config_path): + name = Path(folder_path).stem + print(f"\nProcessing {name}\n") + output_folder = Path(output_dir) / name + try: + monitor(name=name, + data_source=folder_path, + output_dir=output_folder, + decimals=2, + verbose=False, + ds_format='dicom', + tolerance=0, + config_path=config_path, + ) + except DatasetEmptyException as e: + logger.warning(f'{e}: Folder {name} has no DICOM files.') + + +def compile_reports(folder_path, output_dir, config_path, audit='vt'): + output_dir = Path(output_dir) + complete_log = [] + # Look for all mrds.pkl file in the output_dir. For ex, mrqa_reports + # Collect mrds.pkl files for all projects + mrds_files = list(Path(folder_path).rglob('*.mrds.pkl')) + if not mrds_files: + raise FileNotFoundError(f"No .mrds.pkl files found in {folder_path}") + + for mrds in mrds_files: + ds = load_mr_dataset(mrds) + # TODO : check compliance, but maybe its better is to save + # compliance results which can be re-used here + hz, vt = check_compliance( + ds, + output_dir=output_dir / 'compiled_reports', + config_path=config_path, + ) + if audit == 'hz': + non_compliant_ds = hz['non_compliant'] + filter_fn = None + nc_params = ['ReceiveCoilActiveElements'] + supplementary_params = ['BodyPartExamined'] + elif audit == 'vt': + non_compliant_ds = vt['non_compliant'] + nc_params = ['ShimSetting', 'PixelSpacing'] + supplementary_params = [] + # TODO: discuss what parameters can be compared between anatomical + # and functional scans + # after checking compliance just look for epi-fmap pairs for now + filter_fn = filter_epi_fmap_pairs + else: + raise ValueError(f"Invalid audit type {audit}. Choose one of " + f"[hz, vt]") + + nc_log = non_compliant_ds.generate_nc_log( + parameters=nc_params, + suppl_params=supplementary_params, + filter_fn=filter_fn, + output_dir=output_dir, + audit=audit, + verbosity=4) + if __name__ == "__main__": diff --git a/examples/mri-config-abcd.json b/examples/mri-config-abcd.json new file mode 100644 index 0000000..81254af --- /dev/null +++ b/examples/mri-config-abcd.json @@ -0,0 +1,107 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": null, + "sequences": [ + [ + "ABCD-DTI_SIEMENS_mosaic_original_baseline_year_1_arm_1", + "ABCD-Diffusion-FM-AP_SIEMENS_original_baseline_year_1_arm_1" + ], + [ + "ABCD-DTI_SIEMENS_mosaic_original_baseline_year_1_arm_1", + "ABCD-Diffusion-FM-PA_SIEMENS_original_baseline_year_1_arm_1" + ], + [ + "ABCD-fMRI-FM-AP_SIEMENS_original_baseline_year_1_arm_1", + "ABCD-rsfMRI_SIEMENS_mosaic_original_baseline_year_1_arm_1" + ], + [ + "ABCD-fMRI-FM-PA_SIEMENS_original_baseline_year_1_arm_1", + "ABCD-rsfMRI_SIEMENS_mosaic_original_baseline_year_1_arm_1" + ], + [ + "ABCD-fMRI-FM_GE_original_baseline_year_1_arm_1", + "ABCD-rsfMRI_GE_original_baseline_year_1_arm_1" + ], + [ + "ABCD-DTI_PHILIPS_original_baseline_year_1_arm_1", + "ABCD-Diffusion-FM-AP_PHILIPS_original_baseline_year_1_arm_1" + ], + [ + "ABCD-DTI_PHILIPS_original_baseline_year_1_arm_1", + "ABCD-Diffusion-FM-PA_PHILIPS_original_baseline_year_1_arm_1" + ], + [ + "ABCD-fMRI-FM-AP_PHILIPS_original_baseline_year_1_arm_1", + "ABCD-rsfMRI_PHILIPS_original_baseline_year_1_arm_1" + ], + [ + "ABCD-fMRI-FM-PA_PHILIPS_original_baseline_year_1_arm_1", + "ABCD-rsfMRI_PHILIPS_original_baseline_year_1_arm_1" + ] + ], + "include_parameters": [ + "FieldOfView", + "PixelSpacing", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "horizontal_audit": { + "stratify_by": null, + "skip_sequences": [ + "coil_error", + "qa", + "fmri" + ], + "include_parameters": [ + "PixelSpacing", + "Rows", + "Columns", + "AcquisitionMatrix", + "Manufacturer", + "MagneticFieldStrength", + "ScanningSequence", + "SequenceVariant", + "SequenceName", + "NonLinearGradientCorrection", + "MRAcquisitionType", + "PhaseEncodingDirection", + "EchoTime", + "InversionTime", + "DwellTime", + "RepetitionTime", + "FlipAngle", + "BodyPartExamined", + "EchoTrainLength", + "PixelBandwidth", + "PhaseEncodingSteps", + "EchoNumber", + "SliceThickness", + "PercentPhaseFOV", + "PercentSampling", + "VariableFlipAngleFlag", + "ImageOrientationPatient", + "NumberOfAverages", + "ShimMode", + "FieldOfView" + ] + }, + "plots": { + "include_parameters": [ + "ManufacturerAndModel", + "ManufacturerAndVersion", + "Site" + ] + } +} diff --git a/examples/mri-config-full.json b/examples/mri-config-full.json new file mode 100644 index 0000000..e746748 --- /dev/null +++ b/examples/mri-config-full.json @@ -0,0 +1,66 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "exclude_subjects": [ + "210098", + "210078" + ], + "vertical_audit": { + "stratify_by": "series_number", + "include_parameters": [ + "Rows", + "Columns", + "AcquisitionMatrix", + "PixelSpacing", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "horizontal_audit": { + "stratify_by": "series_number", + "include_parameters": [ + "PixelSpacing", + "Rows", + "Columns", + "AcquisitionMatrix", + "Manufacturer", + "ManufacturersModelName", + "SoftwareVersions", + "MagneticFieldStrength", + "ReceiveCoilActiveElements", + "ScanningSequence", + "SequenceVariant", + "ScanOptions", + "SequenceName", + "NonLinearGradientCorrection", + "MRAcquisitionType", + "PhaseEncodingDirection", + "EchoTime", + "InversionTime", + "DwellTime", + "RepetitionTime", + "FlipAngle", + "BodyPartExamined", + "EchoTrainLength", + "PixelBandwidth", + "PhaseEncodingSteps", + "EchoNumber", + "SliceThickness", + "PercentPhaseFOV", + "PercentSampling", + "VariableFlipAngleFlag", + "ImageOrientationPatient", + "NumberOfAverages", + "ShimMode" + ] + } +} diff --git a/examples/mri-config-project.json b/examples/mri-config-project.json new file mode 100644 index 0000000..f39aba6 --- /dev/null +++ b/examples/mri-config-project.json @@ -0,0 +1,60 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": "series_number", + "include_parameters": [ + "AcquisitionMatrix", + "PixelSpacing", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "horizontal_audit": { + "stratify_by": "series_number", + "include_parameters": [ + "PixelSpacing", + "Rows", + "Columns", + "AcquisitionMatrix", + "Manufacturer", + "ManufacturersModelName", + "SoftwareVersions", + "MagneticFieldStrength", + "ReceiveCoilActiveElements", + "ScanningSequence", + "SequenceVariant", + "ScanOptions", + "SequenceName", + "NonLinearGradientCorrection", + "MRAcquisitionType", + "PhaseEncodingDirection", + "EchoTime", + "InversionTime", + "DwellTime", + "RepetitionTime", + "FlipAngle", + "BodyPartExamined", + "EchoTrainLength", + "PixelBandwidth", + "PhaseEncodingSteps", + "SliceThickness", + "PercentPhaseFOV", + "PercentSampling", + "VariableFlipAngleFlag", + "ImageOrientationPatient", + "NumberOfAverages", + "ShimMode", + "FieldOfView" + ] + } +} diff --git a/examples/mri-config.json b/examples/mri-config.json new file mode 100644 index 0000000..92f3ab1 --- /dev/null +++ b/examples/mri-config.json @@ -0,0 +1,75 @@ +{ + "begin": "2014-03-12T13:37:27+00:00", + "end": "2017-03-12T13:37:27+00:00", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "horizontal_audit": { + "stratify_by": "series_number", + "include_parameters": [ + "EchoTrainLength", + "ParallelAcquisitionTechnique", + "MagneticFieldStrength", + "MRAcquisitionType", + "MultiSliceMode", + "PhasePolarity", + "PhaseEncodingSteps", + "PixelBandwidth", + "ScanningSequence", + "SequenceVariant", + "RepetitionTime", + "EchoTime", + "FlipAngle", + "PhaseEncodingDirection", + "ShimMode", + "Rows", + "Columns", + "AcquisitionMatrix" + ] + }, + "vertical_audit": { + "stratify_by": "series_number", + "sequences": [ + [ + "ncanda-rsfmri-v1", + "ncanda-grefieldmap-v1" + ], + [ + "ncanda-dti30b400-v1", + "ncanda-grefieldmap-v1" + ], + [ + "ncanda-dti60b1000-v1", + "ncanda-grefieldmap-v1" + ] + ], + "include_parameters": [ + "Rows", + "Columns", + "AcquisitionMatrix", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "plots": { + "include_parameters": [ + "ContentDate", + "PatientSex", + "PatientAge", + "PatientWeight", + "OperatorsName", + "InstitutionName", + "Manufacturer" + ] + }, + "exclude_subjects": [ + "210098", + "210078" + ] +} diff --git a/examples/plot.py b/examples/plot.py new file mode 100644 index 0000000..f12cfe2 --- /dev/null +++ b/examples/plot.py @@ -0,0 +1,12 @@ +import pickle +from mrQA.project import plot_patterns + +dict_ = pickle.load(open( + '/home/sinhah/scan_data/vertical_abcd_mrqa_files/abcd-fmap-baseline-non-recommended_hz.adt.pkl', 'rb') +) + +config_path = '/home/sinhah/github/mrQA/examples/mri-config-abcd.json' + +plot_patterns(non_compliant_ds=dict_['non_compliant'], + complete_ds=dict_['complete_ds'], + config_path=config_path) diff --git a/examples/process_abcd_T1w.py b/examples/process_abcd_T1w.py index 9d0888b..6b2b702 100644 --- a/examples/process_abcd_T1w.py +++ b/examples/process_abcd_T1w.py @@ -39,7 +39,7 @@ def main(): if args.task == 'create_script': # Create scripts but do not submit jobs create_script(data_source=DATA_ROOT, - subjects_per_job=50, + folders_per_job=50, conda_env='mrqa', conda_dist='miniconda3', hpc=True, diff --git a/examples/process_abcd_local.py b/examples/process_abcd_local.py index 614d337..ad91d68 100644 --- a/examples/process_abcd_local.py +++ b/examples/process_abcd_local.py @@ -4,7 +4,6 @@ from MRdataset import load_mr_dataset from MRdataset.config import MRDS_EXT - from mrQA import check_compliance from mrQA.run_merge import check_and_merge from mrQA.run_parallel import create_script, submit_job @@ -29,25 +28,33 @@ def main(): # Required arguments required.add_argument('-t', '--task', type=str, required=False, - help='[submit_job|merge|report]', - default='submit_job') - + help='[create_script|submit_job|merge|report]', + default='report') + optional.add_argument('-ref', '--ref-protocol-path', type=str, + help='XML file containing desired protocol. If not ' + 'provided, the protocol will be inferred from ' + 'the dataset.') + required.add_argument('--config', type=str, + help='path to config file', + default='/home/sinhah/github/mrQA/examples/mri-config-abcd.json') # Parse arguments args = parser.parse_args() # Set constants - DATA_ROOT = Path('/media/sinhah/extremessd/ABCD-375/dicom-baseline') + DATA_ROOT = Path('/home/sinhah/scan_data/vertical_abcd') OUTPUT_DIR = DATA_ROOT.parent / (DATA_ROOT.stem + '_mrqa_files') # OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - name = 'abcd-375' + name = 'abcd-vertical' # Choose a task, one of [debug|submit_job|merge|report] if args.task == 'create_script': # note that it will generate scripts only create_script(data_source=DATA_ROOT, - subjects_per_job=50, + folders_per_job=5, conda_env='mrcheck', conda_dist='anaconda3', hpc=False, + config_path=args.config, + output_dir=OUTPUT_DIR, ) elif args.task == 'submit_job': # Generate slurm scripts and submit jobs, for local parallel processing @@ -68,10 +75,13 @@ def main(): ) elif args.task == 'report': # Generate a report for the merged dataset - dataset = load_mr_dataset(OUTPUT_DIR / (name + MRDS_EXT), ds_format='dicom') + dataset = load_mr_dataset(OUTPUT_DIR / (name + MRDS_EXT)) check_compliance(dataset=dataset, - output_dir=OUTPUT_DIR/'reports', - decimals=1) + output_dir=OUTPUT_DIR, + decimals=2, + tolerance=0, + config_path=args.config, + reference_path=args.ref_protocol_path,) else: # Invalid task raise NotImplementedError(f"Expected one of [submit_job|merge|report], " diff --git a/mrQA/__compliance__.py b/mrQA/__compliance__.py index 42f8ceb..19b76d6 100644 --- a/mrQA/__compliance__.py +++ b/mrQA/__compliance__.py @@ -1,7 +1,7 @@ from sys import version_info if version_info.major > 2: - from mrQA import cli + from mrQA.cli import cli else: raise NotImplementedError('Protocol Compliance requires Python 3 or higher.' 'Upgrade to Python 3+ or use environments.') @@ -9,7 +9,7 @@ def main(): """Entry point.""" - cli.main() + cli() if __name__ == '__main__': diff --git a/mrQA/__init__.py b/mrQA/__init__.py index c8a4fbb..bda5021 100644 --- a/mrQA/__init__.py +++ b/mrQA/__init__.py @@ -2,10 +2,17 @@ __author__ = """Harsh Sinha""" __email__ = 'harsh.sinha@pitt.edu' -__version__ = '0.1.0' +# __version__ = '0.1.0' -from mrQA.project import check_compliance -from mrQA.monitor import monitor +import logging -from . import _version -__version__ = _version.get_versions()['version'] +from mrQA.config import configure_logger + +logger = logging.getLogger(__name__) +logger = configure_logger(logger, output_dir=None, mode='w') + +from mrQA.monitor import monitor # noqa +from mrQA.project import check_compliance # noqa +from . import _version # noqa + +__version__ = _version.get_versions()['version'] # noqa diff --git a/mrQA/_version.py b/mrQA/_version.py index 663e817..87f472d 100644 --- a/mrQA/_version.py +++ b/mrQA/_version.py @@ -198,7 +198,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and + # filter_fn out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: diff --git a/mrQA/base.py b/mrQA/base.py new file mode 100644 index 0000000..9151a24 --- /dev/null +++ b/mrQA/base.py @@ -0,0 +1,673 @@ +import json +import tempfile +from abc import ABC, abstractmethod +from datetime import timedelta +from typing import List + +from MRdataset import valid_dirs +from MRdataset.base import BaseDataset +from bokeh.palettes import turbo, d3 +from protocol import BaseSequence + + +class CompliantDataset(BaseDataset): + """ + Container to manage properties of sequences that are compliant with the + reference protocol. It is a subclass of BaseDataset, and inherits all + its properties and methods. + + Parameters + ---------- + name: str + Name of the dataset + data_source: Path | List | str + Path to the dataset + ds_format: str + Format of the dataset, one of ['dicom'] + """ + + def __init__(self, name=None, data_source=None, ds_format=None): + # BaseDataset checks if data_source is valid, and if not, + # it raises an error. + # It is very likely that data is processed by MRdataset on a + # different machine, and the processed data is then transferred to + # another machine for audit. In such cases, the data_source of + # original dataset will be invalid on the machine where audit is + # performed. Hence, we set data_source in super() to temp dir. + try: + data_source = valid_dirs(data_source) + except (OSError, ValueError): + data_source = tempfile.gettempdir() + + super().__init__(name=name, data_source=data_source, + ds_format=ds_format) + + # If the sequence name was modified, then we need to keep track of + # the original sequence name as well. For example, if the sequence + # name was modified from T1w to T1w_modified, then we need to keep + # track of the original sequence name T1w as well. Why is modification + # of sequence name required? For example, if the sequence name is + # same, but the sequence is acquired twice, then we need to modify + # the sequence name to distinguish between the two sequences. + self._org2mod_seq_names = {} + self._mod2org_seq_names = {} + + def get_modified_seq_name(self, seq_name): + """Get the modified sequence name""" + return self._org2mod_seq_names[seq_name] + + def _get_original_seq_name(self, seq_name): + """Get the original sequence name""" + return self._mod2org_seq_names[seq_name] + + def set_modified_seq_name(self, original, modified): + """Set the modified sequence name""" + self._org2mod_seq_names[original] = modified + self._mod2org_seq_names[modified] = original + + def load(self): + pass + + +class UndeterminedDataset(BaseDataset): + """ + Container to manage properties of sequences whose reference protocol could + not be determined. Reasons could be: + 1. No reference protocol was found + 2. Multiple reference protocols were found + 3. Reference protocol was not valid + + Parameters + ---------- + name: str + Name of the dataset + data_source: Path | List | str + Path to the dataset + ds_format: str + Format of the dataset, one of ['dicom'] + """ + + def __init__(self, name=None, data_source=None, ds_format=None): + # BaseDataset checks if data_source is valid, and if not, + # it raises an error. + # It is very likely that data is processed by MRdataset on a + # different machine, and the processed data is then transferred to + # another machine for audit. In such cases, the data_source of + # original dataset will be invalid on the machine where audit is + # performed. Hence, we set data_source in super() to None. + try: + data_source = valid_dirs(data_source) + except (OSError, ValueError): + data_source = tempfile.gettempdir() + + super().__init__(name=name, data_source=data_source, + ds_format=ds_format) + + # If the sequence name was modified, then we need to keep track of + # the original sequence name as well. For example, if the sequence + # name was modified from T1w to T1w_modified, then we need to keep + # track of the original sequence name T1w as well. Why is modification + # of sequence name required? For example, if the sequence name is + # same, but the sequence is acquired twice, then we need to modify + # the sequence name to distinguish between the two sequences. + + self._org2mod_seq_names = {} + self._mod2org_seq_names = {} + + def get_modified_seq_name(self, seq_name): + """Get the modified sequence name""" + return self._org2mod_seq_names[seq_name] + + def get_original_seq_name(self, seq_name): + """Get the original sequence name""" + return self._mod2org_seq_names[seq_name] + + def set_modified_seq_name(self, original, modified): + """Set the modified sequence name""" + self._org2mod_seq_names[original] = modified + self._mod2org_seq_names[modified] = original + + def load(self): + pass + + +class NonCompliantDataset(BaseDataset): + """ + Container to manage properties of sequences that are non-compliant with the + reference protocol. It is a subclass of BaseDataset, and inherits all + its properties and methods. + + Parameters + ---------- + name: str + Name of the dataset + data_source: Path | List | str + Path to the dataset + ds_format: str + Format of the dataset, one of ['dicom'] + """ + + def __init__(self, name=None, data_source=None, ds_format=None): + # BaseDataset checks if data_source is valid, and if not, + # it raises an error. + # It is very likely that data is processed by MRdataset on a + # different machine, and the processed data is then transferred to + # another machine for audit. In such cases, the data_source of + # original dataset will be invalid on the machine where audit is + # performed. Hence, we set data_source in super() to None. + try: + data_source = valid_dirs(data_source) + except (OSError, ValueError): + data_source = tempfile.gettempdir() + + super().__init__(name=name, data_source=data_source, + ds_format=ds_format) + + # Dictionary to store all non-compliant parameters + self._nc_flat_map = {} + self._nc_tree_map = {} + self._nc_params_map = {} + + # Set to store all sequence pairs that were checked for vertical audit + self._vt_sequences = set() + + # If the sequence name was modified, then we need to keep track of + # the original sequence name as well. For example, if the sequence + # name was modified from T1w to T1w_modified, then we need to keep + # track of the original sequence name T1w as well. Why is modification + # of sequence name required? For example, if the sequence name is + # same, but the sequence is acquired twice, then we need to modify + # the sequence name to distinguish between the two sequences. + self._org2mod_seq_names = {} + self._mod2org_seq_names = {} + + def get_modified_seq_name(self, seq_name): + """Get the modified sequence name""" + try: + return self._org2mod_seq_names[seq_name] + except KeyError: + return seq_name + + def get_original_seq_name(self, seq_name): + """Get the original sequence name""" + try: + return self._mod2org_seq_names[seq_name] + except KeyError: + return seq_name + + def set_modified_seq_name(self, original, modified): + """Set the modified sequence name""" + self._org2mod_seq_names[original] = modified + self._mod2org_seq_names[modified] = original + + def get_vt_sequences(self) -> List: + """ + Returns a list of all sequences that were checked for vertical + audit. + """ + return list(self._vt_sequences) + + def add_sequence_pair_names(self, list_seqs): + """ + Add a sequence to the list of sequences that were checked for + vertical audit. + """ + self._vt_sequences.add(list_seqs) + + def _is_scanned_before(self, date, seq): + # Provide an option to include those subjects that were + # scanned after the given date + content_date = seq['ContentDate'].get_value() + # Suppose date for report generation is 2023-11-21 01:00:00 am + # However content date doesn't have time information, so it is + # 2023-11-21 00:00:00 am. Now, if we compare the two dates, date for + # report generation will always be greater than content date, + # even though the scan could have been performed on the same day. + # Hence, we add 1 day to content date, so that the two dates + # can be compared. + + # A better option is to use content time, but not all scanners + # provide content time. Hence, we use content date + 1 day. This means + # that the scan will be skipped only if it was performed at least + # 1 day before the date of report generation. + if date >= content_date + timedelta(days=1): + return True + return False + + def generate_hz_log(self, parameters, suppl_params, filter_fn=None, + verbosity=1, date=None): + sequences = self.get_sequence_ids() + nc_log = {} + for seq_id in sequences: + for param_name in parameters: + for param_tupl, sub, path, seq in self.get_nc_param_values( + seq_id, param_name): + if param_name not in nc_log: # empty + nc_log[param_name] = [] + + if self._is_scanned_before(date, seq): + continue + nc_dict = self._populate_nc_dict(param_tuple=param_tupl, + sub=sub, path=path, + seq=seq, seq_ids=seq_id, + suppl_params=suppl_params, + verbosity=verbosity) + nc_log[param_name].append(nc_dict) + return nc_log + + def _populate_nc_dict(self, param_tuple, seq_ids, sub, path, seq, + suppl_params, verbosity): + + nc_dict = {} + nc_dict['date'] = str(seq['ContentDate'].get_value().date()) + nc_dict['subject'] = sub + nc_dict['sequence_name'] = seq_ids + + # if additional parameters have to be included in the log + if suppl_params: + for i in suppl_params: + nc_dict[i] = seq[i].get_value() + + if verbosity > 1: + nc_dict['values'] = [p.get_value() for p in param_tuple] + if verbosity > 2: + nc_dict['path'] = str(path) + return nc_dict + + def generate_nc_log(self, parameters, filter_fn=None, output_dir=None, + suppl_params=None, audit='vt', verbosity=1, date=None): + """ + Generate a log of all non-compliant parameters in the dataset. + Apart from returning the log, it also dumps the log as a json file + """ + nc_log = {} + if audit == 'hz': + nc_log = self.generate_hz_log(parameters, suppl_params, + filter_fn, verbosity, date=date) + filename = self.name + '_hz_log.json' + elif audit == 'vt': + nc_log = self.generate_vt_log(parameters, suppl_params, + filter_fn, verbosity, date=date) + filename = self.name + '_vt_log.json' + if audit not in ['vt', 'hz']: + raise ValueError('Expected one of [vt, hz], got {}'.format(audit)) + + # if output_dir is provided, dump it as a json file + if nc_log and output_dir is not None: + with open(output_dir / filename, 'w') as f: + json.dump(nc_log, f, indent=4) + + return nc_log + + def generate_vt_log(self, parameters, suppl_params, filter_fn=None, + verbosity=1, date=None): + + nc_log = {} + sequence_pairs = self.get_vt_sequences() + + # Don't create the log for all sequence pairs. For example, we only + # want to highlight the issues in field-map and epi sequences. + for pair in filter(filter_fn, sequence_pairs): + for param_name in parameters: + for param_tuple, sub, path, seq in self.get_vt_param_values( + pair, param_name): + if param_name not in nc_log: # empty + nc_log[param_name] = [] + + # Provide a date to include those subjects that were + # scanned after the given date + if self._is_scanned_before(date, seq): + continue + + nc_dict = self._populate_nc_dict(param_tuple=param_tuple, + sub=sub, path=path, + seq=seq, seq_ids=pair, + suppl_params=suppl_params, + verbosity=verbosity) + nc_log[param_name].append(nc_dict) + return nc_log + + def get_nc_param_ids(self, seq_id): + """ + Returns a list of all non-compliant parameter names for a given + sequence id. + + Parameters + ---------- + seq_id: str + Name of the sequence, e.g. T1w, T2w etc. + """ + if seq_id not in self._nc_params_map: + return [] + else: + return list(self._nc_params_map[seq_id]) + + def get_nc_param_values(self, seq_id, param_name, ref_seq=None): + """ + Returns a list of all non-compliant parameter values for a given + sequence id and parameter name. + + Parameters + ---------- + seq_id: str + Name of the sequence, e.g. rs-fMRI etc. + param_name: str + Name of the parameter, e.g. RepetitionTime, EchoTime etc. + ref_seq: str + Name of the reference sequence, e.g. field-map + + Returns + ------- + Iterator + All non-compliant parameter values + + .. note:: It is recommended to also use the name of sequence used as + the reference protocol. For horizontal audit, + this is not essential, as each sequence is compared against its own + reference protocol. However, in case of vertical audit, it is + essential to provide the name of the sequence used as the + reference protocol. + For example, if field map and the rs-fMRI sequence are compared, + then the seq_id can be rs-fMRI, and ref_seq can be field map. + This will return only those values that are non-compliant with + the field map sequence. If ref_seq is provided, it returns only + those values that are non-compliant with the reference protocol. + + """ + if ref_seq is None: + ref_seq = '__NOT_SPECIFIED__' + if param_name in self._nc_params_map[seq_id]: + if seq_id in self._nc_tree_map[param_name]: + for subject_id in self._nc_tree_map[param_name][seq_id]: + for session_id in ( + self._nc_tree_map[param_name][seq_id][subject_id]): + if ref_seq in \ + self._nc_tree_map[param_name][seq_id][subject_id][ + session_id]: + yield from self._get_all_nc_param_values( + seq_id=seq_id, param_name=param_name, + subject_id=subject_id, session_id=session_id, + ref_seq=ref_seq) + + def _get_all_nc_param_values(self, seq_id, param_name, subject_id, + session_id, ref_seq=None): + """ + Returns a list of all non-compliant parameter values for a given + sequence id, subject_id, session_id and parameter name. + """ + for run_id in self._nc_tree_map[param_name][seq_id][ + subject_id][session_id][ref_seq]: + param_tupl = self._nc_tree_map[param_name][seq_id][ + subject_id][session_id][ref_seq][run_id] # noqa + path = self.get_path(subject_id, session_id, + seq_id, run_id) + seq = self.get(subject_id, session_id, seq_id, run_id) + yield param_tupl, subject_id, path, seq + + def get_vt_param_values(self, seq_pair, param_name): + """Wrapper around get_nc_param_values() for vertical audit""" + seq1, seq2 = seq_pair + if seq1 not in self._nc_params_map: + return + yield from self.get_nc_param_values(seq1, param_name, seq2) + + def get_nc_subject_ids(self, seq_id, param_name, ref_seq=None): + """ + Returns a list of all non-compliant subject ids for a given + sequence id and parameter name. Created for vertical audit report + as we are not listing any parameter values or paths in the report, just + the subject ids. + """ + if ref_seq is None: + ref_seq = '__NOT_SPECIFIED__' + if seq_id not in self._nc_params_map: + # return empty generator + return + if param_name in self._nc_params_map[seq_id]: + if seq_id in self._nc_tree_map[param_name]: + for subject_id in self._nc_tree_map[param_name][seq_id]: + for session_id in ( + self._nc_tree_map[param_name][seq_id][subject_id]): + if ref_seq in \ + self._nc_tree_map[param_name][seq_id][subject_id][ + session_id]: + yield subject_id + + def total_nc_subjects_by_sequence(self, seq_id, ref_seq=None): + """ + Returns the total number of non-compliant subjects for a given + sequence id and parameter name. + """ + subject_ids = set() + for parameter in self.get_nc_param_ids(seq_id=seq_id): + for subject_id in self.get_nc_subject_ids( + seq_id=seq_id, + param_name=parameter, + ref_seq=ref_seq): + subject_ids.add(subject_id) + return len(subject_ids) + + def total_nc_subjects_by_parameter(self, param_name): + """ + Returns the total number of non-compliant subjects for a given + sequence id and parameter name. + """ + total_subjects = set() + for seq_id, ref_seq in self.get_vt_sequences(): + if seq_id in self._nc_params_map: + subjects = list(self.get_nc_subject_ids(seq_id=seq_id, + param_name=param_name, + ref_seq=ref_seq)) + total_subjects.update(subjects) + return len(total_subjects) + + def get_nc_params(self, subject_id, session_id, seq_id, run_id): + """ + A generator that returns all non-compliant parameters for a given + subject, session, sequence and run. + + Parameters + ---------- + subject_id: str + Subject ID e.g. sub-01 + session_id: str + Session ID e.g. ses-01 + seq_id: str + Sequence ID e.g. T1w, T2w etc. + run_id: str + Run ID e.g. run-01 + + Returns + ------- + Iterator + All non-compliant parameters + """ + for param_name in self._nc_tree_map: + yield self._nc_tree_map[param_name][subject_id][session_id][seq_id][ + run_id] + + def get_path(self, subject_id, session_id, seq_id, run_id): + """ + Returns the path to the folder where DICOM files for a + given subject, session, sequence and run are stored. + + Parameters + ---------- + subject_id: str + Subject ID e.g. sub-01 + session_id: str + Session ID e.g. ses-01 + seq_id: str + Sequence ID e.g. T1w, T2w etc. + run_id: str + Run ID e.g. run-01 + + Returns + ------- + str + Path to the folder where DICOM files are stored + """ + # Get image sequence for the given subject, session, sequence and run + img_sequence = self._tree_map[subject_id][session_id][seq_id][run_id] + return img_sequence.path + + def add_nc_params(self, subject_id, session_id, seq_id, run_id, + non_compliant_params, ref_seq=None): + """ + Add non-compliant parameters to the dataset. This is a helper function + that is used by the (horizontal/vertical) audit to add non-compliant + parameters to the dataset. + + Parameters + ---------- + subject_id: str + Subject ID e.g. sub-01 + session_id: str + Session ID e.g. ses-01 + seq_id: str + Sequence ID e.g. T1w, T2w etc. + run_id: str + Run ID e.g. run-01 + non_compliant_params: List[Tuple] + List of non-compliant parameters. Each tuple contains + non-compliant parameter and the reference parameter. + ref_seq: str + Name of the reference sequence, e.g. field-map + """ + if ref_seq is None: + ref_seq = '__NOT_SPECIFIED__' + + if not isinstance(non_compliant_params, list): + raise TypeError( + 'Expected list of BaseParameter, got {}'.format( + type(non_compliant_params))) + + if isinstance(seq_id, BaseSequence): + raise TypeError("Expected str, got BaseSequence. Use " + ".name attribute to get the name of the sequence") + + if not isinstance(seq_id, str): + raise TypeError( + 'Expected str, got {}'.format(type(seq_id))) + + if not isinstance(ref_seq, str): + raise TypeError( + 'Expected str, got {}'.format(type(ref_seq))) + + for param_tupl in non_compliant_params: + # if not isinstance(param_tupl, BaseParameter): + # raise TypeError( + # 'Expected BaseParameter, got {}'.format(type(param_tupl))) + + param_name = param_tupl[0].name + self._nc_flat_map[ + (param_name, subject_id, session_id, seq_id, ref_seq, + run_id)] = param_tupl + self._nc_tree_add_node(subject_id=subject_id, session_id=session_id, + seq_id=seq_id, run_id=run_id, + param=param_tupl, param_name=param_name, + ref_seq=ref_seq) + if seq_id not in self._nc_params_map: + self._nc_params_map[seq_id] = set() + self._nc_params_map[seq_id].add(param_name) + + def _nc_tree_add_node(self, subject_id, session_id, seq_id, run_id, + param, param_name, ref_seq=None): + """ + Add a node to the tree map. This is a private function that is used by + the (horizontal/vertical) audit to add non-compliant parameters to the + dataset. + + Parameters + ---------- + subject_id: str + Subject ID e.g. sub-01 + session_id: str + Session ID e.g. ses-01 + seq_id: str + Sequence ID e.g. T1w, T2w etc. + run_id: str + Run ID e.g. run-01 + ref_seq: Optional[str] + Name of the reference sequence, e.g. field-map + """ + # TODO: improve it later + if ref_seq is None: + ref_seq = '__NOT_SPECIFIED__' + + if param_name not in self._nc_tree_map: + self._nc_tree_map[param_name] = dict() + + if seq_id not in self._nc_tree_map[param_name]: + self._nc_tree_map[param_name][seq_id] = dict() + + if subject_id not in self._nc_tree_map[param_name][seq_id]: + self._nc_tree_map[param_name][seq_id][subject_id] = dict() + + if session_id not in self._nc_tree_map[param_name][seq_id][subject_id]: + self._nc_tree_map[param_name][seq_id][subject_id][ + session_id] = dict() + + if ref_seq not in ( + self._nc_tree_map[param_name][seq_id][subject_id][session_id]): + self._nc_tree_map[param_name][seq_id][subject_id][session_id][ + ref_seq] = dict() + + if run_id not in \ + self._nc_tree_map[param_name][seq_id][subject_id][session_id][ + ref_seq]: + self._nc_tree_map[param_name][seq_id][subject_id][session_id][ + ref_seq][run_id] = dict() + + self._nc_tree_map[param_name][seq_id][subject_id][session_id][ref_seq][ + run_id] = param + + def load(self): + pass + + +class BasePlot(ABC): + _name = None + + def __init__(self, name=None): + if name is not None: + self._name = name + self.div = None + self.script = None + self.plot_height = None + self.plot_width = None + self.title = None + self.x_axis_label = None + self.y_axis_label = None + self.x_range = None + self.label = None + self.legend_label = None + self.colors = None + + @abstractmethod + def plot(self, non_compliant_ds, complete_ds, parameters): + """Creates a plot for the given data""" + + @abstractmethod + def compute_counts(self, non_compliant_ds, complete_ds, parameters): + """Computes the counts for the given dataset and parameters.""" + + @abstractmethod + def get_plot_components(self, data): + """getter method for plotting components""" + + @abstractmethod + def get_counter(self, dataset, parameters): + """getter method for counter""" + + def set_cmap(self, length): + """Sets the color map for the plot""" + if length > 10: + colors = turbo(length) + else: + palette = d3['Category10'] + if length > 3: + colors = palette[length] + else: + colors = palette[10][:length] + self.colors = colors diff --git a/mrQA/cli.py b/mrQA/cli.py index ec2f468..84602b7 100644 --- a/mrQA/cli.py +++ b/mrQA/cli.py @@ -3,16 +3,17 @@ import sys from pathlib import Path -from MRdataset import import_dataset -from MRdataset.utils import is_writable, valid_dirs -from MRdataset.log import logger +from MRdataset import import_dataset, load_mr_dataset, valid_dirs, \ + DatasetEmptyException from mrQA import check_compliance -from mrQA.config import PATH_CONFIG +from mrQA import logger +from mrQA.config import PATH_CONFIG, THIS_DIR +from mrQA.utils import is_writable def get_parser(): - """Console script for mrQA.""" + """Parser for command line interface.""" parser = argparse.ArgumentParser( description='Protocol Compliance of MRI scans', add_help=False @@ -25,12 +26,15 @@ def get_parser(): required.add_argument('-d', '--data-source', nargs='+', required=True, help='directory containing downloaded dataset with ' 'dicom files, supports nested hierarchies') + required.add_argument('--config', type=str, + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') optional.add_argument('-o', '--output-dir', type=str, help='specify the directory where the report' ' would be saved. By default, the --data_source ' 'directory will be used to save reports') optional.add_argument('-f', '--format', type=str, default='dicom', - help='type of dataset, one of [dicom|bids|pybids]') + help='type of dataset, one of [dicom|bids]') optional.add_argument('-n', '--name', type=str, help='provide a identifier/name for the dataset') optional.add_argument('-h', '--help', action='help', @@ -43,27 +47,17 @@ def get_parser(): 'of the decimal point.') optional.add_argument('-t', '--tolerance', type=float, default=0, help='tolerance for checking against reference ' - 'protocol. Default is 0.1') + 'protocol. Default is 0') # TODO: use this flag to store cache optional.add_argument('-v', '--verbose', action='store_true', help='allow verbose output on console') - optional.add_argument('-ref', '--reference_path', type=str, - help='.yaml file containing protocol specification') - optional.add_argument('--strategy', type=str, default='majority', - help='how to examine parameters [majority|reference].' - '--reference_path required if using reference') - optional.add_argument('--include-phantom', action='store_true', - help='whether to include phantom, localizer, ' - 'aahead_scout') - optional.add_argument('--include-nifti-header', action='store_true', - help='whether to check nifti headers for compliance,' - 'only used when --format==bids') - # Experimental features, not implemented yet. - optional.add_argument('-l', '--logging', type=int, default=40, - help='set logging to appropriate level') - optional.add_argument('--skip', nargs='+', - help='skip these parameters') - + optional.add_argument('-ref', '--ref-protocol-path', type=str, + help='XML file containing desired protocol. If not ' + 'provided, the protocol will be inferred from ' + 'the dataset.') + optional.add_argument('-pkl', '--mrds-pkl-path', type=str, + help='.mrds.pkl file can be provided to facilitate ' + 'faster re-runs.') if len(sys.argv) < 2: logger.critical('Too few arguments!') parser.print_help() @@ -72,33 +66,47 @@ def get_parser(): return parser -def main(): +def cli(): + """ + Console script for mrQA. + """ args = parse_args() - - dataset = import_dataset(data_source=args.data_source, - ds_format=args.format, - name=args.name, - verbose=args.verbose, - include_phantom=args.include_phantom, - include_nifti_header=args.include_nifti_header) - - check_compliance(dataset=dataset, - strategy=args.strategy, - output_dir=args.output_dir, - decimals=args.decimals, - verbose=args.verbose, - tolerance=args.tolerance,) + if args.mrds_pkl_path: + dataset = load_mr_dataset(args.mrds_pkl_path) + else: + dataset = import_dataset(data_source=args.data_source, + ds_format=args.format, + name=args.name, + verbose=args.verbose, + config_path=args.config, + output_dir=args.output_dir) + + try: + check_compliance(dataset=dataset, + output_dir=args.output_dir, + decimals=args.decimals, + verbose=args.verbose, + tolerance=args.tolerance, + config_path=args.config, + reference_path=args.ref_protocol_path, ) + except DatasetEmptyException: + logger.error("Cannot check compliance if the dataset doesn't have " + "any scans. Please check the dataset.") + except NotADirectoryError: + logger.error('Provided output directory for saving reports is invalid.' + 'Either it is not a directory or it does not exist. ') return 0 def parse_args(): + """Validates command line arguments and returns parsed arguments""" parser = get_parser() args = parser.parse_args() if args.verbose: - logger.setLevel('INFO') - else: logger.setLevel('WARNING') + else: + logger.setLevel('ERROR') if not valid_dirs(args.data_source): raise OSError('Expected valid directory for --data_source argument, ' @@ -114,12 +122,26 @@ def parse_args(): try: Path(args.output_dir).mkdir(parents=True, exist_ok=True) except OSError as exc: + logger.error(f'Unable to create folder {args.output_dir} for ' + f'saving reports') raise exc if not is_writable(args.output_dir): raise OSError(f'Output Folder {args.output_dir} is not writable') + + check_path(args.config, '--config') + check_path(args.ref_protocol_path, '--ref-protocol-path') + check_path(args.mrds_pkl_path, '--mrds-pkl-path') return args +def check_path(path, arg_name): + """Validates if the path is a valid file""" + if path is not None: + if not Path(path).is_file(): + raise OSError( + f'Expected valid file for {arg_name} argument, Got {path}') + + if __name__ == "__main__": - sys.exit(main()) # pragma: no cover + cli() diff --git a/mrQA/common.py b/mrQA/common.py deleted file mode 100644 index e8054ba..0000000 --- a/mrQA/common.py +++ /dev/null @@ -1,16 +0,0 @@ -import logging - -from pathlib import Path - - -def set_logging(name): - format_string = '%(asctime)s - %(levelname)s - %(message)s' - formatter = logging.Formatter(fmt=format_string) - handler = logging.StreamHandler() - # dup_filter = DuplicateFilter() - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - # handler.addFilter(dup_filter) - handler.setFormatter(formatter) - logger.addHandler(handler) - return logger diff --git a/mrQA/config.py b/mrQA/config.py index 7935175..f2b3073 100644 --- a/mrQA/config.py +++ b/mrQA/config.py @@ -1,105 +1,181 @@ +import logging +import tempfile from pathlib import Path + from MRdataset import MRDS_EXT from MRdataset.config import MRException +from protocol import UnspecifiedType +THIS_DIR = Path(__file__).parent.resolve() -STRATEGIES_ALLOWED = ['majority', ] -PARAMETER_NAMES = [ - 'Manufacturer', - 'BodyPartExamined', - 'RepetitionTime', - 'MagneticFieldStrength', - 'FlipAngle', - 'EchoTrainLength', - 'PixelBandwidth', - 'NumberOfPhaseEncodingSteps', - ] +def configure_logger(log, output_dir, mode='w', level='WARNING'): + """ + Initiate log files. + + Parameters + ---------- + log : logging.Logger + The logger object. + mode : str, (``'w'``, ``'a'``) + The writing mode to the log files. + Defaults to ``'w'``, overwrites previous files. + output_dir : str or Path + The path to the output directory. + level : str, + The level of logging to the console. One of ['WARNING', 'ERROR'] + """ + + console_handler = logging.StreamHandler() # creates the handler + warn_formatter = ('%(filename)s:%(name)s:%(funcName)s:%(lineno)d:' + ' %(message)s') + error_formatter = '%(asctime)s - %(levelname)s - %(message)s' + if output_dir is None: + output_dir = tempfile.gettempdir() + output_dir = Path(output_dir) / '.mrdataset' + output_dir.mkdir(parents=True, exist_ok=True) + + options = { + "warn" : { + 'level' : logging.WARN, + 'file' : output_dir / 'warn.log', + 'formatter': warn_formatter + }, + "error": { + 'level' : logging.ERROR, + 'file' : output_dir / 'error.log', + 'formatter': error_formatter + } + } + + if level == 'ERROR': + config = options['error'] + else: + config = options['warn'] + + file_handler = logging.FileHandler(config['file'], mode=mode) + file_handler.setLevel(config['level']) + file_handler.setFormatter(logging.Formatter(config['formatter'])) + log.addHandler(file_handler) + + console_handler.setLevel(config['level']) # sets the handler info + console_handler.setFormatter(logging.Formatter(config['formatter'])) + log.addHandler(console_handler) + return log PATH_CONFIG = { 'data_source': Path.home() / 'scan_data', - 'output_dir': Path.home() / 'mrqa_reports', + 'output_dir' : Path.home() / 'mrqa_reports', } DATE_SEPARATOR = '_DATE_' +ATTRIBUTE_SEPARATOR = '_ATTR_' +DATETIME_FORMAT = '%m_%d_%Y_%H_%M_%S' +DATE_FORMAT = '%m_%d_%Y' +Unspecified = UnspecifiedType() def past_records_fpath(folder): - return Path(folder/'past_record.txt') + """Constructs the path to the past record file""" + return Path(folder / 'past_record.txt') + + +def status_fpath(folder, audit): + """Constructs the path to the status file""" + return Path(folder / f'{audit}_non_compliance_log.txt') def report_fpath(folder_path, fname): + """Constructs the path to the report file""" return folder_path / f'{fname}.html' def mrds_fpath(folder_path, fname): + """Constructs the path to the MRDS file""" return folder_path / f'{fname}{MRDS_EXT}' def subject_list_dir(folder_path, fname): + """Constructs the path to the folder containing subject list files""" return folder_path / f'{fname}_files' class CannotComputeMajority(MRException): """Custom error that is raised when majority cannot be computed.""" - def __init__(self, name, te): - super().__init__( - f"Could not compute majority for {name} with echo time {te}") - - -class ReferenceNotSetForModality(MRException): - """Custom error that is raised when majority cannot be computed.""" - def __init__(self, name): super().__init__( - f"Cannot compute delta for runs in modality {name}" - f"as not reference protocol doesn't exist.") - - -class ReferenceNotSetForEchoTime(MRException): - """Custom error that is raised when majority cannot be computed.""" - - def __init__(self, name, echo_time): - super().__init__( - f"Cannot compute delta for runs in modality {name} " - f"with TE {echo_time}" - f" as not reference protocol is not set.") - - -class ComplianceException(Exception): - """ - Custom error that is raised when some critical properties are not - found in dicom file - """ - def __init__(self, message, **kwargs): - super().__init__(message) - - -class EmptySubject(ComplianceException): - """""" - pass - - -class NonCompliantSubject(ComplianceException): - """""" - pass - - -class ChangingParamsinSeries(ComplianceException): - """ - Custom error that is raised when parameter values are different for - different slices even though the SeriesInstanceUID is same. - """ - - - def __init__(self, filepath): - super().__init__("Expected all dicom slices to have same parameters. " - "Got changing parameters : {}".format(filepath)) - - -class ComplianceWarning(Warning): - """Library specific exception""" - - pass + f"Could not compute majority for {name}") + + +# +# class ReferenceNotSetForModality(MRException): +# """Custom error that is raised when majority cannot be computed.""" +# +# def __init__(self, name): +# super().__init__( +# f"Cannot compute delta for runs in modality {name}" +# f"as not reference protocol doesn't exist.") +# +# +# class ReferenceNotSetForEchoTime(MRException): +# """Custom error that is raised when majority cannot be computed.""" +# +# def __init__(self, name, echo_time): +# super().__init__( +# f"Cannot compute delta for runs in modality {name} " +# f"with TE {echo_time}" +# f" as not reference protocol is not set.") +# +# +# class ComplianceException(Exception): +# """ +# Custom error that is raised when some critical properties are not +# found in dicom file +# """ +# +# def __init__(self, message, **kwargs): +# super().__init__(message) +# +# +# class EmptySubject(ComplianceException): +# """""" +# pass +# +# +# class NonCompliantSubject(ComplianceException): +# """""" +# pass +# +# +# class ChangingParamsinSeries(ComplianceException): +# """ +# Custom error that is raised when parameter values are different for +# different slices even though the SeriesInstanceUID is same. +# """ +# +# def __init__(self, filepath): +# super().__init__("Expected all dicom slices to have same parameters. " +# "Got changing parameters : {}".format(filepath)) +# +# +# class ComplianceWarning(Warning): +# """Library specific exception""" +# +# pass + + +class EqualCountType(UnspecifiedType): + + def __init__(self): + super().__init__() + + def __str__(self): + return 'EqualCount' + + def __repr__(self): + return 'EqualCount' + + +EqualCount = EqualCountType() diff --git a/mrQA/criteria.yaml b/mrQA/criteria.yaml deleted file mode 100644 index a600913..0000000 --- a/mrQA/criteria.yaml +++ /dev/null @@ -1,60 +0,0 @@ -manufacturer: - enabled: true - mode: single - value: siemens -organ: - enabled: true - mode: single - value: brain -te: - enabled: true - mode: range - value: None -tr: - enabled: true - mode: range - value: None -b0: - enabled: true - mode: single - value: None -flip_angle: - enabled: true - mode: range - value: None -bwpx: - enabled: true - mode: range - value: None -comments: - enabled: false - mode: single - value: None -scanning_sequence: - enabled: true - mode: single - value: None -sequence_variant: - enabled: true - mode: single - value: None -mr_acquisition_type: - enabled: true - mode: single - value: None -phase_encoding_lines: - enabled: true - mode: multiple - value: ['ROW', 'COL'] -bwp_phase_encode: - enabled: true - mode: single - value: None -echo_train_length: - enabled: true - mode: single - value: None -phase_encoding_direction: - enabled: true - mode: single - value: None diff --git a/mrQA/formatter.py b/mrQA/formatter.py index ea37728..b106263 100644 --- a/mrQA/formatter.py +++ b/mrQA/formatter.py @@ -1,12 +1,15 @@ +import importlib import smtplib import ssl from abc import ABC, abstractmethod from email import encoders from email.mime import base, multipart, text from pathlib import Path -import importlib + import jinja2 +from mrQA import logger + class Formatter(ABC): def __init__(self): @@ -94,19 +97,160 @@ def render(self, *args, **kwargs): class HtmlFormatter(BaseFormatter): - def __init__(self, filepath, params, render=True): + """ + Class to create an HTML report for compliance evaluation. + + Parameters + ---------- + filepath : str + Path to the html file to be created + render : bool + If True, the report is rendered immediately. Otherwise, the render + method needs to be called explicitly. + """ + + def __init__(self, filepath, render=False): super(HtmlFormatter, self).__init__(filepath) self.template_folder = Path(__file__).resolve().parent - self.params = params + self.hz_audit = None + self.vt_audit = None + self.plots = {} + self.complete_ds = None + + self.skip_hz_report = False + self.skip_vt_report = False + self.skip_plots = True if render: self.render() + def collect_hz_audit_results(self, + compliant_ds, + non_compliant_ds, + undetermined_ds, + subject_lists_by_seq, + complete_ds, + ref_protocol, + **kwargs): + """ + Collects results from horizontal audit and stores them. The + dictionary is then passed to the jinja2 template for rendering. + + Parameters + ---------- + compliant_ds : BaseDataset + Dataset containing compliant sequences + non_compliant_ds : BaseDataset + Dataset containing non-compliant sequences + undetermined_ds : BaseDataset + Dataset containing sequences that could not be determined + subject_lists_by_seq : dict + Dictionary containing subject lists for each sequence + complete_ds : BaseDataset + Dataset containing all sequences + ref_protocol : dict + Reference protocol + kwargs : dict + Additional arguments to pass to the jinja2 template + """ + if not complete_ds.get_sequence_ids(): + logger.error('No sequences found in dataset. Cannot generate' + 'report') + self.skip_hz_report = True + if not ref_protocol: + logger.error('Reference protocol is empty. Cannot generate' + ' report for horizontal audit.') + self.skip_hz_report = True + if not (compliant_ds.get_sequence_ids() + or non_compliant_ds.get_sequence_ids() + or undetermined_ds.get_sequence_ids()): + logger.error('It seems the dataset has not been checked for ' + 'horizontal audit. Skipping horizontal audit report') + self.skip_hz_report = True + + self.hz_audit = { + 'protocol': ref_protocol, + 'compliant_ds': compliant_ds, + 'non_compliant_ds': non_compliant_ds, + 'undetermined_ds': undetermined_ds, + 'sub_lists_by_seq': subject_lists_by_seq, + } + + # add any additional kwargs to the hz_audit dict + for key, value in kwargs.items(): + self.hz_audit[key] = value + + self.complete_ds = complete_ds + + def collect_vt_audit_results(self, + compliant_ds, + non_compliant_ds, + sequence_pairs, + complete_ds, + parameters, + **kwargs): + """ + Collects results from horizontal audit and stores them. The + dictionary is then passed to the jinja2 template for rendering. + + Parameters + ---------- + compliant_ds : BaseDataset + Dataset containing compliant sequences + non_compliant_ds : BaseDataset + Dataset containing non-compliant sequences + complete_ds : BaseDataset + Dataset containing all sequences + sequence_pairs : list + Sequence pairs compared for vertical audit. For ex. + [('gre-field-mapping', 'rs-fMRI'), ('T1w', 'T2w')] + parameters : list + Parameters used for vertical audit. + For ex. ['ShimSetting, 'FlipAngle'] + kwargs : dict + Additional arguments to pass to the jinja2 template + """ + + if not complete_ds.get_sequence_ids(): + logger.error('No sequences found in dataset. Cannot generate' + 'report') + self.skip_vt_report = True + if not (compliant_ds.get_sequence_ids() + or non_compliant_ds.get_sequence_ids()): + logger.error('It seems the dataset has not been checked for ' + 'vertical audit. Skipping vertical audit report') + self.skip_vt_report = True + + self.vt_audit = { + 'complete_ds': complete_ds, + 'compliant_ds': compliant_ds, + 'non_compliant_ds': non_compliant_ds, + 'sequence_pairs': sequence_pairs, + 'parameters': parameters + } + + # add any additional kwargs to the vt_audit dict + for key, value in kwargs.items(): + self.vt_audit[key] = value + + self.complete_ds = complete_ds + + def collect_plots(self, **kwargs): + for key, value in kwargs.items(): + self.plots[key] = value + + if not self.plots: + logger.error('No plots found. Skipping plots section in report') + self.skip_plots = True + def render(self): """ - Render html page using jinja2 - :param - :return: + Renders the html report using jinja2 template. It will skip horizontal + or vertical audit report if the corresponding audit was not performed. """ + if self.skip_hz_report and self.skip_vt_report: + logger.error('Cannot generate report. See error log for details') + return + fs_loader = jinja2.FileSystemLoader(searchpath=self.template_folder) extn = ['jinja2.ext.loopcontrols'] template_env = jinja2.Environment(loader=fs_loader, extensions=extn) @@ -115,20 +259,14 @@ def render(self): template = template_env.get_template(template_file) output_text = template.render( - dataset=self.params['ds'], - sub_lists_by_modality=self.params['sub_lists_by_modality'], - # time=self.params['time'], + hz=self.hz_audit, + vt=self.vt_audit, + plots=self.plots, + skip_hz_report=self.skip_hz_report, + skip_vt_report=self.skip_vt_report, + skip_plots=self.skip_plots, + complete_ds=self.complete_ds, imp0rt=importlib.import_module ) - # self.output = weasyprint.HTML(string=output_text) f = open(self.filepath, 'w') f.write(output_text) - - -class PdfFormatter(HtmlFormatter): - def __init__(self, filepath, params): - super().__init__(filepath, params) - # self.output = super(PdfFormatter, self).render(params) - - def render(self): - return self.output.write_pdf(self.filepath) diff --git a/mrQA/layout.html b/mrQA/layout.html index 700b41a..6cd5196 100644 --- a/mrQA/layout.html +++ b/mrQA/layout.html @@ -4,7 +4,32 @@ + + + + {{ title }} @@ -68,129 +101,163 @@ {% set utils = imp0rt('mrQA.utils') %} -

Summary of non-compliance:

-

List of non-compliant modalities - - {{ dataset.non_compliant_modality_names|length }}

- - - - - - - - - - {# #} +

Summary of non-compliance: {{ hz['compliant_ds'].name }}

+{% if not skip_plots %} + {% for key in plots %} +
+ {{ plots[key].div | safe }} + {{ plots[key].script | safe }} +
+ {% endfor %} +{% endif %} - {# #} - - - - {% for modality in dataset.modalities|sort %} - {% if not modality.compliant %} - {% if modality.subjects|length > 2 %} +{% if not skip_hz_report %} +
+

Horizontal Audit

+ Reference Protocol Type : {{ hz['protocol'].type.name }} +

List of non-compliant modalities + - {{ hz['non_compliant_ds'].get_sequence_ids()|length }}

+
Modality# non-compliant (%)Non-compliant subjectsParameters# compliant (%)# subjects# Echo-Timeserrors
+ + + + + + + + + + + + {% for seq_id in hz['non_compliant_ds'].get_sequence_ids()|sort %} + {% set ncomp_sub_ids = hz['non_compliant_ds'].get_subject_ids(seq_id) %} + {% set seq_name_wo_tag = seq_id.split('_ATTR_') %} + {% set total_subjects = complete_ds.get_subject_ids(seq_name_wo_tag[0]) | + length %} + {% set comp_subject_count = total_subjects - ncomp_sub_ids|length %} + {% set non_compliant_params = + hz['non_compliant_ds'].get_nc_param_ids(seq_id) %} + {% if hz['non_compliant_ds'].get_subject_ids(seq_id) %}{# + |length > 2 #} - - {% set percent_non_compliant = 100 * modality.non_compliant_subject_names|length|float / modality.subjects|length|float %} - {% set percent_compliant = 100 * modality.compliant_subject_names|length|float / modality.subjects|length|float %} + + {% set percent_non_compliant = 100 * ncomp_sub_ids|length|float / + total_subjects %} + {% set percent_compliant = 100 * comp_subject_count|float / + total_subjects %} - {# #} - {# #} {% endif %} - {% endif %} - {% endfor %} - -
Modality# non-compliant (%)Non-compliant subjectsParameters# compliant (%)# subjects
{{ modality.name }}{{ seq_id }} - {{ modality.non_compliant_subject_names|length }} + {{ ncomp_sub_ids | length }} ({{ percent_non_compliant|round(2, 'floor') }} %) - {% if modality.non_compliant_subject_names|length < 50 %} - {% for name in modality.non_compliant_subject_names|sort %} + {% if ncomp_sub_ids|length < 50 %} + {% for name in ncomp_sub_ids|sort %} {{ name }}, {% endfor %} {% else %} Too many to fit here. Click - here + here for full list. {% endif %} - {% for parameter in modality.non_compliant_params()|sort %} + {% for parameter in + hz['non_compliant_ds'].get_nc_param_ids(seq_id)|sort %} {{ parameter }}, {% endfor %} - {{ modality.compliant_subject_names|length }} + {{ comp_subject_count }} ( {{ percent_compliant|round(2, 'floor') }} %) - {{ modality.subjects|length }} + {{ total_subjects }} #} - {# {{ modality.get_echo_times() | length }}#} - {# #} - {# {{ mode.error_children|length }}#} - {#
-

List of fully compliant modalities - : {{ dataset.compliant_modality_names | length }}

- - {% set cols = 4 %} - {% set rows = (dataset.compliant_modality_names|length // cols) + 1 %} - - {% for i in range(rows) %} - - {% for j in range(cols) %} - {% set index = i * cols + j %} - {% if index < dataset.compliant_modality_names|length %} - - {% endif %} - {% endfor %} - - {% endfor %} - -
- {{ dataset.compliant_modality_names[index] }} -
- - -{% for modality in dataset.modalities|sort %} - - {% if not modality.compliant %} - {% if modality.get_echo_times() %} -

Modality : {{ modality.name }}

- {% endif %} - {% for echo_time in modality.get_echo_times()|sort %} - {% set te = echo_time %} - {% set reference = modality.get_reference(echo_time) %} - {% set runs_by_echo = utils._get_runs_by_echo(modality, 3) %} -

Reference {{ loop.index }} | Number of Runs - : {{ runs_by_echo[te]|length }}

+ {% endfor %} + + - - + {% set comp_sequences = hz['compliant_ds'].get_sequence_ids() %} +

Fully compliant modalities + : {{ comp_sequences | length }}

+ {% if comp_sequences|length > 0 %} +
+ {% set cols = 4 %} + {% set rows = (comp_sequences |length // cols) + 1 %} + + {% for i in range(rows) %} - {% for key in reference.keys()|sort %} - + {% for j in range(cols) %} + {% set index = i * cols + j %} + {% if index < comp_sequences |length %} + + {% endif %} {% endfor %} + {% endfor %} + +
{{ key }} + {{ comp_sequences[index] }} +
+ {% endif %} - - + {% set und_sequences = hz['undetermined_ds'].get_sequence_ids() %} + {% if und_sequences|length > 0 %} +

Modalities for which compliance could not be determined + : {{ und_sequences | length }}

+ + {% set cols = 4 %} + {% set rows = (und_sequences |length // cols) + 1 %} + + {% for i in range(rows) %} - {% for key in reference.keys()|sort %} - + {% for j in range(cols) %} + {% set index = i * cols + j %} + {% if index < und_sequences |length %} + + {% endif %} {% endfor %} - -
{{ reference[key] }} + {{ und_sequences[index] }} +
- {% if modality.non_compliant_params(te).any() %} - + {% endfor %} + +
+ {% endif %} + + {% for seq_id in hz['non_compliant_ds'].get_sequence_ids()|sort %} +

Sequence : {{ seq_id }}

+ {% set ref = hz['protocol'][seq_id] %} +

Reference

+ + + + {% for param in ref|sort %} + + {% endfor %} + + + + + {% for param in ref|sort %} + + {% endfor %} + + +
{{ param }}
{{ ref[param].get_value() }}
+ {% set non_compliant_params = +hz['non_compliant_ds'].get_nc_param_ids(seq_id) %} + {% if non_compliant_params|length %} + - {% endif %} - {% for parameter in modality.non_compliant_params(te)|sort %} - {# {% set reasons = modality.query_by_param(parameter, te) %}#} - - - - - + + + + {% for nc_param, tuples in nc_dict.items() %} + + + + {% endfor %} - - - {% endfor %} - -
Reference {{ loop.index }} | Number of Runs colspan="4">Found Subject_Session + colspan="4">Subject
{{ parameter }} - {% for value in modality.query_by_param(parameter, te, 'ref_value') %} - {{ value }}, - {% endfor %} - - {% for value in modality.query_by_param(parameter, te, 'new_value') %} - {{ value }}, - {% endfor %} - - {% for subject in modality.query_by_param(parameter, te, 'subjects') %} - {{ subject }}, + {% for parameter in non_compliant_params|sort %} + {% set nc_data = hz['non_compliant_ds'].get_nc_param_values(seq_id, + parameter)|list %} + {% set nc_dict = utils.tuples2dict(nc_data) %} +
+ {{ parameter }} + + {{ ref[parameter].get_value() }} +
+ {{ nc_param.get_value() }}, + + {% for sub, path in tuples %} + {{ sub }}, + {% endfor %} +
- {% endfor %} - {% if modality.error_subject_names() %} - - - - - - - - - - - - + {% endfor %}
Error - Subject_Session -
Could not compute - non-compliance - - {% for entry in modality.error_subject_names() %} - {{ entry }}, - {% endfor %} -
{% endif %} - {% endif %} -{% endfor %} + {% endfor %} + {% for seq_id in hz['compliant_ds'].get_sequence_ids()|sort %} +

Sequence : {{ seq_id }}

+ {% set ref = hz['protocol'][seq_id] %} + -{% for modality in dataset.modalities|sort %} - {% if modality.compliant %} -

Modality : {{ modality.name }}

- {% if not modality.get_echo_times() %} -
Warning : Could not compute - reference
- {% endif %} - {% for echo_time in modality.get_echo_times()|sort %} - {% set te = echo_time %} - {% set reference = modality.get_reference(echo_time) %} - {% set runs_by_echo = utils._get_runs_by_echo(modality, 3) %} - {# {% if modality.is_multi_echo() %}#} -

Reference {{ loop.index }} | Number of Runs - : {{ runs_by_echo[te]|length }}

- {# {% else %}#} - {#

Reference

#} - {# {% endif %}#} - - - - {% for key in reference.keys()|sort %} - - {% endfor %} - - - - - - {% for key in reference.keys()|sort %} - - {% endfor %} - - -
{{ key }}
{{ reference[key] }}
- {% endfor %} - {% if modality.error_subject_names() %} - - - - - - - - - - - + +
Error - Subject_Session -
Could not compute - non-compliance - - {% for entry in modality.error_subject_names() %} - {{ entry }}, - {% endfor %} + + + + {% for param in ref|sort %} + + {% endfor %} + + + + + {% for param in ref|sort %} + - - -
{{ param }}
{{ ref[param].get_value() }}
- {% endif %} - {% endif %} -{% endfor %} + {% endfor %} +
+ {% endfor %} +{% endif %} +{% if not skip_vt_report %} +

Vertical Audit

+ + + + + + {% for pair in vt['sequence_pairs'] %} + + {% endfor %} + + + + {% for param in vt['parameters'] %} + + + + {% for pair in vt['sequence_pairs'] %} + + {% endfor %} + {% endfor %} + + + + {% for pair in vt['sequence_pairs'] %} + + {% endfor %} + + +
Parameters # non-compliant (%){{ pair[0] }},
{{ pair[1] }} +
{{ param }} + {% set total_subjects = vt['complete_ds'].subjects() | length | float %} + {% set nc_subjects = vt['non_compliant_ds'].total_nc_subjects_by_parameter(param) | float %} + {% set percent_non_comp = 100 * nc_subjects/total_subjects %} + {{ nc_subjects }}
+ ({{ percent_non_comp| round(2, 'floor') }} %) +
+ {% if param in vt['non_compliant_ds'].get_nc_param_ids(pair[0]) %} + {% set val = vt['non_compliant_ds'].get_nc_subject_ids(pair[0], param, pair[1]) %} + {% for sub in val %} + {{ sub }},
+ {% endfor %} + {% endif %}
# non-compliant (%) + {# {% if param in vt['nc_ds'].get_nc_param_ids(pair[0]) %}#} + {% set nc_subjects = vt['non_compliant_ds'].total_nc_subjects_by_sequence(pair[0], ref_seq=pair[1]) %} + {% set original_seq_name = vt['non_compliant_ds'].get_original_seq_name(pair[0]) %} + {% set total_subjects = vt['complete_ds'].get_subject_ids(original_seq_name) | length | float %} + {% set percent_non_comp = 100 * nc_subjects/total_subjects %} + {{ nc_subjects }}
({{ percent_non_comp| round(2, 'floor') }} %) + {# {% endif %}#} +
+{% endif %} diff --git a/mrQA/monitor.py b/mrQA/monitor.py index d07dcec..e5e90e0 100644 --- a/mrQA/monitor.py +++ b/mrQA/monitor.py @@ -1,16 +1,17 @@ """Console script for mrQA.""" import argparse import sys +from datetime import datetime, timedelta from pathlib import Path from typing import Union, List from MRdataset import import_dataset, load_mr_dataset -from MRdataset.log import logger -from MRdataset.utils import is_writable -from mrQA import check_compliance -from mrQA.config import PATH_CONFIG -from mrQA.utils import files_modified_since, get_last_valid_record +from mrQA import logger +from mrQA.config import PATH_CONFIG, THIS_DIR, DATETIME_FORMAT +from mrQA.project import check_compliance +from mrQA.utils import is_writable, folders_modified_since, \ + get_last_valid_record, log_latest_non_compliance def get_parser(): @@ -29,6 +30,9 @@ def get_parser(): required.add_argument('-d', '--data-source', type=str, required=True, help='directory containing downloaded dataset with ' 'dicom files, supports nested hierarchies') + required.add_argument('--config', type=str, + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') optional.add_argument('-o', '--output-dir', type=str, help='specify the directory where the report' ' would be saved. By default, the --data_source ' @@ -43,17 +47,15 @@ def get_parser(): '(default:0). If decimals are negative it ' 'specifies the number of positions to the left' 'of the decimal point.') + optional.add_argument('-t', '--tolerance', type=float, default=0, + help='tolerance for checking against reference ' + 'protocol. Default is 0') optional.add_argument('-v', '--verbose', action='store_true', help='allow verbose output on console') - optional.add_argument('-ref', '--reference-path', type=str, - help='.yaml file containing protocol specification') - optional.add_argument('--strategy', type=str, default='majority', - help='how to examine parameters [majority|reference].' - '--reference-path required if using reference') - optional.add_argument('--include-phantom', action='store_true', - help='whether to include phantom, localizer, ' - 'aahead_scout') - + optional.add_argument('-ref', '--ref-protocol-path', type=str, + help='XML file containing desired protocol. If not ' + 'provided, the protocol will be inferred from ' + 'the dataset.') if len(sys.argv) < 2: logger.critical('Too few arguments!') parser.print_help() @@ -111,29 +113,38 @@ def parse_args(): # TODO: Add this check to mrqa and MRdataset if not is_writable(args.output_dir): raise OSError(f'Output Folder {args.output_dir} is not writable') + + if not Path(args.config).is_file(): + raise FileNotFoundError(f'Expected valid config file, ' + f'Got {args.config}') + else: + args.config = Path(args.config).resolve() return args -def main(): +def cli(): + """Console script for mrQA monitor.""" args = parse_args() monitor(name=args.name, data_source=args.data_source, output_dir=args.output_dir, verbose=args.verbose, - include_phantom=args.include_phantom, decimals=args.decimals, ds_format=args.format, - strategy=args.strategy) + config_path=args.config, + tolerance=args.tolerance, + reference_path=args.ref_protocol_path, ) def monitor(name: str, data_source: Union[str, List, Path], output_dir: Union[str, Path], verbose: bool = False, - include_phantom: bool = False, decimals: int = 3, ds_format: str = 'dicom', - strategy: str = 'majority') -> Path: + config_path: Union[Path, str] = None, + tolerance=0, + reference_path=None): """ Monitor a dataset folder for changes. Read new files and append to existing dataset. Run compliance check on the updated dataset. @@ -149,39 +160,43 @@ def monitor(name: str, Path to the folder where the report, and dataset would be saved. verbose: bool Whether to print verbose output on console. - include_phantom: bool - Whether to include phantom, localizer, aahead_scout decimals: int Number of decimal places to round to (default:3). ds_format: str Type of dataset, one of [dicom] - strategy: str - How to examine parameters [majority|reference] - - Returns - ------- - report_path: Path - Posix path to the new generated report. + config_path: str + Path to the config file + tolerance: float + Tolerance for checking against reference protocol. Default is 0 + reference_path: str + Path to the reference protocol file. """ output_dir = Path(output_dir) last_record = get_last_valid_record(output_dir) + last_reported_on = None + if last_record: last_reported_on, last_report_path, last_mrds_path = last_record # TODO: delete old logs, only keep latest 3-4 reports in the folder dataset = load_mr_dataset(last_mrds_path) - modified_files = files_modified_since(input_dir=data_source, - last_reported_on=last_reported_on, - output_dir=output_dir) - if modified_files: - new_dataset = import_dataset(data_source=modified_files, + modified_folders = folders_modified_since( + input_dir=data_source, + last_reported_on=last_reported_on, + output_dir=output_dir + ) + if modified_folders: + new_dataset = import_dataset(data_source=modified_folders, ds_format='dicom', name=name, verbose=verbose, - include_phantom=include_phantom) + config_path=config_path, + output_dir=output_dir) + # prev_status = get_status(dataset) dataset.merge(new_dataset) else: logger.warning('No new files found since last report. ' - 'Regenerating report') + 'Returning last report') + return else: logger.warning('Dataset %s not found in records. Running ' 'compliance check on entire dataset', name) @@ -189,14 +204,36 @@ def monitor(name: str, ds_format=ds_format, name=name, verbose=verbose, - include_phantom=include_phantom) + config_path=config_path, + output_dir=output_dir) + new_dataset = None + + if last_reported_on is None: + # if this is the first time, set last_reported_on to 1 year ago + last_reported_on = datetime.now() - timedelta(days=365) + last_reported_on = last_reported_on.strftime(DATETIME_FORMAT) + + hz_audit_results, vt_audit_results = check_compliance( + dataset=dataset, + output_dir=output_dir, + decimals=decimals, + verbose=verbose, + tolerance=tolerance, + reference_path=reference_path, + config_path=config_path) + + log_latest_non_compliance(dataset=hz_audit_results['non_compliant'], + config_path=config_path, + output_dir=output_dir, audit='hz', + date=last_reported_on) + + log_latest_non_compliance(dataset=vt_audit_results['non_compliant'], + config_path=config_path, + output_dir=output_dir, audit='vt', + date=last_reported_on) - report_path = check_compliance(dataset=dataset, - strategy=strategy, - output_dir=output_dir, - decimals=decimals) - return report_path + return if __name__ == '__main__': - sys.exit(main()) # pragma: no cover + sys.exit(cli()) # pragma: no cover diff --git a/mrQA/parallel_utils.py b/mrQA/parallel_utils.py index 1a0d084..4f98e29 100644 --- a/mrQA/parallel_utils.py +++ b/mrQA/parallel_utils.py @@ -1,13 +1,14 @@ -import math import os import subprocess from pathlib import Path +from time import sleep from typing import Union, Iterable -from MRdataset.log import logger -from MRdataset.utils import valid_dirs +from MRdataset import valid_dirs -from mrQA.utils import is_integer_number, execute_local, list2txt +from mrQA import logger +from mrQA.utils import is_integer_number, execute_local, list2txt, \ + folders_with_min_files def _check_args(data_source: Union[str, Path, Iterable] = None, @@ -17,7 +18,8 @@ def _check_args(data_source: Union[str, Path, Iterable] = None, subjects_per_job: int = None, hpc: bool = False, conda_dist: str = None, - conda_env: str = None): + conda_env: str = None, + config_path: Union[str, Path] = None): # It is not possible to submit jobs while debugging, why would you submit # a job, if code is still being debugged if debug and hpc: @@ -33,7 +35,7 @@ def _check_args(data_source: Union[str, Path, Iterable] = None, # Check if data_source is a valid directory, or list of valid directories data_source = valid_dirs(data_source) - # RULE : If output_dir not provided, output wil be saved in 'mrqa_files' + # RULE : If output_dir not provided, output will be saved in 'mrqa_files' # created in the parent folder of data_source if not output_dir: if isinstance(data_source, Iterable): @@ -42,19 +44,20 @@ def _check_args(data_source: Union[str, Path, Iterable] = None, raise RuntimeError('Need an output directory to store files') # Didn't find a good alternative to os.access - # in pathlib, please raise a issue if you know one, happy to incorporate - output_dir = data_source.parent / ( - data_source.name + '_mrqa_files') + # in pathlib, please raise an issue if you know one, + # happy to incorporate + parent_dir = Path(data_source[0]).parent + output_dir = parent_dir / (data_source[0].name + '_mrqa_files') # Check if permission to create a folder in data_source.parent - if os.access(data_source.parent, os.W_OK): + if os.access(parent_dir, os.W_OK): logger.warning('Expected a directory to save job scripts. Using ' 'parent folder of --data_source instead.') else: - raise PermissionError(f'You do not have write permission to' - f'create a folder in ' - f'{data_source.parent}' - f'Please provide output_dir') + raise PermissionError('You do not have write permission to' + 'create a folder in ' + f'{parent_dir}' + 'Please provide output_dir') else: output_dir = Path(output_dir) # Information about conda env is required for creating slurm scripts @@ -64,6 +67,8 @@ def _check_args(data_source: Union[str, Path, Iterable] = None, conda_env = 'mrqa' if hpc else 'mrcheck' if not conda_dist: conda_dist = 'miniconda3' if hpc else 'anaconda3' + if not Path(config_path).exists(): + raise FileNotFoundError(f'Config file not found at {config_path}') return data_source, output_dir, conda_env, conda_dist @@ -76,12 +81,12 @@ def _make_file_folders(output_dir): # created by the corresponding bash script folder_paths = { - 'ids': output_dir / 'id_lists', + 'fnames': output_dir / 'fname_lists', 'scripts': output_dir / 'bash_scripts', 'mrds': output_dir / 'partial_mrds' } files_per_batch = { - 'ids': output_dir / 'per_batch_id_list.txt', + 'fnames': output_dir / 'per_batch_folders_list.txt', 'scripts': output_dir / 'per_batch_script_list.txt', 'mrds': output_dir / 'per_batch_partial_mrds_list.txt' } @@ -91,7 +96,7 @@ def _make_file_folders(output_dir): # And store the original complete list (contains all # subject ids) in "complete_id_list.txt" - all_ids_path = output_dir / 'complete_id_list.txt' + all_ids_path = output_dir / 'complete_fname_list.txt' return folder_paths, files_per_batch, all_ids_path @@ -119,6 +124,8 @@ def _run_single_batch(script_path: Union[str, Path], # to submit the script # TODO: Add try/except block here subprocess.run(['sbatch', script_path], check=True, shell=True) + # Without any delay, you may receive NODE_FAIL error + sleep(2) # print(out.stdout) # some way to check was submitted/accepted @@ -134,13 +141,14 @@ def _run_single_batch(script_path: Union[str, Path], def _create_slurm_script(output_script_path: Union[str, Path], - ids_filepath: Union[str, Path], + fnames_filepath: Union[str, Path], env: str = 'mrqa', conda_dist: str = 'anaconda3', - num_subj_per_job: int = 50, + folders_per_job: int = 50, verbose: bool = False, - include_phantom: bool = False, - output_mrds_path: bool = None) -> None: + config_path: Union[str, Path] = None, + output_mrds_path: bool = None, + email='mail.sinha.harsh@gmail.com') -> None: """ Creates a slurm script file which can be submitted to a hpc. @@ -148,18 +156,16 @@ def _create_slurm_script(output_script_path: Union[str, Path], ---------- output_script_path : str Path to slurm script file - ids_filepath : str + fnames_filepath : str Path to text file containing list of subject ids env : str Conda environment name conda_dist : str Conda distribution - num_subj_per_job : int + folders_per_job : int Number of subjects to process in each slurm job verbose : bool If True, prints the output of the script - include_phantom : bool - If True, includes phantom, localizer and calibration studies output_mrds_path : str Path to the partial mrds pickle file """ @@ -171,20 +177,18 @@ def _create_slurm_script(output_script_path: Union[str, Path], # Sys Time (CPU Time) : 10 minutes 20 minutes # Set the memory and cpu time limits - mem_reqd = 2000 # MB; - num_mins_per_subject = 1 # minutes - num_hours = int(math.ceil(num_subj_per_job * num_mins_per_subject / 60)) + mem_required = 2000 # MB; + # num_mins_per_subject = 1 # minutes # Set the number of hours to 3 if less than 3 - time_limit = 3 if num_hours < 3 else num_hours + time_limit = 24 # Setup python command to run - python_cmd = f'mrpc_subset -o {output_mrds_path} -b {ids_filepath}' + python_cmd = (f'mrqa_subset -o {output_mrds_path} -b {fnames_filepath} ' + f'--config {config_path}') # Add flags to python command if verbose: python_cmd += ' --verbose' - if include_phantom: - python_cmd += ' --include_phantom' - python_cmd += ' --is_partial' + python_cmd += ' --is-partial' # Create the slurm script file with open(output_script_path, 'w', encoding='utf-8') as fp: @@ -193,14 +197,14 @@ def _create_slurm_script(output_script_path: Union[str, Path], '#SBATCH -A med220005p', '#SBATCH -N 1', '#SBATCH -p RM-shared', - f'#SBATCH --mem-per-cpu={mem_reqd}M #memory per cpu-core', + f'#SBATCH --mem-per-cpu={mem_required}M #memory per cpu-core', f'#SBATCH --time={time_limit}:00:00', '#SBATCH --ntasks-per-node=1', - f'#SBATCH --error={ids_filepath.stem}.%J.err', - f'#SBATCH --output={ids_filepath.stem}.%J.out', + f'#SBATCH --error={fnames_filepath.stem}.%J.err', + f'#SBATCH --output={fnames_filepath.stem}.%J.out', '#SBATCH --mail-type=end # send email when job ends', '#SBATCH --mail-type=fail # send email if job fails', - '#SBATCH --mail-user=mail.sinha.harsh@gmail.com', + f'#SBATCH --mail-user={email}', '#Clear the environment from any previously loaded modules', 'module purge > /dev/null 2>&1', f'source ${{HOME}}/{conda_dist}/etc/profile.d/conda.sh', @@ -211,25 +215,27 @@ def _create_slurm_script(output_script_path: Union[str, Path], ) -def _get_num_workers(subjects_per_job, subject_list): - if subjects_per_job > len(subject_list): +def _get_num_workers(folders_per_job, folder_list): + if folders_per_job > len(folder_list): # If subjects_per_job is greater than the number of subjects, # process all subjects in a single job. Stop execution. raise RuntimeError('Trying to create more jobs than total number of ' - 'subjects in the directory. Why?') + 'folders in the directory. Why?') # Get the number of jobs - workers = len(subject_list) // subjects_per_job + workers = len(folder_list) // folders_per_job if workers == 1: # If there is only one job, process all subjects in a single job - raise RuntimeError('Decrease number of subjects per job. Expected' + raise RuntimeError('Decrease number of folders per job. Expected' 'workers > 1 for parallel processing. Got 1') return workers -def _get_subject_ids(data_source: Union[str, Path], - all_ids_path: Union[str, Path]) -> list: +def _get_terminal_folders(data_source: Union[str, Path], + all_ids_path: Union[str, Path], + pattern='*', + min_count=1) -> Iterable: """ Get the list of subject ids from the data source folder @@ -242,18 +248,16 @@ def _get_subject_ids(data_source: Union[str, Path], Returns ------- - subject_list : list + subject_list : Iterable List of subject ids """ - subject_list = [] + terminal_folder_list = [] # Get the list of subject ids - for root, _, _ in os.walk(data_source): - if 'sub-' in Path(root).name: - # Get the subject id - num_files_in_root = len(list(Path(root).rglob('*/*'))) - if num_files_in_root > 0: - subject_list.append(root) + for directory in valid_dirs(data_source): + sub_folders = folders_with_min_files(directory, pattern, + min_count) + terminal_folder_list.extend(sub_folders) # Store the list of unique subject ids to a text file given by # output_path - list2txt(all_ids_path, list(set(subject_list))) - return subject_list + list2txt(all_ids_path, list(set(terminal_folder_list))) + return terminal_folder_list diff --git a/mrQA/plotting.py b/mrQA/plotting.py new file mode 100644 index 0000000..6e95da9 --- /dev/null +++ b/mrQA/plotting.py @@ -0,0 +1,568 @@ +import csv +from ast import literal_eval +from collections import defaultdict +from pathlib import Path + +from bokeh.embed import components +from bokeh.models import FactorRange, ColumnDataSource +from bokeh.plotting import figure +from protocol import UnspecifiedType + +from mrQA import logger +from mrQA.base import BasePlot +from mrQA.utils import previous_month, next_month + + +class MultiPlot(BasePlot): + def __init__(self, name=None): + super().__init__(name=name) + self.uniq_secondary_values = None + self.parameters = None + + def get_counter(self, dataset, parameters): + """Computes the counter for the given dataset and parameters.""" + counter = {} + if len(parameters) > 2: + raise ValueError("MultiPlot can only plot two parameters") + param_primary, param_secondary = parameters + for seq_name in dataset.get_sequence_ids(): + for subj, sess, run, seq in dataset.traverse_horizontal( + seq_name): + try: + primary_value = seq[param_primary].get_value() + if 'MEDICALSYSTEMS' in primary_value: + primary_value = primary_value.split('MEDICALSYSTEMS')[0] + secondary_value = seq[param_secondary].get_value() + if 'ORCHESTRASDK' in secondary_value: + continue + except KeyError: + continue + + if (isinstance(primary_value, UnspecifiedType) or + isinstance(secondary_value, UnspecifiedType)): + continue + if primary_value not in counter: + counter[primary_value] = {} + if secondary_value not in counter[primary_value]: + counter[primary_value][secondary_value] = 0 + counter[primary_value][secondary_value] += 1 + return counter + + def normalize_counts(self, counter, base_counter): + """Normalize the values in counter by values of base counter""" + normalized_counts = defaultdict(dict) + uniq_secondary_values = set() + + for key1 in counter: + for key2 in counter[key1]: + uniq_secondary_values.add(key2) + normalized_counts[key1][key2] = 100*( + counter[key1][key2] / base_counter[key1][key2]) + + if not normalized_counts: + raise ValueError("Primary counter is empty. " + "No values found for normalization") + self.uniq_secondary_values = sorted(list(uniq_secondary_values)) + return dict(sorted(normalized_counts.items())) + + def pad_with_zeroes(self, normalized_counts, primary_param): + """Pad the normalized counts with zeroes""" + data = { + primary_param: [] + } + + # initialize data + for category in self.uniq_secondary_values: + data[category] = { + 'x': [], + 'y': [] + } + + for key, values_by_category in normalized_counts.items(): + data[primary_param].append(key) + for category in values_by_category: + # if category not in values_by_category: + # continue + # # data[category].append(0) + # else: + data[category]['x'].append(key) + data[category]['y'].append(values_by_category[category]) + return data + + def compute_counts(self, non_compliant_ds, complete_ds, parameters): + """Returns the plot components for the given dataset and parameters.""" + counter = self.get_counter(non_compliant_ds, parameters) + base_counter = self.get_counter(complete_ds, parameters) + normalized_counts = self.normalize_counts(counter, base_counter) + data = self.pad_with_zeroes(normalized_counts, parameters[0]) + return data + + def plot(self, non_compliant_ds, complete_ds, parameters): + """Creates a plot for the given data""" + raise NotImplementedError + + +class BarPlot(MultiPlot): + """Plot for creating bar plots""" + _name = 'bar_plot' + + def __init__(self, legend_label=None, y_axis_label='% Deviations', + plot_height=300, + plot_width=800): + super().__init__(name=self._name) + self.legend_label = legend_label + self.y_axis_label = y_axis_label + self.plot_width = plot_width + self.plot_height = plot_height + + def compute_counts(self, non_compliant_ds, complete_ds, parameters): + """Returns the plot components for the given dataset and parameters.""" + counter = self.get_counter(non_compliant_ds, parameters) + base_counter = self.get_counter(complete_ds, parameters) + normalized_counts = self.normalize_counts(counter, base_counter) + data = self.pad_with_zeroes(normalized_counts, parameters[0]) + return data + + def pad_with_zeroes(self, normalized_counts, primary_param): + """Pad the normalized counts with zeroes""" + factors = [(str(i), str(j)) for i in normalized_counts + for j in normalized_counts[i]] + y = [normalized_counts[i][j] for i in normalized_counts + for j in normalized_counts[i]] + + self.x_range = FactorRange(*factors) + source = ColumnDataSource(data=dict( + factors=factors, + y=y + )) + + return source + + def normalize_counts(self, counter, base_counter): + """Normalize the values in counter by values of base counter""" + normalized_counts = defaultdict(dict) + uniq_secondary_values = set() + + for key1 in counter: + for key2 in counter[key1]: + uniq_secondary_values.add(key2) + normalized_counts[key1][key2] = 100*( + counter[key1][key2] / base_counter[key1][key2]) + + if not normalized_counts: + raise ValueError("Primary counter is empty. " + "No values found for normalization") + self.uniq_secondary_values = sorted(list(uniq_secondary_values)) + return dict(sorted(normalized_counts.items())) + + def get_plot_components(self, data): + # label = list(data.keys())[0] + self.set_cmap(3) + + p = figure(x_range=self.x_range, + y_axis_label=self.y_axis_label, + width=self.plot_width, height=self.plot_height) + # for i, k in enumerate(self.uniq_secondary_values): + try: + p.vbar(x='factors', top='y', source=data, width=self.width, + fill_color=self.colors[2], fill_alpha=0.75, + line_color=self.colors[0]) + except IndexError: + print("Unable to plot, Color index out of range") + + p.xaxis.major_label_orientation = "vertical" + p.xgrid.grid_line_color = None + p.ygrid.grid_line_alpha = 0.5 + p.x_range.range_padding = 0.1 + + # p.legend.click_policy = "hide" + # p.add_layout(p.legend[0], 'below') + return components(p) + + def plot(self, non_compliant_ds, complete_ds, parameters=None): + """Creates a plot for the given data""" + if not parameters: + parameters = self.parameters + data = self.compute_counts(non_compliant_ds, complete_ds, + parameters) + self.width = 0.8 + # self.x_range = data[parameters[0]] + # x = data[parameters[0]] + # for i in parameters: + # if 'date' in i.lower(): + # self.x_range = (previous_month(min(x)), next_month(max(x))) + # if 'age' in i.lower(): + # self.x_range = [min(x) - 0.1, max(x) + 0.1] + # # self.width = timedelta(days=1) + + self.div, self.script = self.get_plot_components(data) + + +class MultiLinePlot(MultiPlot): + """Plot for creating multi line plots""" + _name = 'multi_line' + + def __init__(self, legend_label=None, y_axis_label=None, line_width=2, + line_dash='solid', line_alpha=0.75, plot_height=300, + plot_width=800): + super().__init__(name=self._name) + self.legend_label = legend_label + self.line_width = line_width + self.line_dash = line_dash + self.line_alpha = line_alpha + self.y_axis_label = y_axis_label + self.plot_width = plot_width + self.plot_height = plot_height + + def get_plot_components(self, data): + self.set_cmap(len(self.uniq_secondary_values)) + + p = figure(x_range=self.x_range, + y_axis_label=self.y_axis_label, + width=self.plot_width, height=self.plot_height) + for i, k in enumerate(self.uniq_secondary_values): + try: + p.line(x=data[k]['x'], y=data[k]['y'], + line_width=self.line_width, + line_alpha=1, color=self.colors[i], legend_label=k) + except IndexError: + print(f"Unable to plot {k}, Color index {i} out of range") + + p.xaxis.major_label_orientation = "vertical" + p.xgrid.grid_line_color = None + p.ygrid.grid_line_alpha = 0.5 + p.legend.click_policy = "hide" + p.add_layout(p.legend[0], 'below') + return components(p) + + def plot(self, non_compliant_ds, complete_ds, parameters=None): + """Creates a plot for the given data""" + if not parameters: + parameters = self.parameters + data = self.compute_counts(non_compliant_ds, complete_ds, + parameters) + self.width = 0.9 + self.x_range = data[parameters[0]] + x = data[parameters[0]] + for i in parameters: + if 'date' in i.lower(): + self.x_range = (previous_month(min(x)), next_month(max(x))) + if 'age' in i.lower(): + self.x_range = [min(x) - 0.1, max(x) + 0.1] + # self.width = timedelta(days=1) + + self.div, self.script = self.get_plot_components(data) + + +class MultiScatterPlot(MultiPlot): + _name = 'multi_scatter' + + def __init__(self, legend_label=None, y_axis_label='% Deviations', size=5, + alpha=0.75, plot_height=300, + plot_width=800): + super().__init__(name=self._name) + self.legend_label = legend_label + self.size = size + self.alpha = alpha + self.y_axis_label = y_axis_label + self.plot_width = plot_width + self.plot_height = plot_height + + def get_plot_components(self, data): + label = list(data.keys())[0] + self.set_cmap(len(self.uniq_secondary_values)) + + p = figure(x_range=self.x_range, + y_axis_label=self.y_axis_label, x_axis_label=label, + width=self.plot_width, height=self.plot_height) + for i, k in enumerate(self.uniq_secondary_values): + try: + p.circle(x=data[k]['x'], y=data[k]['y'], size=self.size, + alpha=self.alpha, + color=self.colors[i], legend_label=k) + except IndexError: + print(f"Unable to plot {k}, Color index {i} out of range") + + p.xaxis.major_label_orientation = "vertical" + p.xgrid.grid_line_color = None + p.ygrid.grid_line_alpha = 0.5 + p.legend.click_policy = "hide" + p.add_layout(p.legend[0], 'below') + return components(p) + + def plot(self, non_compliant_ds, complete_ds, parameters=None): + """Creates a plot for the given data""" + if not parameters: + parameters = self.parameters + data = self.compute_counts(non_compliant_ds, complete_ds, + parameters) + self.width = 0.9 + self.x_range = data[parameters[0]] + x = data[parameters[0]] + for i in parameters: + if 'date' in i.lower(): + self.x_range = (previous_month(min(x)), next_month(max(x))) + if 'age' in i.lower(): + self.x_range = [min(x) - 0.1, max(x) + 0.1] + # self.width = timedelta(days=1) + + self.div, self.script = self.get_plot_components(data) + + +class ManufacturerAndDate(MultiScatterPlot): + """Plot for Manufacturer and Date""" + def __init__(self): + super().__init__(plot_height=600, plot_width=800) + self.parameters = ['ContentDate', 'Manufacturer'] + + +class PatientSexAndAge(BarPlot): + """Plot for PatientSex and PatientAge""" + def __init__(self): + super().__init__(plot_height=600, plot_width=800) + self.parameters = ['PatientAge', 'PatientSex'] + + +class ManufacturersModelAndDate(MultiScatterPlot): + """Plot for Manufacturer and Date""" + def __init__(self): + super().__init__(plot_height=600, plot_width=800) + self.parameters = ['ContentDate', 'ManufacturersModelName'] + + def get_counter(self, dataset, parameters): + """Computes the counter for the given dataset and parameters.""" + counter = {} + if len(parameters) > 2: + raise ValueError("MultiPlot can only plot two parameters") + param_primary, param_secondary = parameters + for seq_name in dataset.get_sequence_ids(): + for subj, sess, run, seq in dataset.traverse_horizontal( + seq_name): + try: + primary_value = seq[param_primary].get_value() + secondary_value = seq[param_secondary].get_value() + manufacturer = seq['Manufacturer'].get_value() + secondary_value = f'{manufacturer} {secondary_value}' + except KeyError: + continue + + if (isinstance(primary_value, UnspecifiedType) or + isinstance(secondary_value, UnspecifiedType)): + continue + if primary_value not in counter: + counter[primary_value] = {} + if secondary_value not in counter[primary_value]: + counter[primary_value][secondary_value] = 0 + counter[primary_value][secondary_value] += 1 + return counter + + +class SoftwareVersionsAndDate(MultiScatterPlot): + """Plot for Manufacturer and Date""" + def __init__(self): + super().__init__(plot_height=800, plot_width=800) + self.parameters = ['ContentDate', 'SoftwareVersions'] + + def get_counter(self, dataset, parameters): + """Computes the counter for the given dataset and parameters.""" + counter = {} + if len(parameters) > 2: + raise ValueError("MultiPlot can only plot two parameters") + param_primary, param_secondary = parameters + for seq_name in dataset.get_sequence_ids(): + for subj, sess, run, seq in dataset.traverse_horizontal( + seq_name): + try: + primary_value = seq[param_primary].get_value() + secondary_value = seq[param_secondary].get_value() + manufacturer = seq['Manufacturer'].get_value() + secondary_value = f'{manufacturer} {secondary_value}' + except KeyError: + continue + + if (isinstance(primary_value, UnspecifiedType) or + isinstance(secondary_value, UnspecifiedType)): + continue + if primary_value not in counter: + counter[primary_value] = {} + if secondary_value not in counter[primary_value]: + counter[primary_value][secondary_value] = 0 + counter[primary_value][secondary_value] += 1 + return counter + + +class Site(BasePlot): + """Plot for Manufacturer and Date""" + def __init__(self): + super().__init__() + logger.warning("This plot is only for ABCD dataset") + self.parameters = ['ContentDate', 'InstitutionName'] + self.csv_path = Path('/media/sinhah/extremessd/ABCD/1210908/original_files/abcd_lt01.txt') # noqa + self.subject_site_map = self.get_subject_site_map() + self.y_axis_label = '% Deviations' + self.plot_width = 600 + self.plot_height = 300 + + def get_subject_site_map(self): + """Returns a dictionary mapping subject to site""" + subject_site_map = {} + with open(self.csv_path, 'r') as fh: + reader = csv.DictReader(fh, delimiter='\t') + for i, row in enumerate(reader): + if i == 0: + continue + subject_id = row['subjectkey'] + follow_up = row['eventname'] + site = row['site_id_l'] + if 'baseline' in follow_up.lower(): + subject_site_map[subject_id] = site + return subject_site_map + + def get_subject_site(self, subject_id): + """Returns the site for the given subject""" + return self.subject_site_map[subject_id] + + def get_counter(self, dataset, parameters): + """Computes the counter for the given dataset and parameters.""" + counter = {} + if len(parameters) > 2: + raise ValueError("MultiPlot can only plot two parameters") + # param_primary, param_secondary = parameters + for seq_name in dataset.get_sequence_ids(): + for subj, sess, run, seq in dataset.traverse_horizontal( + seq_name): + try: + subject_value = seq.subject_id + value = self.get_subject_site(subject_value) + except KeyError: + continue + + if isinstance(value, UnspecifiedType): + continue + if value not in counter: + counter[value] = 0 + counter[value] += 1 + return counter + + def compute_counts(self, non_compliant_ds, complete_ds, parameters): + """Returns the plot components for the given dataset and parameters.""" + counter = self.get_counter(non_compliant_ds, parameters) + base_counter = self.get_counter(complete_ds, parameters) + normalized_counts = self.normalize_counts(counter, base_counter) + data = self.pad_with_zeroes(normalized_counts, parameters[0]) + return data + + def pad_with_zeroes(self, normalized_counts, primary_param): + """Pad the normalized counts with zeroes""" + # factors = [(str(i), str(j)) for i in normalized_counts for j in normalized_counts[i]] # noqa + # y = [normalized_counts[i][j] for i in normalized_counts for j in normalized_counts[i]] # noqa + factors = [str(i) for i in normalized_counts] + y = [normalized_counts[i] for i in normalized_counts] + + self.x_range = FactorRange(*factors) + source = ColumnDataSource(data=dict( + factors=factors, + y=y + )) + + return source + + def normalize_counts(self, counter, base_counter): + """Normalize the values in counter by values of base counter""" + normalized_counts = defaultdict(dict) + uniq_secondary_values = set() + + for key1 in counter: + normalized_counts[key1] = 100*( + counter[key1] / base_counter[key1]) + + if not normalized_counts: + raise ValueError("Primary counter is empty. " + "No values found for normalization") + self.uniq_secondary_values = sorted(list(uniq_secondary_values)) + return dict(sorted(normalized_counts.items())) + + def get_plot_components(self, data): + # label = list(data.keys())[0] + self.set_cmap(3) + + p = figure(x_range=self.x_range, + y_axis_label=self.y_axis_label, + width=self.plot_width, height=self.plot_height) + # for i, k in enumerate(self.uniq_secondary_values): + try: + p.vbar(x='factors', top='y', source=data, width=self.width, + fill_color=self.colors[2], fill_alpha=0.75, + line_color=self.colors[0]) + except IndexError: + print("Unable to plot, Color index out of range") + + p.xaxis.major_label_orientation = "vertical" + p.xgrid.grid_line_color = None + p.ygrid.grid_line_alpha = 0.5 + p.x_range.range_padding = 0.1 + + # p.legend.click_policy = "hide" + # p.add_layout(p.legend[0], 'below') + return components(p) + + def plot(self, non_compliant_ds, complete_ds, parameters=None): + """Creates a plot for the given data""" + if not parameters: + parameters = self.parameters + data = self.compute_counts(non_compliant_ds, complete_ds, + parameters) + self.width = 0.8 + # self.x_range = data[parameters[0]] + # x = data[parameters[0]] + # for i in parameters: + # if 'date' in i.lower(): + # self.x_range = (previous_month(min(x)), next_month(max(x))) + # if 'age' in i.lower(): + # self.x_range = [min(x) - 0.1, max(x) + 0.1] + # # self.width = timedelta(days=1) + + self.div, self.script = self.get_plot_components(data) + + +class ManufacturerAndModel(BarPlot): + """Plot for Manufacturer and Model""" + def __init__(self): + super().__init__(plot_height=300, plot_width=400) + self.parameters = ['Manufacturer', 'ManufacturersModelName'] + + +class ManufacturerAndVersion(BarPlot): + """Plot for Manufacturer and SoftwareVersion""" + def __init__(self): + super().__init__(plot_height=300, plot_width=400) + self.parameters = ['Manufacturer', 'SoftwareVersions'] + + def get_counter(self, dataset, parameters): + """Computes the counter for the given dataset and parameters.""" + counter = {} + if len(parameters) > 2: + raise ValueError("MultiPlot can only plot two parameters") + param_primary, param_secondary = parameters + for seq_name in dataset.get_sequence_ids(): + for subj, sess, run, seq in dataset.traverse_horizontal( + seq_name): + try: + primary_value = seq[param_primary].get_value() + if 'MEDICALSYSTEMS' in primary_value: + primary_value = primary_value.split('MEDICALSYSTEMS')[0] + secondary_value = seq[param_secondary].get_value() + if primary_value != 'SIEMENS': + secondary_value = literal_eval(secondary_value)[0] + except KeyError: + continue + + if (isinstance(primary_value, UnspecifiedType) or + isinstance(secondary_value, UnspecifiedType)): + continue + if primary_value not in counter: + counter[primary_value] = {} + if secondary_value not in counter[primary_value]: + counter[primary_value][secondary_value] = 0 + counter[primary_value][secondary_value] += 1 + return counter diff --git a/mrQA/project.py b/mrQA/project.py index 19d4a9b..e3e6e48 100644 --- a/mrQA/project.py +++ b/mrQA/project.py @@ -1,36 +1,35 @@ +from itertools import combinations from pathlib import Path -from typing import Union +from typing import Union, Dict, Optional -from MRdataset import save_mr_dataset -from MRdataset.base import BaseDataset -from MRdataset.log import logger -from MRdataset.config import DatasetEmptyException +from MRdataset import save_mr_dataset, BaseDataset, DatasetEmptyException +from protocol.utils import import_string -from mrQA.config import STRATEGIES_ALLOWED +from mrQA import logger +from mrQA.base import CompliantDataset from mrQA.formatter import HtmlFormatter -from mrQA.utils import majority_attribute_values, _get_runs_by_echo, \ - _check_against_reference, _cli_report, _validate_reference, \ - export_subject_lists, record_out_paths +from mrQA.utils import _cli_report, \ + export_subject_lists, make_output_paths, \ + modify_sequence_name, _init_datasets, get_reference_protocol, get_config, \ + save_audit_results def check_compliance(dataset: BaseDataset, - strategy: str = 'majority', decimals: int = 3, output_dir: Union[Path, str] = None, verbose: bool = False, - tolerance: float = 0.1,) -> Path: + tolerance: float = 0.1, + config_path: Union[Path, str] = None, + reference_path: Union[Path, str] = None): """ - Main function for checking compliance. Infers the reference protocol - according to the user chosen strategy, and then generates a compliance - report + Main function for checking compliance. It runs horizontal and vertical + audits on the dataset. Generates a report and saves it to the output + directory. Parameters ---------- dataset : BaseDataset - BaseDataset instance for the dataset to be checked for compliance - strategy : str - Strategy employed to specify or automatically infer the - reference protocol. Allowed options are 'majority' + Dataset to be checked for compliance output_dir: Union[Path, str] Path to save the report decimals : int @@ -39,12 +38,16 @@ def check_compliance(dataset: BaseDataset, print more if true tolerance : float Tolerance for checking against reference protocol. Default is 0.1 - + reference_path : Union[Path, str] + Path to the reference protocol file. Required if strategy is + 'reference' + config_path : Union[Path, str] + Path to the config file Returns ------- - report_path : Path - Path to the generated report - + compliance_dict : Dict + Dictionary containing the reference protocol, compliant and + non-compliant datasets Raises ------ ValueError @@ -57,99 +60,315 @@ def check_compliance(dataset: BaseDataset, if verbose: logger.setLevel('INFO') else: - logger.setLevel('WARNING') + logger.setLevel('ERROR') - if not dataset.modalities: + # Check if dataset is empty + if not dataset.get_sequence_ids(): raise DatasetEmptyException - if strategy == 'majority': - dataset = compare_with_majority(dataset, decimals, tolerance=tolerance) - else: - raise NotImplementedError( - f'Only the following strategies are allowed : \n\t' - f'{STRATEGIES_ALLOWED}') - + # Check if output directory exists, create if not output_dir = Path(output_dir).resolve() output_dir.mkdir(exist_ok=True, parents=True) if not output_dir.is_dir(): raise NotADirectoryError('Provide a valid output directory') - report_path, mrds_path, sub_lists_dir_path = record_out_paths(output_dir, - dataset.name) + # Create paths for report, mrds pkl file and sub_lists + report_path, mrds_path, sub_lists_dir_path = make_output_paths(output_dir, + dataset) + # Save the dataset to a pickle file save_mr_dataset(mrds_path, dataset) - generate_report(dataset, - report_path, - sub_lists_dir_path, - output_dir) + + # Get results of horizontal audit + hz_audit_results = horizontal_audit(dataset=dataset, + reference_path=reference_path, + decimals=decimals, + tolerance=tolerance, + config_path=config_path) + save_audit_results(output_dir / (dataset.name + '_hz.adt.pkl'), + hz_audit_results) + # Get results of vertical audit + vt_audit_results = vertical_audit(dataset=dataset, + decimals=decimals, + tolerance=tolerance, + config_path=config_path) + save_audit_results(output_dir / (dataset.name + '_vt.adt.pkl'), + vt_audit_results) + + # Generate plots/visualization + # plot_results = plot_patterns( + # non_compliant_ds=hz_audit_results['non_compliant'], + # complete_ds=hz_audit_results['complete_ds'], + # config_path=config_path) + + # Generate the report if checking compliance was successful + generate_report(hz_audit=hz_audit_results, + vt_audit=vt_audit_results, + report_path=report_path, + sub_lists_dir_path=sub_lists_dir_path, + output_dir=output_dir, ) + # plots=plot_results) # Print a small message on the console, about non-compliance of dataset - print(_cli_report(dataset, str(report_path))) - return report_path + _cli_report(hz_audit_results, str(report_path)) + # TODO : print(_cli_report(vt_audit_results, str(report_path))) + return hz_audit_results, vt_audit_results -def compare_with_majority(dataset: BaseDataset, - decimals: int = 3, - tolerance: float = 0.1) -> BaseDataset: +def plot_patterns(non_compliant_ds, complete_ds, config_path=None): + plots = {} + plots_config = get_config(config_path=config_path, report_type='plots') + if not plots_config: + return plots + + include_params = plots_config.get("include_parameters", None) + for param in include_params: + param_cls = import_string('mrQA.plotting.' + param) + print(param) + param_figure = param_cls() + param_figure.plot(non_compliant_ds, complete_ds) + plots[param] = param_figure + return plots + + +def horizontal_audit(dataset: BaseDataset, + reference_path: Union[Path, str], + decimals: int = 3, + tolerance: float = 0.1, + config_path: Union[Path, str] = None) -> Optional[Dict]: """ - Method for post-acquisition compliance. Infers the reference protocol/values - by looking for the most frequent values, and then identifying deviations + Compares the dataset with the reference protocol (either inferred or + user-defined). Returns a dictionary containing the reference protocol, + compliant and non-compliant datasets. Parameters ---------- - dataset : BaseDataset - BaseDataset instance for the dataset which is to be checked - for compliance - decimals : int + dataset: BaseDataset + Dataset to be checked for compliance + reference_path: Path | str + Path to the reference protocol file. + decimals: int Number of decimal places to round to (default:3). - tolerance : float + tolerance: float Tolerance for checking against reference protocol. Default is 0.1 - + config_path: Union[Path, str] + Path to the config file Returns ------- - dataset : BaseDataset - Adds the non-compliance information to the same BaseDataset instance and - returns it. + + """ + hz_audit_config = get_config(config_path=config_path, + report_type='hz') + ref_protocol = get_reference_protocol(dataset=dataset, + reference_path=reference_path, + config=hz_audit_config) + compliant_ds, non_compliant_ds, undetermined_ds = _init_datasets(dataset) + + eval_dict = { + 'complete_ds' : dataset, + 'reference' : ref_protocol, + 'compliant' : compliant_ds, + 'non_compliant': non_compliant_ds, + 'undetermined' : undetermined_ds, + } + + if not (ref_protocol and hz_audit_config): + return eval_dict + + include_params = hz_audit_config.get('include_parameters', None) + stratify_by = hz_audit_config.get('stratify_by', None) + skip_sequences = hz_audit_config.get('skip_sequences', []) + + for seq_name in dataset.get_sequence_ids(): + # a temporary placeholder for compliant sequences. It will be + # merged to compliant dataset if all the subjects are compliant + temp_dataset = CompliantDataset(name=dataset.name, + data_source=dataset.data_source, + ds_format=dataset.format) + compliant_flag = True + undetermined_flag = False + for subj, sess, run, seq in dataset.traverse_horizontal(seq_name): + try: + for substr in skip_sequences: + if substr in seq_name.lower(): + logger.warning( + f'Skipping {seq_name} sequence as it contains ' + f'{substr}') + raise ValueError("This sequence should be skipped.") + except ValueError: + continue + + sequence_name = modify_sequence_name( + seq, stratify_by, + datasets=[compliant_ds, non_compliant_ds, undetermined_ds]) + + try: + ref_sequence = ref_protocol[sequence_name] + except KeyError: + logger.warning(f'No reference protocol for {seq_name} ' + f'sequence.') + undetermined_ds.add(subject_id=subj, session_id=sess, + run_id=run, seq_id=sequence_name, seq=seq) + + undetermined_flag = True + continue + + is_compliant, non_compliant_tuples = ref_sequence.compliant( + seq, + rtol=tolerance, + decimals=decimals, + include_params=include_params + ) + + if is_compliant: + # a temporary placeholder for compliant sequences. It will be + # merged to compliant dataset if all the subjects are compliant + # for a given sequence + temp_dataset.add(subject_id=subj, session_id=sess, + run_id=run, seq_id=sequence_name, seq=seq) + else: + compliant_flag = False + + # reverse the order of the tuples. Always store in this order + # (sequence_value, reference_value) + + non_compliant_params = [(b, a) for a, b in non_compliant_tuples] + non_compliant_ds.add(subject_id=subj, session_id=sess, + run_id=run, seq_id=sequence_name, seq=seq) + non_compliant_ds.add_nc_params( + subject_id=subj, session_id=sess, run_id=run, + seq_id=sequence_name, + non_compliant_params=non_compliant_params + ) + # only add the sequence if all the subjects, sessions are compliant + if compliant_flag and not undetermined_flag: + compliant_ds.merge(temp_dataset) + + # Update the compliance evaluation dict + eval_dict['compliant'] = compliant_ds + eval_dict['non_compliant'] = non_compliant_ds + eval_dict['undetermined'] = undetermined_ds + return eval_dict + + +def vertical_audit(dataset: BaseDataset, + decimals: int = 3, + tolerance: float = 0, + config_path: Union[Path, str] = None) -> Optional[Dict]: """ - # TODO: Check for subset, if incomplete dataset throw error and stop - - for modality in dataset.modalities: - # Reset compliance calculation before re-computing it. - modality.reset_compliance() - - # Infer reference protocol for each echo_time - # TODO: segregation via echo_time should be deprecated as multiple TE is - # part of the same run - run_by_echo = _get_runs_by_echo(modality, decimals) - - # For each echo time, find the most common values - for echo_time, run_list in run_by_echo.items(): - reference = majority_attribute_values(run_list, echo_time) - if _validate_reference(reference): - modality.set_reference(reference, echo_time) - - modality = _check_against_reference(modality, decimals, - tolerance=tolerance) - if modality.compliant: - dataset.add_compliant_modality_name(modality.name) - else: - dataset.add_non_compliant_modality_name(modality.name) - # As we are updating the same dataset by adding non-compliant subject names, - # and non-compliant modality names, we can return the same dataset - return dataset - - -def generate_report(dataset: BaseDataset, + Compares all the sequences of a given subject. For ex, you may want to + check the field map against the rs-fMRI sequence. Returns a dictionary + containing the compliant and non-compliant sequences for each subject. + + Parameters + ---------- + dataset: BaseDataset + Dataset to be checked for compliance + decimals: int + Number of decimal places to round to (default:3). + tolerance: float + Tolerance for checking against reference protocol. Default is 0 + config_path: Path | str + Path to the config file + """ + vt_audit_config = get_config(config_path=config_path, + report_type='vt') + compliant_ds, non_compliant_ds, _ = _init_datasets(dataset) + eval_dict = { + 'complete_ds' : dataset, + 'compliant' : compliant_ds, + 'non_compliant' : non_compliant_ds, + 'sequence_pairs': [], + 'parameters' : [] + } + if not vt_audit_config: + return eval_dict + + # If include_parameters is not provided, then it will compare all parameters + include_params = vt_audit_config.get('include_parameters', None) + chosen_pairs = vt_audit_config.get('sequences', None) + stratify_by = vt_audit_config.get('stratify_by', None) + + # If no sequence pairs are provided, then compare all possible pairs + if chosen_pairs is None: + logger.warning('No sequence pairs provided. Comparing all possible ' + 'sequence pairs.') + chosen_pairs = list(combinations(dataset.get_sequence_ids(), 2)) + # check pair are queryable, all the pairs are not present + # throw an error if any of the pair is not present + used_pairs = set() + # assuming that sequence_ids are list of 2 + for seq1_name, seq2_name in chosen_pairs: + for items in dataset.traverse_vertical2(seq1_name, seq2_name): + subject, session, run1, run2, seq1, seq2 = items + is_compliant, non_compliant_tuples = seq1.compliant( + seq2, + rtol=tolerance, + decimals=decimals, + include_params=include_params + ) + seq1_name = modify_sequence_name( + seq1, stratify_by, + [compliant_ds, non_compliant_ds]) + seq2_name = modify_sequence_name( + seq2, stratify_by, + [compliant_ds, non_compliant_ds]) + + non_compliant_ds.add_sequence_pair_names((seq1_name, seq2_name)) + used_pairs.add((seq1_name, seq2_name)) + + if is_compliant: + compliant_ds.add(subject_id=subject, session_id=session, + run_id=run1, seq_id=seq1_name, seq=seq1) + else: + non_compliant_ds.add(subject_id=subject, session_id=session, + run_id=run1, seq_id=seq1_name, + seq=seq1) + non_compliant_ds.add(subject_id=subject, session_id=session, + run_id=run2, seq_id=seq2_name, + seq=seq2) + + # non_compliant_params = [x[0] for x in non_compliant_tuples] + non_compliant_ds.add_nc_params( + subject_id=subject, session_id=session, run_id=run1, + seq_id=seq1_name, ref_seq=seq2_name, + non_compliant_params=non_compliant_tuples + ) + + # reverse the order of the tuples. Always store in this order + # (sequence_value, reference_value) + nc_tuples_reverse = [(b, a) for a, b in non_compliant_tuples] + non_compliant_ds.add_nc_params( + subject_id=subject, session_id=session, run_id=run2, + seq_id=seq2_name, ref_seq=seq1_name, + non_compliant_params=nc_tuples_reverse + ) + # TODO: add option for num_sequences > 2 + eval_dict = { + 'complete_ds' : dataset, + 'compliant' : compliant_ds, + 'non_compliant' : non_compliant_ds, + 'sequence_pairs': used_pairs, + 'parameters' : include_params + } + return eval_dict + + +def generate_report(hz_audit: dict, + vt_audit: dict, report_path: str or Path, sub_lists_dir_path: str, - output_dir: Union[Path, str]) -> Path: + output_dir: Union[Path, str], + plots=None) -> Path: """ Generates an HTML report aggregating and summarizing the non-compliance discovered in the dataset. Parameters ---------- - dataset : BaseDataset - BaseDataset instance for the dataset which is to be checked + hz_audit : dict + Dictionary containing the results of the horizontal audit + vt_audit : dict + Dictionary containing the results of the vertical audit report_path : str Name of the file to be generated, without extension. Ensures that naming is consistent across the report, dataset and record files @@ -161,22 +380,34 @@ def generate_report(dataset: BaseDataset, Returns ------- output_path : Path - Path to the generated report + Complete path to the generated report """ output_dir = Path(output_dir).resolve() output_dir.mkdir(parents=True, exist_ok=True) + sub_lists_by_seq = export_subject_lists(output_dir, + hz_audit['non_compliant'], + sub_lists_dir_path) - # time_dict = get_timestamps() - sub_lists_by_modality = export_subject_lists(output_dir, - dataset, - sub_lists_dir_path) - # export_record(output_dir, filename, time_dict) + report_formatter = HtmlFormatter(filepath=report_path) + report_formatter.collect_hz_audit_results( + complete_ds=hz_audit['complete_ds'], + compliant_ds=hz_audit['compliant'], + non_compliant_ds=hz_audit['non_compliant'], + undetermined_ds=hz_audit['undetermined'], + subject_lists_by_seq=sub_lists_by_seq, + ref_protocol=hz_audit['reference'] + ) + + report_formatter.collect_vt_audit_results( + compliant_ds=vt_audit['compliant'], + non_compliant_ds=vt_audit['non_compliant'], + complete_ds=vt_audit['complete_ds'], + sequence_pairs=vt_audit['sequence_pairs'], + parameters=vt_audit['parameters'] + ) + + # report_formatter.collect_plots(**plots) + report_formatter.render() # Generate the HTML report and save it to the output_path - args = { - 'ds': dataset, - 'sub_lists_by_modality': sub_lists_by_modality, - # 'time': time_dict - } - HtmlFormatter(filepath=report_path, params=args) return Path(report_path) diff --git a/mrQA/resources/mri-config.json b/mrQA/resources/mri-config.json new file mode 100644 index 0000000..92f3ab1 --- /dev/null +++ b/mrQA/resources/mri-config.json @@ -0,0 +1,75 @@ +{ + "begin": "2014-03-12T13:37:27+00:00", + "end": "2017-03-12T13:37:27+00:00", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "horizontal_audit": { + "stratify_by": "series_number", + "include_parameters": [ + "EchoTrainLength", + "ParallelAcquisitionTechnique", + "MagneticFieldStrength", + "MRAcquisitionType", + "MultiSliceMode", + "PhasePolarity", + "PhaseEncodingSteps", + "PixelBandwidth", + "ScanningSequence", + "SequenceVariant", + "RepetitionTime", + "EchoTime", + "FlipAngle", + "PhaseEncodingDirection", + "ShimMode", + "Rows", + "Columns", + "AcquisitionMatrix" + ] + }, + "vertical_audit": { + "stratify_by": "series_number", + "sequences": [ + [ + "ncanda-rsfmri-v1", + "ncanda-grefieldmap-v1" + ], + [ + "ncanda-dti30b400-v1", + "ncanda-grefieldmap-v1" + ], + [ + "ncanda-dti60b1000-v1", + "ncanda-grefieldmap-v1" + ] + ], + "include_parameters": [ + "Rows", + "Columns", + "AcquisitionMatrix", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "plots": { + "include_parameters": [ + "ContentDate", + "PatientSex", + "PatientAge", + "PatientWeight", + "OperatorsName", + "InstitutionName", + "Manufacturer" + ] + }, + "exclude_subjects": [ + "210098", + "210078" + ] +} diff --git a/mrQA/run_merge.py b/mrQA/run_merge.py index 2c69cc9..41eaf1a 100644 --- a/mrQA/run_merge.py +++ b/mrQA/run_merge.py @@ -72,8 +72,7 @@ def _merge_and_save(mrds_path_list: List[Path], save_mr_dataset(output_path, complete_dataset) -def _merge_from_disk(mrds_path_list: Union[List[Path], List[str]]) \ - -> BaseDataset: +def _merge_from_disk(mrds_path_list: List) -> BaseDataset: """ Given a list of paths to partial mrds datasets, read and merge Keep aggregating along with the loop. diff --git a/mrQA/run_parallel.py b/mrQA/run_parallel.py index 9bffdb8..6cd27c9 100644 --- a/mrQA/run_parallel.py +++ b/mrQA/run_parallel.py @@ -5,21 +5,22 @@ from pathlib import Path from typing import Iterable, Union -from MRdataset import load_mr_dataset -from MRdataset.config import MRDS_EXT -from MRdataset.log import logger -from MRdataset.utils import valid_paths, is_writable +from MRdataset import load_mr_dataset, MRDS_EXT, DatasetEmptyException -from mrQA.config import PATH_CONFIG +from mrQA import check_compliance +from mrQA import logger +from mrQA.config import PATH_CONFIG, THIS_DIR from mrQA.parallel_utils import _check_args, _make_file_folders, \ - _run_single_batch, _create_slurm_script, _get_num_workers, _get_subject_ids + _run_single_batch, _create_slurm_script, _get_num_workers, \ + _get_terminal_folders from mrQA.run_merge import check_and_merge from mrQA.utils import list2txt, split_list, \ txt2list -from mrQA import check_compliance +from mrQA.utils import valid_paths, is_writable def get_parser(): + """Parser for the CLI""" parser = argparse.ArgumentParser( description='Parallelize the mrQA compliance checks', add_help=False @@ -34,6 +35,9 @@ def get_parser(): required.add_argument('-d', '--data-source', type=str, required=True, help='directory containing downloaded dataset with ' 'dicom files, supports nested hierarchies') + required.add_argument('--config', type=str, + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') optional.add_argument('-o', '--output-dir', type=str, help='specify the directory where the report' ' would be saved. By default, the --data_source ' @@ -42,8 +46,8 @@ def get_parser(): help='specify the path to the output mrds file. ') optional.add_argument('-n', '--name', type=str, help='provide a identifier/name for the dataset') - optional.add_argument('-s', '--subjects-per-job', type=int, default=5, - help='number of subjects to process per job') + optional.add_argument('-j', '--job-size', type=int, default=5, + help='number of folders to process per job') optional.add_argument('-e', '--conda-env', type=str, default='mrcheck', help='name of conda environment to use') optional.add_argument('-c', '--conda-dist', type=str, default='anaconda3', @@ -52,6 +56,19 @@ def get_parser(): help='flag to run on HPC') optional.add_argument('-v', '--verbose', action='store_true', help='allow verbose output on console') + optional.add_argument('-ref', '--ref-protocol-path', type=str, + help='XML file containing desired protocol. If not ' + 'provided, the protocol will be inferred from ' + 'the dataset.') + optional.add_argument('--decimals', type=int, default=3, + help='number of decimal places to round to ' + '(default:0). If decimals are negative it ' + 'specifies the number of positions to the left' + 'of the decimal point.') + optional.add_argument('-t', '--tolerance', type=float, default=0, + help='tolerance for checking against reference ' + 'protocol. Default is 0') + if len(sys.argv) < 2: logger.critical('Too few arguments!') parser.print_help() @@ -60,22 +77,37 @@ def get_parser(): return parser -def main(): +def cli(): + """Console script for mrQA.""" args = parse_args() process_parallel(data_source=args.data_source, output_dir=args.output_dir, out_mrds_path=args.out_mrds_path, name=args.name, - subjects_per_job=args.subjects_per_job, + job_size=args.job_size, conda_env=args.conda_env, conda_dist=args.conda_dist, + config_path=args.config, hpc=args.hpc) dataset = load_mr_dataset(args.out_mrds_path) - check_compliance(dataset=dataset, - output_dir=args.output_dir) + try: + check_compliance(dataset=dataset, + output_dir=args.output_dir, + decimals=args.decimals, + verbose=args.verbose, + tolerance=args.tolerance, + config_path=args.config, + reference_path=args.ref_protocol_path, ) + except DatasetEmptyException: + logger.error("Cannot check compliance if the dataset doesn't have " + "any scans. Please check the dataset.") + except NotADirectoryError: + logger.error('Provided output directory for saving reports is invalid.' + 'Either it is not a directory or it does not exist. ') def parse_args(): + """Argument parser for the CLI""" parser = get_parser() args = parser.parse_args() @@ -95,8 +127,18 @@ def parse_args(): except OSError as exc: raise exc + if args.ref_protocol_path is not None: + if not Path(args.ref_protocol_path).is_file(): + raise OSError( + 'Expected valid file for --ref-protocol-path argument, ' + 'Got {0}'.format(args.ref_protocol_path)) + if not is_writable(args.output_dir): raise OSError(f'Output Folder {args.output_dir} is not writable') + + if not Path(args.config).is_file(): + raise FileNotFoundError(f'Expected valid config file, ' + f'Got {args.config}') return args @@ -104,39 +146,46 @@ def process_parallel(data_source: Union[str, Path], output_dir: Union[str, Path], out_mrds_path: Union[str, Path], name: str = None, - subjects_per_job: int = 5, + job_size: int = 5, conda_env: str = 'mrcheck', conda_dist: str = 'anaconda3', + config_path: Union[str, Path] = None, hpc: bool = False): """ Given a folder(or List[folder]) it will divide the work into smaller - jobs. Each job will contain a fixed number of subjects. These jobs can be + jobs. Each job will contain a fixed number of folders. These jobs can be executed in parallel to save time. Parameters ---------- - data_source: str or Path - Path to the folder containing the subject folders - output_dir: str or Path - Path to the folder where the output will be saved - out_mrds_path: str or Path - Path to the final output mrds file + data_source: str | Path + Valid path to the folder containing the multiple folders + output_dir: str | Path + Valid path to the folder where the output will be saved + out_mrds_path: str | Path + Valid path to the final output .mrds.pkl file name: str Name of the final output file - subjects_per_job: int - Number of subjects to be processed in each job + job_size: int + Number of folders to be processed in each job conda_env: str Name of the conda environment to be used conda_dist: str Name of the conda distribution to be used hpc: bool Whether to use HPC or not + config_path: str + Path to the config file """ # One function to process them all! # note that it will generate scripts only script_list_filepath, mrds_list_filepath = create_script( + ds_format='dicom', + verbose=False, + debug=False, + config_path=config_path, data_source=data_source, - subjects_per_job=subjects_per_job, + folders_per_job=job_size, conda_env=conda_env, conda_dist=conda_dist, output_dir=output_dir, @@ -165,7 +214,7 @@ def submit_job(scripts_list_filepath: Union[str, Path], hpc: bool = False) -> None: """ Given a folder(or List[folder]) it will divide the work into smaller - jobs. Each job will contain a fixed number of subjects. These jobs can be + jobs. Each job will contain a fixed number of folders. These jobs can be executed in parallel to save time. Parameters @@ -192,17 +241,17 @@ def submit_job(scripts_list_filepath: Union[str, Path], def create_script(data_source: Union[str, Path, Iterable] = None, ds_format: str = 'dicom', - include_phantom: bool = False, verbose: bool = False, output_dir: Union[str, Path] = None, debug: bool = False, - subjects_per_job: int = None, + folders_per_job: int = None, hpc: bool = False, conda_dist: str = None, - conda_env: str = None): + conda_env: str = None, + config_path: Union[str, Path] = None): """ Given a folder(or List[folder]) it will divide the work into smaller - jobs. Each job will contain a fixed number of subjects. These jobs can be + jobs. Each job will contain a fixed number of folders. These jobs can be executed in parallel to save time. Parameters @@ -211,48 +260,50 @@ def create_script(data_source: Union[str, Path, Iterable] = None, /path/to/my/dataset containing files ds_format: str Specify dataset type. Use one of [dicom] - include_phantom: bool - Include phantom scans in the dataset verbose: bool Print progress output_dir: str Path to save the output dataset debug: bool If True, the dataset will be created locally. This is useful for testing - subjects_per_job: int - Number of subjects per job. Recommended value is 50 or 100 + folders_per_job: int + Number of folders per job. Recommended value is 50 or 100 hpc: bool If True, the scripts will be generated for HPC, not for local execution conda_dist: str Name of conda distribution conda_env: str Name of conda environment + config_path: str + Path to the config file """ data_src, output_dir, env, dist = _check_args(data_source, ds_format, output_dir, debug, - subjects_per_job, hpc, - conda_dist, conda_env) - folder_paths, files_per_batch, all_ids_path = _make_file_folders(output_dir) - ids_path_list = split_ids_list( + folders_per_job, hpc, + conda_dist, conda_env, + config_path) + folder_paths, files_per_batch, all_fnames_path = _make_file_folders( + output_dir) + fnames_path_list = split_folders_list( data_src, - all_ids_path=all_ids_path, - per_batch_ids=files_per_batch['ids'], - output_dir=folder_paths['ids'], - subjects_per_job=subjects_per_job + all_fnames_path=all_fnames_path, + per_batch_ids=files_per_batch['fnames'], + output_dir=folder_paths['fnames'], + folders_per_job=folders_per_job ) scripts_path_list = [] mrds_path_list = [] - # create a slurm job script for each sub_group of subject ids - for ids_filepath in ids_path_list: + # create a slurm job script for each sub_group of folders + for fnames_filepath in fnames_path_list: # Filename of the bash script should be same as text file. - # Say batch0000.txt points to set of 10 subjects. Then create a - # slurm script file batch0000.sh which will run for these 10 subjects, + # Say batch0000.txt points to set of 10 folders. Then create a + # slurm script file batch0000.sh which will run for these 10 folders, # and the final partial mrds pickle file will have the name # batch0000.mrds.pkl - script_filename = ids_filepath.stem + '.sh' - partial_mrds_filename = ids_filepath.stem + MRDS_EXT + script_filename = fnames_filepath.stem + '.sh' + partial_mrds_filename = fnames_filepath.stem + MRDS_EXT script_filepath = folder_paths['scripts'] / script_filename partial_mrds_filepath = folder_paths['mrds'] / partial_mrds_filename @@ -262,12 +313,12 @@ def create_script(data_source: Union[str, Path, Iterable] = None, # Finally create the slurm script and save to disk _create_slurm_script(output_script_path=script_filepath, - ids_filepath=ids_filepath, + fnames_filepath=fnames_filepath, env=conda_env, conda_dist=conda_dist, - num_subj_per_job=subjects_per_job, + folders_per_job=folders_per_job, verbose=verbose, - include_phantom=include_phantom, + config_path=config_path, output_mrds_path=partial_mrds_filepath) # Finally, save the all the paths to create mrds pickle files and all the @@ -277,55 +328,55 @@ def create_script(data_source: Union[str, Path, Iterable] = None, return files_per_batch['scripts'], files_per_batch['mrds'] -def split_ids_list(data_source: Union[str, Path], - all_ids_path: Union[str, Path], - per_batch_ids: Union[str, Path], - output_dir: Union[str, Path], - subjects_per_job: int = 50): +def split_folders_list(data_source: Union[str, Path], + all_fnames_path: Union[str, Path], + per_batch_ids: Union[str, Path], + output_dir: Union[str, Path], + folders_per_job: int = 50): """ - Splits a given set of subjects into multiple jobs and creates separate - text files containing the list of subjects. Each text file - contains the list of subjects to be processed in a single job. + Splits a given set of folders into multiple jobs and creates separate + text files containing the list of folders. Each text file + contains the list of folders to be processed in a single job. Parameters ---------- data_source : Union[str, Path] Path to the root directory of the data - all_ids_path : Union[str, Path] + all_fnames_path : Union[str, Path] Path to the output directory per_batch_ids : Union[str, Path] filepath to a file which has paths to all txt files for all jobs. - Each of these txt files contains a list of subject ids for + Each of these txt files contains a list of folder ids for corresponding job. output_dir : Union[str, Path] Name of the output directory - subjects_per_job : int - Number of subjects to process in each job + folders_per_job : int + Number of folders to process in each job Returns ------- - batch_ids_path_list : list - Paths to the text files, each containing a list of subjects + batch_ids_path_list : Sized + Paths to the text files, each containing a list of folders """ - all_ids_path = Path(all_ids_path) + all_fnames_path = Path(all_fnames_path) # List of paths to the txt files, - # each containing the list of subjects per job - batch_ids_path_list = [] + # each containing the list of folders per job + batch_fnames_path_list = [] - subject_list = _get_subject_ids(data_source, all_ids_path) - # Get the list of subjects for each job - workers = _get_num_workers(subjects_per_job, subject_list) - subject_subsets = split_list(subject_list, num_chunks=workers) + folder_list = _get_terminal_folders(data_source, all_fnames_path) + # Get the list of folders for each job + workers = _get_num_workers(folders_per_job, folder_list) + folder_subsets = split_list(folder_list, num_chunks=workers) # Create a text file for each job - for i, subset in enumerate(subject_subsets): - # Create a text file containing the list of subjects for each job + for i, subset in enumerate(folder_subsets): + # Create a text file containing the list of folders for each job batch_filepath = output_dir / f'batch{i:04}.txt' # Store to the path given to the text file list2txt(batch_filepath, subset) # Add the path to the text file ( containing the - # list of subjects for each job) to a list, return the list - batch_ids_path_list.append(batch_filepath) - list2txt(fpath=per_batch_ids, list_=batch_ids_path_list) - return batch_ids_path_list + # list of folders for each job) to a list, return the list + batch_fnames_path_list.append(batch_filepath) + list2txt(fpath=per_batch_ids, list_=batch_fnames_path_list) + return batch_fnames_path_list diff --git a/mrQA/run_subset.py b/mrQA/run_subset.py index 4a8c3d7..72cd252 100644 --- a/mrQA/run_subset.py +++ b/mrQA/run_subset.py @@ -3,15 +3,28 @@ import argparse import sys from pathlib import Path +from typing import Union -from MRdataset import import_dataset, save_mr_dataset -from MRdataset.base import BaseDataset -from MRdataset.log import logger +from MRdataset import import_dataset, save_mr_dataset, BaseDataset +from mrQA import logger +from mrQA.config import THIS_DIR from mrQA.utils import txt2list -def main(): +def parse_args(): + parser = get_parser() + args = parser.parse_args() + + if args.verbose: + logger.setLevel('WARNING') + else: + logger.setLevel('ERROR') + + return args + + +def get_parser(): """Console script for mrQA.""" parser = argparse.ArgumentParser( description='Protocol Compliance of MRI scans', @@ -21,52 +34,56 @@ def main(): required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') - required.add_argument('-o', '--output_path', type=str, required=True, + required.add_argument('-o', '--output-path', type=str, + required=True, help='complete path to pickle file for storing ' 'partial dataset') - required.add_argument('-b', '--batch_ids_file', type=str, required=True, + required.add_argument('-b', '--batch-ids-file', type=str, + required=True, help='text file path specifying the folders to read') optional.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help='show this help message and exit') - optional.add_argument('--is_partial', action='store_true', + optional.add_argument('--is-partial', action='store_true', help='flag dataset as a partial dataset') # TODO: use this flag to store cache optional.add_argument('-v', '--verbose', action='store_true', help='allow verbose output on console') - optional.add_argument('--include_phantom', action='store_true', - help='whether to include phantom, localizer, ' - 'aahead_scout') + required.add_argument('--config', type=str, + help='path to config file', + default=THIS_DIR / 'resources/mri-config.json') if len(sys.argv) < 2: logger.critical('Too few arguments!') parser.print_help() parser.exit(1) - args = parser.parse_args() - output_path = Path(args.output_path).resolve() + return parser - if args.verbose: - logger.setLevel('INFO') - else: - logger.setLevel('WARNING') + +def cli(): + """Console script for mrQA subset.""" + args = parse_args() + output_path = Path(args.output_path).resolve() if not output_path.exists(): - partial_dataset = read_subset(output_path=args.output_path, + partial_dataset = read_subset(output_dir=Path(args.output_path).parent, batch_ids_file=args.batch_ids_file, ds_format='dicom', verbose=args.verbose, - include_phantom=args.include_phantom, + config_path=args.config, is_complete=not args.is_partial) partial_dataset.is_complete = False save_mr_dataset(args.output_path, partial_dataset) -def read_subset(batch_ids_file: str, +def read_subset(output_dir: Union[str, Path], + batch_ids_file: str, ds_format: str, verbose: bool, - include_phantom: bool, + config_path: str = None, + is_complete: bool = True, **kwargs) -> BaseDataset: """ Given a list of folder paths, reads all dicom files in those folders @@ -75,17 +92,23 @@ def read_subset(batch_ids_file: str, Parameters ---------- + output_dir : Path | str + path to a folder where the partial dataset will be saved batch_ids_file : str path to a text file containing a list of paths (to several folders) ds_format : str what kind of MRdataset to create, dicom, bids etc. verbose : bool print more while doing the job - include_phantom : bool - whether to include phantom files in processing + config_path : str + path to config file + is_complete : bool + whether the dataset is subset of a larger dataset. It is useful for + parallel processing of large datasets. **kwargs: dict additional arguments to pass to import_dataset + Returns ------- BaseDataset @@ -97,7 +120,8 @@ def read_subset(batch_ids_file: str, """ # Supports only dicom for now if ds_format != 'dicom': - raise NotImplementedError(f'Expected ds_format as dicom, Got {ds_format}') + raise NotImplementedError( + f'Expected ds_format as dicom, Got {ds_format}') subset = txt2list(batch_ids_file) identifier = Path(batch_ids_file).stem @@ -105,11 +129,13 @@ def read_subset(batch_ids_file: str, ds_format=ds_format, name=identifier, verbose=verbose, - include_phantom=include_phantom, + config_path=config_path, + output_dir=output_dir, + is_complete=is_complete, **kwargs) - # partial_dataset.walk(), import_dataset already does this + # partial_dataset.load(), import_dataset already does this return partial_dataset if __name__ == '__main__': - sys.exit(main()) # pragma: no cover + sys.exit(cli()) # pragma: no cover diff --git a/mrQA/scripts/Makefile b/mrQA/scripts/Makefile new file mode 100644 index 0000000..987b38e --- /dev/null +++ b/mrQA/scripts/Makefile @@ -0,0 +1,35 @@ +.PHONY: abcd_subset +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +abcd_subset: + mrqa -d /home/sinhah/scan_data/vertical_abcd -n ABCD -o /home/sinhah/scan_data/vertical_abcd_mrqa_files/ --config /home/sinhah/github/mrQA/examples/mri-config-abcd.json --mrds-pkl-path /media/sinhah/extremessd/ABCD/active_series/non-recommended/fmap/dicom_mrqa_v2_files/reports/abcd-fmap-baseline-non-recommended_DATE_11_01_2023_13_49_13.mrds.pkl #/home/sinhah/scan_data/vertical_abcd_mrqa_files/abcd-vertical_DATE_10_26_2023_16_14_18.mrds.pkl + $(BROWSER) /home/sinhah/scan_data/vertical_abcd_mrqa_files/ + +monitor_compile: + python /home/harsh/Github/in_progress/mrQA/examples/monitor_project.py -d /home/harsh/mr_reports/MRRC-reportsv2/mrqa_reportsv2 -t compile -o /home/harsh/mr_reports/MRRC-reportsv2/debug --config /home/harsh/Github/in_progress/mrQA/examples/mri-config-project.json + diff --git a/mrQA/tests/check_status.py b/mrQA/tests/check_status.py new file mode 100644 index 0000000..45df14a --- /dev/null +++ b/mrQA/tests/check_status.py @@ -0,0 +1,43 @@ +import tempfile +from pathlib import Path + +from mrQA import monitor +from mrQA.tests.conftest import THIS_DIR +from mrQA.tests.simulate import copy2dest + + +# @settings(max_examples=10, deadline=None) +# @given(args=dcm_dataset_strategy) +def run(folder_path): # args): + # ds1, attributes = args + # assume(attributes['num_subjects'] > 4) + # folder_path = attributes['fake_ds_dir'] + folder_path = Path(folder_path).resolve() + # config_path = attributes['config_path'] + config_path = THIS_DIR / 'resources/mri-config.json' + + # make a temporary output folder using tempfile + with tempfile.TemporaryDirectory() as tmpdirname: + output_dir = Path(tmpdirname) / 'output' + input_dir = Path(tmpdirname) / 'input' + output_dir.mkdir(exist_ok=True, parents=True) + input_dir.mkdir(exist_ok=True, parents=True) + i = 0 + # copy a folder from folder_path to tmpdirname + for folder in folder_path.iterdir(): + if folder.is_dir(): + copy2dest(folder, folder_path, input_dir) + + # Run monitor on the temporary folder + monitor(name='dummy_dataset', + data_source=input_dir, + output_dir=output_dir, + decimals=2, + config_path=config_path, + verbose=False, + ) + # copy2dest(output_dir, tmpdirname, '/tmp') + print('simulation-over') + + +run('/home/sinhah/scan_data/WPC-6106') diff --git a/mrQA/tests/config.py b/mrQA/tests/config.py deleted file mode 100644 index eee68d7..0000000 --- a/mrQA/tests/config.py +++ /dev/null @@ -1,34 +0,0 @@ -from pathlib import Path - - -DATASET_PATHS = [ - # ['/media/sinhah/extremessd/ABCD-375/dicom-baseline-subset/', 0, 200], - # ['/media/sinhah/extremessd/ABCD-375/dicom-baseline-subset/', 1, 10, 1], - # ['/media/sinhah/extremessd/ABCD-375/dicom-baseline-subset/', 2, 100, 1], - # ['/media/sinhah/extremessd/ABCD-375/dicom-baseline-subset/', 3, 200, 1], - # ['/home/sinhah/scan_data/WPC-7807', 2, 50, 1], - # ['/home/sinhah/scan_data/WPC-7761', 2, 50, 1], - # ['/home/sinhah/scan_data/sinhah-20220514_140054', 3, 200, 1], - # ['/home/sinhah/scan_data/sinhah-20220520_153204', 3, 200, 1], - # ['/home/sinhah/scan_data/sinhah-20220520_210659', 3, 200, 1], - ['/media/sinhah/extremessd/ABCD-375/dicom-baseline/', 5, 500], -] - -# DATA_ROOT = Path('/media/sinhah/extremessd/ABCD/active_series/non-recommended/') -DATA_ROOT = Path('/ocean/projects/med220005p/sinhah/ABCD/active_series/non-recommended') # noqa -ABCD_DATASET_PATHS = [ - [DATA_ROOT/'t1w', 3, 1], -] - - -const_bids = { - 'tr': 2.0, - 'b0': 3.0, - 'fa': 80.0 -} - -const_xnat = { - 'tr': 200, - 'etl': 4000, - 'fa': 80 -} diff --git a/mrQA/tests/conftest.py b/mrQA/tests/conftest.py new file mode 100644 index 0000000..3a10141 --- /dev/null +++ b/mrQA/tests/conftest.py @@ -0,0 +1,65 @@ +import tempfile +import typing as tp +from pathlib import Path +from typing import Tuple + +from MRdataset import DicomDataset +from hypothesis import strategies as st +from hypothesis.strategies import SearchStrategy + +from mrQA.tests.simulate import make_compliant_test_dataset +from mrQA.tests.utils import download + +param_strategy: tp.Final[SearchStrategy[Tuple]] = st.tuples( + st.text(min_size=1, max_size=10), + st.integers(min_value=1, max_value=10), + st.floats(allow_nan=False, + allow_infinity=False), + st.integers(min_value=-10000000, max_value=10000000), + st.floats(allow_nan=False, + allow_infinity=False) +) + +THIS_DIR = Path(__file__).parent.resolve() + + +def sample_protocol(): + """Download a sample protocol from GitHub""" + # Using an example XML file from the following GitHub repository + # https://github.com/lrq3000/mri_protocol + url = 'https://raw.githubusercontent.com/lrq3000/mri_protocol/master/SiemensVidaProtocol/Coma%20Science%20Group.xml' # noqa + filename = THIS_DIR / 'coma_science.xml' + xml_file = Path(filename) + + if not xml_file.is_file(): + download(url, filename) + return filename + + +@st.composite +def create_dataset(draw_from: st.DrawFn) -> Tuple: + name, num_subjects, repetition_time, echo_train_length, flip_angle = draw_from(param_strategy) + fake_ds_dir = make_compliant_test_dataset(num_subjects, + repetition_time, + echo_train_length, + flip_angle) + temp_dir = Path(tempfile.mkdtemp()) + ds = DicomDataset(name=name, + data_source=fake_ds_dir, + config_path=THIS_DIR / 'resources/mri-config.json', + output_dir=temp_dir) + ref_protocol_path = sample_protocol() + attributes = { + 'name': name, + 'num_subjects': num_subjects, + 'repetition_time': repetition_time, + 'echo_train_length': echo_train_length, + 'flip_angle': flip_angle, + 'fake_ds_dir': fake_ds_dir, + 'config_path': THIS_DIR / 'resources/mri-config.json', + 'ref_protocol_path': ref_protocol_path, + } + return ds, attributes + + +dcm_dataset_strategy: tp.Final[SearchStrategy[Tuple]] = create_dataset() diff --git a/mrQA/tests/resources/compliant_dicom_data.zip b/mrQA/tests/resources/compliant_dicom_data.zip new file mode 100644 index 0000000..eca78e2 Binary files /dev/null and b/mrQA/tests/resources/compliant_dicom_data.zip differ diff --git a/mrQA/tests/resources/example_dicom_data.zip b/mrQA/tests/resources/example_dicom_data.zip new file mode 100644 index 0000000..b1e3e7f Binary files /dev/null and b/mrQA/tests/resources/example_dicom_data.zip differ diff --git a/mrQA/tests/resources/invalid-json.json b/mrQA/tests/resources/invalid-json.json new file mode 100644 index 0000000..e6fc2ad --- /dev/null +++ b/mrQA/tests/resources/invalid-json.json @@ -0,0 +1,44 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": null, + "include_parameters": [ + "Rows", + "Columns", + "AcquisitionMatrix", + "PixelSpacing", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "horizontal_audit": { + "stratify_by": null, + "include_parameters": [ + "EchoTime", + "RepetitionTime", + "FlipAngle", + "EchoTrainLength" + ] + }, + "plots": { + "include_parameters": [ + "ContentDate", + "PatientSex", + "PatientAge", + "PatientWeight", + "OperatorsName", + "InstitutionName" + "Manufacturer" + ] + } +} diff --git a/mrQA/tests/resources/invalid.dcm b/mrQA/tests/resources/invalid.dcm new file mode 100644 index 0000000..66ae844 Binary files /dev/null and b/mrQA/tests/resources/invalid.dcm differ diff --git a/mrQA/tests/resources/mri-config.json b/mrQA/tests/resources/mri-config.json new file mode 100644 index 0000000..5fc4254 --- /dev/null +++ b/mrQA/tests/resources/mri-config.json @@ -0,0 +1,44 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": null, + "include_parameters": [ + "Rows", + "Columns", + "AcquisitionMatrix", + "PixelSpacing", + "PhaseEncodingDirection", + "ShimMode", + "ShimSetting" + ] + }, + "horizontal_audit": { + "stratify_by": null, + "include_parameters": [ + "EchoTime", + "RepetitionTime", + "FlipAngle", + "EchoTrainLength" + ] + }, + "plots": { + "include_parameters": [ + "ContentDate", + "PatientSex", + "PatientAge", + "PatientWeight", + "OperatorsName", + "InstitutionName", + "Manufacturer" + ] + } +} diff --git a/mrQA/tests/resources/test-config.json b/mrQA/tests/resources/test-config.json new file mode 100644 index 0000000..15c5851 --- /dev/null +++ b/mrQA/tests/resources/test-config.json @@ -0,0 +1,15 @@ +{ + "begin": "03_12_2024", + "end": "03_12_2000", + "include_sequence": { + "phantom": false, + "nifti_header": false, + "moco": false, + "sbref": false, + "derived": false + }, + "use_echonumbers": true, + "vertical_audit": { + "stratify_by": null + } +} diff --git a/mrQA/tests/resources/valid.dcm b/mrQA/tests/resources/valid.dcm new file mode 100644 index 0000000..e4f1419 Binary files /dev/null and b/mrQA/tests/resources/valid.dcm differ diff --git a/mrQA/tests/simulate.py b/mrQA/tests/simulate.py new file mode 100644 index 0000000..a98370a --- /dev/null +++ b/mrQA/tests/simulate.py @@ -0,0 +1,138 @@ +import tempfile +import zipfile +from collections import defaultdict +from datetime import datetime +from pathlib import Path + +import pydicom +from pydicom import dcmread + +from mrQA.utils import convert2ascii + + +def make_test_dataset(num_noncompliant_subjects, + repetition_time, + echo_train_length, + flip_angle): + src_dir, dest_dir = setup_directories(compliant_dicom_dataset()) # noqa + dataset_info = defaultdict(set) + modalities = [s.name for s in src_dir.iterdir() if (s.is_dir() and + 'mrdataset' not in + s.name)] + for i, modality in enumerate(modalities): + subject_paths = [s for s in (src_dir / modality).iterdir()] + for sub_path in subject_paths: + for filepath in sub_path.glob('*.dcm'): + dicom = pydicom.read_file(filepath) + export_file(dicom, filepath, dest_dir) + + for i, modality in enumerate(modalities): + count = num_noncompliant_subjects[i] + subject_paths = [s for s in (src_dir / modality).iterdir()] + + for j in range(count): + sub_path = subject_paths[j] + for filepath in sub_path.glob('**/*.dcm'): + dicom = pydicom.read_file(filepath) + patient_id = str(dicom.get('PatientID', None)) + dicom.RepetitionTime = repetition_time + dicom.EchoTrainLength = echo_train_length + dicom.FlipAngle = flip_angle + export_file(dicom, filepath, dest_dir) + modality = dicom.get('SeriesDescription', None).replace(' ', + '_') + dataset_info[modality].add(patient_id) + + return dest_dir, dataset_info + + +def export_file(dicom, filepath, out_dir): + patient_id = dicom.get('PatientID', None) + series_desc = dicom.get('SeriesDescription', None) + series_number = dicom.get('SeriesNumber', None) + series_desc = convert2ascii( + series_desc.replace(' ', '_')) # + '_' + str(series_number) + output_path = out_dir / series_desc / patient_id + number = dicom.get('InstanceNumber', None) + output_path.mkdir(exist_ok=True, parents=True) + filename = f'{patient_id}_{number}.dcm' + dicom.save_as(output_path / filename) + + +def make_compliant_test_dataset(num_subjects, + repetition_time, + echo_train_length, + flip_angle) -> Path: + src_dir, dest_dir = setup_directories(sample_dicom_dataset()) + dcm_list = list(src_dir.glob('**/*.dcm')) + + subject_names = set() + i = 0 + while len(subject_names) < num_subjects: + filepath = dcm_list[i] + dicom = pydicom.read_file(filepath) + + dicom.RepetitionTime = repetition_time + dicom.EchoTrainLength = echo_train_length + dicom.FlipAngle = flip_angle + + export_file(dicom, filepath, dest_dir) + subject_names.add(dicom.get('PatientID', None)) + i += 1 + return dest_dir + + +THIS_DIR = Path(__file__).parent.resolve() + + +def sample_dicom_dataset(tmp_path='/tmp'): + DATA_ARCHIVE = THIS_DIR / 'resources/example_dicom_data.zip' + DATA_ROOT = Path(tmp_path) + output_dir = DATA_ROOT / 'example_dicom_data' + if not output_dir.exists(): + with zipfile.ZipFile(DATA_ARCHIVE, 'r') as zip_ref: + zip_ref.extractall(DATA_ROOT) + return DATA_ROOT / 'example_dicom_data' + + +def compliant_dicom_dataset(tmp_path='/tmp'): + DATA_ARCHIVE = THIS_DIR / 'resources/compliant_dicom_data.zip' + DATA_ROOT = Path(tmp_path) + output_dir = DATA_ROOT / 'compliant_dicom_data' + if not output_dir.exists(): + with zipfile.ZipFile(DATA_ARCHIVE, 'r') as zip_ref: + zip_ref.extractall(DATA_ROOT) + return DATA_ROOT / 'compliant_dicom_data' + + +def setup_directories(src): + src_dir = Path(src).resolve() + if not src_dir.exists(): + print(src_dir) + raise FileNotFoundError("Source Directory {} not found".format(src_dir)) + + temp_dir = tempfile.mkdtemp() + dest_dir = Path(temp_dir).resolve() + if not dest_dir.exists(): + raise FileNotFoundError("Temporary directory not found") + + return src_dir, dest_dir + + +def copy2dest(folder, src, dest): + file_list = [] + date = datetime.now() + for file in folder.rglob('*'): + if file.is_file(): + try: + dicom = dcmread(file) + except: + continue + dicom.ContentDate = date.strftime('%Y%m%d') + rel_path = file.relative_to(src) + new_abs_path = dest / rel_path + parent = new_abs_path.parent + parent.mkdir(exist_ok=True, parents=True) + dicom.save_as(new_abs_path) + file_list.append(file) + return file_list diff --git a/mrQA/tests/test_base.py b/mrQA/tests/test_base.py new file mode 100644 index 0000000..e69de29 diff --git a/mrQA/tests/test_cli.py b/mrQA/tests/test_cli.py new file mode 100644 index 0000000..1455364 --- /dev/null +++ b/mrQA/tests/test_cli.py @@ -0,0 +1,322 @@ +import shlex +import subprocess +import sys +import tempfile +from pathlib import Path +from time import sleep + +import pytest +from MRdataset import load_mr_dataset +from hypothesis import given, settings, assume + +from mrQA.cli import cli +from mrQA.config import DATE_SEPARATOR +from mrQA.monitor import cli as monitor_cli +from mrQA.run_parallel import cli as parallel_cli +from mrQA.run_subset import cli as subset_cli +from mrQA.tests.conftest import dcm_dataset_strategy +from mrQA.utils import list2txt + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_mrqa(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_mrqa_with_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--ref-protocol-path', attributes['ref_protocol_path'], + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + assert_report_paths(report_paths, tempdir, attributes, ds1) + return + + +@settings(max_examples=10, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_with_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + sys.argv = shlex.split( + f'mrqa --data-source {attributes["fake_ds_dir"]}' + f' --config {attributes["config_path"]}' + f' --name {ds1.name}' + f' --format dicom' + ' --decimals 3' + ' --tolerance 0.1' + ' --verbose' + f' --ref-protocol-path {attributes["ref_protocol_path"]}' + f' --output-dir {tempdir}') + cli() + + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_report_paths(report_paths, tempdir, attributes, ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_parallel(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + if attributes['num_subjects'] > 2: + subprocess.run(['mrqa_parallel', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--job-size', '1', + '--out-mrds-path', Path(tempdir)/'test.mrds.pkl', + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_mrqa_monitor(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa_monitor', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_mrqa_monitor(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + sys.argv = shlex.split( + f'mrqa_monitor --data-source {attributes["fake_ds_dir"]} ' + f' --config {attributes["config_path"]} ' + f' --name {ds1.name} ' + '--format dicom ' + '--decimals 3 ' + '--tolerance 0.1 ' + '--verbose ' + f'--output-dir {tempdir}') + monitor_cli() + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_run_subset(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + folders = [f for f in Path(attributes['fake_ds_dir']).iterdir() + if f.is_dir()] + batch_file = Path(tempdir) / 'batch.txt' + list2txt(batch_file, folders) + + sys.argv = shlex.split( + f'mrqa_subset ' + f' --config {attributes["config_path"]} ' + f' -b {batch_file} ' + '--verbose ' + f'--output-path {tempdir}/test.mrds.pkl') + subset_cli() + ds2 = load_mr_dataset(f"/{tempdir}/test.mrds.pkl") + assert ds1 == ds2 + return + + +@settings(max_examples=5, deadline=None) +@given(args=dcm_dataset_strategy) +def test_cli_parallel(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + sys.argv = shlex.split( + f'mrqa_parallel --data-source {attributes["fake_ds_dir"]} ' + f' --config {attributes["config_path"]} ' + f' --name {ds1.name} ' + '--job-size 1 ' + '--decimals 3 ' + '--tolerance 0.1 ' + '--verbose ' + f'--out-mrds-path {tempdir}/test.mrds.pkl ' + f'--output-dir {tempdir}') + if attributes['num_subjects'] < 2: + with pytest.raises(RuntimeError): + parallel_cli() + else: + parallel_cli() + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + return + + +def assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, ds1): + if attributes['num_subjects'] > 2: + assert_report_paths(report_paths, tempdir, attributes, ds1) + else: + assert not report_paths + + +def assert_report_paths(report_paths, tempdir, attributes, ds1): + assert len(report_paths) > 0 + report_path = report_paths[0] + assert str(report_path.parent) == str(tempdir) + assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] + + + + +@settings(max_examples=10, deadline=None) +@given(args=dcm_dataset_strategy) +def test_binary_monitor_with_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + # shlex doesn't test work with binaries + subprocess.run(['mrqa_monitor', + '--data-source', attributes['fake_ds_dir'], + '--config', attributes['config_path'], + '--name', ds1.name, + '--format', 'dicom', + '--decimals', '3', + '--tolerance', '0.1', + '--verbose', + '--ref-protocol-path', attributes['ref_protocol_path'], + '--output-dir', tempdir]) + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert len(report_paths) > 0 + report_path = report_paths[0] + assert str(report_path.parent) == str(tempdir) + assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] + return + + +def test_binary_subset(): + pass + + +@settings(max_examples=10, deadline=None) +@given(args=dcm_dataset_strategy) +def test_report_generated(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + with tempfile.TemporaryDirectory() as tempdir: + sys.argv = shlex.split(f'mrqa --data-source {attributes["fake_ds_dir"]}' + f' --config {attributes["config_path"]}' + f' --name {ds1.name}' + f' --format dicom' + ' --decimals 3' + ' --tolerance 0.1' + ' --verbose' + f' --output-dir {tempdir}') + cli() + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + assert_paths_more_than_2_subjects(report_paths, tempdir, attributes, + ds1) + # wait for 2 seconds, otherwise the next test will fail. + # This happens if report is generated with the same timestamp, then + # the number of reports will be 1 because the previous report will be + # overwritten. + sleep(2) + # re-run with mrds pkl path + mrds_paths = list(Path(tempdir).glob('*.mrds.pkl')) + assert len(mrds_paths) > 0 + sys.argv = shlex.split( + f'mrqa --data-source {attributes["fake_ds_dir"]} ' + f'--config {attributes["config_path"]} ' + f'--name {ds1.name} ' + f'--format dicom ' + '--decimals 3 ' + '--tolerance 0.1 ' + '--verbose ' + f'--output-dir {tempdir} ' + f'--mrds-pkl-path {mrds_paths[0]} ') + cli() + report_paths = list(Path(tempdir).glob('*.html')) + # check if report was generated + if attributes['num_subjects'] > 2: + assert len(report_paths) > 1 + report_path = report_paths[0] + assert str(report_path.parent) == str(tempdir) + assert ds1.name in report_path.stem.split(DATE_SEPARATOR)[0] + else: + assert not report_paths + return + + +if __name__ == '__main__': + test_report_generated() diff --git a/mrQA/tests/test_common.py b/mrQA/tests/test_common.py new file mode 100644 index 0000000..e69de29 diff --git a/mrQA/tests/test_compliance.py b/mrQA/tests/test_compliance.py index 33263c5..9c87e98 100644 --- a/mrQA/tests/test_compliance.py +++ b/mrQA/tests/test_compliance.py @@ -1,79 +1,79 @@ +import tempfile from collections import defaultdict from pathlib import Path -import MRdataset.config import hypothesis.strategies as st from MRdataset import import_dataset -from MRdataset.simulate import make_compliant_test_dataset, \ - make_test_dataset, make_bids_test_dataset -from bids import BIDSLayout from hypothesis import given, settings, assume from mrQA import check_compliance -from mrQA.tests.config import const_bids - - -@settings(max_examples=50, deadline=None) -@given(st.integers(min_value=0, max_value=10), - st.floats(allow_nan=False, - allow_infinity=False), - st.integers(min_value=-10000000, max_value=10000000), - st.floats(allow_nan=False, - allow_infinity=False)) -def test_compliance_all_clean(num_subjects, - repetition_time, - echo_train_length, - flip_angle): - """pass compliant datasets, and make sure library recognizes them as such""" - dest_dir = make_compliant_test_dataset(num_subjects, - repetition_time, - echo_train_length, - flip_angle) - fake_mrd_dataset = import_dataset(dest_dir, include_phantom=True) - checked_dataset = check_compliance(dataset=fake_mrd_dataset) - - sub_names_by_modality = defaultdict(list) - for modality_pat in Path(dest_dir).iterdir(): - if modality_pat.is_dir() and ('.mrdataset' not in str(modality_pat)): - for subject_path in modality_pat.iterdir(): - sub_names_by_modality[modality_pat.name].append( - subject_path.name) - - for modality in checked_dataset.modalities: - percent_compliant = len(modality.compliant_subject_names) / len( - modality.subjects) - assert percent_compliant == 1. - - percent_non_compliant = len(modality.non_compliant_subject_names) / len( - modality.subjects) - assert percent_non_compliant == 0 - - assert len(modality.non_compliant_subject_names) == 0 - assert len(modality.compliant_subject_names) == len( - modality._children.keys()) - - assert set(sub_names_by_modality[modality.name]) == set( - modality.compliant_subject_names) - assert len(modality.non_compliant_params) == 0 - - assert modality.compliant - assert not modality.is_multi_echo() - - for subject in modality.subjects: - for session in subject.sessions: - for run in session.runs: - assert not run.delta - assert run.params['tr'] == repetition_time - assert run.params[ - 'echo_train_length'] == echo_train_length - assert run.params['flip_angle'] == flip_angle - assert modality.reference[run.echo_time]['tr'] == \ - repetition_time - assert modality.reference[run.echo_time][ - 'echo_train_length'] == \ - echo_train_length - assert modality.reference[run.echo_time][ - 'flip_angle'] == flip_angle +from mrQA.tests.simulate import make_test_dataset +from mrQA.utils import get_config_from_file + +THIS_DIR = Path(__file__).parent.resolve() + + +# @settings(max_examples=50, deadline=None) +# @given(st.integers(min_value=0, max_value=10), +# st.floats(allow_nan=False, +# allow_infinity=False), +# st.integers(min_value=-10000000, max_value=10000000), +# st.floats(allow_nan=False, +# allow_infinity=False)) +# def test_compliance_all_clean(num_subjects, +# repetition_time, +# echo_train_length, +# flip_angle): +# """pass compliant datasets, and make sure library recognizes them as such""" +# dest_dir = make_compliant_test_dataset(num_subjects, +# repetition_time, +# echo_train_length, +# flip_angle) +# fake_mrd_dataset = import_dataset(dest_dir, output_dir=tempdir) +# checked_dataset = check_compliance(dataset=fake_mrd_dataset) +# +# sub_names_by_modality = defaultdict(list) +# for modality_pat in Path(dest_dir).iterdir(): +# if modality_pat.is_dir() and ('.mrdataset' not in str(modality_pat)): +# for subject_path in modality_pat.iterdir(): +# sub_names_by_modality[modality_pat.name].append( +# subject_path.name) +# +# for modality in checked_dataset.modalities: +# percent_compliant = len(modality.compliant_subject_names) / len( +# modality.subjects) +# assert percent_compliant == 1. +# +# percent_non_compliant = len(modality.non_compliant_subject_names) / len( +# modality.subjects) +# assert percent_non_compliant == 0 +# +# assert len(modality.non_compliant_subject_names) == 0 +# assert len(modality.compliant_subject_names) == len( +# modality._children.keys()) +# +# assert set(sub_names_by_modality[modality.name]) == set( +# modality.compliant_subject_names) +# assert len(modality.non_compliant_params) == 0 +# +# assert modality.compliant +# assert not modality.is_multi_echo() +# +# for subject in modality.subjects: +# for session in subject.sessions: +# for run in session.runs: +# assert not run.delta +# assert run.params['tr'] == repetition_time +# assert run.params[ +# 'echo_train_length'] == echo_train_length +# assert run.params['flip_angle'] == flip_angle +# assert modality.reference[run.echo_time]['tr'] == \ +# repetition_time +# assert modality.reference[run.echo_time][ +# 'echo_train_length'] == \ +# echo_train_length +# assert modality.reference[run.echo_time][ +# 'flip_angle'] == flip_angle def assert_list(list1, list2): @@ -83,7 +83,7 @@ def assert_list(list1, list2): return False -@settings(max_examples=100, deadline=None) +@settings(max_examples=5, deadline=None) @given(st.lists(st.integers(min_value=0, max_value=3), min_size=15, max_size=15), st.floats(allow_nan=False, allow_infinity=False), @@ -95,158 +95,177 @@ def test_non_compliance(num_noncompliant_subjects, echo_train_length, flip_angle): """pass non-compliant ds, and ensure library recognizes them as such""" - assume(repetition_time != 200) - assume(echo_train_length != 4000) - assume(flip_angle != 80) - - fake_ds_dir, dataset_info = \ - make_test_dataset(num_noncompliant_subjects, - repetition_time, - echo_train_length, - flip_angle) - mrd = import_dataset(fake_ds_dir, include_phantom=True) - checked_dataset = check_compliance(dataset=mrd) - - # Check on disk, basically the truth - sub_names_by_modality = defaultdict(list) - for modality_path in Path(fake_ds_dir).iterdir(): - if modality_path.is_dir() and ('.mrdataset' not in str(modality_path)): - for subject_path in modality_path.iterdir(): - sub_names_by_modality[modality_path.name].append( - subject_path.name) - - # Check if modalities are equal - non_compliant_modality_names = [m for m in dataset_info if dataset_info[m]] - assert assert_list(sub_names_by_modality.keys(), - checked_dataset._children.keys()) - - assert assert_list(checked_dataset.non_compliant_modality_names, - non_compliant_modality_names) - - assert assert_list(checked_dataset.compliant_modality_names, - set(checked_dataset._children.keys()) - set( - non_compliant_modality_names)) - - for modality in checked_dataset.modalities: - # GT - all_subjects = sub_names_by_modality[modality.name] - non_compliant_subjects = dataset_info[modality.name] - compliant_subjects = set(all_subjects) - set(non_compliant_subjects) - - # What did you parse - assert assert_list(all_subjects, modality._children.keys()) - assert assert_list(non_compliant_subjects, - modality.non_compliant_subject_names) - assert assert_list(compliant_subjects, modality.compliant_subject_names) - - # Check if reference has the right values - echo_time = list(modality.reference.keys())[0] - assert modality.reference[echo_time]['tr'] == 200 - assert modality.reference[echo_time]['echo_train_length'] == 4000 - assert modality.reference[echo_time]['flip_angle'] == 80 - - for subject in modality.subjects: - for session in subject.sessions: - for run in session.runs: - if run.delta: - assert run.params['tr'] == repetition_time - assert run.params[ - 'echo_train_length'] == echo_train_length - assert run.params['flip_angle'] == flip_angle - else: - assert run.params['tr'] == 200 - assert run.params[ - 'echo_train_length'] == 4000 - assert run.params['flip_angle'] == 80 - - -@settings(max_examples=30, deadline=None) -@given(st.lists(st.integers(min_value=0, max_value=2), min_size=11, - max_size=11), - st.floats(allow_nan=False, allow_infinity=False), - st.floats(allow_nan=False, allow_infinity=False), - st.floats(allow_nan=False, allow_infinity=False) - ) -def test_non_compliance_bids(num_noncompliant_subjects, - repetition_time, - magnetic_field_strength, - flip_angle): - """pass non-compliant ds, and ensure library recognizes them as such""" - assume(repetition_time != const_bids['tr']) - assume(magnetic_field_strength != const_bids['b0']) - assume(flip_angle != const_bids['fa']) - - fake_dir, dataset_info = make_bids_test_dataset(num_noncompliant_subjects, - repetition_time, - magnetic_field_strength, - flip_angle) - mrd = import_dataset(fake_dir, include_phantom=True, ds_format='bids') - checked_dataset = check_compliance(dataset=mrd, output_dir=mrd.data_source) - - # Check on disk, basically the truth - layout = BIDSLayout(fake_dir) - sub_names_by_modality = defaultdict(set) - subjects = layout.get_subjects() - - for modality in MRdataset.config.datatypes: - for sub in subjects: - filters = {'datatype': modality, - 'subject': sub, - 'extension': 'json'} - files = layout.get(**filters) - if files: - sub_names_by_modality[modality].add(sub) - - # Check if modalities are equal - non_compliant_modality_names = [m for m in dataset_info if dataset_info[m]] - assert assert_list(sub_names_by_modality.keys(), - checked_dataset._children.keys()) - - assert assert_list(checked_dataset.non_compliant_modality_names, - non_compliant_modality_names) - - assert assert_list(checked_dataset.compliant_modality_names, - set(checked_dataset._children.keys()) - set( - non_compliant_modality_names)) - - for modality in checked_dataset.modalities: - # GT - all_subjects = sub_names_by_modality[modality.name] - non_compliant_subjects = dataset_info[modality.name] - compliant_subjects = set(all_subjects) - set(non_compliant_subjects) - - # What did you parse - assert assert_list(all_subjects, modality._children.keys()) - assert assert_list(non_compliant_subjects, - modality.non_compliant_subject_names) - assert assert_list(compliant_subjects, modality.compliant_subject_names) - - # Check if reference has the right values - echo_times = modality.get_echo_times() - for te in echo_times: - reference = modality.get_reference(te) - assert reference['RepetitionTime'] == const_bids['tr'] - assert reference['MagneticFieldStrength'] == const_bids['b0'] - assert reference['FlipAngle'] == const_bids['fa'] - - for subject in modality.subjects: - for session in subject.sessions: - for run in session.runs: - if run.delta: - assert run.params['RepetitionTime'] == repetition_time - assert run.params['MagneticFieldStrength'] == \ - magnetic_field_strength - assert run.params['FlipAngle'] == flip_angle - else: - assert run.params['RepetitionTime'] == const_bids['tr'] - assert run.params['MagneticFieldStrength'] == \ - const_bids['b0'] - assert run.params['FlipAngle'] == const_bids['fa'] - - -if __name__ == '__main__': - test_non_compliance_bids(num_noncompliant_subjects=[1, 0, 0, 2, 0, 0, 1, 0, - 0, 2, 2], - repetition_time=0.0, - magnetic_field_strength=0.0, - flip_angle=0.0) + with tempfile.TemporaryDirectory() as tempdir: + assume(repetition_time != 200) + assume(echo_train_length != 4000) + assume(flip_angle != 80) + + fake_ds_dir, dataset_info = \ + make_test_dataset(num_noncompliant_subjects, + repetition_time, + echo_train_length, + flip_angle) + mrd = import_dataset(fake_ds_dir, + config_path=THIS_DIR / 'resources/mri-config.json', + output_dir=tempdir) + compliance_dict, _ = check_compliance(dataset=mrd, + output_dir=tempdir, + config_path=THIS_DIR / 'resources/mri-config.json') + config_dict = get_config_from_file( + THIS_DIR / 'resources/mri-config.json') + # include_params = config_dict['include_parameters'] + stratify_by = config_dict.get('stratify_by', None) + + if compliance_dict is not None: + # Check on disk, basically the truth + sub_names_by_modality = defaultdict(list) + for modality_path in Path(fake_ds_dir).iterdir(): + if modality_path.is_dir() and ( + '.mrdataset' not in str(modality_path)): + for subject_path in modality_path.iterdir(): + sub_names_by_modality[modality_path.name].append( + subject_path.name) + + fully_compliant_ds = compliance_dict['compliant'] + non_compliant_ds = compliance_dict['non_compliant'] + reference = compliance_dict['reference'] + non_compliant_sequences = non_compliant_ds.get_sequence_ids() + # non_compliant_sequences = [f.split(ATTRIBUTE_SEPARATOR)[0] for f in non_compliant_sequences] + fully_compliant_sequences = fully_compliant_ds.get_sequence_ids() + # fully_compliant_sequences = [f.split(ATTRIBUTE_SEPARATOR)[0] for f in fully_compliant_sequences] + all_sequences = mrd.get_sequence_ids() + + # Check if modalities are equal + all_modalities_on_disk = [m for m in sub_names_by_modality.keys() if + not m.startswith('local')] + non_compliant_modality_on_disk = [m for m in dataset_info if + dataset_info[m] if + not m.startswith('local')] + compliant_modalities_on_disk = set(all_modalities_on_disk) - set( + non_compliant_modality_on_disk) + + assert assert_list(all_modalities_on_disk, all_sequences) + assert assert_list(non_compliant_sequences, + non_compliant_modality_on_disk) + assert assert_list(fully_compliant_sequences, + compliant_modalities_on_disk) + + for seq_id in mrd.get_sequence_ids(): + all_subjects = mrd.get_subject_ids(seq_id) + non_compliant_subjects = non_compliant_ds.get_subject_ids( + seq_id) + compliant_subjects = set(all_subjects) - set( + non_compliant_subjects) + # On diskplu + all_subjects_on_disk = sub_names_by_modality[seq_id] + non_compliant_subjects_on_disk = dataset_info[seq_id] + compliant_subjects_on_disk = set(all_subjects_on_disk) - set( + non_compliant_subjects_on_disk) + + # What did you parse + assert assert_list(all_subjects, all_subjects_on_disk) + assert assert_list(non_compliant_subjects, + non_compliant_subjects_on_disk) + assert assert_list(compliant_subjects, + compliant_subjects_on_disk) + + # Check if reference has the right values + assert reference[seq_id]['RepetitionTime'].get_value() == 200 + assert reference[seq_id]['EchoTrainLength'].get_value() == 4000 + assert reference[seq_id]['FlipAngle'].get_value() == 80 + + for subject, session, run, seq in non_compliant_ds.traverse_horizontal( + seq_id): + assert seq['RepetitionTime'].get_value() == repetition_time + assert seq[ + 'EchoTrainLength'].get_value() == echo_train_length + assert seq['FlipAngle'].get_value() == flip_angle + + for subject, session, run, seq in fully_compliant_ds.traverse_horizontal( + seq_id): + assert seq['RepetitionTime'].get_value() == 200 + assert seq['EchoTrainLength'].get_value() == 4000 + assert seq['FlipAngle'].get_value() == 80 + + # @settings(max_examples=30, deadline=None) + # @given(st.lists(st.integers(min_value=0, max_value=2), min_size=11, + # max_size=11), + # st.floats(allow_nan=False, allow_infinity=False), + # st.floats(allow_nan=False, allow_infinity=False), + # st.floats(allow_nan=False, allow_infinity=False) + # ) + # def modify_test_non_compliance_bids(num_noncompliant_subjects, + # repetition_time, + # magnetic_field_strength, + # flip_angle): + # """pass non-compliant ds, and ensure library recognizes them as such""" + # assume(repetition_time != const_bids['tr']) + # assume(magnetic_field_strength != const_bids['b0']) + # assume(flip_angle != const_bids['fa']) + # + # fake_dir, dataset_info = make_bids_test_dataset(num_noncompliant_subjects, + # repetition_time, + # magnetic_field_strength, + # flip_angle) + # mrd = import_dataset(fake_dir, include_phantom=True, ds_format='bids') + # checked_dataset = check_compliance(dataset=mrd, output_dir=mrd.data_source) + # + # # Check on disk, basically the truth + # layout = BIDSLayout(fake_dir) + # sub_names_by_modality = defaultdict(set) + # subjects = layout.get_subjects() + + # for modality in MRdataset.config.datatypes: + # for sub in subjects: + # filters = {'datatype': modality, + # 'subject': sub, + # 'extension': 'json'} + # files = layout.get(**filters) + # if files: + # sub_names_by_modality[modality].add(sub) + # + # # Check if modalities are equal + # non_compliant_modality_names = [m for m in dataset_info if dataset_info[m]] + # assert assert_list(sub_names_by_modality.keys(), + # checked_dataset._children.keys()) + # + # assert assert_list(checked_dataset.non_compliant_modality_names, + # non_compliant_modality_names) + # + # assert assert_list(checked_dataset.compliant_modality_names, + # set(checked_dataset._children.keys()) - set( + # non_compliant_modality_names)) + # + # for modality in checked_dataset.modalities: + # # GT + # all_subjects = sub_names_by_modality[modality.name] + # non_compliant_subjects = dataset_info[modality.name] + # compliant_subjects = set(all_subjects) - set(non_compliant_subjects) + # + # # What did you parse + # assert assert_list(all_subjects, modality._children.keys()) + # assert assert_list(non_compliant_subjects, + # modality.non_compliant_subject_names) + # assert assert_list(compliant_subjects, modality.compliant_subject_names) + # + # # Check if reference has the right values + # echo_times = modality.get_echo_times() + # for te in echo_times: + # reference = modality.get_reference(te) + # assert reference['RepetitionTime'] == const_bids['tr'] + # assert reference['MagneticFieldStrength'] == const_bids['b0'] + # assert reference['FlipAngle'] == const_bids['fa'] + # + # for subject in modality.subjects: + # for session in subject.sessions: + # for run in session.runs: + # if run.delta: + # assert run.params['RepetitionTime'] == repetition_time + # assert run.params['MagneticFieldStrength'] == \ + # magnetic_field_strength + # assert run.params['FlipAngle'] == flip_angle + # else: + # assert run.params['RepetitionTime'] == const_bids['tr'] + # assert run.params['MagneticFieldStrength'] == \ + # const_bids['b0'] + # assert run.params['FlipAngle'] == const_bids['fa'] diff --git a/mrQA/tests/test_config.py b/mrQA/tests/test_config.py new file mode 100644 index 0000000..e69de29 diff --git a/mrQA/tests/test_formatter.py b/mrQA/tests/test_formatter.py new file mode 100644 index 0000000..e69de29 diff --git a/mrQA/tests/unit_tests.py b/mrQA/tests/test_majority.py similarity index 72% rename from mrQA/tests/unit_tests.py rename to mrQA/tests/test_majority.py index bbc51e8..47b6797 100644 --- a/mrQA/tests/unit_tests.py +++ b/mrQA/tests/test_majority.py @@ -1,4 +1,11 @@ -from mrQA.utils import majority_attribute_values, files_modified_since +import shutil +from subprocess import CalledProcessError + +from protocol import UnspecifiedType + +from mrQA.config import EqualCountType, CannotComputeMajority +from mrQA.tests.conftest import THIS_DIR +from mrQA.utils import majority_values, folders_modified_since from mrQA.utils import list2txt, txt2list import unittest import tempfile @@ -21,7 +28,9 @@ def test_single_majority(self): {'species': 'panther', 'color': 'orange', 'habitat': 'jungle'}, {'species': 'lion', 'color': 'brown', 'habitat': 'savanna'}, ] - maj_attr_vals = majority_attribute_values(animals) + maj_attr_vals = majority_values( + animals, + include_params=['species', 'color', 'habitat']) self.assertEqual(maj_attr_vals['species'], 'lion') self.assertEqual(maj_attr_vals['habitat'], 'jungle') self.assertEqual(maj_attr_vals['color'], 'orange') @@ -29,12 +38,12 @@ def test_single_majority(self): def test_empty_list(self): animals = [] with self.assertRaises(ValueError): - majority_attribute_values(animals) + majority_values(animals) def test_list_empty_dicts(self): animals = [{}, {}, {}, {}] with self.assertRaises(ValueError): - majority_attribute_values(animals) + majority_values(animals) def test_equal_count(self): animals = [ @@ -49,8 +58,10 @@ def test_equal_count(self): {'species': 'tiger', 'color': 'orange', 'habitat': 'jungle'}, {'species': 'lion', 'color': 'brown', 'habitat': 'savanna'}, ] - maj_attr_vals = majority_attribute_values(animals) - self.assertIsNone(maj_attr_vals['species']) + maj_attr_vals = majority_values( + animals, + include_params=['species', 'color', 'habitat']) + self.assertIsInstance(maj_attr_vals['species'], EqualCountType) self.assertEqual(maj_attr_vals['habitat'], 'jungle') self.assertEqual(maj_attr_vals['color'], 'orange') @@ -67,8 +78,10 @@ def test_single_key_equal_count(self): {'species': 'tiger'}, {'species': 'lion'}, ] - maj_attr_vals = majority_attribute_values(animals) - self.assertIsNone(maj_attr_vals['species']) + maj_attr_vals = majority_values( + animals, + include_params=['species']) + self.assertIsInstance(maj_attr_vals['species'], EqualCountType) def test_single_key_majority(self): animals = [ @@ -83,7 +96,8 @@ def test_single_key_majority(self): {'species': 'tiger'}, {'species': 'lion'}, ] - maj_attr_vals = majority_attribute_values(animals) + maj_attr_vals = majority_values(animals, + include_params=['species']) self.assertEqual(maj_attr_vals['species'], 'lion') def test_none_majority(self): @@ -99,7 +113,9 @@ def test_none_majority(self): {'species': 'tiger'}, {'species': None}, ] - maj_attr_vals = majority_attribute_values(animals) + maj_attr_vals = majority_values( + animals, + include_params=['species']) self.assertEqual(maj_attr_vals['species'], 'tiger') def test_all_none(self): @@ -115,7 +131,9 @@ def test_all_none(self): {'species': None}, {'species': None}, ] - maj_attr_vals = majority_attribute_values(animals) + maj_attr_vals = majority_values( + animals, + include_params=['species']) self.assertIsNone(maj_attr_vals['species']) def test_many_values_for_majority(self): @@ -130,30 +148,29 @@ def test_many_values_for_majority(self): {'species': 'cheetah'}, {'species': 'cheetah'}, ] - maj_attr_vals = majority_attribute_values(animals) - self.assertIsNone(maj_attr_vals['species']) + maj_attr_vals = majority_values(animals, + include_params=['species']) + self.assertIsInstance(maj_attr_vals['species'], EqualCountType) def test_length_less_than_3(self): animals = [ {'species': 'lion', 'color': 'orange', 'habitat': 'savanna'}, ] - maj_attr_vals = majority_attribute_values(animals) - self.assertIsNone(maj_attr_vals['species']) - self.assertIsNone(maj_attr_vals['habitat']) - self.assertIsNone(maj_attr_vals['color']) + with self.assertRaises(CannotComputeMajority): + maj_attr_vals = majority_values( + animals, include_params=['species', 'color', 'habitat']) animals = [ {'species': 'lion', 'color': 'orange', 'habitat': 'savanna'}, {'species': 'tiger', 'color': 'yellow', 'habitat': 'jungle'}, ] - maj_attr_vals = majority_attribute_values(animals) - self.assertIsNone(maj_attr_vals['species']) - self.assertIsNone(maj_attr_vals['habitat']) - self.assertIsNone(maj_attr_vals['color']) + with self.assertRaises(CannotComputeMajority): + maj_attr_vals = majority_values( + animals, include_params=['species', 'color', 'habitat']) def test_none(self): with self.assertRaises(ValueError): - majority_attribute_values(None) + majority_values(None) def test_different_keys(self): animals = [ @@ -168,7 +185,8 @@ def test_different_keys(self): {'species': 'tiger', 'color': 'orange', 'habitat': 'jungle'}, {'species': 'jaguar', 'color': 'brown', 'habitat': 'savanna'}, ] - maj_attr_vals = majority_attribute_values(animals) + maj_attr_vals = majority_values( + animals, include_params=['species', 'color', 'habitat']) self.assertEqual(maj_attr_vals['species'], 'tiger') self.assertEqual(maj_attr_vals['habitat'], 'savanna') self.assertEqual(maj_attr_vals['color'], 'orange') @@ -195,9 +213,14 @@ def setUpClass(cls): cls.temp_ds = cls.create_dummy_directory() def test_invalid_path(self): - with self.assertRaises(FileNotFoundError): - now = datetime.now(timezone.utc) - files_modified_since('/tmp/mrqa', now.strftime("%m/%d/%Y %H:%M:%S")) + now = datetime.now(timezone.utc) + folders = folders_modified_since( + input_dir='/tmp/mrqa', + last_reported_on=now.strftime("%m/%d/%Y %H:%M:%S"), + output_dir='/tmp/', + time_format='datetime' + ) + self.assertEqual(len(folders), 0) @staticmethod def create_dummy_directory(): @@ -211,22 +234,17 @@ def create_dummy_directory(): temp_subdir_path = temp_folder_name / random_name() temp_subdir_path.mkdir(parents=True, exist_ok=True) for j in range(num_files): - tmp = tempfile.NamedTemporaryFile(delete=False, - dir=temp_subdir_path) + # copy a file from somewhere + temp_file_path = temp_subdir_path / random_name() + shutil.copy(Path(THIS_DIR / 'resources/valid.dcm'), temp_file_path) return temp_folder_name def test_basic(self): now = datetime(2023, 2, 5, 18, 00) # datetime.now(timezone.utc) date_time = now.strftime("%m/%d/%Y %H:%M:%S") - valid_files = files_modified_since(self.temp_ds, date_time, '/tmp/', - time_format='datetime') - print(valid_files) - - -if __name__ == '__main__': - a = TestModifiedTimeDirectory() - # a.check_basic() - - - + valid_files = folders_modified_since(input_dir=str(self.temp_ds), + last_reported_on=date_time, + output_dir='/tmp/', + time_format='datetime') + self.assertEqual(len(valid_files), 10) diff --git a/mrQA/tests/test_monitor.py b/mrQA/tests/test_monitor.py index 1c36772..09651ca 100644 --- a/mrQA/tests/test_monitor.py +++ b/mrQA/tests/test_monitor.py @@ -1,179 +1,179 @@ -import shutil -import subprocess -import time -import unittest -import zipfile -from pathlib import Path -from random import randint - -import pytest -from MRdataset import import_dataset, load_mr_dataset, MRDS_EXT -from MRdataset.utils import is_same_dataset -from numpy.random import default_rng - -from mrQA import check_compliance -from mrQA.config import mrds_fpath -from mrQA.monitor import monitor -from mrQA.tests.config import DATASET_PATHS, ABCD_DATASET_PATHS -from mrQA.tests.utils import test_modified_files, test_output_files_created, \ - test_same_dataset, get_temp_input_folder, get_temp_output_folder, \ - create_random_file_sets, copy2dest, pick_random_sets -from mrQA.utils import get_timestamps, txt2list -import tempfile - - -@pytest.fixture -def seed(): - num = randint(1, 10000) - return num - - -@pytest.mark.parametrize('data_source, n, max_files, seed', DATASET_PATHS, - indirect=['seed']) -def test_monitor_local(data_source, n, max_files, seed) -> None: - rng = default_rng(seed) - print(f"\nSeed = {seed}\n") - data_source = Path(data_source) - name = data_source.stem - temp_dir = Path(tempfile.mkdtemp()) - temp_input_src = get_temp_input_folder(name, temp_dir) - temp_output_dest = get_temp_output_folder(name, temp_dir) - folder_sets = create_random_file_sets(data_source, - n, - max_files, - rng) - time_dict = None - for i in range(n): - file_set = copy2dest(folder_sets[i], data_source, temp_input_src) - time.sleep(5) - if time_dict: - # on the first iteration, time_dict is None. - # On subsequent iterations, we want to check - # that the files modified since the last report - # are the same as the files we copied. - test_modified_files(time_dict['utc'], temp_input_src, - temp_output_dest, data_source, file_set) - - time_dict = get_timestamps() - - report_filepath = monitor(name=name, - data_source=temp_input_src, - output_dir=temp_output_dest) - mrds_path = mrds_fpath(report_filepath.parent, report_filepath.stem) - - test_output_files_created(folder=report_filepath.parent, - fname=report_filepath.stem) - test_same_dataset(mrds_path, temp_input_src, temp_dir, - name) - # shutil.rmtree(temp_dir) - - -@pytest.mark.parametrize('data_source, n, seed', ABCD_DATASET_PATHS, - indirect=['seed']) -def test_monitor_abcd(data_source, n, seed) -> None: - rng = default_rng(seed) - print(f"\nSeed = {seed}\n") - name = data_source.stem - data_source_path = Path(data_source) / 'dicom' - per_batch_id_list = txt2list(Path(data_source) / 'dicom_mrqa_files/per_batch_id_list.txt') - temp_dir = Path(tempfile.mkdtemp()) - temp_input_src = get_temp_input_folder(name, temp_dir) - temp_output_dest = get_temp_output_folder(name, temp_dir) - folder_sets = pick_random_sets(per_batch_id_list, n, rng) - time_dict = None - for i in range(n): - file_set = copy2dest(folder_sets[i], data_source_path, temp_input_src) - time.sleep(5) - if time_dict: - # on the first iteration, time_dict is None. - # On subsequent iterations, we want to check - # that the files modified since the last report - # are the same as the files we copied. - test_modified_files(time_dict['utc'], temp_input_src, - temp_output_dest, data_source_path, file_set) - - time_dict = get_timestamps() - - report_filepath = monitor(name=name, - data_source=temp_input_src, - output_dir=temp_output_dest) - mrds_path = mrds_fpath(report_filepath.parent, report_filepath.stem) - - test_output_files_created(folder=report_filepath.parent, - fname=report_filepath.stem) - test_same_dataset(mrds_path, temp_input_src, temp_dir, - name) - shutil.rmtree(temp_dir) - - -class TestMonitorDummyDataset(unittest.TestCase): - @classmethod - def setUpClass(cls): - # Extract Zip folders - zip_path = Path( - '/home/sinhah/github/MRdataset/examples/test_merge_data.zip') - temp_dir = Path('/tmp/') - extract_folder = temp_dir / zip_path.stem - cls.data_source = temp_dir / 'test_merge_data' - if extract_folder.exists(): - shutil.rmtree(extract_folder) - with zipfile.ZipFile(zip_path, 'r') as zip_ref: - zip_ref.extractall(temp_dir) - - # Set up output directories - output_dir = temp_dir / 'output_dir' - if output_dir.is_dir(): - shutil.rmtree(output_dir) - output_dir.mkdir(exist_ok=False, parents=True) - output_folder_path = output_dir / 'dummy_ds' - cls.output_folder_path = output_folder_path - - # Read full dataset, acts as ground truth - complete_dataset = import_dataset( - data_source=temp_dir / 'test_merge_data/full_data', - name='dummy_ds') - report_path = check_compliance(complete_dataset, - output_dir=output_folder_path) - fname = output_folder_path / f'{report_path.stem}{MRDS_EXT}' - cls.complete_dataset = load_mr_dataset(fname) - - def test_modalities(self): - input_folder_path = self.data_source / 'new_modalities' - self.simulate(input_folder_path, self.output_folder_path) - - def test_subjects(self): - input_folder_path = self.data_source / 'new_subjects' - self.simulate(input_folder_path, self.output_folder_path) - - def test_sessions(self): - input_folder_path = self.data_source / 'new_sessions' - self.simulate(input_folder_path, self.output_folder_path) - - def test_runs(self): - input_folder_path = self.data_source / 'new_runs' - self.simulate(input_folder_path, self.output_folder_path) - - @staticmethod - def copy_new_scans(src, dest): - cmd = f"cp -r -n {src}/* {dest}" - with subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) as proc: - o, e = proc.communicate() - if proc.returncode: - raise RuntimeError(e.decode('utf8')) - - def simulate(self, input_fpath, output_fpath): - ds1 = import_dataset(data_source=input_fpath / 'set1', - name='dummy_ds') - check_compliance(ds1, output_dir=output_fpath) - - self.copy_new_scans(src=input_fpath / 'set2', - dest=input_fpath / 'set1') - - report_path = monitor(name='dummy_ds', - data_source=input_fpath / 'set1', - output_dir=output_fpath) - - mrds_path = mrds_fpath(output_fpath, report_path.stem) - ds2 = load_mr_dataset(mrds_path) - assert is_same_dataset(ds2, self.complete_dataset) +# import shutil +# import subprocess +# import time +# import unittest +# import zipfile +# from pathlib import Path +# from random import randint +# +# import pytest +# from MRdataset import import_dataset, load_mr_dataset, MRDS_EXT +# from MRdataset.utils import is_same_dataset +# from numpy.random import default_rng +# +# from mrQA import check_compliance +# from mrQA.config import mrds_fpath +# from mrQA.monitor import monitor +# from mrQA.tests.config import DATASET_PATHS, ABCD_DATASET_PATHS +# from mrQA.tests.test_utils import test_modified_folders, test_output_files_created, \ +# test_same_dataset, get_temp_input_folder, get_temp_output_folder, \ +# create_random_file_sets, copy2dest, pick_random_sets +# from mrQA.utils import get_timestamps, txt2list +# import tempfile +# +# +# @pytest.fixture +# def seed(): +# num = randint(1, 10000) +# return num +# +# +# @pytest.mark.parametrize('data_source, n, max_files, seed', DATASET_PATHS, +# indirect=['seed']) +# def test_monitor_local(data_source, n, max_files, seed) -> None: +# rng = default_rng(seed) +# print(f"\nSeed = {seed}\n") +# data_source = Path(data_source) +# name = data_source.stem +# temp_dir = Path(tempfile.mkdtemp()) +# temp_input_src = get_temp_input_folder(name, temp_dir) +# temp_output_dest = get_temp_output_folder(name, temp_dir) +# folder_sets = create_random_file_sets(data_source, +# n, +# max_files, +# rng) +# time_dict = None +# for i in range(n): +# file_set = copy2dest(folder_sets[i], data_source, temp_input_src) +# time.sleep(5) +# if time_dict: +# # on the first iteration, time_dict is None. +# # On subsequent iterations, we want to check +# # that the files modified since the last report +# # are the same as the files we copied. +# test_modified_folders(time_dict['utc'], temp_input_src, +# temp_output_dest, data_source, file_set) +# +# time_dict = get_timestamps() +# +# report_filepath = monitor(name=name, +# data_source=temp_input_src, +# output_dir=temp_output_dest) +# mrds_path = mrds_fpath(report_filepath.parent, report_filepath.stem) +# +# test_output_files_created(folder=report_filepath.parent, +# fname=report_filepath.stem) +# test_same_dataset(mrds_path, temp_input_src, temp_dir, +# name) +# # shutil.rmtree(temp_dir) +# +# +# @pytest.mark.parametrize('data_source, n, seed', ABCD_DATASET_PATHS, +# indirect=['seed']) +# def test_monitor_abcd(data_source, n, seed) -> None: +# rng = default_rng(seed) +# print(f"\nSeed = {seed}\n") +# name = data_source.stem +# data_source_path = Path(data_source) / 'dicom' +# per_batch_id_list = txt2list(Path(data_source) / 'dicom_mrqa_files/per_batch_id_list.txt') +# temp_dir = Path(tempfile.mkdtemp()) +# temp_input_src = get_temp_input_folder(name, temp_dir) +# temp_output_dest = get_temp_output_folder(name, temp_dir) +# folder_sets = pick_random_sets(per_batch_id_list, n, rng) +# time_dict = None +# for i in range(n): +# file_set = copy2dest(folder_sets[i], data_source_path, temp_input_src) +# time.sleep(5) +# if time_dict: +# # on the first iteration, time_dict is None. +# # On subsequent iterations, we want to check +# # that the files modified since the last report +# # are the same as the files we copied. +# test_modified_folders(time_dict['utc'], temp_input_src, +# temp_output_dest, data_source_path, file_set) +# +# time_dict = get_timestamps() +# +# report_filepath = monitor(name=name, +# data_source=temp_input_src, +# output_dir=temp_output_dest) +# mrds_path = mrds_fpath(report_filepath.parent, report_filepath.stem) +# +# test_output_files_created(folder=report_filepath.parent, +# fname=report_filepath.stem) +# test_same_dataset(mrds_path, temp_input_src, temp_dir, +# name) +# shutil.rmtree(temp_dir) +# +# +# class TestMonitorDummyDataset(unittest.TestCase): +# @classmethod +# def setUpClass(cls): +# # Extract Zip folders +# zip_path = Path( +# '/home/sinhah/github/MRdataset/examples/test_merge_data.zip') +# temp_dir = Path('/tmp/') +# extract_folder = temp_dir / zip_path.stem +# cls.data_source = temp_dir / 'test_merge_data' +# if extract_folder.exists(): +# shutil.rmtree(extract_folder) +# with zipfile.ZipFile(zip_path, 'r') as zip_ref: +# zip_ref.extractall(temp_dir) +# +# # Set up output directories +# output_dir = temp_dir / 'output_dir' +# if output_dir.is_dir(): +# shutil.rmtree(output_dir) +# output_dir.mkdir(exist_ok=False, parents=True) +# output_folder_path = output_dir / 'dummy_ds' +# cls.output_folder_path = output_folder_path +# +# # Read full dataset, acts as ground truth +# complete_dataset = import_dataset( +# data_source=temp_dir / 'test_merge_data/full_data', +# name='dummy_ds') +# report_path = check_compliance(complete_dataset, +# output_dir=output_folder_path) +# fname = output_folder_path / f'{report_path.stem}{MRDS_EXT}' +# cls.complete_dataset = load_mr_dataset(fname) +# +# def test_modalities(self): +# input_folder_path = self.data_source / 'new_modalities' +# self.simulate(input_folder_path, self.output_folder_path) +# +# def test_subjects(self): +# input_folder_path = self.data_source / 'new_subjects' +# self.simulate(input_folder_path, self.output_folder_path) +# +# def test_sessions(self): +# input_folder_path = self.data_source / 'new_sessions' +# self.simulate(input_folder_path, self.output_folder_path) +# +# def test_runs(self): +# input_folder_path = self.data_source / 'new_runs' +# self.simulate(input_folder_path, self.output_folder_path) +# +# @staticmethod +# def copy_new_scans(src, dest): +# cmd = f"cp -r -n {src}/* {dest}" +# with subprocess.Popen(cmd, stdout=subprocess.PIPE, +# stderr=subprocess.PIPE, shell=True) as proc: +# o, e = proc.communicate() +# if proc.returncode: +# raise RuntimeError(e.decode('utf8')) +# +# def simulate(self, input_fpath, output_fpath): +# ds1 = import_dataset(data_source=input_fpath / 'set1', +# name='dummy_ds') +# check_compliance(ds1, output_dir=output_fpath) +# +# self.copy_new_scans(src=input_fpath / 'set2', +# dest=input_fpath / 'set1') +# +# report_path = monitor(name='dummy_ds', +# data_source=input_fpath / 'set1', +# output_dir=output_fpath) +# +# mrds_path = mrds_fpath(output_fpath, report_path.stem) +# ds2 = load_mr_dataset(mrds_path) +# assert is_same_dataset(ds2, self.complete_dataset) diff --git a/mrQA/tests/test_parallel.py b/mrQA/tests/test_parallel.py index d06b9dc..01de999 100644 --- a/mrQA/tests/test_parallel.py +++ b/mrQA/tests/test_parallel.py @@ -1,148 +1,144 @@ +import shutil +import tempfile from pathlib import Path from MRdataset import import_dataset, save_mr_dataset, load_mr_dataset from MRdataset.config import MRDS_EXT -from MRdataset.log import logger -from MRdataset.utils import valid_paths from mrQA import check_compliance from mrQA.parallel_utils import _make_file_folders -from mrQA.run_parallel import process_parallel, split_ids_list +from mrQA.run_parallel import process_parallel, split_folders_list +from mrQA.tests.conftest import THIS_DIR +from mrQA.tests.simulate import sample_dicom_dataset from mrQA.utils import txt2list dummy_DS = [] -logger.setLevel('WARNING') - - -def test_equivalence_seq_vs_parallel(data_source): - output_dir = Path(data_source).parent / 'test_mrqa_files' - output_path = { - 'sequential': output_dir / ('sequential' + MRDS_EXT), - 'parallel': output_dir / ('parallel' + MRDS_EXT) - } - if not output_path['sequential'].exists(): - sequential_ds = import_dataset(data_source=data_source, - ds_format='dicom', - name='sequential') - save_mr_dataset(output_path['sequential'], sequential_ds) - else: - sequential_ds = load_mr_dataset(output_path['sequential']) - - process_parallel(data_source, - output_dir, - output_path['parallel'], - 'parallel') - - # Generate a report for the merged dataset - parallel_ds = load_mr_dataset(output_path['parallel'], ds_format='dicom') - - report_path = { - 'sequential': check_compliance(dataset=sequential_ds, - output_dir=output_dir), - 'parallel': check_compliance(dataset=parallel_ds, - output_dir=output_dir) - } - - if is_same(report_path['sequential'], report_path['parallel']): - print("Reports are same") - else: - print('Reports are different') - - -def test_merging(data_source): - # Sequential complete processing of the dataset - output_dir = Path(data_source).parent / 'test_merge_mrqa_files' - output_path_seq = output_dir / ('sequential' + MRDS_EXT) - - if not output_path_seq.exists(): - sequential_ds = import_dataset(data_source=data_source, - ds_format='dicom', - name='sequential') - save_mr_dataset(output_path_seq, sequential_ds) - else: - sequential_ds = load_mr_dataset(output_path_seq) - - # Start processing in batches - folder_paths, files_per_batch, all_ids_path = _make_file_folders(output_dir) - - # For each batch create the list of ids to be processed - ids_path_list = split_ids_list( - data_source, - all_ids_path=all_ids_path, - per_batch_ids=files_per_batch['ids'], - output_dir=folder_paths['ids'], - subjects_per_job=5 - ) - - # The paths to the output files - output_path = {i: output_dir/f'seq{i}{MRDS_EXT}' - for i in range(len(ids_path_list))} - ds_list = [] - for i, filepath in enumerate(ids_path_list): - # Read the list of subject ids to be processed - subject_folders_list = txt2list(filepath) - if not output_path[i].exists(): - # Process the batch of subjects - ds = import_dataset(data_source=subject_folders_list, - ds_format='dicom', - name=f'seq{i}') - save_mr_dataset(output_path[i], ds) + + +def test_equivalence_seq_vs_parallel(): + with tempfile.TemporaryDirectory() as tempdir: + shutil.copytree(sample_dicom_dataset(), tempdir, dirs_exist_ok=True) + data_source = tempdir + config_path = THIS_DIR / 'resources/mri-config.json' + output_dir = Path(data_source) / 'test_mrqa_files' + output_path = { + 'sequential': output_dir / ('sequential' + MRDS_EXT), + 'parallel': output_dir / ('parallel' + MRDS_EXT) + } + if not output_path['sequential'].exists(): + sequential_ds = import_dataset(data_source=data_source, + ds_format='dicom', + name='sequential', + config_path=config_path, + output_dir=output_dir) + save_mr_dataset(output_path['sequential'], sequential_ds) else: - ds = load_mr_dataset(output_path[i]) - ds_list.append(ds) - - # Merge batches - combined_mrds = None - for ds in ds_list: - if combined_mrds is None: - # Add the first partial dataset - combined_mrds = ds + sequential_ds = load_mr_dataset(output_path['sequential']) + + process_parallel(data_source=data_source, + output_dir=output_dir, + out_mrds_path=output_path['parallel'], + name='parallel', + job_size=5, + config_path=config_path, + hpc=False, ) + + # Generate a report for the merged dataset + parallel_ds = load_mr_dataset(output_path['parallel']) + + report_path = { + 'sequential': check_compliance(dataset=sequential_ds, + output_dir=output_dir, + config_path=config_path), + 'parallel': check_compliance(dataset=parallel_ds, + output_dir=output_dir, + config_path=config_path) + } + # check hz_audit_results + sequential_hz_results, sequential_vt_results = report_path['sequential'] + parallel_hz_results, parallel_vt_results = report_path['parallel'] + + assert sequential_hz_results['complete_ds'] == parallel_hz_results[ + 'complete_ds'] + assert sequential_hz_results['reference'] == parallel_hz_results[ + 'reference'] + assert sequential_hz_results['compliant'] == parallel_hz_results[ + 'compliant'] + assert sequential_hz_results['non_compliant'] == parallel_hz_results[ + 'non_compliant'] + assert sequential_hz_results['undetermined'] == parallel_hz_results[ + 'undetermined'] + + assert sequential_vt_results['complete_ds'] == parallel_vt_results[ + 'complete_ds'] + assert sequential_vt_results['sequence_pairs'] == parallel_vt_results[ + 'sequence_pairs'] + assert sequential_vt_results['compliant'] == parallel_vt_results[ + 'compliant'] + assert sequential_vt_results['non_compliant'] == parallel_vt_results[ + 'non_compliant'] + assert sequential_vt_results['parameters'] == parallel_vt_results[ + 'parameters'] + + +def test_merging(): + # Sequential complete processing of the dataset + data_source = sample_dicom_dataset() + with tempfile.TemporaryDirectory() as tempdir: + output_dir = Path(tempdir) + output_path_seq = output_dir / ('sequential' + MRDS_EXT) + config_path = THIS_DIR / 'resources/mri-config.json' + + if not output_path_seq.exists(): + sequential_ds = import_dataset(data_source=data_source, + ds_format='dicom', + name='sequential', + config_path=config_path, + output_dir=output_dir) + save_mr_dataset(output_path_seq, sequential_ds) else: - # otherwise, keep aggregating - combined_mrds.merge(ds) - - # Check if both datasets are the same - assert is_same_dataset(combined_mrds, sequential_ds) - - -def is_same_dataset(dataset1, dataset2): - modalities_list1 = sorted(dataset1.modalities) - modalities_list2 = sorted(dataset2.modalities) - for modality1, modality2 in zip(modalities_list1, modalities_list2): - assert modality1.name == modality2.name - assert modality1.compliant == modality2.compliant - assert modality1._reference == modality2._reference - assert modality1.non_compliant_data.equals(modality2.non_compliant_data) - subjects_list1 = sorted(modality1.subjects) - subjects_list2 = sorted(modality2.subjects) - for subject1, subject2 in zip(subjects_list1, subjects_list2): - assert subject1.name == subject2.name - assert subject1.__dict__ == subject2.__dict__ - sessions_list1 = sorted(subject1.sessions) - sessions_list2 = sorted(subject2.sessions) - for session1, session2 in zip(sessions_list1, sessions_list2): - assert session1.name == session2.name - assert session1.__dict__ == session2.__dict__ - runs_list1 = sorted(session1.runs) - runs_list2 = sorted(session2.runs) - for run1, run2 in zip(runs_list1, runs_list2): - assert run1.__dict__ == run2.__dict__ - assert run1.name == run2.name - assert run1.params == run2.params - return True - - -def is_same(file1, file2): - file1, file2 = valid_paths([file1, file2]) - with open(file1) as f1, open(file2) as f2: - # Check after timestamps. Don't check 1st 3 lines - for line1, line2 in list(zip(f1, f2))[3:]: - if line1 != line2: - return False - return True - - -if __name__ == '__main__': - # test_equivalence_seq_vs_parallel( - # '/media/sinhah/extremessd/ABCD-375/dicom-baseline-subset/') - test_merging('/media/sinhah/extremessd/ABCD-375/dicom-baseline-subset/') + sequential_ds = load_mr_dataset(output_path_seq) + + # Start processing in batches + folder_paths, files_per_batch, all_ids_path = _make_file_folders(output_dir) + + # For each batch create the list of ids to be processed + ids_path_list = split_folders_list( + data_source, + all_fnames_path=all_ids_path, + per_batch_ids=files_per_batch['fnames'], + output_dir=folder_paths['fnames'], + folders_per_job=5 + ) + + # The paths to the output files + output_path = {i: folder_paths['mrds']/f'seq{i}{MRDS_EXT}' + for i in range(len(ids_path_list))} + ds_list = [] + for i, filepath in enumerate(ids_path_list): + # Read the list of subject ids to be processed + subject_folders_list = txt2list(filepath) + if not output_path[i].exists(): + # Process the batch of subjects + ds = import_dataset(data_source=subject_folders_list, + ds_format='dicom', + name=f'seq{i}', + config_path=config_path, + output_dir=folder_paths['mrds']) + save_mr_dataset(output_path[i], ds) + else: + ds = load_mr_dataset(output_path[i]) + ds_list.append(ds) + + # Merge batches + combined_mrds = None + for ds in ds_list: + if combined_mrds is None: + # Add the first partial dataset + combined_mrds = ds + else: + # otherwise, keep aggregating + combined_mrds.merge(ds) + save_mr_dataset(output_dir / ('parallel' + MRDS_EXT), combined_mrds) + # Check if both datasets are the same + assert combined_mrds == sequential_ds diff --git a/mrQA/tests/test_utils.py b/mrQA/tests/test_utils.py new file mode 100644 index 0000000..705d33d --- /dev/null +++ b/mrQA/tests/test_utils.py @@ -0,0 +1,394 @@ +import re +import tempfile +from datetime import datetime, timedelta, date +from pathlib import Path + +import pytest +from hypothesis import given, settings, assume +from hypothesis.strategies import lists, integers, dates, text, composite, \ + characters, booleans, tuples +from protocol import SiemensMRImagingProtocol, MRImagingProtocol + +from mrQA.tests.conftest import sample_protocol, THIS_DIR, dcm_dataset_strategy +from mrQA.utils import split_list, convert2ascii, next_month, previous_month, \ + has_substring, filter_epi_fmap_pairs, get_protocol_from_file, \ + get_config_from_file, valid_paths, folders_with_min_files, \ + find_terminal_folders, save_audit_results, is_folder_with_no_subfolders, \ + get_reference_protocol, get_config, is_writable + + +@given( + dir_index=lists(integers(), min_size=1), + num_chunks=integers(min_value=1) +) +def test_split_list_hypothesis(dir_index, num_chunks): + if num_chunks < 0: # Ensure num_chunks is greater than 0 + with pytest.raises(ValueError): + split_list(dir_index, num_chunks) + return + + result = list(split_list(dir_index, num_chunks)) + + if len(dir_index) < num_chunks: # Ensure dir_index has enough elements + # Assertions for the result based on the expected behavior of split_list + num_chunks = len(dir_index) + + # Assertions for the result based on the expected behavior of split_list + assert len(result) == num_chunks + assert sum(map(len, result)) == len(dir_index) + + +def test_split_list_value_errors(): + with pytest.raises(ValueError): + split_list([], 1) + with pytest.raises(ValueError): + split_list([1], 0) + with pytest.raises(ValueError): + split_list([1], -1) + + +# Define a strategy for generating strings +@composite +def strings(draw): + return draw(text()) + + +# Define a strategy for generating ASCII strings +@composite +def ascii_strings(draw): + return draw(text( + alphabet=characters(whitelist_categories=('L', 'N', 'P', 'Z', 'S')))) + + +# Define a strategy for generating booleans +@composite +def booleans(draw): + return draw(booleans()) + + +# Property-based test: the output should contain only ASCII characters +@given(strings()) +def test_contains_only_ascii(value): + result = convert2ascii(value, allow_unicode=False) + assert all(ord(char) < 128 for char in result) + + +# Property-based test: the output should not contain spaces or +# dashes at the beginning or end +@given(strings()) +def test_no_spaces_or_dashes_at_ends(value): + result = convert2ascii(value, False) + assert not result.startswith((' ', '-')) + assert not result.endswith((' ', '-')) + + +# Property-based test: the output should not contain consecutive +# spaces or dashes +@given(ascii_strings()) +def test_no_consecutive_spaces_or_dashes(value): + result = convert2ascii(value, allow_unicode=False) + assert ' ' not in result + assert '--' not in result + + +# Property-based test: the output should not contain any special characters +@given(ascii_strings()) +def test_no_special_characters(value): + result = convert2ascii(value, allow_unicode=False) + assert re.match(r'^[a-zA-Z0-9_-]*$', result) + + +# Property-based test: converting twice should be the same as converting once +@given(ascii_strings()) +def test_double_conversion_is_same(value): + result1 = convert2ascii(value, allow_unicode=False) + result2 = convert2ascii(result1, allow_unicode=False) + assert result1 == result2 + + +def test_next_month(): + # Test cases with specific dates + assert next_month(datetime(2023, 1, 15)) == datetime(2023, 2, 1) + assert next_month(datetime(2022, 12, 5)) == datetime(2023, 1, 1) + # Add more test cases as needed + + +@given(dt=dates()) +def test_next_month_hypothesis(dt): + result = next_month(dt) + + # Ensure the result is a datetime object + assert isinstance(result, date) + + # Ensure the result is the first day of the next month + expected_result = (dt.replace(day=28) + timedelta(days=5)).replace(day=1) + assert result == expected_result + + +def test_previous_month(): + # Test cases with specific dates + assert previous_month(datetime(2023, 2, 15)) == datetime(2023, 1, 1) + assert previous_month(datetime(2023, 1, 1)) == datetime(2022, 12, 1) + # Add more test cases as needed + + +@given(dt=dates()) +def test_previous_month_hypothesis(dt): + result = previous_month(dt) + + # Ensure the result is a datetime object + assert isinstance(result, date) + + # Ensure the result is the first day of the previous month + expected_result = (dt.replace(day=1) - timedelta(days=1)).replace(day=1) + assert result == expected_result + + +def test_has_substring(): + # Test cases with specific inputs + assert has_substring("hello world", ["hello", "world"]) + assert has_substring("python", ["java", "python", "cpp"]) + assert not has_substring("apple", ["orange", "banana"]) + # Add more test cases as needed + + +@given( + input_string=text(), + substrings=lists(text(), min_size=1) +) +def test_has_substring_hypothesis(input_string, substrings): + result = has_substring(input_string, substrings) + + # Ensure the result is a boolean + assert isinstance(result, bool) + + # Ensure the result is True if and only if at least one substring is + # present in the input_string + expected_result = any(substring in input_string for substring in substrings) + assert result == expected_result + + +def test_filter_epi_fmap_pairs(): + # Test cases with specific inputs + assert filter_epi_fmap_pairs(("epi_bold", "fmap_fieldmap")) + assert filter_epi_fmap_pairs(("rest_fmri", "map")) + assert not filter_epi_fmap_pairs(("dti", "asl")) + # Add more test cases as needed + + +@given( + pair=tuples(text(), text()) +) +def test_filter_epi_fmap_pairs_hypothesis(pair): + result = filter_epi_fmap_pairs(pair) + assert filter_epi_fmap_pairs(('epi', 'fmap')) + assert filter_epi_fmap_pairs(('fmap', 'epi')) + # Ensure the result is a boolean + assert isinstance(result, bool) + + +def test_get_protocol_from_file(): + ref_protocol = sample_protocol() + protocol = get_protocol_from_file(str(ref_protocol)) + + assert isinstance(protocol, SiemensMRImagingProtocol) + + with pytest.raises(FileNotFoundError): + get_protocol_from_file("nonexistent_file.txt") + + with pytest.raises(ValueError): + get_protocol_from_file(THIS_DIR / 'resources/mri-config.json') + + +def test_get_config_from_file(): + config = get_config_from_file(THIS_DIR / 'resources/mri-config.json') + with pytest.raises(TypeError): + get_config_from_file(config) + with pytest.raises(FileNotFoundError): + get_config_from_file("nonexistent_file.txt") + with pytest.raises(ValueError): + get_config_from_file(THIS_DIR / 'resources/invalid-json.json') + + +def test_valid_paths(): + with pytest.raises(ValueError): + valid_paths(None) + with pytest.raises(FileNotFoundError): + valid_paths('nonexistent_file.txt') + with pytest.raises(FileNotFoundError): + valid_paths(['nonexistent_file.txt']) + + +# Test find_terminal_folders with terminal folders +def test_find_terminal_folders_with_terminals(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + folder2 = folder1 / "folder2" + folder2.mkdir() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [folder2] + + folder3 = folder2 / "folder3" + folder3.mkdir() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [folder3] + + +# Test find_terminal_folders with single folder +def test_find_terminal_folders_single_folder(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder = root / "folder" + folder.mkdir() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [folder] + + +# Test find_terminal_folders with non-existent folder +def test_find_terminal_folders_nonexistent_folder(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) / "nonexistent_folder" + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [] + + +def test_folder_with_min_files_nonexistent_folder(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) / "nonexistent_folder" + with pytest.raises(ValueError): + a = list(folders_with_min_files(root, pattern="*.dcm", min_count=1)) + with pytest.raises(ValueError): + a = list(folders_with_min_files([], pattern="*.dcm", min_count=0)) + + +# Test find_terminal_folders with files +def test_find_terminal_folders_with_files(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + file = root / "file.txt" + file.touch() + + terminal_folders = find_terminal_folders(root) + assert terminal_folders == [root] + + +# Test find_terminal_folders with nested terminal folders +def test_find_terminal_folders_nested_terminals(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + folder2 = folder1 / "folder2" + folder2.mkdir() + folder3 = folder2 / "folder3" + folder3.mkdir() + + terminal_folders = find_terminal_folders(folder1) + assert terminal_folders == [folder3] + + +# Test find_terminal_folders with multiple terminal folders +def test_find_terminal_folders_multiple_terminals(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + folder2 = root / "folder2" + folder2.mkdir() + folder3 = root / "folder3" + folder3.mkdir() + + terminal_folders = find_terminal_folders(root) + assert set(terminal_folders) == {folder1, folder2, folder3} + + +def test_find_folders_with_min_files(): + with tempfile.TemporaryDirectory() as tmpdirname: + root = Path(tmpdirname) + folder1 = root / "folder1" + folder1.mkdir() + file = folder1 / "file.dcm" + file.touch() + folder2 = root / "folder2" + folder2.mkdir() + file = folder2 / "file.dcm" + file.touch() + folder3 = root / "folder3" + folder3.mkdir() + file = folder3 / "file.dcm" + file.touch() + + terminal_folders = folders_with_min_files(root, + pattern="*.dcm", + min_count=1) + assert set(terminal_folders) == {folder1, folder2, folder3} + + +def test_save_audit_results(): + with pytest.raises(OSError): + save_audit_results('/sys/firmware/hz.adt.pkl', {}) + + +# Test when folder has subfolders +def test_has_subfolders(): + with tempfile.TemporaryDirectory() as tmpdirname: + folder_path = Path(tmpdirname) + subfolder = folder_path / "subfolder" + subfolder.mkdir(parents=True, exist_ok=True) + + has_no_subfolders, subfolders = is_folder_with_no_subfolders( + folder_path) + assert has_no_subfolders is False + assert subfolder in subfolders + + +# Test when folder has no subfolders +def test_no_subfolders(): + with tempfile.TemporaryDirectory() as tmpdirname: + folder_path = Path(tmpdirname) + + has_no_subfolders, subfolders = is_folder_with_no_subfolders( + folder_path) + assert has_no_subfolders is True + assert subfolders == [] + + +# Test when folder doesn't exist +def test_nonexistent_folder(): + folder_path = Path("nonexistent_folder") + + with pytest.raises(FileNotFoundError): + is_folder_with_no_subfolders(folder_path) + + +@settings(max_examples=1, deadline=None) +@given(args=(dcm_dataset_strategy)) +def test_get_reference_protocol(args): + ds1, attributes = args + assume(len(ds1.name) > 0) + ds1.load() + config = get_config_from_file(attributes['config_path']) + protocol = get_reference_protocol(ds1, config, 'nonexistent_file.txt') + assert isinstance(protocol, MRImagingProtocol) + + +def test_get_config(): + with pytest.raises(FileNotFoundError): + get_config("nonexistent_file.txt") + with pytest.raises(ValueError): + get_config(THIS_DIR / 'resources/mri-config.json', + report_type='horizontal') + config_path = THIS_DIR / 'resources/test-config.json' + config = get_config(config_path, report_type='hz') + config = get_config(config_path, report_type='vt') + assert isinstance(config, dict) + + +def test_is_writable(): + assert not is_writable('/sys/firmware/') diff --git a/mrQA/tests/utils.py b/mrQA/tests/utils.py index 2d7da4a..f499fc1 100644 --- a/mrQA/tests/utils.py +++ b/mrQA/tests/utils.py @@ -1,129 +1,139 @@ -import math -import shutil -from datetime import datetime -from pathlib import Path - -import numpy as np -import pytest - -from MRdataset import load_mr_dataset, import_dataset -from MRdataset.utils import is_same_dataset, files_in_path - -from mrQA import check_compliance -from mrQA.config import report_fpath, mrds_fpath, past_records_fpath, \ - DATE_SEPARATOR -from mrQA.utils import files_modified_since, get_last_valid_record, txt2list - - -def test_modified_files(last_reported_on, - temp_input_src, - temp_output_dest, - data_source, - file_set): - modified_files = files_modified_since( - input_dir=temp_input_src, - last_reported_on=last_reported_on, - output_dir=temp_output_dest) - expected = get_relative_paths(file_set, data_source) - got = get_relative_paths(modified_files, temp_input_src) - assert len(expected) == len(got) - assert sorted(expected) == sorted(got) - - -def test_output_files_created(fname, folder): - # add special delimiter to strip time from fname - time_fname = fname.split(DATE_SEPARATOR)[-1] - utc = datetime.strptime(time_fname, '%m_%d_%Y_%H_%M_%S').timestamp() - report_path = report_fpath(folder, fname) - mrds_path = mrds_fpath(folder, fname) - records_path = past_records_fpath(folder) - last_record = get_last_valid_record(folder) - assert report_path.is_file() - assert mrds_path.is_file() - assert records_path.is_file() - assert math.isclose(float(last_record[0]), utc) - assert last_record[1] == str(report_path) - assert last_record[2] == str(mrds_path) - - -def test_same_dataset(mrds_path, - temp_input_src, - tempdir, - name): - # Read the dataset created by monitor - monitor_dataset = load_mr_dataset(mrds_path) - - # Read full dataset, acts as ground truth - ds = import_dataset(data_source=temp_input_src, - name=name) - report_path = check_compliance(ds, output_dir=tempdir/'complete_eval') - mrds_path2 = mrds_fpath(report_path.parent, report_path.stem) - complete_dataset = load_mr_dataset(mrds_path2) - print() - # Both datasets should be the same - assert is_same_dataset(complete_dataset, monitor_dataset) - - -def get_temp_input_folder(name, temp_dir): - temp_folder_path = temp_dir / name - if temp_folder_path.is_dir(): - shutil.rmtree(temp_folder_path) - temp_folder_path.mkdir(exist_ok=False, parents=True) - return temp_folder_path - - -def get_temp_output_folder(name, temp_dir): - # Set up output directories - output_dir = temp_dir / 'output_dir' - if output_dir.is_dir(): - shutil.rmtree(output_dir) - output_dir.mkdir(exist_ok=False, parents=True) - output_folder_path = output_dir / name - return output_folder_path - - -def pick_random_sets(per_batch_id_list, n, rng): - rand_id_list_paths = rng.choice(per_batch_id_list, n) - folder_sets = [txt2list(f) for f in rand_id_list_paths] - return folder_sets - - -def create_random_file_sets(temp_input_src, n, max_folders, rng): - # TODO: dataset is not random - unique_folders = set() - for f in temp_input_src.rglob('*'): - if f.is_file() and f.suffix not in ['.html', '.txt']: - folder_path = f.parent - unique_folders.add(folder_path) - unique_folders = sorted(list(unique_folders)) - - rng.shuffle(unique_folders) - testing_set = unique_folders[:max_folders] - print(testing_set[:5]) - try: - folder_sets = np.array_split(testing_set, n) - except ValueError as e: - with pytest.raises(ValueError): - raise ValueError(f"Could not split list of dicom files." - f" Got n = {n}") from e - return None - return folder_sets - - -def get_relative_paths(file_list, data_root): - rel_paths = [] - for file in file_list: - rel_path = Path(file).relative_to(data_root) - rel_paths.append(str(rel_path)) - return rel_paths - - -def copy2dest(folder_list, src, dest): - file_list = files_in_path(folder_list) - for file in file_list: - rel_path = file.relative_to(src) - new_abs_path = dest / rel_path - parent = new_abs_path.parent - parent.mkdir(exist_ok=True, parents=True) - shutil.copy(file, parent) - return file_list +# import math +# import shutil +# from datetime import datetime +# from pathlib import Path +# +# import numpy as np +# import pytest +# from MRdataset import load_mr_dataset, import_dataset +# from mrQA import check_compliance +# from mrQA.config import report_fpath, mrds_fpath, past_records_fpath, \ +# DATE_SEPARATOR +# from mrQA.utils import files_in_path +# from mrQA.utils import folders_modified_since, get_last_valid_record, txt2list +# +# +# def test_modified_folders(last_reported_on, +# temp_input_src, +# temp_output_dest, +# data_source, +# file_set): +# modified_files = folders_modified_since( +# input_dir=temp_input_src, +# last_reported_on=last_reported_on, +# output_dir=temp_output_dest) +# expected = get_relative_paths(file_set, data_source) +# got = get_relative_paths(modified_files, temp_input_src) +# assert len(expected) == len(got) +# assert sorted(expected) == sorted(got) +# +# +# def test_output_files_created(fname, folder): +# # add special delimiter to strip time from fname +# time_fname = fname.split(DATE_SEPARATOR)[-1] +# utc = datetime.strptime(time_fname, '%m_%d_%Y_%H_%M_%S').timestamp() +# report_path = report_fpath(folder, fname) +# mrds_path = mrds_fpath(folder, fname) +# records_path = past_records_fpath(folder) +# last_record = get_last_valid_record(folder) +# assert report_path.is_file() +# assert mrds_path.is_file() +# assert records_path.is_file() +# assert math.isclose(float(last_record[0]), utc) +# assert last_record[1] == str(report_path) +# assert last_record[2] == str(mrds_path) +# +# +# def test_same_dataset(mrds_path, +# temp_input_src, +# tempdir, +# name): +# # Read the dataset created by monitor +# monitor_dataset = load_mr_dataset(mrds_path) +# +# # Read full dataset, acts as ground truth +# ds = import_dataset(data_source=temp_input_src, +# name=name) +# report_path = check_compliance(ds, output_dir=tempdir/'complete_eval') +# mrds_path2 = mrds_fpath(report_path.parent, report_path.stem) +# complete_dataset = load_mr_dataset(mrds_path2) +# print() +# # Both datasets should be the same +# # assert is_same_dataset(complete_dataset, monitor_dataset) +# +# +# def get_temp_input_folder(name, temp_dir): +# temp_folder_path = temp_dir / name +# if temp_folder_path.is_dir(): +# shutil.rmtree(temp_folder_path) +# temp_folder_path.mkdir(exist_ok=False, parents=True) +# return temp_folder_path +# +# +# def get_temp_output_folder(name, temp_dir): +# # Set up output directories +# output_dir = temp_dir / 'output_dir' +# if output_dir.is_dir(): +# shutil.rmtree(output_dir) +# output_dir.mkdir(exist_ok=False, parents=True) +# output_folder_path = output_dir / name +# return output_folder_path +# +# +# def pick_random_sets(per_batch_id_list, n, rng): +# rand_id_list_paths = rng.choice(per_batch_id_list, n) +# folder_sets = [txt2list(f) for f in rand_id_list_paths] +# return folder_sets +# +# +# def create_random_file_sets(temp_input_src, n, max_folders, rng): +# # TODO: dataset is not random +# unique_folders = set() +# for f in temp_input_src.rglob('*'): +# if f.is_file() and f.suffix not in ['.html', '.txt']: +# folder_path = f.parent +# unique_folders.add(folder_path) +# unique_folders = sorted(list(unique_folders)) +# +# rng.shuffle(unique_folders) +# testing_set = unique_folders[:max_folders] +# print(testing_set[:5]) +# try: +# folder_sets = np.array_split(testing_set, n) +# except ValueError as e: +# with pytest.raises(ValueError): +# raise ValueError(f"Could not split list of dicom files." +# f" Got n = {n}") from e +# return None +# return folder_sets +# +# +# def get_relative_paths(file_list, data_root): +# rel_paths = [] +# for file in file_list: +# rel_path = Path(file).relative_to(data_root) +# rel_paths.append(str(rel_path)) +# return rel_paths +# +# +# def copy2dest(folder_list, src, dest): +# file_list = files_in_path(folder_list) +# for file in file_list: +# rel_path = file.relative_to(src) +# new_abs_path = dest / rel_path +# parent = new_abs_path.parent +# parent.mkdir(exist_ok=True, parents=True) +# shutil.copy(file, parent) +# return file_list + +from requests import get # to make GET request + + +def download(url, file_name): + """Download file from url and save to file_name""" + # open in binary mode + with open(file_name, "wb") as file: + # get request + response = get(url) + # write to file + file.write(response.content) diff --git a/mrQA/utils.py b/mrQA/utils.py index dff22a6..bbb3a30 100644 --- a/mrQA/utils.py +++ b/mrQA/utils.py @@ -1,27 +1,189 @@ +import json +import pickle import re +import tempfile import time -import typing import unicodedata -import warnings from collections import Counter -from datetime import datetime, timezone -from itertools import groupby +from datetime import datetime, timedelta, timezone from itertools import takewhile from pathlib import Path -from typing import Union, List, Optional, Any -from subprocess import run, CalledProcessError, TimeoutExpired +from subprocess import run, CalledProcessError, TimeoutExpired, Popen +from typing import Union, List, Optional, Any, Iterable, Sized -import numpy as np -import tempfile -from MRdataset.base import Modality, BaseDataset -from MRdataset.log import logger -from MRdataset.utils import param_difference, make_hashable, slugify +from MRdataset import BaseDataset, is_dicom_file from dateutil import parser +from protocol import BaseSequence, MRImagingProtocol, SiemensMRImagingProtocol +from tqdm import tqdm +from mrQA import logger +from mrQA.base import CompliantDataset, NonCompliantDataset, UndeterminedDataset from mrQA.config import past_records_fpath, report_fpath, mrds_fpath, \ subject_list_dir, DATE_SEPARATOR, CannotComputeMajority, \ - ReferenceNotSetForModality, \ - ReferenceNotSetForEchoTime + Unspecified, \ + EqualCount, status_fpath, ATTRIBUTE_SEPARATOR, DATETIME_FORMAT, DATE_FORMAT + + +def get_reference_protocol(dataset: BaseDataset, + config: dict, + reference_path: Union[str, Path] = None): + """ + Given a dataset, it returns the reference protocol that contains + reference protocol for each sequence in the dataset. + """ + # Infer reference protocol if not provided + if reference_path is None: + ref_protocol = infer_protocol(dataset, config=config) + else: + try: + ref_protocol = get_protocol_from_file(reference_path) + except (TypeError, ValueError, FileNotFoundError) as e: + logger.error(f'Error while reading reference protocol ' + f'from filepath : {e}. Falling back to inferred ' + f'reference protocol') + ref_protocol = infer_protocol(dataset, config=config) + + if not ref_protocol: + if reference_path: + logger.error("Reference protocol is invalid. " + "It doesn't contain any sequences. " + "Cannot generate results for horizontal audit.") + else: + logger.error("Inferred reference protocol doesn't have any" + "sequences. It seems the dataset is very small. " + ", that is less than 3 subjects for each sequence.") + return ref_protocol + + +def get_config(config_path: Union[str, Path], report_type='hz') -> dict: + try: + config_dict = get_config_from_file(config_path) + except (ValueError, FileNotFoundError, TypeError) as e: + logger.error(f'Error while reading config file: {e}. Please provide' + f'a valid path to the configuration JSON file.') + raise e + + if report_type == 'hz': + key = "horizontal_audit" + elif report_type == 'vt': + key = "vertical_audit" + elif report_type == 'plots': + key = "plots" + else: + raise ValueError(f'Invalid audit type {report_type}. ' + f'Expected "hz" or "vt"') + + audit_config = config_dict.get(key, None) + if audit_config is None: + logger.error( + f'No {key} config found in config file. Note ' + f'that the config file should have a key named ' + f'"{key}".') + else: + include_params = audit_config.get('include_parameters', None) + if include_params is None: + logger.warning( + 'Parameters to be included in the compliance check are ' + 'not provided. All parameters will be included in the ' + f'{key}') + return audit_config + + +def _init_datasets(dataset: BaseDataset): + """ + Initialize the three dataset objects for compliant, non-compliant + and undetermined datasets. + """ + compliant_ds = CompliantDataset(name=dataset.name, + data_source=dataset.data_source, + ds_format=dataset.format) + non_compliant_ds = NonCompliantDataset(name=dataset.name, + data_source=dataset.data_source, + ds_format=dataset.format) + undetermined_ds = UndeterminedDataset(name=dataset.name, + data_source=dataset.data_source, + ds_format=dataset.format) + return compliant_ds, non_compliant_ds, undetermined_ds + + +def is_writable(dir_path): + """ + Check if the directory is writable. For ex. if the directory is + mounted on a read-only file system, it will return False. + """ + try: + with tempfile.TemporaryFile(dir=dir_path, mode='w') as testfile: + testfile.write("OS write to directory test.") + logger.info(f"Created temp file in {dir_path}") + except (OSError, IOError) as e: + logger.error(e) + return False + return True + + +# def files_under_folder(fpath: Union[str, Path], +# ext: str = None) -> typing.Iterable[Path]: +# """ +# Generates all the files inside the folder recursively. If ext is given +# returns file which have that extension. +# +# Parameters +# ---------- +# fpath: str +# filepath of the directory +# ext: str +# filter_fn files with given extension. For ex. return only .nii files +# +# Returns +# ------- +# generates filepaths +# """ +# if not Path(fpath).is_dir(): +# raise FileNotFoundError(f"Folder doesn't exist : {fpath}") +# folder_path = Path(fpath).resolve() +# if ext: +# pattern = '*' + ext +# else: +# pattern = '*' +# for file in folder_path.rglob(pattern): +# if file.is_file(): +# # If it is a regular file and not a directory, return filepath +# yield file + + +# def files_in_path(fp_list: Union[Iterable, str, Path], +# ext: Optional[str] = None): +# """ +# If given a single folder, returns the list of all files in the directory. +# If given a list of folders, returns concatenated list of all the files +# inside each directory. +# +# Parameters +# ---------- +# fp_list : List[Path] +# List of folder paths +# ext : str +# Used to filter_fn files, and select only those which have this ext +# Returns +# ------- +# List of paths +# """ +# if isinstance(fp_list, Iterable): +# files = [] +# for i in fp_list: +# if str(i) == '' or str(i) == '.' or i == Path(): +# logger.warning("Found an empty string. Skipping") +# continue +# if Path(i).is_dir(): +# files.extend(list(files_under_folder(i, ext))) +# elif Path(i).is_file(): +# files.append(i) +# return sorted(list(set(files))) +# elif isinstance(fp_list, str) or isinstance(fp_list, Path): +# return sorted(list(files_under_folder(fp_list, ext))) +# else: +# raise NotImplementedError("Expected either Iterable or str type. Got" +# f"{type(fp_list)}") def get_items_upto_count(dict_: Counter, rank: int = 1): @@ -50,96 +212,130 @@ def get_items_upto_count(dict_: Counter, rank: int = 1): def timestamp(): """Generate a timestamp as a string""" - time_string = time.strftime('%m_%d_%Y_%H_%M_%S') + time_string = time.strftime(DATETIME_FORMAT) return time_string -def record_out_paths(output_dir, dataset_name): +def make_output_paths(output_dir, dataset): + """ + Generate output paths for the report, mrdataset pickle + file, and subject lists + + Parameters + ---------- + output_dir : Path + output directory + dataset : BaseDataset + dataset object + + Returns + ------- + report_path : Path + Full path to the report file + mrds_path : Path + Full path to the mrdataset pickle file + sub_lists_dir_path : Path + Full path to the directory containing compliant/non-compliant + subject lists for each modality + """ ts = timestamp() - utc = datetime.strptime(ts, '%m_%d_%Y_%H_%M_%S').timestamp() - filename = f'{dataset_name}{DATE_SEPARATOR}{ts}' + # utc = datetime.strptime(ts, '%m_%d_%Y_%H_%M_%S').timestamp() + filename = f'{dataset.name}{DATE_SEPARATOR}{ts}' report_path = report_fpath(output_dir, filename) mrds_path = mrds_fpath(output_dir, filename) sub_lists_dir_path = subject_list_dir(output_dir, filename) + log_report_history(output_dir, mrds_path, report_path, ts) + return report_path, mrds_path, sub_lists_dir_path + + +def log_report_history(output_dir, mrds_path, report_path, ts): + """ + Log the report generation history to a text file + Parameters + ---------- + output_dir : Path + fullpath to the output directory + mrds_path : Path + fullpath to the mrdataset pickle file + report_path : Path + fullpath to the report file + ts : str + timestamp + utc : float + timestamp in UTC zone + """ records_filepath = past_records_fpath(output_dir) if not records_filepath.parent.is_dir(): records_filepath.parent.mkdir(parents=True) with open(records_filepath, 'a', encoding='utf-8') as fp: - fp.write(f'{utc},{report_path},' - f'{mrds_path},{ts}\n') - return report_path, mrds_path, sub_lists_dir_path + fp.write(f'{ts},{report_path},' + f'{mrds_path}\n') -def majority_attribute_values(list_of_dicts: list, echo_time: float, - default=None): +def majority_values(list_seqs: list, + default=None, + include_params: list = None, ): """ - Given a list of dictionaries, it generates the most common + Given a list of dictionaries, it generates the most frequent values for each key Parameters ---------- - list_of_dicts : list + list_seqs : list a list of dictionaries - echo_time : float - echo time default : Any a default value if the key is missing in any dictionary - + include_params : list + a list of parameters for which the most frequent values + is to be computed Returns ------- dict - Key-value pairs specifying the most common values for each key + Key-value pairs specifying the most frequent values for each + parameter """ args_valid = False maj_value = default - - try: - args_valid = _check_args_validity(list_of_dicts, echo_time) - except CannotComputeMajority as e: - maj_value = None # 'Cannot Compute Majority:\n Count < 3' - logger.info(f'Cannot compute majority: {e}') - except ValueError as e: - maj_value = None - logger.info(f'Cannot compute majority: {e}') + args_valid = _check_args_validity(list_seqs) if not args_valid: - maj_attr_values = {} - for key in list_of_dicts[0].keys(): - maj_attr_values[key] = maj_value - return maj_attr_values + return maj_value + counters_dict = {} categories = set() - for dict_ in list_of_dicts: - categories.update(dict_.keys()) - for key, value in dict_.items(): - counter = counters_dict.get(key, Counter({default: 0})) - value = make_hashable(value) + if not include_params: + raise ValueError('Expected a list of parameters to include. Got None') + for seq in list_seqs: + categories.update(include_params) + for param in include_params: + counter = counters_dict.get(param, Counter({default: 0})) + value = seq.get(param, default) counter[value] += 1 - counters_dict[key] = counter + counters_dict[param] = counter - majority_attr_dict = {} + majority_dict = {} for parameter, counter in counters_dict.items(): - majority_attr_dict[parameter] = pick_majority(counter, parameter) - return majority_attr_dict + majority_dict[parameter] = pick_majority(counter, parameter) + return majority_dict -def extract_reasons(data: list): - """ - Given a list of tuples, extract all the elements at index 1, and return - as a list - - Parameters - ---------- - data : List - A list of tuples - - Returns - ------- - list - List of values at index 1 - """ - return list(zip(*data))[1] +# def extract_reasons(data: list): +# """ +# Given a list of tuples, extract all the elements at index 1, and return +# as a list +# +# Parameters +# ---------- +# data : List +# A list of tuples +# +# Returns +# ------- +# list +# List of values at index 1 +# """ +# return list(zip(*data))[1] def pick_majority(counter_: Counter, parameter: str, default: Any = None): @@ -168,7 +364,8 @@ def pick_majority(counter_: Counter, parameter: str, default: Any = None): If the counter is empty """ if len(counter_) == 0: - raise ValueError('Expected atleast one entry in counter. Got 0') + logger.error('Expected at least one entry in counter. Got 0') + raise ValueError('Expected at least one entry in counter. Got 0') if len(counter_) == 1: return list(counter_.keys())[0] # there are more than 1 value, remove default, and computer majority @@ -181,20 +378,18 @@ def pick_majority(counter_: Counter, parameter: str, default: Any = None): logger.info( 'Could not compute reference for %s. Got multiple values' ' %s with same count = %s.', parameter, values, items_rank1[0][1]) - return 'Cannot Compute Majority:\nEqual Count' + return EqualCount return items_rank1[0][0] -def _check_args_validity(list_of_dicts: List[dict], echo_time) -> bool: +def _check_args_validity(list_: List) -> bool: """ Checks if the arguments are valid for computing majority attribute values Parameters ---------- - list_of_dicts : list + list_ : list a list of dictionaries - echo_time : float - Echo time of run Returns ------- @@ -206,34 +401,34 @@ def _check_args_validity(list_of_dicts: List[dict], echo_time) -> bool: ValueError If the list is empty or if any of the dictionaries is empty """ - if list_of_dicts is None: - raise ValueError('Expected a list of dicts, Got NoneType') - if len(list_of_dicts) == 0: + if list_ is None: + raise ValueError('Expected a list of sequences, Got NoneType') + if len(list_) == 0: raise ValueError('List is empty.') - for dict_ in list_of_dicts: - if len(dict_) == 0: - raise ValueError('Atleast one of dictionaries is empty.') - if len(list_of_dicts) < 3: + for seq in list_: + if len(seq) == 0: + raise ValueError('At least one of sequences is empty.') + if len(list_) < 3: logger.info('Cannot compute majority attribute values. ' 'Got less than 3 values for each ' 'parameter. Returns majority values as None.') - raise CannotComputeMajority('Count < 3', te=echo_time) + raise CannotComputeMajority('Count < 3') return True -def split_list(dir_index: list, num_chunks: int) -> typing.Iterable[List[str]]: +def split_list(dir_index: Sized, num_chunks: int) -> Iterable: """ Adapted from https://stackoverflow.com/questions/2130016/splitting-a-list-into-n-parts-of-approximately-equal-length # noqa Given a list of n elements, split it into k parts, where k = num_chunks. - Each part has atleast n/k elements. And the remaining elements + Each part has at least n/k elements. And the remaining elements n % k are distributed uniformly among the sub-parts such that each part has almost same number of elements. The first n % k will have floor(n/k) + 1 elements. Parameters ---------- - dir_index : list + dir_index : Sized list to split num_chunks : int number of parts @@ -250,14 +445,15 @@ def split_list(dir_index: list, num_chunks: int) -> typing.Iterable[List[str]]: if not is_integer_number(num_chunks): raise ValueError(f'Number of chunks must be an integer. ' f'Got {num_chunks}') - if num_chunks == 0: + if num_chunks < 1: raise ValueError('Cannot divide list into chunks of size 0') if len(dir_index) == 0: raise ValueError('List of directories is empty!') if len(dir_index) < num_chunks: - warnings.warn(f'Got num_chunks={num_chunks}, list_size={len(dir_index)}' - f'Expected num_chunks < list_size', - stacklevel=2) + logger.warning( + f'Got num_chunks={num_chunks}, list_size={len(dir_index)}' + f'Expected num_chunks < list_size', + stacklevel=2) num_chunks = len(dir_index) k, m = divmod(len(dir_index), num_chunks) # k, m = (len(dir_index)//num_chunks, len(dir_index)%num_chunks) @@ -335,17 +531,17 @@ def execute_local(script_path: str) -> None: if not Path(script_path).is_file(): raise FileNotFoundError(f'Could not find {script_path}') - format_params = '\n'.join(['File system outputs: %O', - 'Maximum RSS size: %M', - 'CPU percentage used: %P', - 'Real Time: %E', - 'User Time: %U', - 'Sys Time: %S']) + # format_params = '\n'.join(['File system outputs: %O', + # 'Maximum RSS size: %M', + # 'CPU percentage used: %P', + # 'Real Time: %E', + # 'User Time: %U', + # 'Sys Time: %S']) # cmd = ['/usr/bin/time', '-f', format_params, 'bash', str(script_path)] cmd = f'bash {str(script_path)}' try: - run(cmd, check=True, shell=True) + Popen(cmd, close_fds=True, shell=True) except FileNotFoundError as exc: logger.error( "Process failed because 'bash' could not be found.\n %s", exc) @@ -359,47 +555,47 @@ def execute_local(script_path: str) -> None: # TODO : check if file was created successfully -def get_outliers(data: list, m=25.0) -> Union[list, None]: - """ - Check for outliers. Adapted from - https://stackoverflow.com/a/16562028/3140172 - Parameters - ---------- - data : list - list of values - m : float - number of standard deviations to use as threshold - """ - d = np.abs(data - np.median(data)) - mdev = np.median(d) - s = d / mdev if mdev else 0. - if np.any(s > m): - indices = np.argwhere(s > m).flatten() - return indices - return None - - -def round_dict_values(dict_: dict, decimals: int) -> dict: - """ - Round all the values in a dictionary to a given number of decimals. +# def get_outliers(data: list, m=25.0) -> Union[list, None]: +# """ +# Check for outliers. Adapted from +# https://stackoverflow.com/a/16562028/3140172 +# Parameters +# ---------- +# data : list +# list of values +# m : float +# number of standard deviations to use as threshold +# """ +# d = np.abs(data - np.median(data)) +# mdev = np.median(d) +# s = d / mdev if mdev else 0. +# if np.any(s > m): +# indices = np.argwhere(s > m).flatten() +# return indices +# return None - Parameters - ---------- - dict_ : dict - dictionary of key, value pairs. Values can be numbers or strings. - The function will only round the values that are numbers. - decimals : int - number of decimals to round to - Returns - ------- - dict - dictionary with all the values rounded to the given number of decimals - """ - new_dict = dict_.copy() - for key, value in new_dict.items(): - new_dict[key] = round_if_numeric(value, decimals) - return new_dict +# def round_dict_values(dict_: dict, decimals: int) -> dict: +# """ +# Round all the values in a dictionary to a given number of decimals. +# +# Parameters +# ---------- +# dict_ : dict +# dictionary of key, value pairs. Values can be numbers or strings. +# The function will only round the values that are numbers. +# decimals : int +# number of decimals to round to +# +# Returns +# ------- +# dict +# dictionary with all the values rounded to the given number of decimals +# """ +# new_dict = dict_.copy() +# for key, value in new_dict.items(): +# new_dict[key] = round_if_numeric(value, decimals) +# return new_dict def is_integer_number(n: Union[int, float]) -> bool: @@ -446,245 +642,114 @@ def subject_list2txt(dataset: BaseDataset, """ output_dir.mkdir(exist_ok=True, parents=True) filepaths = {} - for modality in dataset.modalities: - if not modality.compliant: - filepath = output_dir / slugify(modality.name) - list2txt(filepath, modality.non_compliant_subject_names) - filepaths[modality.name] = filepath + for seq_name in dataset.get_sequence_ids(): + filepath = output_dir / f'{convert2ascii(seq_name)}.txt' + subj_with_sequence = dataset.get_subject_ids(seq_name) + list2txt(filepath, subj_with_sequence) + filepaths[seq_name] = filepath return filepaths -def _get_runs_by_echo(modality: Modality, decimals: int = 3): - """ - Given a modality, return a dictionary with the echo time as key and a list - of run parameters as value. The run parameters are rounded to the given - number of decimals. - - Parameters - ---------- - modality - decimals - - Returns - ------- - - """ - runs_in_modality = [] - for subject in modality.subjects: - for session in subject.sessions: - runs_in_modality.extend(session.runs) - - def _sort_key(run_): - return run_.echo_time - - run_params_by_te = {} - runs_in_modality = sorted(runs_in_modality, key=_sort_key) - for te, group in groupby(runs_in_modality, key=_sort_key): - te_ = round_if_numeric(te, decimals) - for i_run in list(group): - if te_ not in run_params_by_te: - run_params_by_te[te_] = [] - run_params_by_te[te_].append(round_dict_values(i_run.params, - decimals)) - return run_params_by_te - - -def _validate_reference(dict_, default=None): - """ - Check if a dictionary is valid. A dictionary is valid if it is not empty - and if at least one of its values is different from the default value. - - Parameters - ---------- - dict_: dict - dictionary to check - default: any - default value to compare the values of the dictionary to - - Returns - ------- - bool - True if the dictionary is valid, False otherwise +def convert2ascii(value, allow_unicode=False): """ - if not dict_: - return False - if all(value == default for value in dict_.values()): - return False - # flag = True - # for value in dict_.values(): - # if value and ('Cannot Compute Majority' in value): - # flag = False - # continue - return True - - -def round_if_numeric(value: Union[int, float], - decimals: int = 3) -> Union[int, float, np.ndarray]: + Taken from https://github.com/django/django/blob/master/django/utils/text.py + Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated + dashes to single dashes. Remove characters that aren't alphanumerics, + underscores, or hyphens. Convert to lowercase. Also strip leading and + trailing whitespace, dashes, and underscores. """ - Round a number to a given number of decimals. + value = str(value) + if allow_unicode: + value = unicodedata.normalize('NFKC', value) + else: + value = unicodedata.normalize( + 'NFKD', value).encode('ascii', 'ignore').decode('ascii') + value = re.sub(r'[^\w\s-]', '', value) + return re.sub(r'[-\s]+', '-', value).strip('-_') - Parameters - ---------- - value: int or float - number to round - decimals : int - number of decimals to round to - Returns - ------- - int or float - rounded number - """ - # For historical reasons, bool is a type of int, but we cannot - # apply np.round on bool - if isinstance(value, bool): - return value - elif isinstance(value, (int, float)): - # round using numpy and then convert to native python type - return np.around(value, decimals=decimals).item() - return value +# def round_if_numeric(value: Union[int, float], +# decimals: int = 3) -> Union[int, float, np.ndarray]: +# """ +# Round a number to a given number of decimals. +# +# Parameters +# ---------- +# value: int or float +# number to round +# decimals : int +# number of decimals to round to +# +# Returns +# ------- +# int or float +# rounded number +# """ +# # For historical reasons, bool is a type of int, but we cannot +# # apply np.round on bool +# if isinstance(value, bool): +# return value +# elif isinstance(value, (int, float)): +# # round using numpy and then convert to native python type +# return np.around(value, decimals=decimals).item() +# return value -def _check_single_run(modality: Modality, - decimals: int, - run_te: float, - run_params: dict, - tolerance: float = 0.1): +def compute_majority(dataset: BaseDataset, seq_name, config_dict=None): """ - Check if a single run is compliant with the reference protocol. + Compute the most frequent values for each acquisition parameter Parameters ---------- - modality : Modality - modality node from BaseDataset - decimals: int - number of decimals to round to - run_te: float - echo time of the run - run_params: dict - parameters of the run - tolerance: float - tolerance for the difference between the parameters of the run and the - reference protocol - - Returns - ------- - tuple - tuple containing the echo time of reference protocol, - and the delta between the parameters of the run and the reference - protocol - """ - te = round_if_numeric(run_te, decimals) - params = round_dict_values(run_params, decimals) - ignore_keys = ['modality', 'BodyPartExamined'] - echo_times = modality.get_echo_times() - if not echo_times: - raise ReferenceNotSetForModality(modality.name) - - if te in echo_times: - reference = modality.get_reference(te) - te_ref = te - else: - raise ReferenceNotSetForEchoTime(modality.name, run_te) - - delta = param_difference(params, reference, - ignore=ignore_keys, - tolerance=tolerance) - return delta, te_ref - + dataset : BaseDataset + dataset should contain multiple sequences. The most frequent values + will be computed for each sequence in the dataset -def _check_against_reference(modality, decimals, tolerance): """ - Given a modality, check if the parameters of each run are compliant with - the reference protocol. If all the runs of a session are non-compliant, - the session is added to the list of non-compliant sessions. If all the - sessions of a subject are non-compliant, the subject is added to the list - of non-compliant subjects. If all the subjects of a modality are - non-compliant, the function returns False. - - The delta between the parameters of a run and the reference protocol is - stored in modality.non_compliant_data + # if config_dict is not None: + # TODO: parse begin and end times + # TODO: add option to exclude subjects + seq_dict = {} + most_freq_vals = {} + + if config_dict is None: + logger.error('No horizontal audit config found. ' + f'Returning empty reference protocol for {seq_name}') + return most_freq_vals + + include_params = config_dict.get('include_parameters', None) + if include_params is None: + logger.error('No parameters specified for horizontal audit. ' + f'Returning empty reference protocol for {seq_name}') + return most_freq_vals + + stratify_by = config_dict.get('stratify_by', None) + + for subj, sess, runs, seq in dataset.traverse_horizontal(seq_name): + sequence_id = modify_sequence_name(seq, stratify_by, None) + if sequence_id not in seq_dict: + seq_dict[sequence_id] = [] + seq_dict[sequence_id].append(seq) + + for seq_id in seq_dict: + try: + most_freq_vals[seq_id] = majority_values( + seq_dict[seq_id], + default=Unspecified, + include_params=include_params) + except (CannotComputeMajority, ValueError) as e: + logger.warning(f"Could not compute reference " + f"protocol for {seq_name} : {e}.") + return most_freq_vals - Parameters - ---------- - modality : Modality - modality node of a dataset - decimals : int - number of decimals to round the parameters - tolerance : float - tolerance to consider a parameter compliant - Returns - ------- - Modality - True if modality is compliant, False otherwise - """ - # Set default flags as True, if there is some non-compliance - # flags will be set to false. Default value in modality class is True, - # but we cannot rely on that default value. - modality.compliant = True - for subject in modality.subjects: - subject.compliant = True - for session in subject.sessions: - session.compliant = True - for i_run in session.runs: - try: - i_run.delta, te_ref = _check_single_run(modality, - decimals, - i_run.echo_time, - i_run.params, - tolerance=tolerance) - if i_run.delta: - modality.add_non_compliant_subject_name(subject.name) - _store_non_compliance(modality, i_run.delta, te_ref, - subject.name, session.name) - # NC = non_compliant - # If any run is NC, then session is NC. - session.compliant = False - # If any session is NC, then subject is NC. - subject.compliant = False - # If any subject is NC, then modality is NC. - modality.compliant = False - except ReferenceNotSetForEchoTime as e: - modality.add_error_subject_names(f'{subject.name}_' - f'{session.name}') - modality.add_non_compliant_subject_name(subject.name) - # _store_non_compliance(modality, i_run.delta, 'Various', - # subject.name, session.name) - # If any run is NC, then session is NC. - session.compliant = False - # If any session is NC, then subject is NC. - subject.compliant = False - # If any subject is NC, then modality is NC. - modality.compliant = False - logger.info(e) - except ReferenceNotSetForModality as e: - modality.add_error_subject_names(f'{subject.name}_' - f'{session.name}') - logger.info(e) - - if session.compliant: - # If after all runs, session is still compliant, then the - # session is added to the list of compliant sessions. - subject.add_compliant_session_name(session.name) - if subject.compliant: - # If after all sessions, subject is still compliant, then the - # subject is added to the list of compliant subjects. - modality.add_compliant_subject_name(subject.name) - # If after all the subjects, modality is compliant, then the - # modality should be added to the list of compliant sessions. - return modality - - -def _cli_report(dataset: BaseDataset, report_name): +def _cli_report(hz_audit: dict, report_name): """ CLI report generator. Generate a single line report for the dataset Parameters ---------- - dataset : BaseDataset - BaseDataset instance for the dataset which is to be checked report_name : str Filename for the report @@ -692,69 +757,40 @@ def _cli_report(dataset: BaseDataset, report_name): ------- """ + result = {} # For all the modalities calculate the percent of non-compliance - for modality in dataset.modalities: - percent_non_compliant = len(modality.non_compliant_subject_names) \ - / len(modality.subjects) + non_compliant_ds = hz_audit['non_compliant'] + compliant_ds = hz_audit['compliant'] + undetermined_ds = hz_audit['undetermined'] + if not (compliant_ds.get_sequence_ids() + or non_compliant_ds.get_sequence_ids() + or undetermined_ds.get_sequence_ids()): + logger.error('No report generated for horizontal audit.') + return + + for seq_id in non_compliant_ds.get_sequence_ids(): + ncomp_sub_ids = len(non_compliant_ds.get_subject_ids(seq_id)) + comp_sub_ids = len(compliant_ds.get_subject_ids(seq_id)) + total_subjects = comp_sub_ids + ncomp_sub_ids + + percent_non_compliant = ncomp_sub_ids / total_subjects if percent_non_compliant > 0: - result[modality.name] = str(100 * percent_non_compliant) + result[seq_id] = str(100 * percent_non_compliant) # Format the result as a string if result: modalities = ', '.join(result.keys()) - ret_string = f'In {dataset.name} dataset,' \ + ret_string = f'In {compliant_ds.name} dataset,' \ f' modalities "{modalities}" are non-compliant. ' \ f'See {report_name} for report' else: - ret_string = f'In {dataset.name} dataset, all modalities ' \ + ret_string = f'In {compliant_ds.name} dataset, all modalities ' \ f'are compliant. See {report_name} for report.' + print(ret_string) return ret_string -def _store_non_compliance(modality: Modality, - delta: list, - echo_time: float, - subject_name: str, - session_name: str): - """ - Store the sources of non-compliance like flip angle, ped, tr, te - - Parameters - ---------- - modality : MRdataset.base.Modality - The modality node, in which these sources of non-compliance were found - so that these values can be stored - delta : list - A list of differences between the reference and the non-compliant - echo_time: float - Echo time of run - subject_name : str - Non-compliant subject's name - session_name : str - Non-compliant session name - """ - for entry in delta: - if entry[0] == 'change': - _, parameter, [new_value, ref_value] = entry - if echo_time is None: - echo_time = 1.0 - ref_value = make_hashable(ref_value) - new_value = make_hashable(new_value) - - modality.add_non_compliant_param( - parameter, echo_time, ref_value, new_value, - f'{subject_name}_{session_name}' - ) - elif entry[0] == 'add': - for key, value in entry[2]: - if echo_time is None: - echo_time = 1.0 - modality.add_non_compliant_param( - key, echo_time, value, None, - f'{subject_name}_{session_name}') - - def _datasets_processed(dir_path, ignore_case=True): """ Add function to retrieve the names of projects that have been processed in @@ -779,10 +815,26 @@ def _datasets_processed(dir_path, ignore_case=True): return [x.name.lower() for x in dir_path.iterdir() if x.is_dir()] -def files_modified_since(last_reported_on: str, - input_dir: Union[str, Path], - output_dir: Union[str, Path], - time_format: str = 'timestamp') -> List: +def _get_time(time_format: str, last_reported_on: str): + str_format = DATETIME_FORMAT + if time_format == 'timestamp': + mod_time = datetime.fromtimestamp(float(last_reported_on)).strftime( + str_format) + elif time_format == 'datetime': + try: + mod_time = parser.parse(last_reported_on, dayfirst=False) + except ValueError as exc: + raise ValueError(f'Invalid time format. Use {str_format}.') from exc + else: + raise NotImplementedError("Expected one of ['timestamp', 'datetime']." + f'Got {time_format}') + return mod_time + + +def folders_modified_since(last_reported_on: str, + input_dir: Union[str, Path], + output_dir: Union[str, Path], + time_format: str = 'timestamp') -> List: """ Find files modified since a given time @@ -815,20 +867,10 @@ def files_modified_since(last_reported_on: str, TimeoutExpired If the command `find` times out. """ - str_format = '%m/%d/%Y %H:%M:%S' - if time_format == 'timestamp': - mod_time = datetime.fromtimestamp(float(last_reported_on)).strftime( - str_format) - elif time_format == 'datetime': - try: - mod_time = parser.parse(last_reported_on, dayfirst=False) - except ValueError as exc: - raise ValueError(f'Invalid time format. Use {str_format}.') from exc - else: - raise NotImplementedError("Expected one of ['timestamp', 'datetime']." - f'Got {time_format}') + modified_folders = set() - out_path = Path(output_dir) / 'modified_files_since.txt' + mod_time = get_datetime(last_reported_on) + out_path = Path(output_dir) / 'modified_folders_since.txt' if out_path.is_file(): out_path.unlink() @@ -837,8 +879,13 @@ def files_modified_since(last_reported_on: str, try: run(cmd, check=True, shell=True) modified_files = txt2list(out_path) - valid_files = [f for f in modified_files if Path(f).is_file()] - return valid_files + for f in modified_files: + if not Path(f).is_file(): + logger.warning(f'File {f} not found.') + if not is_dicom_file(f): + continue + else: + modified_folders.add(Path(f).parent) except FileNotFoundError as exc: logger.error( 'Process failed because file could not be found.\n %s', exc) @@ -850,6 +897,8 @@ def files_modified_since(last_reported_on: str, except TimeoutExpired as exc: logger.error('Process timed out.\n %s', exc) + return list(modified_folders) + def get_last_valid_record(folder_path: Path) -> Optional[tuple]: """ @@ -876,61 +925,452 @@ def get_last_valid_record(folder_path: Path) -> Optional[tuple]: num_records = len(lines) if i < -num_records: return None - last_line = lines[i] - last_reported_on, last_report_path, last_mrds_path, _ = \ - last_line.split(',') + last_line = lines[i].strip('\n').split(',') + last_reported_on, last_report_path, last_mrds_path = last_line if Path(last_mrds_path).is_file(): return last_reported_on, last_report_path, last_mrds_path i -= 1 -# def check_valid_files(fname: str, folder_path: Path) -> bool: -# """ -# Check if the expected files are present in the folder -# -# Parameters -# ---------- -# fname: str -# Name of the file -# folder_path : Path -# Absolute path to the folder where the files are expected to be present -# -# Returns -# ------- -# bool -# True if the files are present, False otherwise -# """ -# # report_path = report_fpath(folder_path, fname) -# mrds_path = mrds_fpath(folder_path, fname) -# # actually we don't need to check if the report is present -# # because we just need the mrds file, to update. -# return mrds_path.is_file() - - -def export_record(output_dir, filename, time_dict): - record_filepath = past_records_fpath(output_dir) - if not record_filepath.parent.is_dir(): - record_filepath.parent.mkdir(parents=True) - - with open(record_filepath, 'a', encoding='utf-8') as fp: - fp.write(f"{time_dict['utc']},{filename}," - f"{time_dict['date_time']}\n") - - def get_timestamps(): + """ + Get the current timestamp in UTC and local time + """ now = datetime.now(timezone.utc) now = now.replace(tzinfo=timezone.utc) ts = datetime.timestamp(now) date_time = now.strftime('%m/%d/%Y %H:%M:%S%z') return { - 'utc': ts, + 'utc' : ts, 'date_time': date_time } def export_subject_lists(output_dir: Union[Path, str], - dataset: BaseDataset, + non_compliant_ds: BaseDataset, folder_name: str) -> dict: - sub_lists_by_modality = subject_list2txt(dataset, output_dir/folder_name) - return sub_lists_by_modality + """ + Export subject lists for each sequence to a text file + """ + noncompliant_sub_by_seq = subject_list2txt(non_compliant_ds, + output_dir / folder_name) + return noncompliant_sub_by_seq + + +def folders_with_min_files(root: Union[Path, str], + pattern: Optional[str] = "*.dcm", + min_count=3) -> List[Path]: + """ + Returns all the folders with at least min_count of files + matching the pattern, one at time via generator. + + Parameters + ---------- + root : List[Path] + List of folder paths + pattern : str + pattern to filter_fn files + + min_count : int + size representing the number of files in folder + matching the input pattern + + Returns + ------- + List of folders + """ + + if not isinstance(root, (Path, str)): + raise ValueError('root must be a Path-like object (str or Path)') + + root = Path(root).resolve() + if not root.exists(): + raise ValueError('Root folder does not exist') + + terminals = find_terminal_folders(root) + + for folder in terminals: + if len([file_ for file_ in folder.rglob(pattern)]) >= min_count: + yield folder + + return + + +def is_folder_with_no_subfolders(fpath): + """ + Check if a folder has any subfolders + """ + if isinstance(fpath, str): + fpath = Path(fpath) + if not fpath.is_dir(): + raise FileNotFoundError(f'Folder not found: {fpath}') + + sub_dirs = [] + for file_ in fpath.iterdir(): + if file_.is_dir(): + sub_dirs.append(file_) + elif file_.suffix == '.dcm': + # you have reached a folder which contains '.dcm' files + break + + # sub_dirs = [file_ for file_ in fpath.iterdir() if file_.is_dir()] + return len(sub_dirs) < 1, sub_dirs + + +def save_audit_results(filepath: Union[str, Path], result_dict) -> None: + """ + Save a dataset to a file with extension .mrds.pkl + + Parameters + ---------- + filepath: Union[str, Path] + path to the dataset file + result_dict: dict + dictionary containing the compliant and non-compliant dataset object + + Returns + ------- + None + + Examples + -------- + .. code :: python + + from MRdataset import save_mr_dataset + my_dataset = import_dataset(data_source='/path/to/my/data/', + ds_format='dicom', name='abcd_baseline', + config_path='mri-config.json') + dataset = save_mr_dataset(filepath='/path/to/my/dataset.mrds.pkl', + mrds_obj=my_dataset) + """ + + # Extract extension from filename + EXT = '.adt.pkl' + ext = "".join(Path(filepath).suffixes) + assert ext == EXT, f"Expected extension {EXT}, Got {ext}" + parent_folder = Path(filepath).parent + try: + parent_folder.mkdir(exist_ok=True, parents=True) + except OSError as exc: + logger.error(f'Unable to create folder {parent_folder}' + ' for saving dataset') + raise exc + + with open(filepath, 'wb') as f: + # save dict of the object as pickle + pickle.dump(result_dict, f) + + +def find_terminal_folders(root, leave=True, position=0): + """ + Find all the terminal folders in a given directory + """ + try: + no_more_subdirs, sub_dirs = is_folder_with_no_subfolders(root) + except FileNotFoundError: + return [] + + if no_more_subdirs: + return [root, ] + + terminal = list() + for sd1 in tqdm(sub_dirs, leave=leave, position=position): + no_more_subdirs2, level2_subdirs = is_folder_with_no_subfolders(sd1) + if no_more_subdirs2: + terminal.append(sd1) + else: + for sd2 in level2_subdirs: + terminal.extend(find_terminal_folders(sd2, leave=False, + position=1)) + + return terminal + + +def get_datetime(date): + try: + date = datetime.strptime(date, DATETIME_FORMAT) + except ValueError as exc: + if 'unconverted data remains' in str(exc): + try: + date = datetime.strptime(date, DATE_FORMAT) + except ValueError as exc: + raise ValueError(f'Invalid date format. ' + f'Use one of ' + f'[{DATE_FORMAT}, {DATETIME_FORMAT}]') from exc + return date + + +def log_latest_non_compliance(dataset, config_path, + filter_fn=None, + audit='hz', date=None, output_dir=None): + """ + Log the latest non-compliance data from recent sessions to a file + """ + nc_log = {} + ds_name = None + date = get_datetime(date) + + config = get_config(config_path=config_path, report_type=audit) + parameters = config.get("include_parameters", None) + + if audit == 'hz': + ds_name = dataset.name + nc_log = dataset.generate_nc_log(parameters, filter_fn, + date=date, + audit='hz', verbosity=1, + output_dir=None) + elif audit == 'vt': + ds_name = dataset.name + nc_log = dataset.generate_nc_log(parameters, filter_fn, + date=date, + audit='vt', verbosity=1, + output_dir=None) + + status_filepath = status_fpath(output_dir, audit) + if not status_filepath.parent.is_dir(): + status_filepath.parent.mkdir(parents=True) + + with open(status_filepath, 'w', encoding='utf-8') as fp: + for parameter in nc_log: + for i in nc_log[parameter]: + fp.write(f" {i['date']}, {ds_name}, {i['sequence_name']}," + f" {i['subject']}, {parameter} \n") + return None # status_filepath + + +def tuples2dict(mylist): + """ + Utility function used in jinja2 template. Not used in + the main code. Do not delete. + """ + result = {} + # each entry in mylist is a tuple of the form + # ((param_nc_sequence, param_ref_sequence), subject_id, path, seq) + for i in mylist: + param_tuple = i[0] + + # param_tuple[1] is the parameter from reference sequence + # param_tuple[0] is the parameter from the non-compliant sequence + param_sequence = param_tuple[0] + result.setdefault(param_sequence, []).append(i[1:3]) + return result + + +def valid_paths(files: Union[List, str]) -> Union[List[Path], Path]: + """ + If given a single path, the function will just check if it's valid. + If given a list of paths, the function validates if all the paths exist or + not. The paths can either be an instance of string or POSIX path. + + Parameters + ---------- + files : str or List[str] + The path or list of paths that must be validated + + Returns + ------- + List of POSIX Paths that exist on disk + """ + if files is None: + raise ValueError('Expected a valid path or Iterable, Got NoneType') + if isinstance(files, str) or isinstance(files, Path): + if not Path(files).is_file(): + raise FileNotFoundError('Invalid File {0}'.format(files)) + return Path(files).resolve() + elif isinstance(files, Iterable): + for file in files: + if not Path(file).is_file(): + raise FileNotFoundError('Invalid File {0}'.format(file)) + return [Path(f).resolve() for f in files] + else: + raise NotImplementedError('Expected str or Path or Iterable, ' + f'Got {type(files)}') + + +def modify_sequence_name(seq: "BaseSequence", stratify_by: str, + datasets) -> str: + """ + Modifies the sequence name to include the stratification value, if it + exists. + + Parameters + ---------- + + Returns + ------- + seq_name_with_stratify : str + Modified sequence name + """ + # TODO: change stratify_by from attributes to acquisition parameters + stratify_value = '' + if 'gre_field' in seq.name.lower(): + stratify_by = 'NonLinearGradientCorrection' + nlgc = seq[stratify_by].get_value() + if 'P' in nlgc: + stratify_value = 'P' + elif 'M' in nlgc: + stratify_value = 'M' + else: + stratify_value = '' + + seq_name_with_stratify = ATTRIBUTE_SEPARATOR.join([seq.name, + stratify_value]) + if datasets: + for ds in datasets: + ds.set_modified_seq_name(seq.name, seq_name_with_stratify) + + return seq_name_with_stratify + return seq.name + + +def get_config_from_file(config_path: Union[Path, str]) -> dict: + """ + Read the configuration file and return the contents as a dictionary + + Parameters + ---------- + config_path : Path or str + path to the configuration file + + Returns + ------- + dict + contents of the configuration file + """ + try: + config_path = Path(config_path) + except TypeError: + raise TypeError('Invalid path to the configuration file.' + f'Expected Path or str, got {type(config_path)}') + if not config_path.is_file(): + raise FileNotFoundError('Either provided configuration ' + 'file does not exist or it is not a ' + 'file.') + + # read json file + with open(config_path, 'r') as f: + try: + config = json.load(f) + except ValueError: + # json.decoder.JSONDecodeError is a subclass of ValueError + raise ValueError('Invalid JSON file provided in config_path ' + 'Expected a valid JSON file.') + + return config + + +def get_protocol_from_file(reference_path: Union[Path, str], + vendor: str = 'siemens') -> MRImagingProtocol: + """ + Extracts the reference protocol from the file. Supports only Siemens + protocols in xml format. Raises error otherwise. + + Parameters + ---------- + reference_path : Path | str + Path to the reference protocol file + vendor: str + Vendor of the scanner. Default is Siemens + + Returns + ------- + ref_protocol : MRImagingProtocol + Reference protocol extracted from the file + """ + # Extract reference protocol from file + ref_protocol = None + + if not isinstance(reference_path, Path): + reference_path = Path(reference_path) + + if not reference_path.is_file(): + raise FileNotFoundError(f'Unable to access {reference_path}. Maybe it' + f'does not exist or is not a file') + + # TODO: Add support for other file formats, like json and dcm + if reference_path.suffix != '.xml': + raise ValueError(f'Expected xml file, got {reference_path.suffix} file') + + # TODO: Add support for other vendors, like GE and Philips + if vendor == 'siemens': + ref_protocol = SiemensMRImagingProtocol(filepath=reference_path) + else: + raise NotImplementedError('Only Siemens protocols are supported') + + return ref_protocol + + +def infer_protocol(dataset: BaseDataset, + config: dict) -> MRImagingProtocol: + """ + Infers the reference protocol from the dataset. The reference protocol + is inferred by computing the majority for each of the + parameters for each sequence in the dataset. + + Parameters + ---------- + dataset: BaseDataset + Dataset to be checked for compliance + config: dict + Configuration + + Returns + ------- + ref_protocol : MRImagingProtocol + Reference protocol inferred from the dataset + """ + # TODO: Check for subset, if incomplete dataset throw error and stop + ref_protocol = MRImagingProtocol(f'reference_for_{dataset.name}') + + # create reference protocol for each sequence + for seq_name in dataset.get_sequence_ids(): + num_subjects = dataset.get_subject_ids(seq_name) + + # If subjects are less than 3, then we can't infer a reference protocol + if len(num_subjects) < 3: + logger.warning(f'Skipping {seq_name}. Not enough subjects to' + f' infer a reference protocol') + continue + + # If subjects are more than 3, then we can infer a reference protocol + ref_dict = compute_majority(dataset=dataset, + seq_name=seq_name, + config_dict=config) + if not ref_dict: + continue + # Add the inferred reference to the reference protocol + for seq_id, param_dict in ref_dict.items(): + ref_protocol.add_sequence_from_dict(seq_id, param_dict) + + return ref_protocol + + +def filter_epi_fmap_pairs(pair): + epi_substrings = ['epi', 'bold', 'rest', 'fmri', 'pasl', + 'asl', 'dsi', 'dti', 'dwi'] + fmap_substrings = ['fmap', 'fieldmap', 'map'] + if (has_substring(pair[0].lower(), epi_substrings) + and has_substring(pair[1].lower(), fmap_substrings)): + return True + if (has_substring(pair[1].lower(), epi_substrings) + and has_substring(pair[0].lower(), fmap_substrings)): + return True + return False + + +def has_substring(input_string, substrings): + """Check if a string contains any of the substrings""" + for substring in substrings: + if substring in input_string: + return True + return False + + +def previous_month(dt): + """Return the first day of the previous month.""" + return (dt.replace(day=1) - timedelta(days=1)).replace(day=1) + +def next_month(dt): + """Return the first day of the next month.""" + return (dt.replace(day=28) + timedelta(days=5)).replace(day=1) diff --git a/requirements_dev.txt b/requirements_dev.txt index c0cd95f..cb08f1e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,5 +3,11 @@ pydicom nibabel dictdiffer jinja2 -weasyprint +protocol +hypothesis +pytest +bokeh +flake8 +requests +coverage diff --git a/setup.cfg b/setup.cfg index fe32731..7bfd1a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,8 +14,24 @@ replace = __version__ = '{new_version}' [bdist_wheel] universal = 1 +# if deleting this setup.cfg, please move the flake8 config to .flake8 file [flake8] -exclude = docs +exclude = + docs, + */_version.py, + */tests/*.py, + plotting.py +filename = *.py +# E203 - whitespace before ':' +ignore = + E203, W503 +max-line-length = 80 +max-complexity = 12 +accept-encodings = utf-8 +inline-quotes = single +multiline-quotes = double +docstring-quotes = double + [tool:pytest] collect_ignore = ['setup.py'] diff --git a/setup.py b/setup.py index 2f7c7db..215b251 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,9 @@ "nibabel", "dictdiffer", "jinja2>=3.0.3", + "protocol", + "bokeh", + "tqdm" ] test_requirements = ['pytest>=3', 'hypothesis'] @@ -25,25 +28,22 @@ setup( author="Pradeep Raamana", author_email='raamana@gmail.com', - python_requires='>=3.6', + python_requires='>=3.8', classifiers=[ 'Development Status :: 2 - Pre-Alpha', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', ], description="mrQA suite of tools offering automatic evaluation of " "protocol compliance", entry_points={ 'console_scripts': [ - 'mrqa=mrQA.cli:main', - 'mrqa_monitor=mrQA.monitor:main', - 'mrqa_parallel=mrQA.run_parallel:main', - 'mrpc_subset=mrQA.run_subset:main' + 'mrqa=mrQA.cli:cli', + 'mrqa_monitor=mrQA.monitor:cli', + 'mrqa_parallel=mrQA.run_parallel:cli', + 'mrqa_subset=mrQA.run_subset:cli' ], }, install_requires=requirements, diff --git a/tox.ini b/tox.ini index 155280d..12d23d1 100644 --- a/tox.ini +++ b/tox.ini @@ -1,16 +1,29 @@ [tox] -envlist = py36, py37, py38, flake8 +envlist = py38, flake8 +isolated_build = True + [travis] python = 3.8: py38 - 3.7: py37 - 3.6: py36 [testenv:flake8] +# If you feel that flake8 is not reading these settings, update the +# flake8 section in setup.cfg basepython = python deps = flake8 -commands = flake8 mrQA tests +commands = flake8 mrQA +exclude = + docs, + */_version.py, + */tests/*.py, +filename = *.py +max-line-length = 80 +max-complexity = 12 +accept-encodings = utf-8 +inline-quotes = single +multiline-quotes = double +docstring-quotes = double [testenv] setenv = diff --git a/versioneer.py b/versioneer.py index a142bf5..af5a90d 100644 --- a/versioneer.py +++ b/versioneer.py @@ -623,7 +623,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and + # filter_fn out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: @@ -1146,7 +1146,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and + # filter_fn out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: