From b76627e3dcfee29ef8e306cc5e398a178cfcec73 Mon Sep 17 00:00:00 2001 From: Jennings Zhang Date: Sat, 3 Feb 2024 02:57:35 -0500 Subject: [PATCH] Tag-based overhaul --- .github/workflows/ci.yml | 8 ++ Dockerfile | 30 ++-- print_fetal_brain_atlases_options.py | 83 ----------- pubchrisvisual/one.py | 133 ------------------ requirements.txt | 3 + setup.py | 13 +- tests/__init__.py | 0 tests/examples.py | 25 ++++ tests/test_index.py | 61 ++++++++ {pubchrisvisual => visualdataset}/__init__.py | 2 +- visualdataset/__main__.py | 47 +++++++ visualdataset/args_types.py | 15 ++ visualdataset/index_nifti_dir.py | 29 ++++ visualdataset/json_arg_parser.py | 42 ++++++ visualdataset/manifest.py | 57 ++++++++ visualdataset/nifti_dataset.py | 81 +++++++++++ visualdataset/nifti_sidecar.py | 23 +++ .../types.py => visualdataset/settings.py | 6 +- 18 files changed, 419 insertions(+), 239 deletions(-) delete mode 100755 print_fetal_brain_atlases_options.py delete mode 100644 pubchrisvisual/one.py create mode 100644 tests/__init__.py create mode 100644 tests/examples.py create mode 100644 tests/test_index.py rename {pubchrisvisual => visualdataset}/__init__.py (95%) create mode 100644 visualdataset/__main__.py create mode 100644 visualdataset/args_types.py create mode 100644 visualdataset/index_nifti_dir.py create mode 100644 visualdataset/json_arg_parser.py create mode 100644 visualdataset/manifest.py create mode 100644 visualdataset/nifti_dataset.py create mode 100644 visualdataset/nifti_sidecar.py rename pubchrisvisual/types.py => visualdataset/settings.py (90%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 80c0f08..ca30d75 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,6 +81,14 @@ jobs: tags: ${{ steps.info.outputs.local_tag }} load: true cache-from: type=gha + build-args: extras_require=dev + + - name: Unit tests + run: | + docker run --rm \ + -v '${{ github.workspace }}:/src' -w /src \ + ${{ steps.info.outputs.local_tag }} \ + pytest -v --color=yes -o cache_dir=/tmp/pytest - name: Login to DockerHub if: (github.event_name == 'push' || github.event_name == 'release') && contains(steps.info.outputs.tags_csv, 'docker.io') diff --git a/Dockerfile b/Dockerfile index 7e481b3..7c3508f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,26 @@ -# Python version can be changed, e.g. -# FROM python:3.8 -# FROM ghcr.io/mamba-org/micromamba:1.5.1-focal-cuda-11.3.1 -FROM docker.io/python:3.12.1-slim-bookworm +FROM docker.io/mambaorg/micromamba:1.5.5-bookworm-slim AS micromamba +FROM micromamba AS builder -LABEL org.opencontainers.image.authors="FNNDSC " \ - org.opencontainers.image.title="Publish ChRIS Public Dataset" \ - org.opencontainers.image.description="Mark the outputs of a feed as compatible with the public dataset viewer feature of ChRIS_ui." +RUN \ + --mount=type=cache,sharing=private,target=/home/mambauser/.mamba/pkgs,uid=57439,gid=57439 \ + --mount=type=cache,sharing=private,target=/opt/conda/pkgs,uid=57439,gid=57439 \ + micromamba -y -n base install -c conda-forge python=3.12.1 nibabel=5.2.0 numpy=1.26.3 tqdm=4.66.1 pydantic=2.6.0 -ARG SRCDIR=/usr/local/src/pl-visual-dataset +ARG SRCDIR=/home/mambauser/pl-visual-dataset +RUN mkdir "${SRCDIR}" WORKDIR ${SRCDIR} COPY requirements.txt . -RUN --mount=type=cache,sharing=private,target=/root/.cache/pip pip install -r requirements.txt +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN pip install -r requirements.txt -COPY . . +COPY --chown=mambauser:mambauser . . ARG extras_require=none -RUN pip install ".[${extras_require}]" \ - && cd / && rm -rf ${SRCDIR} +RUN pip install ".[${extras_require}]" && cd / && rm -rf ${SRCDIR} WORKDIR / -CMD ["pub"] +CMD ["visualdataset"] + +LABEL org.opencontainers.image.authors="FNNDSC " \ + org.opencontainers.image.title="Create ChRIS Visual Dataset" \ + org.opencontainers.image.description="Prepare a dataset for visualization with ChRIS_ui" diff --git a/print_fetal_brain_atlases_options.py b/print_fetal_brain_atlases_options.py deleted file mode 100755 index b743209..0000000 --- a/print_fetal_brain_atlases_options.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python -""" -Notes: - -``` -pubone --order 'kiho.nii.gz,serag.nii.gz,ali.nii.gz,aliexp.nii.gz' \ - --options "$(./print_atlas_options.py)" \ - --readme "Fetal brain T2 MRI atlas datasets curated by the Fetal-Neonatal Neuroimaging Developmental Science Center. https://www.fnndsc.org/" \ - incoming/ outgoing/ -``` -""" -import json -import sys - -from pubchrisvisual.types import ChrisViewerFileOptions, NiivueVolumeOptions - - -MRI_OPTIONS = NiivueVolumeOptions(colormap="gray", colorbarVisible=False) -LABEL_OPTIONS = NiivueVolumeOptions(colormap="roi_i256", colorbarVisible=False) - -CRL_MRI_OPTIONS = ChrisViewerFileOptions( - name="T2 MRI", - author="CRL (Ali Gholipour et al.)", - description="Fetal T2 atlas developed by the Computational Radiology Laboratory of " - "Boston Children's Hospital, Harvard Medical School.", - website="http://crl.med.harvard.edu/research/fetal_brain_atlas/", - citation=[ - "A Gholipour, CK Rollins, C Velasco-Annis, A Ouaalam, A Akhondi-Asl, O Afacan, C Ortinau, S Clancy, " - "C Limperopoulos, E Yang, JA Estroff, and SK Warfield. A normative spatiotemporal MRI atlas of the " - "fetal brain for automatic segmentation and analysis of early brain growth, Scientific Reports 7, " - "Article number: 476 (2017). http://www.nature.com/articles/s41598-017-00525-w", - "A Gholipour, C Limperopoulos, S Clancy, C Clouchoux, A Akhondi-Asl, J A Estroff, and S K Warfield. " - "Construction of a Deformable Spatiotemporal MRI Atlas of the Fetal Brain: Evaluation of Similarity " - "Metrics and Deformation Models. MICCAI 2014.", - "S Khan, L Vasung, B Marami, CK Rollins, O Afacan, C Ortinau, E Yang, SK Warfield, and A Gholipour. " - "Fetal Brain Growth Portrayed by a Spatiotemporal Diffusion Tensor MRI Atlas Computed From In Utero " - "Images. NeuroImage 2018. https://doi.org/10.1016/j.neuroimage.2018.08.030" - ], - niivue_defaults=MRI_OPTIONS -) - -CRL_REGIONAL_OPTIONS = CRL_MRI_OPTIONS | ChrisViewerFileOptions( - name="Regional cortex parcellation", - description="Regional cortex parcellation of the CRL fetal brain atlas.", - niivue_defaults=LABEL_OPTIONS -) - -CRL_TISSUE_OPTIONS = CRL_MRI_OPTIONS | ChrisViewerFileOptions( - name="Tissue segmentation (\"Olympic edition\")", - description="Tissue segmentation of the CRL fetal brain atlas.", - niivue_defaults=LABEL_OPTIONS -) - -KIHO_MRI_OPTIONS = ChrisViewerFileOptions( - name="T2 MRI", - author="FNNDSC (Kiho Im et al)", - description="Fetal T2 atlas developed by the MRI group of the Fetal-Neonatal Neuroimaging Developmental Science " - "Center at the Boston Children's Hospital", - website="https://research.childrenshospital.org/neuroim/", - niivue_defaults=MRI_OPTIONS -) - -SERAG_MRI_OPTIONS = ChrisViewerFileOptions( - name="T2 MRI", - author="Imperial College London (Serag et al.)", - description="Fetal T2 atlas developed at the Imperial College London.", - website="https://brain-development.org/brain-atlases/fetal-brain-atlases/fetal-brain-atlas-serag/", - niivue_defaults=MRI_OPTIONS -) - -FILENAME_MAPPING: dict[str, ChrisViewerFileOptions] = { - "kiho.nii.gz": KIHO_MRI_OPTIONS, - "serag.nii.gz": SERAG_MRI_OPTIONS, - "ali.nii.gz": CRL_MRI_OPTIONS, - "aliexp.nii.gz": CRL_MRI_OPTIONS, - "ali_tissue.nii.gz": CRL_TISSUE_OPTIONS, - "aliexp_tissue.nii.gz": CRL_TISSUE_OPTIONS, - "ali_regional.nii.gz": CRL_REGIONAL_OPTIONS, - "aliexp_regional.nii.gz": CRL_REGIONAL_OPTIONS -} - -if __name__ == "__main__": - json.dump(FILENAME_MAPPING, sys.stdout, indent=2) diff --git a/pubchrisvisual/one.py b/pubchrisvisual/one.py deleted file mode 100644 index a8ec81b..0000000 --- a/pubchrisvisual/one.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -import copy -import json -import shutil -import sys -from argparse import ArgumentParser, Namespace, ArgumentDefaultsHelpFormatter -from pathlib import Path -from typing import Iterable, Sequence - -from chris_plugin import chris_plugin -from pydantic import TypeAdapter, ConfigDict, ValidationError - -from pubchrisvisual import DISPLAY_TITLE -from pubchrisvisual.types import NiivueVolumeOptions, ChrisViewerFileOptions - -parser = ArgumentParser(description='Adds options for viewing one file of each subject using ChRIS_ui.', - formatter_class=ArgumentDefaultsHelpFormatter) -parser.add_argument('--order', type=str, - help='Order of preference for file names as a comma-separated list') -parser.add_argument('--options', type=str, default='{}', - help='Mapping of file names to default Niivue options. ' - 'Should either be a relative path or stringified JSON') -parser.add_argument('--readme', type=str, - help='README file content') - -VISIBLE = NiivueVolumeOptions(opacity=1.0) -INVISIBLE = NiivueVolumeOptions(opacity=0.0) - -_OPTIONS_MAPPING_ADAPTER = TypeAdapter(dict[str, ChrisViewerFileOptions]) -_OPTIONS_ADAPTER = TypeAdapter(ChrisViewerFileOptions) - - -@chris_plugin( - parser=parser, - title='Single Volume ChRIS Visual Dataset', - category='Utility', - min_memory_limit='256Mi', - min_cpu_limit='200m', -) -def main(options: Namespace, inputdir: Path, outputdir: Path): - configs = deserialize_mapping(path_or_fname(inputdir, options.options)) - order = [name.strip() for name in options.order.split(',')] if options.order else [] - print(DISPLAY_TITLE, flush=True) - shutil.copytree(inputdir, outputdir, dirs_exist_ok=True) - for folder in subject_folders(outputdir): - files = [p for p in folder.glob('*.nii.gz') if p.is_file()] - preferred = get_preferred_file(files, order) - for file in files: - base_niivue_config: NiivueVolumeOptions = VISIBLE if file is preferred else INVISIBLE - file_config: ChrisViewerFileOptions = copy.deepcopy(configs[file.name]) if file.name in configs else {} - if 'niivue_defaults' not in file_config: - file_config['niivue_defaults'] = {} - file_config['niivue_defaults'] = base_niivue_config | file_config['niivue_defaults'] - - if file.name not in configs: - print(f"warning: no file name given by --options matches {file}") - sidecar = file.with_suffix(file.suffix + '.chrisvisualdataset.volume.json') - with sidecar.open('wb') as f: - f.write(_OPTIONS_ADAPTER.dump_json(file_config)) - - if options.readme is not None: - (outputdir / 'README.txt').write_text(options.readme) - - if not options.options.startswith('{'): - delete_file_and_empty_parents(outputdir, options.options) - - (outputdir / '.chrisvisualdataset.root.json').write_text('{}') - - -def get_preferred_file(files: Sequence[Path], order: Sequence[str]) -> Path: - for preferred_name in order: - for file in files: - if file.name == preferred_name: - return file - return files[0] - - -def subject_folders(p: Path) -> Iterable[Path]: - return filter(is_dir_containing_nifti, p.glob('*')) - - -def is_dir_containing_nifti(p: Path) -> bool: - if not p.is_dir(): - return False - return next(filter(is_nifti_file, p.glob('*.nii.gz')), None) is not None - - -def is_nifti_file(p: Path): - return p.is_file() and p.name.endswith('.nii.gz') - - -def is_nifti(p: Path) -> bool: - return p.suffix == '.nii.gz' - - -def deserialize_mapping(x: str) -> dict[str, ChrisViewerFileOptions]: - try: - return _OPTIONS_MAPPING_ADAPTER.validate_json(x, strict=True) - except ValidationError as e: - print("Invalid value for --options") - for error in e.errors(): - if 'url' in error: - del error['url'] - print(json.dumps(error)) - sys.exit(1) - - -def path_or_fname(parent_dir: Path, value: str): - if value.startswith('{'): - return value - p = parent_dir / value - return p.read_text() if p.is_file() else value - - -def delete_file_and_empty_parents(root: Path, fname: str): - p = root / fname - if not p.is_file(): - return - p.unlink() - delete_empty_dirs(p.parent.resolve(), root.resolve()) - - -def delete_empty_dirs(p: Path, root: Path): - if p.resolve() == root: - return - if next(p.glob('*'), None) is not None: - return - p.rmdir() - delete_empty_dirs(p.parent, root) - - -if __name__ == '__main__': - main() diff --git a/requirements.txt b/requirements.txt index b156c18..76a6082 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ chris_plugin==0.4.0 +nibabel~=5.2.0 +numpy~=1.26.3 +tqdm~=4.66.1 pydantic~=2.6.0 diff --git a/setup.py b/setup.py index ebeb77b..d674c90 100644 --- a/setup.py +++ b/setup.py @@ -19,18 +19,18 @@ def get_version(rel_path: str) -> str: setup( - name='publish-chris-dataset', - version=get_version('pubchrisvisual/__init__.py'), - description='Mark the outputs of a feed as compatible with the public dataset viewer feature of ChRIS_ui.', + name='chrisvisualdataset', + version=get_version('visualdataset/__init__.py'), + description='Prepare the outputs of a feed for the "visual datasets" feature of ChRIS_ui.', author='FNNDSC', author_email='dev@babymri.org', url='https://github.com/FNNDSC/pl-visual-dataset', - packages=['pubchrisvisual'], + packages=['visualdataset'], install_requires=['chris_plugin'], license='MIT', entry_points={ 'console_scripts': [ - 'pubone = pubchrisvisual.one:main' + 'visualdataset = visualdataset.__main__:main' ] }, classifiers=[ @@ -42,7 +42,8 @@ def get_version(rel_path: str) -> str: extras_require={ 'none': [], 'dev': [ - 'pytest~=7.1' + 'pytest~=8.0', + 'pytest-unordered~=0.5.2' ] } ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/examples.py b/tests/examples.py new file mode 100644 index 0000000..0706bc6 --- /dev/null +++ b/tests/examples.py @@ -0,0 +1,25 @@ +import sys +from typing import Sequence + +from pydantic import TypeAdapter + +from visualdataset.args_types import Matcher + + +FETAL_ATLAS_MATCHERS: Sequence[Matcher] = [ + *(Matcher(key='age', value=str(age), regex=f'Age {age}/') for age in range(10, 40, 1)), + + Matcher(key='author', value='Ahmed Serag et al.', regex=r'/serag\.nii\.gz$'), + Matcher(key='author', value="Ali Gholipour et al., CRL", regex=r'/ali.*\.nii\.gz$'), + Matcher(key='author', value="Kiho Im et al., FNNDSC", regex=r'/kiho\.nii\.gz$'), + + Matcher(key='institution', value="Boston Children's Hospital", regex=r'/(kiho|ali).*\.nii\.gz$'), + Matcher(key='institution', value="Imperial College London", regex=r'/serag\.nii\.gz$'), + + Matcher(key='type', value='mri', regex=r'/(ali|aliexp|kiho|serag)\.nii\.gz$'), + Matcher(key='type', value='segmentation', regex=r'/(ali|aliexp)_.+\.nii\.gz$'), +] + +if __name__ == '__main__': + adapter = TypeAdapter(Sequence[Matcher]) + print(adapter.dump_json(FETAL_ATLAS_MATCHERS).decode('utf-8')) diff --git a/tests/test_index.py b/tests/test_index.py new file mode 100644 index 0000000..5eec4a9 --- /dev/null +++ b/tests/test_index.py @@ -0,0 +1,61 @@ +from pathlib import Path +import pytest +from pytest_unordered import unordered + +from visualdataset.index_nifti_dir import index_nifti_dir +from tests.examples import FETAL_ATLAS_MATCHERS +from visualdataset.manifest import VisualDatasetFile + + +def test_index_dir(tmp_path: Path): + example_files = [ + 'Age 36/serag.nii.gz', + 'Age 37/ali.nii.gz', + 'Age 37/ali_regional.nii.gz', + 'Age 37/ali_tissue.nii.gz', + ] + for example in example_files: + p = tmp_path / example + p.parent.mkdir(parents=True, exist_ok=True) + p.touch() + + actual = list(index_nifti_dir(tmp_path, FETAL_ATLAS_MATCHERS)) + expected = [ + VisualDatasetFile( + path='Age 36/serag.nii.gz', + tags={ + 'age': '36', + 'author': 'Ahmed Serag et al.', + 'institution': 'Imperial College London', + 'type': 'mri' + }, + ), + VisualDatasetFile( + path='Age 37/ali.nii.gz', + tags={ + 'age': '37', + 'author': 'Ali Gholipour et al., CRL', + 'institution': "Boston Children's Hospital", + 'type': 'mri' + } + ), + VisualDatasetFile( + path='Age 37/ali_regional.nii.gz', + tags={ + 'age': '37', + 'author': 'Ali Gholipour et al., CRL', + 'institution': "Boston Children's Hospital", + 'type': 'segmentation' + } + ), + VisualDatasetFile( + path='Age 37/ali_tissue.nii.gz', + tags={ + 'age': '37', + 'author': 'Ali Gholipour et al., CRL', + 'institution': "Boston Children's Hospital", + 'type': 'segmentation' + } + ), + ] + assert actual == unordered(expected) diff --git a/pubchrisvisual/__init__.py b/visualdataset/__init__.py similarity index 95% rename from pubchrisvisual/__init__.py rename to visualdataset/__init__.py index 65b9b6f..a0ef5dd 100644 --- a/pubchrisvisual/__init__.py +++ b/visualdataset/__init__.py @@ -9,4 +9,4 @@ |_| """ -__version__ = '0.0.5' +__version__ = '0.1.0' diff --git a/visualdataset/__main__.py b/visualdataset/__main__.py new file mode 100644 index 0000000..5d88ea3 --- /dev/null +++ b/visualdataset/__main__.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +from argparse import ArgumentParser, Namespace, ArgumentDefaultsHelpFormatter +from pathlib import Path + +from chris_plugin import chris_plugin +from pydantic import TypeAdapter + +from visualdataset import DISPLAY_TITLE +from visualdataset.json_arg_parser import parse_args +from visualdataset.nifti_dataset import nifti_dataset +from visualdataset.settings import ChrisViewerFileOptions + +parser = ArgumentParser(description='Prepares a dataset for use with the ChRIS_ui ' + '"Visual Datasets" feature.', + formatter_class=ArgumentDefaultsHelpFormatter) +parser.add_argument('--matchers', type=str, required=True, + help='Regular expressions used to assign tags to files') +parser.add_argument('--options', type=str, + help='Metadata to go with tag sets') +parser.add_argument('-s', '--string-args', action='store_true', + help='Interpret --matchers and --options as data instead of paths') +parser.add_argument('--first-run-files', type=str, + help='List of files to show on first run, ' + 'as a stringified JSON list of paths relative to inputdir') +parser.add_argument('--readme', type=str, + help='README file content') + +_LIST_ADAPTER = TypeAdapter(list[str]) + + +@chris_plugin( + parser=parser, + title='Single Volume ChRIS Visual Dataset', + category='Utility', + min_memory_limit='1Gi', + min_cpu_limit='1000m', +) +def main(options: Namespace, inputdir: Path, outputdir: Path): + matchers, tag_options = parse_args(options.matchers, options.options, + None if options.string_args else inputdir) + first_run_files = [] if options.first_run_files is None else _LIST_ADAPTER.validate_json(options.first_run_files) + print(DISPLAY_TITLE, flush=True) + nifti_dataset(inputdir, outputdir, matchers, tag_options, first_run_files, options.readme) + + +if __name__ == '__main__': + main() diff --git a/visualdataset/args_types.py b/visualdataset/args_types.py new file mode 100644 index 0000000..b8b6703 --- /dev/null +++ b/visualdataset/args_types.py @@ -0,0 +1,15 @@ +import functools +import re + +from pydantic import BaseModel + + +class Matcher(BaseModel): + key: str + value: str + regex: str + + @functools.cached_property + def re(self): + return re.compile(self.regex) + diff --git a/visualdataset/index_nifti_dir.py b/visualdataset/index_nifti_dir.py new file mode 100644 index 0000000..22a9a4a --- /dev/null +++ b/visualdataset/index_nifti_dir.py @@ -0,0 +1,29 @@ +import os.path +from pathlib import Path, PurePath +from typing import Iterator, Sequence + +from visualdataset.args_types import Matcher +from visualdataset.manifest import VisualDatasetFile + + +def index_nifti_dir(input_dir: Path, matchers: Sequence[Matcher]) -> Iterator[VisualDatasetFile]: + """ + Scan a directory for files matching the matchers. + """ + nifti_files = filter(os.path.isfile, input_dir.rglob('*.nii.gz', case_sensitive=False)) + rel_paths = (p.relative_to(input_dir) for p in nifti_files) + matches = (match_file(p, matchers) for p in rel_paths) + return filter(_has_tags, matches) + + +def match_file(path: PurePath, matchers: Sequence[Matcher]) -> VisualDatasetFile: + tags = { + matcher.key: matcher.value + for matcher in matchers + if matcher.re.search(str(path)) is not None + } + return VisualDatasetFile(path=PurePath(path), tags=tags) + + +def _has_tags(match: VisualDatasetFile) -> bool: + return len(match.tags) > 0 diff --git a/visualdataset/json_arg_parser.py b/visualdataset/json_arg_parser.py new file mode 100644 index 0000000..ea5c8d7 --- /dev/null +++ b/visualdataset/json_arg_parser.py @@ -0,0 +1,42 @@ +import json +import sys +from pathlib import Path +from typing import Sequence, TypeVar, Type + +from pydantic import BaseModel, ValidationError + +from visualdataset.args_types import Matcher +from visualdataset.manifest import OptionsLink + + +def parse_args(matchers: str | None, options: str | None, input_dir: Path | None, + ) -> tuple[Sequence[Matcher], Sequence[OptionsLink]]: + if input_dir: + matchers_str = '[]' if matchers is None else (input_dir / matchers).read_text() + options_str = '[]' if options is None else (input_dir / options).read_text() + else: + matchers_str = '[]' if matchers is None else matchers + options_str = '[]' if options is None else options + matchers_list = deserialize_list(matchers_str, Matcher, '--matchers') + options_list = deserialize_list(options_str, OptionsLink, '--options') + return matchers_list, options_list + + +_M = TypeVar('_M', bound=BaseModel) + + +def deserialize_list(s: str, t: Type[_M], flag: str) -> Sequence[_M]: + try: + data = json.loads(s) + except json.JSONDecodeError: + print(f'Invalid value for {flag}: not JSON') + sys.exit(1) + if not isinstance(data, list): + print(f'Invalid value for {flag}: not JSON list') + sys.exit(1) + try: + return [t.model_validate(x, strict=True) for x in data] + except ValidationError as e: + print(f"Invalid value for {flag}:") + print(e) + sys.exit(1) diff --git a/visualdataset/manifest.py b/visualdataset/manifest.py new file mode 100644 index 0000000..d1c262b --- /dev/null +++ b/visualdataset/manifest.py @@ -0,0 +1,57 @@ +from pathlib import PurePath +from pydantic import BaseModel, ConfigDict +from typing import Sequence, FrozenSet, Tuple, Mapping, Optional, Set + +from visualdataset.settings import ChrisViewerFileOptions + + +class VisualDatasetFile(BaseModel): + """ + Index data about a file of a "visual dataset". + """ + path: PurePath + """ + Path of file relative to the plugin instance's output directory. + """ + tags: Mapping[str, str] + """ + Metadata as key-value pairs which identify the file. + """ + has_sidecar: bool = False + """ + Whether or not the file has a corresponding `.chrisvisualdataset.volume.json` sidecar file. + """ + + __pydantic_config__ = ConfigDict(extra='forbid') + + +class OptionsLink(BaseModel): + """ + An association between some options and a set of tags. + """ + match: FrozenSet[Tuple[str, str]] + options: ChrisViewerFileOptions + + +class VisualDatasetManifest(BaseModel): + """ + A list of all the files and metadata of a "visual dataset". + """ + tags: Mapping[str, Set[str]] + """ + All known tags and all known values for each tag. + """ + files: Sequence[VisualDatasetFile] + """ + Files in this dataset. + """ + options: Sequence[OptionsLink] + """ + Options for files. + """ + first_run_files: Sequence[int] + """ + Index numbers into ``files`` for which files to show when the viewer is first opened. + """ + + __pydantic_config__ = ConfigDict(extra='forbid') diff --git a/visualdataset/nifti_dataset.py b/visualdataset/nifti_dataset.py new file mode 100644 index 0000000..06c94e5 --- /dev/null +++ b/visualdataset/nifti_dataset.py @@ -0,0 +1,81 @@ +import sys +from pathlib import Path +from typing import Sequence, Optional, Mapping, Set + +from tqdm import tqdm + +from visualdataset.args_types import Matcher +from visualdataset.index_nifti_dir import index_nifti_dir +from visualdataset.manifest import VisualDatasetFile, OptionsLink, VisualDatasetManifest +from visualdataset.nifti_sidecar import create_sidecar + + +def nifti_dataset( + input_dir: Path, + output_dir: Path, + matchers: Sequence[Matcher], + options: Sequence[OptionsLink], + first_run_files: Sequence[str], + readme: Optional[str] +): + with tqdm(desc='Scanning input directory...'): + index = [i.model_copy(update={'has_sidecar': True}) for i in index_nifti_dir(input_dir, matchers)] + + if not index: + print(f'Error: nothing matched for: {[m.regex for m in matchers]}') + sys.exit(1) + + first_run_index_nums = find_first_run_files(input_dir, index, first_run_files) + + with tqdm(index, desc='Writing outputs') as pbar: + for file in pbar: + output_path = output_dir / file.path + output_path.parent.mkdir(parents=True, exist_ok=True) + sidecar_path = output_path.with_suffix(output_path.suffix + '.chrisvisualdataset.volume.json') + create_sidecar(input_dir / file.path, sidecar_path) + + manifest = VisualDatasetManifest( + tags=aggregate_tags(index), + files=index, + options=options, + first_run_files=first_run_index_nums + ) + + manifest_path = output_dir / '.chrisvisualdataset.tagmanifest.json' + manifest_path.write_text(manifest.model_dump_json()) + + if readme is not None: + readme_path = output_dir / 'README.txt' + readme_path.write_text(readme) + + +def aggregate_tags(index: Sequence[VisualDatasetFile]) -> Mapping[str, Set[str]]: + """ + Get all tag and all of their possible values. + """ + tags = {} + for file in index: + for key, value in file.tags.items(): + if key not in tags: + tags[key] = set() + tags[key].add(value) + return tags + + +def find_first_run_files( + input_dir: Path, + index: Sequence[VisualDatasetFile], + first_run_files: Sequence[str] +) -> Sequence[int]: + """ + Find all elements of ``first_run_files`` as paths in ``index``, then return a list of their array index numbers. + """ + first_run_index_nums = [] + indexed_paths = [str(file.path) for file in index] + for file in first_run_files: + try: + first_run_index_nums.append(indexed_paths.index(file)) + except ValueError: + print(f'File was not matched: {file}') + sys.exit(1) + return first_run_index_nums diff --git a/visualdataset/nifti_sidecar.py b/visualdataset/nifti_sidecar.py new file mode 100644 index 0000000..4ba9e48 --- /dev/null +++ b/visualdataset/nifti_sidecar.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import numpy as np +import nibabel as nib +from pydantic import TypeAdapter + +from visualdataset.settings import NiivueVolumeSettings + +_SETTINGS_ADAPTER = TypeAdapter(NiivueVolumeSettings) + + +def create_sidecar(img: Path, output: Path): + cal_min, cal_max = get_range(img) + settings = NiivueVolumeSettings(cal_min=cal_min, cal_max=cal_max) + output.write_bytes(_SETTINGS_ADAPTER.dump_json(settings)) + + +def get_range(img: Path): + vol = nib.load(img) + data = vol.get_fdata() + cal_min = np.min(data) + cal_max = np.max(data) + return cal_min, cal_max diff --git a/pubchrisvisual/types.py b/visualdataset/settings.py similarity index 90% rename from pubchrisvisual/types.py rename to visualdataset/settings.py index 2edbb01..83ed72c 100644 --- a/pubchrisvisual/types.py +++ b/visualdataset/settings.py @@ -6,9 +6,9 @@ from pydantic import ConfigDict, TypeAdapter, HttpUrl -class NiivueVolumeOptions(TypedDict): +class NiivueVolumeSettings(TypedDict): """ - Options supported by Niivue for volumes. + Settings supported by Niivue for volumes. https://github.com/niivue/niivue-react/blob/d56dcd2b3f58ce854686e77963f3a7a89599765f/src/model.ts#L30-L76 """ @@ -48,7 +48,7 @@ class ChrisViewerFileOptions(TypedDict): """ Website for the dataset """ - niivue_defaults: NotRequired[NiivueVolumeOptions] + niivue_defaults: NotRequired[NiivueVolumeSettings] """ Default volume rendering options """