From 085dbeef435e22296792e188601f4a0aeecac8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Ho=CC=88rst?= Date: Tue, 2 Apr 2024 14:26:02 +0200 Subject: [PATCH] Adding DICOM module as another WSI-Reader --- PathoPatch.ipynb | 115 ++++---- README.md | 27 +- docs/DICOM.md | 57 ++++ environment.yaml | 2 + examples/filelist.csv | 2 + examples/patch_extraction.yaml | 7 +- pathopatch/cli.py | 14 +- pathopatch/config/config.py | 2 +- pathopatch/patch_extraction/dataset.py | 8 +- .../patch_extraction/patch_extraction.py | 184 +++++++----- pathopatch/patch_extraction/process_batch.py | 15 +- pathopatch/utils/patch_util.py | 65 ++--- pathopatch/wsi_interfaces/__init__.py | 14 + .../cucim_deepzoom.py | 12 +- .../wsi_interfaces/openslide_deepzoom.py | 30 ++ .../wsi_interfaces/wsidicomizer_openslide.py | 274 ++++++++++++++++++ requirements.txt | 2 + requirements_develop.txt | 26 -- setup.py | 4 +- test_database/filelist.csv | 2 + .../annotations_filtering/config.yaml | 2 +- .../annotations_simple/config.yaml | 4 +- .../annotations_store_masks/config.yaml | 2 +- .../annotations_tissue_mask/config.yaml | 2 +- .../config.yaml | 2 +- .../preprocessing/baseline/config.yaml | 2 +- .../baseline_dataset/config.yaml | 2 +- .../static_test_files/preprocessing/cli.yaml | 2 +- .../complex_overlap_dataset/config.yaml | 2 +- .../preprocessing/complex_setup/config.yaml | 2 +- .../complex_setup_dataset/config.yaml | 2 +- .../preprocessing/dicom/dicom.yaml | 20 ++ .../preprocessing/dicom/openslide.yaml | 20 ++ .../preprocessing/downsample/config.yaml | 2 +- .../downsample_dataset_dataset/config.yaml | 2 +- .../preprocessing/filelist/config.yaml | 12 + .../preprocessing/filelist/filelist.csv | 2 + .../filelist/results/CMU-1/metadata.yaml | 15 + .../filelist/results/processed.json | 5 + .../preprocessing/macenko/load_macenko.yaml | 2 +- .../preprocessing/macenko/test_macenko.yaml | 2 +- .../preprocessing/roi/config.yaml | 2 +- .../preprocessing/roi_context/config.yaml | 2 +- .../target_magnification/config.yaml | 2 +- .../target_magnification_dataset/config.yaml | 2 +- .../preprocessing/target_mpp/config.yaml | 2 +- .../target_mpp_dataset/config.yaml | 2 +- .../target_mpp_macenko/config.yaml | 2 +- .../target_mpp_macenko_dataset/config.yaml | 2 +- tests/test_core_modules/test_filelist.py | 88 ++++++ tests/test_dicom_module/__init__.py | 0 tests/test_dicom_module/test_dicom_loader.py | 156 ++++++++++ 52 files changed, 986 insertions(+), 242 deletions(-) create mode 100644 docs/DICOM.md create mode 100644 examples/filelist.csv create mode 100644 pathopatch/wsi_interfaces/__init__.py rename pathopatch/{patch_extraction => wsi_interfaces}/cucim_deepzoom.py (88%) create mode 100644 pathopatch/wsi_interfaces/openslide_deepzoom.py create mode 100644 pathopatch/wsi_interfaces/wsidicomizer_openslide.py delete mode 100644 requirements_develop.txt create mode 100644 test_database/filelist.csv create mode 100644 tests/static_test_files/preprocessing/dicom/dicom.yaml create mode 100644 tests/static_test_files/preprocessing/dicom/openslide.yaml create mode 100644 tests/static_test_files/preprocessing/filelist/config.yaml create mode 100644 tests/static_test_files/preprocessing/filelist/filelist.csv create mode 100644 tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml create mode 100644 tests/static_test_files/preprocessing/filelist/results/processed.json create mode 100644 tests/test_core_modules/test_filelist.py create mode 100644 tests/test_dicom_module/__init__.py create mode 100644 tests/test_dicom_module/test_dicom_loader.py diff --git a/PathoPatch.ipynb b/PathoPatch.ipynb index cd44411..ad0ad65 100644 --- a/PathoPatch.ipynb +++ b/PathoPatch.ipynb @@ -1,26 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyOSg5Tomy2ythze0d941UHb", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -28,21 +12,21 @@ }, { "cell_type": "markdown", - "source": [ - "# PathoPatch Example\n" - ], "metadata": { "id": "GA0VTVQzkmEJ" - } + }, + "source": [ + "# PathoPatch Example\n" + ] }, { "cell_type": "markdown", - "source": [ - "### 1. Installation (OpenSlide, CuCIM, PathoPatch)" - ], "metadata": { "id": "chE1_Uyxk4Lt" - } + }, + "source": [ + "### 1. Installation (OpenSlide, CuCIM, PathoPatch)" + ] }, { "cell_type": "code", @@ -56,8 +40,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[33m\r0% [Working]\u001b[0m\r \rHit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n", "Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", @@ -87,9 +71,7 @@ }, { "cell_type": "code", - "source": [ - "!pip install openslide-python pathopatch" - ], + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -97,11 +79,10 @@ "id": "WkCsLuQAj41e", "outputId": "796bb818-c29c-4a22-df88-9eecebc897ff" }, - "execution_count": 2, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Requirement already satisfied: openslide-python in /usr/local/lib/python3.10/site-packages (1.3.1)\n", "Requirement already satisfied: pathopatch in /usr/local/lib/python3.10/site-packages (0.9.5.1b0)\n", @@ -173,13 +154,14 @@ "\u001b[0m" ] } + ], + "source": [ + "!pip install openslide-python pathopatch" ] }, { "cell_type": "code", - "source": [ - "!pip install cucim" - ], + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -187,11 +169,10 @@ "id": "xdsM1hLnkC1D", "outputId": "e9209dc9-7de8-44dc-b0a2-64fa1c355ec5" }, - "execution_count": 3, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Collecting cucim\n", " Downloading cucim-23.10.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (43 kB)\n", @@ -207,24 +188,23 @@ "\u001b[0m" ] } + ], + "source": [ + "!pip install cucim" ] }, { "cell_type": "markdown", - "source": [ - "## 2. Download files" - ], "metadata": { "id": "MNTTsbCblo-Q" - } + }, + "source": [ + "## 2. Download files" + ] }, { "cell_type": "code", - "source": [ - "!mkdir wsi_data\n", - "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs\n", - "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1.svs" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -232,11 +212,10 @@ "id": "ldwjSWvYmbic", "outputId": "4f3994ce-6486-4c7b-9821-f767fcc98720" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "mkdir: cannot create directory ‘wsi_data’: File exists\n", "--2024-03-26 21:12:26-- https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs\n", @@ -260,16 +239,46 @@ "CMU-1.svs 36%[======> ] 61.51M 457KB/s eta 2m 52s " ] } + ], + "source": [ + "!mkdir wsi_data\n", + "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs\n", + "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1.svs" ] }, { "cell_type": "code", - "source": [], + "execution_count": null, "metadata": { "id": "6smBlRYcmgPQ" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "DICOM-Files:\n", + "\n", + "whole folder must be provided, please use the wsi_extension .dcm for loading wsi, or provide a .csv file as a filelist with path to the dicom folder" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyOSg5Tomy2ythze0d941UHb", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/README.md b/README.md index 26523c8..19cb2b2 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ We provide different use cases - Offline-Dataset (Store on Disk :floppy_disk:) a In our Pre-Processing pipeline, we are able to extract quadratic patches from detected tissue areas, load annotation files (`.json`) and apply color normlizations. We make use of the popular [OpenSlide](https://openslide.org/) library, but extended it with the [RAPIDS cuCIM](https://github.com/rapidsai/cucim) framework for a speedup in patch-extraction. +> We support all OpenSlide file formats + .dcm-File format (DICOM), by utilizing [`wsidicom`](https://github.com/imi-bigpicture/wsidicom) and [`wsidicomizer`](https://github.com/imi-bigpicture/wsidicomizer). + **Explanations for use cases :floppy_disk: vs :zap:**
Offline-Dataset @@ -369,10 +371,20 @@ In our Pre-Processing pipeline, we are able to extract quadratic patches from de An example notebook is given [here](PathoPatch.ipynb): Open In Colab -## Roadmap -- :construction: In-memory inference loader - This feature is currently under development. Once completed, it will allow a dataset to be loaded into memory for inference, eliminating the need to store it on disk. Useful for inference +### DICOM-conversion +To convert WSI-Files into DICOM-Format, please follow [this documentation](docs/DICOM.md) -- :soon: Dicom support - We plan to add another backend for handling DICOM files with a different structure +### Filelist with metadata +See here: [examples/filelist.csv](examples/filelist.csv) + +```csv +path,slide_mpp,magnification +./test_database/input/WSI/CMU-1.svs,0.500,20 +``` +Only the path is enforced, other two cols are optional. + +## Roadmap +- :construction: In-memory inference loader - This feature is currently under development - an unstable version is already online. Once completed, it will allow a dataset to be loaded into memory for inference, eliminating the need to store it on disk. Useful for inference - :soon: More test cases @@ -406,6 +418,13 @@ An example notebook is given [here](PathoPatch.ipynb): pages="356--361",, isbn="978-3-658-44037-4" } +``` +### Acknowledgement +For processing DICOM-files, this work relies on the IMI-Bigpicture [`wsidicom`](https://github.com/imi-bigpicture/wsidicom) and [`wsidicomizer`](https://github.com/imi-bigpicture/wsidicomizer) libraries, with the following acknowledgements: -``` +>wsidicom: Copyright 2021 Sectra AB, licensed under Apache 2.0. +This project is part of a project that has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under grant agreement No 945358. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA. IMI website: + +>wsidicomizer: Copyright 2021 Sectra AB, licensed under Apache 2.0. +This project is part of a project that has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under grant agreement No 945358. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA. IMI website: diff --git a/docs/DICOM.md b/docs/DICOM.md new file mode 100644 index 0000000..3ca3230 --- /dev/null +++ b/docs/DICOM.md @@ -0,0 +1,57 @@ +# Convert WSI-Files to DICOM + +## Basic cli-usage + +```bash +usage: wsidicomizer [-h] -i INPUT [-o OUTPUT] [-t TILE_SIZE] [-m METADATA] + [-d DEFAULT_METADATA] [-l LEVELS [LEVELS ...]] [--label LABEL] + [--no-label] [--no-overview] [--no-confidential] [-w WORKERS] + [--chunk-size CHUNK_SIZE] [--format FORMAT] [--quality QUALITY] + [--subsampling SUBSAMPLING] [--offset-table OFFSET_TABLE] + +Convert compatible wsi file to DICOM + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Path to input wsi file. + -o OUTPUT, --output OUTPUT + Path to output folder. Folder will be created and must not + exist. If not specified a folder named after the input file is + created in the same path. + -t TILE_SIZE, --tile-size TILE_SIZE + Tile size (same for width and height). Required for ndpi and + openslide formats E.g. 512 + -m METADATA, --metadata METADATA + Path to json metadata that will override metadata from source + image file. + -d DEFAULT_METADATA, --default-metadata DEFAULT_METADATA + Path to json metadata that will be used as default values. + -l LEVELS [LEVELS ...], --levels LEVELS [LEVELS ...] + Pyramid levels to include, if not all. E.g. 0 1 for base and + first pyramid layer. + --label LABEL Optional label image to use instead of label found in file. + --no-label If not to include label + --no-overview If not to include overview + --no-confidential If not to include confidential metadata + -w WORKERS, --workers WORKERS + Number of worker threads to use + --chunk-size CHUNK_SIZE + Number of tiles to give each worker at a time + --format FORMAT Encoding format to use if re-encoding. 'jpeg' or 'jpeg2000'. + --quality QUALITY Quality to use if re-encoding. It is recommended to not use > + 95 for jpeg. Use < 1 or > 1000 for lossless jpeg2000. + --subsampling SUBSAMPLING + Subsampling option if using jpeg for re-encoding. Use '444' + for no subsampling, '422' for 2x1 subsampling, and '420' for + 2x2 subsampling. + --offset-table OFFSET_TABLE + Offset table to use, 'bot' basic offset table, 'eot' extended + offset table, 'None' - no offset table. +``` + +## Acknowledgement for using WSIDICOMIZER + +wsidicomizer: Copyright 2021 Sectra AB, licensed under Apache 2.0. + +This project is part of a project that has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under grant agreement No 945358. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA. IMI website: diff --git a/environment.yaml b/environment.yaml index 00ac216..5cc910f 100644 --- a/environment.yaml +++ b/environment.yaml @@ -31,3 +31,5 @@ dependencies: - scikit-image==0.19.3 - torchvision==0.16.2 - tqdm==4.65.0 + - wsidicomizer==0.13.2 + - wsidicom==0.20.4 diff --git a/examples/filelist.csv b/examples/filelist.csv new file mode 100644 index 0000000..2933339 --- /dev/null +++ b/examples/filelist.csv @@ -0,0 +1,2 @@ +path,slide_mpp,magnification +./test_database/input/WSI/CMU-1.svs,0.500,20 diff --git a/examples/patch_extraction.yaml b/examples/patch_extraction.yaml index 8e6c348..cc08185 100644 --- a/examples/patch_extraction.yaml +++ b/examples/patch_extraction.yaml @@ -4,7 +4,7 @@ wsi_filelist: # Path to a csv-filelist with WSI files (separator # used.Must include full paths to WSIs, including suffixes.Can be used as an replacement for # the wsi_paths option.If both are provided, yields an error. [str] [Optional, defaults to None] output_path: # Path to the folder where the resulting dataset should be stored [str] -wsi_extensions: # The extension of the WSI-files [str] [Optional, defaults to "svs"] +wsi_extension: # The extension of the WSI-files [str] [Optional, defaults to "svs"] # basic setups patch_size: # The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. [][Optional, defaults to 256] @@ -76,5 +76,6 @@ filter_patches: # Post-extraction patch filtering to sort out arte log_path: # Path where log files should be stored. Otherwise, log files are stored in the output folder. [str][Optional, defaults to None] log_level: # Set the logging level. [str][Optional, defaults to info] hardware_selection: # Select hardware device (just if available, otherwise always cucim). [str] [Optional, defaults to cucim] -wsi_magnification: # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None] -wsi_mpp: # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None] +wsi_properties: # If provided, the properties of the WSI are used for the extraction. [str][Optional, defaults to None] + magnifcation: # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None] + slide_mpp: # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None] diff --git a/pathopatch/cli.py b/pathopatch/cli.py index d615849..79f6ee9 100644 --- a/pathopatch/cli.py +++ b/pathopatch/cli.py @@ -85,9 +85,11 @@ class PreProcessingConfig(BaseModel): Args: wsi_paths (str): Path to the folder where all WSI are stored or path to a single WSI-file. output_path (str): Path to the folder where the resulting dataset should be stored. - wsi_extension (str, optional): The extension of the WSI-files. Defaults to "svs. + wsi_extension (str, optional): The extension of the WSI-files. Defaults to "svs". wsi_filelist (str, optional): Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used. Must include full paths to WSIs, including suffixes. - Can be used as an replacement for the wsi_paths option. If both are provided, yields an error. Defaults to None. + Can be used as an replacement for the wsi_paths option. If both are provided, yields an error. + The path to the files should be written in a column named "path". Metadata for slide magnification and mpp can be provided in columns named 'slide_mpp' and 'magnification'. + Defaults to None. patch_size (int, optional): The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. Defaults to 256. patch_overlap (float, optional): The percentage amount pixels that should overlap between two different patches. Please Provide as integer between 0 and 100, indicating overlap in percentage. @@ -339,9 +341,11 @@ def __init__(self) -> None: parser.add_argument( "--wsi_filelist", type=str, - help="Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used." - "Must include full paths to WSIs, including suffixes." - "Can be used as an replacement for the wsi_paths option." + help="Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used. " + "Must include full paths to WSIs, including suffixes. " + "Can be used as an replacement for the wsi_paths option. " + "The path to the files should be written in a column named `path`. " + "Metadata for slide magnification and mpp can be provided in columns named `slide_mpp` and `magnification`. " "If both are provided, yields an error.", ) parser.add_argument( diff --git a/pathopatch/config/config.py b/pathopatch/config/config.py index a9de02f..6f827a8 100644 --- a/pathopatch/config/config.py +++ b/pathopatch/config/config.py @@ -18,7 +18,7 @@ "vms", "vmu", "dcm", -] # mirax not tested yet +] ANNOTATION_EXT: List[str] = ["json"] LOGGING_EXT: List[str] = ["critical", "error", "warning", "info", "debug"] diff --git a/pathopatch/patch_extraction/dataset.py b/pathopatch/patch_extraction/dataset.py index 835ab90..550fff8 100644 --- a/pathopatch/patch_extraction/dataset.py +++ b/pathopatch/patch_extraction/dataset.py @@ -27,8 +27,8 @@ from torchvision.transforms.v2 import ToTensor from PIL import Image from pathopatch.utils.exceptions import WrongParameterException +from pathopatch.wsi_interfaces.openslide_deepzoom import DeepZoomGeneratorOS from pathopatch.utils.patch_util import ( - DeepZoomGeneratorOS, calculate_background_ratio, compute_interesting_patches, get_intersected_labels, @@ -284,7 +284,7 @@ def _set_hardware(self) -> None: self.logger.debug("Using CuCIM") from cucim import CuImage - from pathopatch.patch_extraction.cucim_deepzoom import ( + from pathopatch.wsi_interfaces.cucim_deepzoom import ( DeepZoomGeneratorCucim, ) @@ -451,8 +451,8 @@ def _prepare_slide( ) self.tile_extractor = self.deepzoomgenerator( - osr=self.slide_openslide, - cucim_slide=self.slide, + meta_loader=self.slide_openslide, + image_loader=self.slide, tile_size=self.res_tile_size, overlap=self.res_overlap, limit_bounds=True, diff --git a/pathopatch/patch_extraction/patch_extraction.py b/pathopatch/patch_extraction/patch_extraction.py index 166ba67..a489a23 100644 --- a/pathopatch/patch_extraction/patch_extraction.py +++ b/pathopatch/patch_extraction/patch_extraction.py @@ -6,7 +6,6 @@ # University Medicine Essen -import csv import json import multiprocessing import os @@ -15,6 +14,7 @@ from pathlib import Path from shutil import rmtree from typing import Any, Callable, List, Tuple, Union + import matplotlib import torch @@ -23,6 +23,7 @@ import warnings import numpy as np +import pandas as pd from natsort import natsorted from openslide import OpenSlide from PIL import Image @@ -36,7 +37,6 @@ from pathopatch.utils.exceptions import UnalignedDataException, WrongParameterException from pathopatch.utils.patch_dataset import load_tissue_detection_dl from pathopatch.utils.patch_util import ( - DeepZoomGeneratorOS, calculate_background_ratio, compute_interesting_patches, generate_thumbnails, @@ -52,6 +52,11 @@ target_mpp_to_downsample, ) from pathopatch.utils.tools import end_timer, module_exists, start_timer +from pathopatch.wsi_interfaces.openslide_deepzoom import DeepZoomGeneratorOS +from pathopatch.wsi_interfaces.wsidicomizer_openslide import ( + DicomSlide, + DeepZoomGeneratorDicom, +) warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=UserWarning) @@ -195,6 +200,7 @@ class PreProcessor(object): detector_transforms (Compose): Tissue detection transforms curr_wsi_level (int): Current WSI level save_context (bool): Save context flag + # TODO: improve and check with new dcm coce and new filelist loading Methods: setup_output_path(output_path: Union[str, Path]) -> None: @@ -270,6 +276,7 @@ def __init__(self, slide_processor_config: PreProcessingConfig) -> None: self.config = slide_processor_config self.files, self.annotation_files = [], [] + self.global_properties = {} self.num_files = 0 self.rescaling_factor = 1 @@ -342,22 +349,34 @@ def _load_wsi_filelist(self, wsi_filelist: Union[str, Path]) -> None: wsi_filelist (Union[str, Path]): Path to the CSV file containing the WSI file list. CSV File Example: - The CSV file should contain a single column with the paths to the WSI files. + The CSV file should contain the path column, with path to the WSI. slide_mpp and magnification are optional. Example content of "wsi_filelist.csv": ``` - /path/to/wsi1.svs - /path/to/wsi2.svs - /path/to/wsi3.svs + path,slide_mpp,magnification + test_database/input/WSI/CMU-1.svs,0.499,20 ``` """ self.files = [] - with open(wsi_filelist, "r") as csv_file: - csv_reader = csv.reader(csv_file) - for row in csv_reader: - self.files.append(Path(row[0])) - self.files = natsorted(self.files, key=lambda x: x.name) + csv_file = pd.read_csv(wsi_filelist, sep=",") + + self.files = natsorted(csv_file["path"].to_list(), key=lambda x: Path(x).name) + self.files = [Path(f) for f in self.files] self.num_files = len(self.files) + for row in csv_file.iterrows(): + file = row[1]["path"] + try: + slide_mpp = row[1]["slide_mpp"] + except KeyError: + slide_mpp = None + try: + magnification = row[1]["magnification"] + except KeyError: + magnification = None + prop_dict = {"slide_mpp": slide_mpp, "magnification": magnification} + prop_dict = {k: v for k, v in prop_dict.items() if v is not None} + self.global_properties[Path(file).name] = prop_dict + def _set_annotations_paths( self, annotation_paths: Union[Path, str], @@ -403,23 +422,28 @@ def _set_hardware(self, hardware_selection: str = "cucim") -> None: hardware_selection (str, optional): Specify hardware. Just for experiments. Must be either "openslide", or "cucim". Defaults to cucim. """ - if ( - module_exists("cucim", error="ignore") - and hardware_selection.lower() == "cucim" - ): - logger.info("Using CuCIM") - from cucim import CuImage - - from pathopatch.patch_extraction.cucim_deepzoom import ( - DeepZoomGeneratorCucim, - ) - - self.deepzoomgenerator = DeepZoomGeneratorCucim - self.image_loader = CuImage + if self.config.wsi_extension == "dcm": + logger.info("Using WsiDicom as WSIReader") + self.deepzoomgenerator = DeepZoomGeneratorDicom + self.image_loader = DicomSlide else: - logger.info("Using OpenSlide") - self.deepzoomgenerator = DeepZoomGeneratorOS - self.image_loader = OpenSlide + if ( + module_exists("cucim", error="ignore") + and hardware_selection.lower() == "cucim" + ): + logger.info("Using CuCIM as WSIReader") + from cucim import CuImage + + from pathopatch.wsi_interfaces.cucim_deepzoom import ( + DeepZoomGeneratorCucim, + ) + + self.deepzoomgenerator = DeepZoomGeneratorCucim + self.image_loader = CuImage + else: + logger.info("Using OpenSlide as WSIReader") + self.deepzoomgenerator = DeepZoomGeneratorOS + self.image_loader = OpenSlide def _set_tissue_detector(self) -> None: """Set up the tissue detection model and transformations. @@ -774,47 +798,60 @@ def _prepare_wsi( logger.info(f"Computing patches for {wsi_file.name}") # load slide (OS and CuImage/OS) - slide = OpenSlide(str(wsi_file)) + if self.config.wsi_extension == "dcm": + slide = DicomSlide(wsi_file) + else: + slide = OpenSlide(str(wsi_file)) slide_cu = self.image_loader(str(wsi_file)) - if "openslide.mpp-x" in slide.properties: - slide_mpp = float(slide.properties.get("openslide.mpp-x")) - elif ( - self.config.wsi_properties is not None - and "slide_mpp" in self.config.wsi_properties - ): - slide_mpp = self.config.wsi_properties["slide_mpp"] - else: # last option is to use regex - try: - pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)") - # Use the pattern to find the match in the string - match = pattern.search(slide.properties["openslide.comment"]) - # Extract the float value - if match: - slide_mpp = float(match.group(1)) - logger.warning( - f"MPP {slide_mpp:.4f} was extracted from the comment of the WSI (Tiff-Metadata comment string) - Please check for correctness!" - ) - else: + + slide_mpp = None + slide_mag = None + if str(wsi_file.name) in self.global_properties: + slide_properties = self.global_properties[str(wsi_file.name)] + if "slide_mpp" in slide_properties: + slide_mpp = slide_properties["slide_mpp"] + if "magnification" in slide_properties: + slide_mag = slide_properties["magnification"] + if slide_mpp is None: + if "openslide.mpp-x" in slide.properties: + slide_mpp = float(slide.properties.get("openslide.mpp-x")) + elif ( + self.config.wsi_properties is not None + and "slide_mpp" in self.config.wsi_properties + ): + slide_mpp = self.config.wsi_properties["slide_mpp"] + else: # last option is to use regex + try: + pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)") + # Use the pattern to find the match in the string + match = pattern.search(slide.properties["openslide.comment"]) + # Extract the float value + if match: + slide_mpp = float(match.group(1)) + logger.warning( + f"MPP {slide_mpp:.4f} was extracted from the comment of the WSI (Tiff-Metadata comment string) - Please check for correctness!" + ) + else: + raise NotImplementedError( + "MPP must be defined either by metadata or by config file!" + ) + except: raise NotImplementedError( "MPP must be defined either by metadata or by config file!" ) - except: + if slide_mag is None: + if "openslide.objective-power" in slide.properties: + slide_mag = float(slide.properties.get("openslide.objective-power")) + elif ( + self.config.wsi_properties is not None + and "magnification" in self.config.wsi_properties + ): + slide_mag = self.config.wsi_properties["magnification"] + else: raise NotImplementedError( - "MPP must be defined either by metadata or by config file!" + "Magnification must be defined either by metadata or by config file!" ) - if "openslide.objective-power" in slide.properties: - slide_mag = float(slide.properties.get("openslide.objective-power")) - elif ( - self.config.wsi_properties is not None - and "magnification" in self.config.wsi_properties - ): - slide_mag = self.config.wsi_properties["magnification"] - else: - raise NotImplementedError( - "MPP must be defined either by metadata or by config file!" - ) - slide_properties = {"mpp": slide_mpp, "magnification": slide_mag} # Generate thumbnails logger.info("Generate thumbnails") @@ -860,8 +897,8 @@ def _prepare_wsi( ) tiles = self.deepzoomgenerator( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, overlap=overlap, limit_bounds=True, @@ -1003,7 +1040,10 @@ def process_queue( context_tiles = {} # reload image - slide = OpenSlide(str(wsi_file)) + if self.config.wsi_extension == "dcm": + slide = DicomSlide(wsi_file) + else: + slide = OpenSlide(str(wsi_file)) slide_cu = self.image_loader(str(wsi_file)) tile_size, overlap = patch_to_tile_size( @@ -1011,8 +1051,8 @@ def process_queue( ) tiles = self.deepzoomgenerator( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, overlap=overlap, limit_bounds=True, @@ -1022,8 +1062,8 @@ def process_queue( for c_scale in self.config.context_scales: overlap_context = int((c_scale - 1) * tile_size / 2) + overlap context_tiles[c_scale] = self.deepzoomgenerator( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, # tile_size, overlap=overlap_context, # (1-c_scale) * tile_size / 2, limit_bounds=True, @@ -1224,8 +1264,8 @@ def _get_surrounding_patches( self.config.patch_size, self.config.patch_overlap, self.rescaling_factor ) tiles = self.deepzoomgenerator( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, overlap=overlap, limit_bounds=True, @@ -1352,8 +1392,8 @@ def save_normalization_vector( # extract all patches patches = [] tiles = self.deepzoomgenerator( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, overlap=overlap, limit_bounds=True, diff --git a/pathopatch/patch_extraction/process_batch.py b/pathopatch/patch_extraction/process_batch.py index 5a79d04..aa7eedf 100644 --- a/pathopatch/patch_extraction/process_batch.py +++ b/pathopatch/patch_extraction/process_batch.py @@ -15,8 +15,8 @@ from PIL import Image from shapely.geometry import Polygon +from pathopatch import logger from pathopatch.utils.patch_util import ( - DeepZoomGeneratorOS, calculate_background_ratio, get_intersected_labels, macenko_normalization, @@ -25,8 +25,7 @@ standardize_brightness, ) from pathopatch.utils.tools import module_exists - -from pathopatch import logger +from pathopatch.wsi_interfaces.openslide_deepzoom import DeepZoomGeneratorOS def process_batch( @@ -107,7 +106,7 @@ def process_batch( if module_exists("cucim", error="ignore"): from cucim import CuImage - from pathopatch.deepzoom.cucim_deepzoom import DeepZoomGeneratorCucim + from pathopatch.wsi_interfaces.cucim_deepzoom import DeepZoomGeneratorCucim generator_module = DeepZoomGeneratorCucim image_loader = CuImage @@ -120,8 +119,8 @@ def process_batch( tile_size = patch_to_tile_size(patch_size, patch_overlap) tiles = generator_module( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, overlap=patch_overlap, limit_bounds=True, @@ -131,8 +130,8 @@ def process_batch( for scale in context_scales: overlap_context = int((scale - 1) * patch_size / 2) + patch_overlap context_tiles[scale] = generator_module( - osr=slide, - cucim_slide=slide_cu, + meta_loader=slide, + image_loader=slide_cu, tile_size=tile_size, # tile_size, overlap=overlap_context, # (1-scale) * tile_size / 2, limit_bounds=True, diff --git a/pathopatch/utils/patch_util.py b/pathopatch/utils/patch_util.py index 7e09227..ff64d3c 100644 --- a/pathopatch/utils/patch_util.py +++ b/pathopatch/utils/patch_util.py @@ -47,25 +47,40 @@ def get_files_from_dir( """ if not isinstance(file_path, list): file_path = [file_path] - all_files = [] - for curr_path in file_path: - # Could be that the path itself is a WSI - curr_path = Path(curr_path) - if curr_path.suffix[1:] == file_type and curr_path.is_file(): - all_files += [curr_path] - else: - all_files += [ - curr_file - for curr_file in curr_path.glob("*." + file_type) - if curr_file.is_file() - ] - # Could also be (class) folder in folder - if len(all_files) == 0: + if file_type == "dcm": + # dicom files: files of one WSI need to be stored inside a folder + all_files = [] + for curr_path in file_path: + # check if path contains dcm files + subfiles = [f for f in curr_path.glob("*.dcm") if f.is_file()] + if len(subfiles) != 0: + all_files.append(curr_path) # -> dicom folder needs to be loaded + # check if path contains subfolders with dicom files + subfolders = [f for f in curr_path.glob("*") if f.is_dir()] + for subfolder in subfolders: + subfiles = [f for f in subfolder.glob("*.dcm") if f.is_file()] + if len(subfiles) != 0: + all_files.append(subfolder) + else: + all_files = [] + for curr_path in file_path: + # Could be that the path itself is a WSI + curr_path = Path(curr_path) + if curr_path.suffix[1:] == file_type and curr_path.is_file(): + all_files += [curr_path] + else: all_files += [ curr_file - for curr_file in curr_path.glob("**/*" + file_type) + for curr_file in curr_path.glob("*." + file_type) if curr_file.is_file() ] + # Could also be (class) folder in folder + if len(all_files) == 0: + all_files += [ + curr_file + for curr_file in curr_path.glob("**/*" + file_type) + if curr_file.is_file() + ] return all_files @@ -1080,23 +1095,3 @@ def polygon_to_patch_mask( label_mask[:, :, label] = label_submask return label_mask - - -# ignore kwargs for OpenSlide DeepZoomGenerator -class DeepZoomGeneratorOS(DeepZoomGenerator): - def __init__(self, osr, tile_size=254, overlap=1, limit_bounds=False, **kwargs): - """Overwrite DeepZoomGenerator of OpenSlide - - DeepZoomGenerator gets overwritten to provide matching API with CuCim - No Change in functionality - - Args: - osr (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata. - tile_size (int, optional): the width and height of a single tile. For best viewer - performance, tile_size + 2 * overlap should be a power - of two.. Defaults to 254. - overlap (int, optional): the number of extra pixels to add to each interior edge - of a tile. Defaults to 1. - limit_bounds (bool, optional): True to render only the non-empty slide region. Defaults to False. - """ - super().__init__(osr, tile_size, overlap, limit_bounds) diff --git a/pathopatch/wsi_interfaces/__init__.py b/pathopatch/wsi_interfaces/__init__.py new file mode 100644 index 0000000..8b02715 --- /dev/null +++ b/pathopatch/wsi_interfaces/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Different interfaces to interact with WSIs +# +# This module provides functionality for generating Deep Zoom images from +# CuImage objects +# +# @ Fabian Hörst, fabian.hoerst@uk-essen.de +# Institute for Artifical Intelligence in Medicine, +# University Medicine Essen + +import logging + +logger = logging.getLogger("__main__") +logger.addHandler(logging.NullHandler()) diff --git a/pathopatch/patch_extraction/cucim_deepzoom.py b/pathopatch/wsi_interfaces/cucim_deepzoom.py similarity index 88% rename from pathopatch/patch_extraction/cucim_deepzoom.py rename to pathopatch/wsi_interfaces/cucim_deepzoom.py index fbd90d7..da3d8c2 100644 --- a/pathopatch/patch_extraction/cucim_deepzoom.py +++ b/pathopatch/wsi_interfaces/cucim_deepzoom.py @@ -21,8 +21,8 @@ class DeepZoomGeneratorCucim(DeepZoomGenerator): use cucim to read regions. Args: - osr (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata. - cucim_slide (CuImage): CuImage slide. Used for retrieving image data. + meta_loader (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata. + image_loader (CuImage): CuImage slide. Used for retrieving image data. tile_size (int, optional): the width and height of a single tile. For best viewer performance, tile_size + 2 * overlap should be a power of two.. Defaults to 254. @@ -41,15 +41,15 @@ class DeepZoomGeneratorCucim(DeepZoomGenerator): def __init__( self, - osr: OpenSlide, - cucim_slide: CuImage, + meta_loader: OpenSlide, + image_loader: CuImage, tile_size: int = 254, overlap: int = 1, limit_bounds=False, ): - super().__init__(osr, tile_size, overlap, limit_bounds) + super().__init__(meta_loader, tile_size, overlap, limit_bounds) - self._cucim_slide = cucim_slide + self._cucim_slide = image_loader self.memory_capacity = preferred_memory_capacity( self._cucim_slide, patch_size=(tile_size, tile_size) ) diff --git a/pathopatch/wsi_interfaces/openslide_deepzoom.py b/pathopatch/wsi_interfaces/openslide_deepzoom.py new file mode 100644 index 0000000..7773580 --- /dev/null +++ b/pathopatch/wsi_interfaces/openslide_deepzoom.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Wrapping Openslide for a common interface +# +# @ Fabian Hörst, fabian.hoerst@uk-essen.de +# Institute for Artifical Intelligence in Medicine, +# University Medicine Essen + +from openslide.deepzoom import DeepZoomGenerator + + +# ignore kwargs for OpenSlide DeepZoomGenerator +class DeepZoomGeneratorOS(DeepZoomGenerator): + def __init__( + self, image_loader, tile_size=254, overlap=1, limit_bounds=False, **kwargs + ): + """Overwrite DeepZoomGenerator of OpenSlide + + DeepZoomGenerator gets overwritten to provide matching API with CuCim + No Change in functionality + + Args: + image_loader (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata. + tile_size (int, optional): the width and height of a single tile. For best viewer + performance, tile_size + 2 * overlap should be a power + of two.. Defaults to 254. + overlap (int, optional): the number of extra pixels to add to each interior edge + of a tile. Defaults to 1. + limit_bounds (bool, optional): True to render only the non-empty slide region. Defaults to False. + """ + super().__init__(image_loader, tile_size, overlap, limit_bounds) diff --git a/pathopatch/wsi_interfaces/wsidicomizer_openslide.py b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py new file mode 100644 index 0000000..1bfe599 --- /dev/null +++ b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py @@ -0,0 +1,274 @@ +import math +from pathlib import Path +from typing import List, Tuple, Union + +from openslide.deepzoom import DeepZoomGenerator +from PIL import Image +from wsidicom import WsiDicom +from wsidicom.file import WsiDicomFileSource + + +class DicomSlide(WsiDicom): + def __init__(self, dcm_folder: Union[Path, str]) -> None: + """Open the DICOM slide from the specified folder.""" + self.dimensions: Tuple[int, int] + self.properties: dict + self.level_dimensions: Tuple[Tuple[int, int]] + self.level_count: int + self.level_downsamples: List[float] + + source = WsiDicomFileSource.open(dcm_folder) + super().__init__(source, True) + + # information and properties to make this compatible with OpenSlide + self.dimensions = (self.size.width, self.size.height) + self.level_count = len(self.levels) + self.level_dimensions = self._get_level_dimensions() + self.level_downsamples = self._get_level_downsamples(self.level_dimensions) + + self.properties = { + "mpp": self.mpp, + "openslide.mpp-x": self.mpp.width, + "openslide.mpp-y": self.mpp.height, + "openslide.level-count": self.level_count, + "level_count": self.level_count, + "level_dimensions": self.level_dimensions, + "metadata": self.metadata, + } + for level, (downsample, dims) in enumerate( + zip(self.level_downsamples, self.level_dimensions) + ): + self.properties[f"openslide.level[{level}].downsample"] = downsample + self.properties[f"openslide.level[{level}].height"] = dims[1] + self.properties[ + f"openslide.level[{level}].tile-height" + ] = self.tile_size.height + self.properties[ + f"openslide.level[{level}].tile-width" + ] = self.tile_size.width + self.properties[f"openslide.level[{level}].width"] = dims[0] + + def _get_level_dimensions(self) -> Tuple[Tuple[int, int]]: + """Get the dimensions of all levels. + + Returns: + Tuple[Tuple[int, int]]: The dimensions of all levels. + Each tuple contains the width and height of the level. + """ + return tuple((level.size.width, level.size.height) for level in self.levels) + + def _get_level_downsamples( + self, level_dimensions: Tuple[Tuple[int, int]] + ) -> List[float]: + """Get the downsample factor for each level. + + Args: + level_dimensions (Tuple[Tuple[int, int]]): The dimensions of all levels. + Each tuple contains the width and height of the level. + + Returns: + List[float]: The downsample factor for each level. + """ + highest_x = level_dimensions[0][0] + return tuple(highest_x / dim[0] for dim in level_dimensions) + + def _convert_region_openslide( + self, location: Tuple[int, int], level: int + ) -> Tuple[Tuple[int, int], int]: + """Convert the location and level from OpenSlide to DICOM. + + Args: + location (Tuple[int, int]): Location in OpenSlide format (referenced to highest level). + level (int): Level in OpenSlide format. + + Returns: + Tuple[Tuple[int, int], int]: + The location in DICOM format and the level. + """ + level = self.levels[level] + x = location[0] // 2**level.level + y = location[1] // 2**level.level + + return ((x, y), level.level) + + def get_best_level_for_downsample(self, downsample: float) -> int: + """Get the best level for a given downsample factor. + + Args: + downsample (float): The downsample factor. + + Returns: + int: The level with the closest downsample factor. + """ + if downsample == 0: + return 0 + closest_power_of_2 = 2 ** math.floor(math.log2(downsample)) + if closest_power_of_2 in self.level_downsamples: + return self.level_downsamples.index(closest_power_of_2) + else: + smaller_downsamples = [ + ds for ds in self.level_downsamples if ds < closest_power_of_2 + ] + if smaller_downsamples: + closest_smaller_downsample = max(smaller_downsamples) + return self.level_downsamples.index(closest_smaller_downsample) + else: + return 0 + + def read_region( + self, location: Tuple[int, int], level: int, size: Tuple[int, int] + ) -> Image: + """Read a region from the slide. Interface equal to OpenSlide. + + Args: + location (Tuple[int, int]): Location in OpenSlide format (referenced to highest level). + level (int): Level in OpenSlide format. + size (Tuple[int, int]): Size of the region in pixels. + + Returns: + Image: The region as an image. + """ + location, level = self._convert_region_openslide(location, level) + return super(DicomSlide, self).read_region(location, level, size) + + def get_thumbnail(self, size: Tuple[int, int]) -> Image: + """Get the thumbnail of the slide. Interface equal to OpenSlide. + + Args: + size (Tuple[int, int]): Size of the thumbnail in pixels. + + Returns: + Image: The thumbnail as an image. + """ + return super(DicomSlide, self).read_thumbnail(size) + + +class DeepZoomGeneratorDicom(DeepZoomGenerator): + BOUNDS_SIZE_PROPS = ( + "openslide.bounds-x", + "openslide.bounds-y", + ) + BOUNDS_SIZE_PROPS = ( + "openslide.bounds-width", + "openslide.bounds-height", + ) + + def __init__( + self, + image_loader: DicomSlide, + tile_size=256, + overlap=0, + limit_bounds=False, + **kwargs, + ) -> None: + """Create a DeepZoomGenerator (inherited from OpenSlide), but instead of utilizing OpenSlide, use a DicomSlide. + + Args: + slide (DicomSlide): DicomSlide object. + tile_size (int, optional): Tile size. Defaults to 256. + overlap (int, optional): Overlap. Defaults to 0. + limit_bounds (bool, optional): Not working now, waiting for the implementation. Defaults to False. + """ + self._osr = image_loader + self._z_t_downsample = tile_size + self._z_overlap = overlap + self._limit_bounds = limit_bounds + + if limit_bounds: + # Level 0 coordinate offset + self._l0_offset = tuple( + int(image_loader.properties.get(prop, 0)) + for prop in self.BOUNDS_OFFSET_PROPS + ) + # Slide level dimensions scale factor in each axis + size_scale = tuple( + int(image_loader.properties.get(prop, l0_lim)) / l0_lim + for prop, l0_lim in zip(self.BOUNDS_SIZE_PROPS, image_loader.dimensions) + ) + # Dimensions of active area + self._l_dimensions = tuple( + tuple( + int(math.ceil(l_lim * scale)) + for l_lim, scale in zip(l_size, size_scale) + ) + for l_size in image_loader.level_dimensions + ) + else: + self._l_dimensions = image_loader.level_dimensions + self._l0_offset = (0, 0) + self._l0_dimensions = self._l_dimensions[0] + # Deep Zoom level + z_size = self._l0_dimensions + z_dimensions = [z_size] + while z_size[0] > 1 or z_size[1] > 1: + z_size = tuple(max(1, int(math.ceil(z / 2))) for z in z_size) + z_dimensions.append(z_size) + self._z_dimensions = tuple(reversed(z_dimensions)) + + # self._l0_offset = (0, 0) + # self._l_dimensions = image_loader.level_dimensions + # self._l0_dimensions = self._l_dimensions[0] + # z_size = self._l0_dimensions + # z_dimensions = [z_size] + # while z_size[0] > 1 or z_size[1] > 1: + # z_size = tuple(max(1, int(math.ceil(z / 2))) for z in z_size) + # z_dimensions.append(z_size) + # self._z_dimensions = tuple(reversed(z_dimensions)) + + # Tile + def tiles(z_lim): + return int(math.ceil(z_lim / self._z_t_downsample)) + + self._t_dimensions = tuple( + (tiles(z_w), tiles(z_h)) for z_w, z_h in self._z_dimensions + ) + + # Deep Zoom level count + self._dz_levels = len(self._z_dimensions) + + # Total downsamples for each Deep Zoom level + l0_z_downsamples = tuple( + 2 ** (self._dz_levels - dz_level - 1) for dz_level in range(self._dz_levels) + ) + + # Preferred slide levels for each Deep Zoom level + self._slide_from_dz_level = tuple( + self._osr.get_best_level_for_downsample(d) for d in l0_z_downsamples + ) + + # Piecewise downsamples + self._l0_l_downsamples = self._osr.level_downsamples + self._l_z_downsamples = tuple( + l0_z_downsamples[dz_level] + / self._l0_l_downsamples[self._slide_from_dz_level[dz_level]] + for dz_level in range(self._dz_levels) + ) + + # Slide background color + self._bg_color = "#ffffff" + + def get_tile(self, level, address): + """Return an RGB PIL.Image for a tile. + + level: the Deep Zoom level. + address: the address of the tile within the level as a (col, row) + tuple.""" + + # Read tile + args, z_size = self._get_tile_info(level, address) + tile = self._osr.read_region(*args) + tile = tile.convert("RGBA") + + # Apply on solid background + bg = Image.new( + "RGB", tile.size, self._bg_color + ) # -> tile -> PIL.Image.Image, image mode=RGBA + tile = Image.composite(tile, bg, tile) + + # Scale to the correct size + if tile.size != z_size: + # Image.Resampling added in Pillow 9.1.0 + # Image.LANCZOS removed in Pillow 10 + tile.thumbnail(z_size, getattr(Image, "Resampling", Image).LANCZOS) + + return tile diff --git a/requirements.txt b/requirements.txt index f00d302..1e15256 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,5 @@ setuptools<=65.6.3 tqdm torchvision torch +wsidicom==0.20.4 +wsidicomizer==0.13.2 diff --git a/requirements_develop.txt b/requirements_develop.txt deleted file mode 100644 index 1d36b1a..0000000 --- a/requirements_develop.txt +++ /dev/null @@ -1,26 +0,0 @@ -Pillow>=9.5.0 -PyYAML -Shapely==1.8.5.post1 -black -colorama -flake8 -flake8-html -genbadge -geojson>=3.0.0 -matplotlib -natsort -numpy>1.22,<1.24 -opencv_python_headless -openslide_python -pandas -pre-commit -pydantic==1.10.4 -pytest -pytest-sugar -rasterio==1.3.5.post1 -requests -scikit-image -setuptools<=65.6.3 -tqdm -torchvision -torch diff --git a/setup.py b/setup.py index 2815d7b..6213ee4 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import find_packages, setup -VERSION = "1.0.0b" +VERSION = "1.0.1b" DESCRIPTION = "PathoPatch - Accelerating Artificial Intelligence Based Whole Slide Image Analysis with an Optimized Preprocessing Pipeline" with open("docs/README_pypi.md", "r") as fh: LONG_DESCRIPTION = fh.read() @@ -39,6 +39,8 @@ "tqdm", "torchvision", "torch", + "wsidicom=0.20.4", + "wsidicomizer=0.13.2", ], scripts=[ "pathopatch/wsi_extraction.py", diff --git a/test_database/filelist.csv b/test_database/filelist.csv new file mode 100644 index 0000000..8d4977e --- /dev/null +++ b/test_database/filelist.csv @@ -0,0 +1,2 @@ +path,slide_mpp,magnification +./test_database/input/WSI/CMU-1.svs,0.500,20.0 diff --git a/tests/static_test_files/preprocessing/annotations_filtering/config.yaml b/tests/static_test_files/preprocessing/annotations_filtering/config.yaml index e81b791..94708f4 100644 --- a/tests/static_test_files/preprocessing/annotations_filtering/config.yaml +++ b/tests/static_test_files/preprocessing/annotations_filtering/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI output_path: ./tests/tmp_results_folder/annotations_filtering -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: ./test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/annotations_simple/config.yaml b/tests/static_test_files/preprocessing/annotations_simple/config.yaml index e4f44b4..60e934b 100644 --- a/tests/static_test_files/preprocessing/annotations_simple/config.yaml +++ b/tests/static_test_files/preprocessing/annotations_simple/config.yaml @@ -1,7 +1,7 @@ # dataset paths -wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI/CMU-1.svs +wsi_paths: ./test_database/input/WSI/CMU-1.svs output_path: ./tests/tmp_results_folder/annotations_simple -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml b/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml index 3d12ac2..8f09346 100644 --- a/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml +++ b/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI/CMU-1.svs output_path: ./tests/tmp_results_folder/store_masks -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml b/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml index 522c181..7965e83 100644 --- a/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml +++ b/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI/CMU-1.svs output_path: ./tests/tmp_results_folder/annotations_tissue_mask -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml b/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml index 39d4298..a1d5373 100644 --- a/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml +++ b/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI output_path: ./tests/tmp_results_folder/annotations_tissue_mask_failing -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/baseline/config.yaml b/tests/static_test_files/preprocessing/baseline/config.yaml index 1644b29..d730699 100644 --- a/tests/static_test_files/preprocessing/baseline/config.yaml +++ b/tests/static_test_files/preprocessing/baseline/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/baseline -wsi_extensions: svs +wsi_extension: svs # basic setups downsample: 1 diff --git a/tests/static_test_files/preprocessing/baseline_dataset/config.yaml b/tests/static_test_files/preprocessing/baseline_dataset/config.yaml index c0f6577..5fbe61a 100644 --- a/tests/static_test_files/preprocessing/baseline_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/baseline_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/baseline_pytorch_dataset -wsi_extensions: svs +wsi_extension: svs # basic setups downsample: 1 diff --git a/tests/static_test_files/preprocessing/cli.yaml b/tests/static_test_files/preprocessing/cli.yaml index af42a1a..56a7672 100644 --- a/tests/static_test_files/preprocessing/cli.yaml +++ b/tests/static_test_files/preprocessing/cli.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: /homes/fhoerst/histo-projects/DigitalHistologyHub/test_database/input/WSI output_path: /homes/fhoerst/histo-projects/DigitalHistologyHub/test_database/output/debug -wsi_extensions: svs +wsi_extension: svs # basic setups downsample: 0 diff --git a/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml b/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml index f9edccd..5f8164f 100644 --- a/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/complex_setup_dataset -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 0.782 diff --git a/tests/static_test_files/preprocessing/complex_setup/config.yaml b/tests/static_test_files/preprocessing/complex_setup/config.yaml index 006f536..1df4c6a 100644 --- a/tests/static_test_files/preprocessing/complex_setup/config.yaml +++ b/tests/static_test_files/preprocessing/complex_setup/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/complex_setup -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 0.499 diff --git a/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml b/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml index 3973c3d..4f4fbdd 100644 --- a/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/complex_setup_dataset -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 0.499 diff --git a/tests/static_test_files/preprocessing/dicom/dicom.yaml b/tests/static_test_files/preprocessing/dicom/dicom.yaml new file mode 100644 index 0000000..8f548ca --- /dev/null +++ b/tests/static_test_files/preprocessing/dicom/dicom.yaml @@ -0,0 +1,20 @@ +# dataset paths +wsi_paths: ./test_database/dicom_files/CMU-1 +output_path: ./tests/tmp_results_folder/dicom/dicom_extraction +wsi_extension: dcm + +# basic setups +target_mpp: 0.499 +patch_size: 256 +patch_overlap: 0 +normalize_stains: False +min_intersection_ratio: 0.05 + +processes: 8 +overwrite: True + +# other +log_level: debug +wsi_properties: { + magnification: 20.0, +} diff --git a/tests/static_test_files/preprocessing/dicom/openslide.yaml b/tests/static_test_files/preprocessing/dicom/openslide.yaml new file mode 100644 index 0000000..cbc9dfd --- /dev/null +++ b/tests/static_test_files/preprocessing/dicom/openslide.yaml @@ -0,0 +1,20 @@ +# dataset paths +wsi_paths: ./test_database/input/WSI/CMU-1.svs +output_path: ./tests/tmp_results_folder/dicom/openslide_extraction +wsi_extension: svs + +# basic setups +target_mpp: 0.499 +patch_size: 256 +patch_overlap: 0 +normalize_stains: False +min_intersection_ratio: 0.05 + +processes: 8 +overwrite: True + +# other +log_level: debug +wsi_properties: { + magnification: 20, +} diff --git a/tests/static_test_files/preprocessing/downsample/config.yaml b/tests/static_test_files/preprocessing/downsample/config.yaml index 7814ead..e0517a3 100644 --- a/tests/static_test_files/preprocessing/downsample/config.yaml +++ b/tests/static_test_files/preprocessing/downsample/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/downsample -wsi_extensions: svs +wsi_extension: svs # basic setups downsample: 2 diff --git a/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml b/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml index 7814ead..e0517a3 100644 --- a/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/downsample -wsi_extensions: svs +wsi_extension: svs # basic setups downsample: 2 diff --git a/tests/static_test_files/preprocessing/filelist/config.yaml b/tests/static_test_files/preprocessing/filelist/config.yaml new file mode 100644 index 0000000..3a7ee07 --- /dev/null +++ b/tests/static_test_files/preprocessing/filelist/config.yaml @@ -0,0 +1,12 @@ +# dataset paths +wsi_filelist: ./tests/static_test_files/preprocessing/filelist/filelist.csv +output_path: ./tests/tmp_results_folder/filelist +wsi_extension: svs + +# basic setups +target_mpp: 0.998 +processes: 8 + +# other +log_level: debug +overwrite: True diff --git a/tests/static_test_files/preprocessing/filelist/filelist.csv b/tests/static_test_files/preprocessing/filelist/filelist.csv new file mode 100644 index 0000000..8d4977e --- /dev/null +++ b/tests/static_test_files/preprocessing/filelist/filelist.csv @@ -0,0 +1,2 @@ +path,slide_mpp,magnification +./test_database/input/WSI/CMU-1.svs,0.500,20.0 diff --git a/tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml b/tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml new file mode 100644 index 0000000..0c00ca0 --- /dev/null +++ b/tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml @@ -0,0 +1,15 @@ +orig_n_tiles_cols: 90 +orig_n_tiles_rows: 65 +base_magnification: 20 +downsampling: 2 +label_map: + background: 0 +patch_overlap: 0 +patch_size: 256 +base_mpp: 0.5 +target_patch_mpp: 1.0 +stain_normalization: false +magnification: 10.0 +level: 15 +patch_distribution: + 0: 0 diff --git a/tests/static_test_files/preprocessing/filelist/results/processed.json b/tests/static_test_files/preprocessing/filelist/results/processed.json new file mode 100644 index 0000000..24a6e37 --- /dev/null +++ b/tests/static_test_files/preprocessing/filelist/results/processed.json @@ -0,0 +1,5 @@ +{ + "processed_files": [ + "CMU-1" + ] +} diff --git a/tests/static_test_files/preprocessing/macenko/load_macenko.yaml b/tests/static_test_files/preprocessing/macenko/load_macenko.yaml index 4f24f37..115bdbe 100644 --- a/tests/static_test_files/preprocessing/macenko/load_macenko.yaml +++ b/tests/static_test_files/preprocessing/macenko/load_macenko.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI output_path: ./tests/output/patch_extraction -wsi_extensions: svs +wsi_extension: svs # basic setups downsample: 1 diff --git a/tests/static_test_files/preprocessing/macenko/test_macenko.yaml b/tests/static_test_files/preprocessing/macenko/test_macenko.yaml index f8bff60..2efc115 100644 --- a/tests/static_test_files/preprocessing/macenko/test_macenko.yaml +++ b/tests/static_test_files/preprocessing/macenko/test_macenko.yaml @@ -1,6 +1,6 @@ # dataset paths wsi_path: ./test_database/input/WSI/CMU-1-Small-Region.svs -wsi_extensions: svs +wsi_extension: svs save_json_path: ./tests/tmp_results_folder/macenko/test_macenko.json # basic setups diff --git a/tests/static_test_files/preprocessing/roi/config.yaml b/tests/static_test_files/preprocessing/roi/config.yaml index 55bba35..a8e63b9 100644 --- a/tests/static_test_files/preprocessing/roi/config.yaml +++ b/tests/static_test_files/preprocessing/roi/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/JP2K-33003-1.svs output_path: ./tests/tmp_results_folder/roi -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: ./test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/roi_context/config.yaml b/tests/static_test_files/preprocessing/roi_context/config.yaml index aac9c72..2ac3bf3 100644 --- a/tests/static_test_files/preprocessing/roi_context/config.yaml +++ b/tests/static_test_files/preprocessing/roi_context/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/JP2K-33003-1.svs output_path: ./tests/tmp_results_folder/roi_context -wsi_extensions: svs +wsi_extension: svs # annotations annotation_paths: ./test_database/input/Annotations diff --git a/tests/static_test_files/preprocessing/target_magnification/config.yaml b/tests/static_test_files/preprocessing/target_magnification/config.yaml index 4f162ab..73c0e3a 100644 --- a/tests/static_test_files/preprocessing/target_magnification/config.yaml +++ b/tests/static_test_files/preprocessing/target_magnification/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/target_magnification -wsi_extensions: svs +wsi_extension: svs # basic setups target_mag: 10 diff --git a/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml b/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml index 02f96f1..4d0260c 100644 --- a/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/target_magnification_dataset -wsi_extensions: svs +wsi_extension: svs # basic setups target_mag: 10 diff --git a/tests/static_test_files/preprocessing/target_mpp/config.yaml b/tests/static_test_files/preprocessing/target_mpp/config.yaml index 926ef8a..60ee360 100644 --- a/tests/static_test_files/preprocessing/target_mpp/config.yaml +++ b/tests/static_test_files/preprocessing/target_mpp/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/target_mpp -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 1 diff --git a/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml b/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml index 59719a7..46839d4 100644 --- a/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/target_mpp_dataset -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 1 diff --git a/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml b/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml index 2e17dee..575c098 100644 --- a/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml +++ b/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/target_mpp_macenko -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 1.2 diff --git a/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml b/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml index 6c8ef6e..79ce990 100644 --- a/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml +++ b/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml @@ -1,7 +1,7 @@ # dataset paths wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs output_path: ./tests/tmp_results_folder/target_mpp_macenko_dataset -wsi_extensions: svs +wsi_extension: svs # basic setups target_mpp: 1.2 diff --git a/tests/test_core_modules/test_filelist.py b/tests/test_core_modules/test_filelist.py new file mode 100644 index 0000000..87eb855 --- /dev/null +++ b/tests/test_core_modules/test_filelist.py @@ -0,0 +1,88 @@ +import os +import shutil +import unittest +from pathlib import Path + +import yaml + +from pathopatch.cli import PreProcessingConfig, PreProcessingYamlConfig +from pathopatch.patch_extraction.patch_extraction import PreProcessor +from pathopatch.utils.logger import Logger +from pathopatch.utils.tools import close_logger +from test_database.download import check_test_database + + +class TestPreProcessorFilelist(unittest.TestCase): + """Test the PreProcessor Module with basic (default) parameter setup, but with filelist input""" + + @classmethod + def setUpClass(cls) -> None: + """Setup configuration""" + check_test_database() + cls.config = "./tests/static_test_files/preprocessing/filelist/config.yaml" + with open(cls.config, "r") as config_file: + yaml_config = yaml.safe_load(config_file) + yaml_config = PreProcessingYamlConfig(**yaml_config) + + opt_dict = dict(yaml_config) + cls.opt_dict = {k: v for k, v in opt_dict.items() if v is not None} + cls.configuration = PreProcessingConfig(**cls.opt_dict) + + cls.gt_folder = Path( + "./tests/static_test_files/preprocessing/filelist/results/" + ).resolve() + cls.wsi_name = "CMU-1" + + preprocess_logger = Logger( + level=cls.configuration.log_level.upper(), + log_dir=cls.configuration.log_path, + comment="preprocessing", + use_timestamp=True, + ) + cls.logger = preprocess_logger.create_logger() + # do preprocessing for result checking + cls.slide_processor = PreProcessor(slide_processor_config=cls.configuration) + cls.logger.info( + "Sucessfully started the setup - Now we calculate the base dataset. May take up to 10 Minutes!" + ) + cls.slide_processor.sample_patches_dataset() + + @classmethod + def tearDownClass(cls): + """Clean output directory""" + # close logger + close_logger(cls.logger) + + # clean output directory + clean_folders = [ + f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_dir() + ] + for f in clean_folders: + shutil.rmtree(f.resolve()) + clean_files = [ + f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_file() + ] + for f in clean_files: + os.remove(f.resolve()) + shutil.rmtree(f.parent.resolve()) + + def test_init_files(self) -> None: + """For this case 1 WSI files should have been loaded""" + self.assertEqual(self.slide_processor.num_files, 1) + + def test_init_num_annotations_loaded(self) -> None: + """For this case 0 annotation files should have been loaded""" + self.assertEqual(len(self.slide_processor.annotation_files), 0) + + def test_metadata_wsi(self) -> None: + gt_path = self.gt_folder / self.wsi_name / "metadata.yaml" + with open(gt_path, "r") as config_file: + yaml_config = yaml.safe_load(config_file) + + test_path = ( + self.slide_processor.config.output_path / self.wsi_name / "metadata.yaml" + ) + with open(test_path, "r") as config_file: + test_file = yaml.safe_load(config_file) + + self.assertEqual(yaml_config, test_file) diff --git a/tests/test_dicom_module/__init__.py b/tests/test_dicom_module/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_dicom_module/test_dicom_loader.py b/tests/test_dicom_module/test_dicom_loader.py new file mode 100644 index 0000000..e2d2c3c --- /dev/null +++ b/tests/test_dicom_module/test_dicom_loader.py @@ -0,0 +1,156 @@ +import json +import shutil +import subprocess +import unittest +from pathlib import Path + +import numpy as np +import yaml +from numpy.testing import assert_almost_equal +from PIL import Image + +from pathopatch.cli import PreProcessingConfig, PreProcessingYamlConfig +from pathopatch.patch_extraction.patch_extraction import PreProcessor +from pathopatch.utils.logger import Logger +from pathopatch.utils.tools import close_logger +from test_database.download import check_test_database + + +class TestPreProcessorDICOM(unittest.TestCase): + """Test the dicom image loader, must be equal to openslide loader""" + + @classmethod + def setUpClass(cls) -> None: + """Setup configuration""" + check_test_database() + conversion_command = "wsidicomizer -i ./test_database/input/WSI/CMU-1.svs -o ./test_database/dicom_files/CMU-1" + # dicom conversion + process = subprocess.Popen(conversion_command.split(), stdout=subprocess.PIPE) + output, error = process.communicate() + + # openslide + cls.wsi_name = "CMU-1" + cls.config = "./tests/static_test_files/preprocessing/dicom/openslide.yaml" + with open(cls.config, "r") as config_file: + yaml_config = yaml.safe_load(config_file) + yaml_config = PreProcessingYamlConfig(**yaml_config) + + opt_dict = dict(yaml_config) + cls.opt_dict = {k: v for k, v in opt_dict.items() if v is not None} + cls.configuration = PreProcessingConfig(**cls.opt_dict) + cls.openslide_config = cls.configuration.dict() + preprocess_logger = Logger( + level=cls.configuration.log_level.upper(), + log_dir=cls.configuration.log_path, + comment="preprocessing", + use_timestamp=True, + ) + cls.logger = preprocess_logger.create_logger() + # do preprocessing for openslide + cls.slide_processor = PreProcessor(slide_processor_config=cls.configuration) + cls.logger.info( + "Sucessfully started the setup - Now we calculate the base dataset. May take up to 10 Minutes!" + ) + cls.slide_processor.sample_patches_dataset() + + # dicom + cls.config = "./tests/static_test_files/preprocessing/dicom/dicom.yaml" + with open(cls.config, "r") as config_file: + yaml_config = yaml.safe_load(config_file) + yaml_config = PreProcessingYamlConfig(**yaml_config) + + opt_dict = dict(yaml_config) + cls.opt_dict = {k: v for k, v in opt_dict.items() if v is not None} + cls.configuration = PreProcessingConfig(**cls.opt_dict) + cls.dicom_config = cls.configuration.dict() + cls.slide_processor = PreProcessor(slide_processor_config=cls.configuration) + cls.logger.info( + "Sucessfully started the setup - Now we calculate the base dataset. May take up to 10 Minutes!" + ) + cls.slide_processor.sample_patches_dataset() + + @classmethod + def tearDownClass(cls): + """Clean output directory""" + # close logger + close_logger(cls.logger) + + # clean output directory + shutil.rmtree(cls.dicom_config["output_path"].parent) + shutil.rmtree(Path("./test_database/dicom_files/CMU-1").resolve()) + + def test_metadata_wsi(self) -> None: + os_path = self.openslide_config["output_path"] / self.wsi_name / "metadata.yaml" + with open(os_path, "r") as config_file: + os_config = yaml.safe_load(config_file) + + dcm_path = self.dicom_config["output_path"] / self.wsi_name / "metadata.yaml" + with open(dcm_path, "r") as config_file: + dcm_file = yaml.safe_load(config_file) + + self.assertEqual(os_config, dcm_file) + + def test_count_patches(self) -> None: + """Test if the number of patches is correct""" + os_path = self.openslide_config["output_path"] / self.wsi_name / "metadata" + os_patches_count = len([f for f in os_path.glob("*.yaml")]) + + dcm_path = self.dicom_config["output_path"] / self.wsi_name / "metadata" + dcm_patches_count = len([f for f in dcm_path.glob("*.yaml")]) + + self.assertEqual(os_patches_count, dcm_patches_count) + + def test_patch_results_wsi(self) -> None: + """Test if patches are extracted the right way for WSI""" + os_path = ( + self.openslide_config["output_path"] / self.wsi_name / "patch_metadata.json" + ) + with open(os_path, "r") as config_file: + patch_os = json.load(config_file) + patch_os = sorted(patch_os, key=lambda d: list(d.keys())[0]) + patch_os = {list(elem.keys())[0]: list(elem.values())[0] for elem in patch_os} + + dcm_path = ( + self.dicom_config["output_path"] / self.wsi_name / "patch_metadata.json" + ) + with open(dcm_path, "r") as config_file: + test_dcm = json.load(config_file) + test_dcm = sorted(test_dcm, key=lambda d: list(d.keys())[0]) + + test_dcm = {list(elem.keys())[0]: list(elem.values())[0] for elem in test_dcm} + + # Extract unique patch names from both files + unique_patches = set(patch_os.keys()).union(test_dcm.keys()) + # print(unique_patches) + differing_patches = 0 + for patch_name in unique_patches: + if patch_name not in patch_os or patch_name not in test_dcm: + differing_patches += 1 + print(f"differing_patches: {patch_name}") + else: + if not patch_os[patch_name] == test_dcm[patch_name]: + differing_patches += 1 + print(f"differing_patches: {patch_name}") + + self.assertLess( + differing_patches, + 15, + "Patches are not equal and differ in more than 15 patches", + ) + + def test_example_images(self) -> None: + """ """ + patch_list = [ + "CMU-1_8_116.png", + "CMU-1_10_109.png", + "CMU-1_28_152.png", + "CMU-1_102_16.png", + ] + os_path = self.openslide_config["output_path"] / self.wsi_name / "patches" + dcm_path = self.dicom_config["output_path"] / self.wsi_name / "patches" + + for p_name in patch_list: + os_image = np.array(Image.open((os_path / p_name).resolve())) + dcm_image = np.array(Image.open((dcm_path / p_name).resolve())) + + assert_almost_equal(os_image, dcm_image)