diff --git a/PathoPatch.ipynb b/PathoPatch.ipynb
index cd44411..ad0ad65 100644
--- a/PathoPatch.ipynb
+++ b/PathoPatch.ipynb
@@ -1,26 +1,10 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "authorship_tag": "ABX9TyOSg5Tomy2ythze0d941UHb",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- }
- },
"cells": [
{
"cell_type": "markdown",
"metadata": {
- "id": "view-in-github",
- "colab_type": "text"
+ "colab_type": "text",
+ "id": "view-in-github"
},
"source": [
""
@@ -28,21 +12,21 @@
},
{
"cell_type": "markdown",
- "source": [
- "# PathoPatch Example\n"
- ],
"metadata": {
"id": "GA0VTVQzkmEJ"
- }
+ },
+ "source": [
+ "# PathoPatch Example\n"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "### 1. Installation (OpenSlide, CuCIM, PathoPatch)"
- ],
"metadata": {
"id": "chE1_Uyxk4Lt"
- }
+ },
+ "source": [
+ "### 1. Installation (OpenSlide, CuCIM, PathoPatch)"
+ ]
},
{
"cell_type": "code",
@@ -56,8 +40,8 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\u001b[33m\r0% [Working]\u001b[0m\r \rHit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n",
"Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n",
@@ -87,9 +71,7 @@
},
{
"cell_type": "code",
- "source": [
- "!pip install openslide-python pathopatch"
- ],
+ "execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -97,11 +79,10 @@
"id": "WkCsLuQAj41e",
"outputId": "796bb818-c29c-4a22-df88-9eecebc897ff"
},
- "execution_count": 2,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Requirement already satisfied: openslide-python in /usr/local/lib/python3.10/site-packages (1.3.1)\n",
"Requirement already satisfied: pathopatch in /usr/local/lib/python3.10/site-packages (0.9.5.1b0)\n",
@@ -173,13 +154,14 @@
"\u001b[0m"
]
}
+ ],
+ "source": [
+ "!pip install openslide-python pathopatch"
]
},
{
"cell_type": "code",
- "source": [
- "!pip install cucim"
- ],
+ "execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -187,11 +169,10 @@
"id": "xdsM1hLnkC1D",
"outputId": "e9209dc9-7de8-44dc-b0a2-64fa1c355ec5"
},
- "execution_count": 3,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Collecting cucim\n",
" Downloading cucim-23.10.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (43 kB)\n",
@@ -207,24 +188,23 @@
"\u001b[0m"
]
}
+ ],
+ "source": [
+ "!pip install cucim"
]
},
{
"cell_type": "markdown",
- "source": [
- "## 2. Download files"
- ],
"metadata": {
"id": "MNTTsbCblo-Q"
- }
+ },
+ "source": [
+ "## 2. Download files"
+ ]
},
{
"cell_type": "code",
- "source": [
- "!mkdir wsi_data\n",
- "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs\n",
- "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1.svs"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -232,11 +212,10 @@
"id": "ldwjSWvYmbic",
"outputId": "4f3994ce-6486-4c7b-9821-f767fcc98720"
},
- "execution_count": null,
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"mkdir: cannot create directory ‘wsi_data’: File exists\n",
"--2024-03-26 21:12:26-- https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs\n",
@@ -260,16 +239,46 @@
"CMU-1.svs 36%[======> ] 61.51M 457KB/s eta 2m 52s "
]
}
+ ],
+ "source": [
+ "!mkdir wsi_data\n",
+ "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1-Small-Region.svs\n",
+ "!wget --directory-prefix ./wsi_data https://openslide.cs.cmu.edu/download/openslide-testdata/Aperio/CMU-1.svs"
]
},
{
"cell_type": "code",
- "source": [],
+ "execution_count": null,
"metadata": {
"id": "6smBlRYcmgPQ"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "DICOM-Files:\n",
+ "\n",
+ "whole folder must be provided, please use the wsi_extension .dcm for loading wsi, or provide a .csv file as a filelist with path to the dicom folder"
+ ]
}
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+ "colab": {
+ "authorship_tag": "ABX9TyOSg5Tomy2ythze0d941UHb",
+ "include_colab_link": true,
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/README.md b/README.md
index 26523c8..19cb2b2 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,8 @@ We provide different use cases - Offline-Dataset (Store on Disk :floppy_disk:) a
In our Pre-Processing pipeline, we are able to extract quadratic patches from detected tissue areas, load annotation files (`.json`) and apply color normlizations. We make use of the popular [OpenSlide](https://openslide.org/) library, but extended it with the [RAPIDS cuCIM](https://github.com/rapidsai/cucim) framework for a speedup in patch-extraction.
+> We support all OpenSlide file formats + .dcm-File format (DICOM), by utilizing [`wsidicom`](https://github.com/imi-bigpicture/wsidicom) and [`wsidicomizer`](https://github.com/imi-bigpicture/wsidicomizer).
+
**Explanations for use cases :floppy_disk: vs :zap:**
Offline-Dataset
@@ -369,10 +371,20 @@ In our Pre-Processing pipeline, we are able to extract quadratic patches from de
An example notebook is given [here](PathoPatch.ipynb):
-## Roadmap
-- :construction: In-memory inference loader - This feature is currently under development. Once completed, it will allow a dataset to be loaded into memory for inference, eliminating the need to store it on disk. Useful for inference
+### DICOM-conversion
+To convert WSI-Files into DICOM-Format, please follow [this documentation](docs/DICOM.md)
-- :soon: Dicom support - We plan to add another backend for handling DICOM files with a different structure
+### Filelist with metadata
+See here: [examples/filelist.csv](examples/filelist.csv)
+
+```csv
+path,slide_mpp,magnification
+./test_database/input/WSI/CMU-1.svs,0.500,20
+```
+Only the path is enforced, other two cols are optional.
+
+## Roadmap
+- :construction: In-memory inference loader - This feature is currently under development - an unstable version is already online. Once completed, it will allow a dataset to be loaded into memory for inference, eliminating the need to store it on disk. Useful for inference
- :soon: More test cases
@@ -406,6 +418,13 @@ An example notebook is given [here](PathoPatch.ipynb):
pages="356--361",,
isbn="978-3-658-44037-4"
}
+```
+### Acknowledgement
+For processing DICOM-files, this work relies on the IMI-Bigpicture [`wsidicom`](https://github.com/imi-bigpicture/wsidicom) and [`wsidicomizer`](https://github.com/imi-bigpicture/wsidicomizer) libraries, with the following acknowledgements:
-```
+>wsidicom: Copyright 2021 Sectra AB, licensed under Apache 2.0.
+This project is part of a project that has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under grant agreement No 945358. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA. IMI website:
+
+>wsidicomizer: Copyright 2021 Sectra AB, licensed under Apache 2.0.
+This project is part of a project that has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under grant agreement No 945358. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA. IMI website:
diff --git a/docs/DICOM.md b/docs/DICOM.md
new file mode 100644
index 0000000..3ca3230
--- /dev/null
+++ b/docs/DICOM.md
@@ -0,0 +1,57 @@
+# Convert WSI-Files to DICOM
+
+## Basic cli-usage
+
+```bash
+usage: wsidicomizer [-h] -i INPUT [-o OUTPUT] [-t TILE_SIZE] [-m METADATA]
+ [-d DEFAULT_METADATA] [-l LEVELS [LEVELS ...]] [--label LABEL]
+ [--no-label] [--no-overview] [--no-confidential] [-w WORKERS]
+ [--chunk-size CHUNK_SIZE] [--format FORMAT] [--quality QUALITY]
+ [--subsampling SUBSAMPLING] [--offset-table OFFSET_TABLE]
+
+Convert compatible wsi file to DICOM
+
+options:
+ -h, --help show this help message and exit
+ -i INPUT, --input INPUT
+ Path to input wsi file.
+ -o OUTPUT, --output OUTPUT
+ Path to output folder. Folder will be created and must not
+ exist. If not specified a folder named after the input file is
+ created in the same path.
+ -t TILE_SIZE, --tile-size TILE_SIZE
+ Tile size (same for width and height). Required for ndpi and
+ openslide formats E.g. 512
+ -m METADATA, --metadata METADATA
+ Path to json metadata that will override metadata from source
+ image file.
+ -d DEFAULT_METADATA, --default-metadata DEFAULT_METADATA
+ Path to json metadata that will be used as default values.
+ -l LEVELS [LEVELS ...], --levels LEVELS [LEVELS ...]
+ Pyramid levels to include, if not all. E.g. 0 1 for base and
+ first pyramid layer.
+ --label LABEL Optional label image to use instead of label found in file.
+ --no-label If not to include label
+ --no-overview If not to include overview
+ --no-confidential If not to include confidential metadata
+ -w WORKERS, --workers WORKERS
+ Number of worker threads to use
+ --chunk-size CHUNK_SIZE
+ Number of tiles to give each worker at a time
+ --format FORMAT Encoding format to use if re-encoding. 'jpeg' or 'jpeg2000'.
+ --quality QUALITY Quality to use if re-encoding. It is recommended to not use >
+ 95 for jpeg. Use < 1 or > 1000 for lossless jpeg2000.
+ --subsampling SUBSAMPLING
+ Subsampling option if using jpeg for re-encoding. Use '444'
+ for no subsampling, '422' for 2x1 subsampling, and '420' for
+ 2x2 subsampling.
+ --offset-table OFFSET_TABLE
+ Offset table to use, 'bot' basic offset table, 'eot' extended
+ offset table, 'None' - no offset table.
+```
+
+## Acknowledgement for using WSIDICOMIZER
+
+wsidicomizer: Copyright 2021 Sectra AB, licensed under Apache 2.0.
+
+This project is part of a project that has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under grant agreement No 945358. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA. IMI website:
diff --git a/environment.yaml b/environment.yaml
index 00ac216..5cc910f 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -31,3 +31,5 @@ dependencies:
- scikit-image==0.19.3
- torchvision==0.16.2
- tqdm==4.65.0
+ - wsidicomizer==0.13.2
+ - wsidicom==0.20.4
diff --git a/examples/filelist.csv b/examples/filelist.csv
new file mode 100644
index 0000000..2933339
--- /dev/null
+++ b/examples/filelist.csv
@@ -0,0 +1,2 @@
+path,slide_mpp,magnification
+./test_database/input/WSI/CMU-1.svs,0.500,20
diff --git a/examples/patch_extraction.yaml b/examples/patch_extraction.yaml
index 8e6c348..cc08185 100644
--- a/examples/patch_extraction.yaml
+++ b/examples/patch_extraction.yaml
@@ -4,7 +4,7 @@ wsi_filelist: # Path to a csv-filelist with WSI files (separator
# used.Must include full paths to WSIs, including suffixes.Can be used as an replacement for
# the wsi_paths option.If both are provided, yields an error. [str] [Optional, defaults to None]
output_path: # Path to the folder where the resulting dataset should be stored [str]
-wsi_extensions: # The extension of the WSI-files [str] [Optional, defaults to "svs"]
+wsi_extension: # The extension of the WSI-files [str] [Optional, defaults to "svs"]
# basic setups
patch_size: # The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. [][Optional, defaults to 256]
@@ -76,5 +76,6 @@ filter_patches: # Post-extraction patch filtering to sort out arte
log_path: # Path where log files should be stored. Otherwise, log files are stored in the output folder. [str][Optional, defaults to None]
log_level: # Set the logging level. [str][Optional, defaults to info]
hardware_selection: # Select hardware device (just if available, otherwise always cucim). [str] [Optional, defaults to cucim]
-wsi_magnification: # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
-wsi_mpp: # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
+wsi_properties: # If provided, the properties of the WSI are used for the extraction. [str][Optional, defaults to None]
+ magnifcation: # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
+ slide_mpp: # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
diff --git a/pathopatch/cli.py b/pathopatch/cli.py
index d615849..79f6ee9 100644
--- a/pathopatch/cli.py
+++ b/pathopatch/cli.py
@@ -85,9 +85,11 @@ class PreProcessingConfig(BaseModel):
Args:
wsi_paths (str): Path to the folder where all WSI are stored or path to a single WSI-file.
output_path (str): Path to the folder where the resulting dataset should be stored.
- wsi_extension (str, optional): The extension of the WSI-files. Defaults to "svs.
+ wsi_extension (str, optional): The extension of the WSI-files. Defaults to "svs".
wsi_filelist (str, optional): Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used. Must include full paths to WSIs, including suffixes.
- Can be used as an replacement for the wsi_paths option. If both are provided, yields an error. Defaults to None.
+ Can be used as an replacement for the wsi_paths option. If both are provided, yields an error.
+ The path to the files should be written in a column named "path". Metadata for slide magnification and mpp can be provided in columns named 'slide_mpp' and 'magnification'.
+ Defaults to None.
patch_size (int, optional): The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. Defaults to 256.
patch_overlap (float, optional): The percentage amount pixels that should overlap between two different patches.
Please Provide as integer between 0 and 100, indicating overlap in percentage.
@@ -339,9 +341,11 @@ def __init__(self) -> None:
parser.add_argument(
"--wsi_filelist",
type=str,
- help="Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used."
- "Must include full paths to WSIs, including suffixes."
- "Can be used as an replacement for the wsi_paths option."
+ help="Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used. "
+ "Must include full paths to WSIs, including suffixes. "
+ "Can be used as an replacement for the wsi_paths option. "
+ "The path to the files should be written in a column named `path`. "
+ "Metadata for slide magnification and mpp can be provided in columns named `slide_mpp` and `magnification`. "
"If both are provided, yields an error.",
)
parser.add_argument(
diff --git a/pathopatch/config/config.py b/pathopatch/config/config.py
index a9de02f..6f827a8 100644
--- a/pathopatch/config/config.py
+++ b/pathopatch/config/config.py
@@ -18,7 +18,7 @@
"vms",
"vmu",
"dcm",
-] # mirax not tested yet
+]
ANNOTATION_EXT: List[str] = ["json"]
LOGGING_EXT: List[str] = ["critical", "error", "warning", "info", "debug"]
diff --git a/pathopatch/patch_extraction/dataset.py b/pathopatch/patch_extraction/dataset.py
index 835ab90..550fff8 100644
--- a/pathopatch/patch_extraction/dataset.py
+++ b/pathopatch/patch_extraction/dataset.py
@@ -27,8 +27,8 @@
from torchvision.transforms.v2 import ToTensor
from PIL import Image
from pathopatch.utils.exceptions import WrongParameterException
+from pathopatch.wsi_interfaces.openslide_deepzoom import DeepZoomGeneratorOS
from pathopatch.utils.patch_util import (
- DeepZoomGeneratorOS,
calculate_background_ratio,
compute_interesting_patches,
get_intersected_labels,
@@ -284,7 +284,7 @@ def _set_hardware(self) -> None:
self.logger.debug("Using CuCIM")
from cucim import CuImage
- from pathopatch.patch_extraction.cucim_deepzoom import (
+ from pathopatch.wsi_interfaces.cucim_deepzoom import (
DeepZoomGeneratorCucim,
)
@@ -451,8 +451,8 @@ def _prepare_slide(
)
self.tile_extractor = self.deepzoomgenerator(
- osr=self.slide_openslide,
- cucim_slide=self.slide,
+ meta_loader=self.slide_openslide,
+ image_loader=self.slide,
tile_size=self.res_tile_size,
overlap=self.res_overlap,
limit_bounds=True,
diff --git a/pathopatch/patch_extraction/patch_extraction.py b/pathopatch/patch_extraction/patch_extraction.py
index 166ba67..a489a23 100644
--- a/pathopatch/patch_extraction/patch_extraction.py
+++ b/pathopatch/patch_extraction/patch_extraction.py
@@ -6,7 +6,6 @@
# University Medicine Essen
-import csv
import json
import multiprocessing
import os
@@ -15,6 +14,7 @@
from pathlib import Path
from shutil import rmtree
from typing import Any, Callable, List, Tuple, Union
+
import matplotlib
import torch
@@ -23,6 +23,7 @@
import warnings
import numpy as np
+import pandas as pd
from natsort import natsorted
from openslide import OpenSlide
from PIL import Image
@@ -36,7 +37,6 @@
from pathopatch.utils.exceptions import UnalignedDataException, WrongParameterException
from pathopatch.utils.patch_dataset import load_tissue_detection_dl
from pathopatch.utils.patch_util import (
- DeepZoomGeneratorOS,
calculate_background_ratio,
compute_interesting_patches,
generate_thumbnails,
@@ -52,6 +52,11 @@
target_mpp_to_downsample,
)
from pathopatch.utils.tools import end_timer, module_exists, start_timer
+from pathopatch.wsi_interfaces.openslide_deepzoom import DeepZoomGeneratorOS
+from pathopatch.wsi_interfaces.wsidicomizer_openslide import (
+ DicomSlide,
+ DeepZoomGeneratorDicom,
+)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
@@ -195,6 +200,7 @@ class PreProcessor(object):
detector_transforms (Compose): Tissue detection transforms
curr_wsi_level (int): Current WSI level
save_context (bool): Save context flag
+ # TODO: improve and check with new dcm coce and new filelist loading
Methods:
setup_output_path(output_path: Union[str, Path]) -> None:
@@ -270,6 +276,7 @@ def __init__(self, slide_processor_config: PreProcessingConfig) -> None:
self.config = slide_processor_config
self.files, self.annotation_files = [], []
+ self.global_properties = {}
self.num_files = 0
self.rescaling_factor = 1
@@ -342,22 +349,34 @@ def _load_wsi_filelist(self, wsi_filelist: Union[str, Path]) -> None:
wsi_filelist (Union[str, Path]): Path to the CSV file containing the WSI file list.
CSV File Example:
- The CSV file should contain a single column with the paths to the WSI files.
+ The CSV file should contain the path column, with path to the WSI. slide_mpp and magnification are optional.
Example content of "wsi_filelist.csv":
```
- /path/to/wsi1.svs
- /path/to/wsi2.svs
- /path/to/wsi3.svs
+ path,slide_mpp,magnification
+ test_database/input/WSI/CMU-1.svs,0.499,20
```
"""
self.files = []
- with open(wsi_filelist, "r") as csv_file:
- csv_reader = csv.reader(csv_file)
- for row in csv_reader:
- self.files.append(Path(row[0]))
- self.files = natsorted(self.files, key=lambda x: x.name)
+ csv_file = pd.read_csv(wsi_filelist, sep=",")
+
+ self.files = natsorted(csv_file["path"].to_list(), key=lambda x: Path(x).name)
+ self.files = [Path(f) for f in self.files]
self.num_files = len(self.files)
+ for row in csv_file.iterrows():
+ file = row[1]["path"]
+ try:
+ slide_mpp = row[1]["slide_mpp"]
+ except KeyError:
+ slide_mpp = None
+ try:
+ magnification = row[1]["magnification"]
+ except KeyError:
+ magnification = None
+ prop_dict = {"slide_mpp": slide_mpp, "magnification": magnification}
+ prop_dict = {k: v for k, v in prop_dict.items() if v is not None}
+ self.global_properties[Path(file).name] = prop_dict
+
def _set_annotations_paths(
self,
annotation_paths: Union[Path, str],
@@ -403,23 +422,28 @@ def _set_hardware(self, hardware_selection: str = "cucim") -> None:
hardware_selection (str, optional): Specify hardware. Just for experiments. Must be either "openslide", or "cucim".
Defaults to cucim.
"""
- if (
- module_exists("cucim", error="ignore")
- and hardware_selection.lower() == "cucim"
- ):
- logger.info("Using CuCIM")
- from cucim import CuImage
-
- from pathopatch.patch_extraction.cucim_deepzoom import (
- DeepZoomGeneratorCucim,
- )
-
- self.deepzoomgenerator = DeepZoomGeneratorCucim
- self.image_loader = CuImage
+ if self.config.wsi_extension == "dcm":
+ logger.info("Using WsiDicom as WSIReader")
+ self.deepzoomgenerator = DeepZoomGeneratorDicom
+ self.image_loader = DicomSlide
else:
- logger.info("Using OpenSlide")
- self.deepzoomgenerator = DeepZoomGeneratorOS
- self.image_loader = OpenSlide
+ if (
+ module_exists("cucim", error="ignore")
+ and hardware_selection.lower() == "cucim"
+ ):
+ logger.info("Using CuCIM as WSIReader")
+ from cucim import CuImage
+
+ from pathopatch.wsi_interfaces.cucim_deepzoom import (
+ DeepZoomGeneratorCucim,
+ )
+
+ self.deepzoomgenerator = DeepZoomGeneratorCucim
+ self.image_loader = CuImage
+ else:
+ logger.info("Using OpenSlide as WSIReader")
+ self.deepzoomgenerator = DeepZoomGeneratorOS
+ self.image_loader = OpenSlide
def _set_tissue_detector(self) -> None:
"""Set up the tissue detection model and transformations.
@@ -774,47 +798,60 @@ def _prepare_wsi(
logger.info(f"Computing patches for {wsi_file.name}")
# load slide (OS and CuImage/OS)
- slide = OpenSlide(str(wsi_file))
+ if self.config.wsi_extension == "dcm":
+ slide = DicomSlide(wsi_file)
+ else:
+ slide = OpenSlide(str(wsi_file))
slide_cu = self.image_loader(str(wsi_file))
- if "openslide.mpp-x" in slide.properties:
- slide_mpp = float(slide.properties.get("openslide.mpp-x"))
- elif (
- self.config.wsi_properties is not None
- and "slide_mpp" in self.config.wsi_properties
- ):
- slide_mpp = self.config.wsi_properties["slide_mpp"]
- else: # last option is to use regex
- try:
- pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)")
- # Use the pattern to find the match in the string
- match = pattern.search(slide.properties["openslide.comment"])
- # Extract the float value
- if match:
- slide_mpp = float(match.group(1))
- logger.warning(
- f"MPP {slide_mpp:.4f} was extracted from the comment of the WSI (Tiff-Metadata comment string) - Please check for correctness!"
- )
- else:
+
+ slide_mpp = None
+ slide_mag = None
+ if str(wsi_file.name) in self.global_properties:
+ slide_properties = self.global_properties[str(wsi_file.name)]
+ if "slide_mpp" in slide_properties:
+ slide_mpp = slide_properties["slide_mpp"]
+ if "magnification" in slide_properties:
+ slide_mag = slide_properties["magnification"]
+ if slide_mpp is None:
+ if "openslide.mpp-x" in slide.properties:
+ slide_mpp = float(slide.properties.get("openslide.mpp-x"))
+ elif (
+ self.config.wsi_properties is not None
+ and "slide_mpp" in self.config.wsi_properties
+ ):
+ slide_mpp = self.config.wsi_properties["slide_mpp"]
+ else: # last option is to use regex
+ try:
+ pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)")
+ # Use the pattern to find the match in the string
+ match = pattern.search(slide.properties["openslide.comment"])
+ # Extract the float value
+ if match:
+ slide_mpp = float(match.group(1))
+ logger.warning(
+ f"MPP {slide_mpp:.4f} was extracted from the comment of the WSI (Tiff-Metadata comment string) - Please check for correctness!"
+ )
+ else:
+ raise NotImplementedError(
+ "MPP must be defined either by metadata or by config file!"
+ )
+ except:
raise NotImplementedError(
"MPP must be defined either by metadata or by config file!"
)
- except:
+ if slide_mag is None:
+ if "openslide.objective-power" in slide.properties:
+ slide_mag = float(slide.properties.get("openslide.objective-power"))
+ elif (
+ self.config.wsi_properties is not None
+ and "magnification" in self.config.wsi_properties
+ ):
+ slide_mag = self.config.wsi_properties["magnification"]
+ else:
raise NotImplementedError(
- "MPP must be defined either by metadata or by config file!"
+ "Magnification must be defined either by metadata or by config file!"
)
- if "openslide.objective-power" in slide.properties:
- slide_mag = float(slide.properties.get("openslide.objective-power"))
- elif (
- self.config.wsi_properties is not None
- and "magnification" in self.config.wsi_properties
- ):
- slide_mag = self.config.wsi_properties["magnification"]
- else:
- raise NotImplementedError(
- "MPP must be defined either by metadata or by config file!"
- )
-
slide_properties = {"mpp": slide_mpp, "magnification": slide_mag}
# Generate thumbnails
logger.info("Generate thumbnails")
@@ -860,8 +897,8 @@ def _prepare_wsi(
)
tiles = self.deepzoomgenerator(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size,
overlap=overlap,
limit_bounds=True,
@@ -1003,7 +1040,10 @@ def process_queue(
context_tiles = {}
# reload image
- slide = OpenSlide(str(wsi_file))
+ if self.config.wsi_extension == "dcm":
+ slide = DicomSlide(wsi_file)
+ else:
+ slide = OpenSlide(str(wsi_file))
slide_cu = self.image_loader(str(wsi_file))
tile_size, overlap = patch_to_tile_size(
@@ -1011,8 +1051,8 @@ def process_queue(
)
tiles = self.deepzoomgenerator(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size,
overlap=overlap,
limit_bounds=True,
@@ -1022,8 +1062,8 @@ def process_queue(
for c_scale in self.config.context_scales:
overlap_context = int((c_scale - 1) * tile_size / 2) + overlap
context_tiles[c_scale] = self.deepzoomgenerator(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size, # tile_size,
overlap=overlap_context, # (1-c_scale) * tile_size / 2,
limit_bounds=True,
@@ -1224,8 +1264,8 @@ def _get_surrounding_patches(
self.config.patch_size, self.config.patch_overlap, self.rescaling_factor
)
tiles = self.deepzoomgenerator(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size,
overlap=overlap,
limit_bounds=True,
@@ -1352,8 +1392,8 @@ def save_normalization_vector(
# extract all patches
patches = []
tiles = self.deepzoomgenerator(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size,
overlap=overlap,
limit_bounds=True,
diff --git a/pathopatch/patch_extraction/process_batch.py b/pathopatch/patch_extraction/process_batch.py
index 5a79d04..aa7eedf 100644
--- a/pathopatch/patch_extraction/process_batch.py
+++ b/pathopatch/patch_extraction/process_batch.py
@@ -15,8 +15,8 @@
from PIL import Image
from shapely.geometry import Polygon
+from pathopatch import logger
from pathopatch.utils.patch_util import (
- DeepZoomGeneratorOS,
calculate_background_ratio,
get_intersected_labels,
macenko_normalization,
@@ -25,8 +25,7 @@
standardize_brightness,
)
from pathopatch.utils.tools import module_exists
-
-from pathopatch import logger
+from pathopatch.wsi_interfaces.openslide_deepzoom import DeepZoomGeneratorOS
def process_batch(
@@ -107,7 +106,7 @@ def process_batch(
if module_exists("cucim", error="ignore"):
from cucim import CuImage
- from pathopatch.deepzoom.cucim_deepzoom import DeepZoomGeneratorCucim
+ from pathopatch.wsi_interfaces.cucim_deepzoom import DeepZoomGeneratorCucim
generator_module = DeepZoomGeneratorCucim
image_loader = CuImage
@@ -120,8 +119,8 @@ def process_batch(
tile_size = patch_to_tile_size(patch_size, patch_overlap)
tiles = generator_module(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size,
overlap=patch_overlap,
limit_bounds=True,
@@ -131,8 +130,8 @@ def process_batch(
for scale in context_scales:
overlap_context = int((scale - 1) * patch_size / 2) + patch_overlap
context_tiles[scale] = generator_module(
- osr=slide,
- cucim_slide=slide_cu,
+ meta_loader=slide,
+ image_loader=slide_cu,
tile_size=tile_size, # tile_size,
overlap=overlap_context, # (1-scale) * tile_size / 2,
limit_bounds=True,
diff --git a/pathopatch/utils/patch_util.py b/pathopatch/utils/patch_util.py
index 7e09227..ff64d3c 100644
--- a/pathopatch/utils/patch_util.py
+++ b/pathopatch/utils/patch_util.py
@@ -47,25 +47,40 @@ def get_files_from_dir(
"""
if not isinstance(file_path, list):
file_path = [file_path]
- all_files = []
- for curr_path in file_path:
- # Could be that the path itself is a WSI
- curr_path = Path(curr_path)
- if curr_path.suffix[1:] == file_type and curr_path.is_file():
- all_files += [curr_path]
- else:
- all_files += [
- curr_file
- for curr_file in curr_path.glob("*." + file_type)
- if curr_file.is_file()
- ]
- # Could also be (class) folder in folder
- if len(all_files) == 0:
+ if file_type == "dcm":
+ # dicom files: files of one WSI need to be stored inside a folder
+ all_files = []
+ for curr_path in file_path:
+ # check if path contains dcm files
+ subfiles = [f for f in curr_path.glob("*.dcm") if f.is_file()]
+ if len(subfiles) != 0:
+ all_files.append(curr_path) # -> dicom folder needs to be loaded
+ # check if path contains subfolders with dicom files
+ subfolders = [f for f in curr_path.glob("*") if f.is_dir()]
+ for subfolder in subfolders:
+ subfiles = [f for f in subfolder.glob("*.dcm") if f.is_file()]
+ if len(subfiles) != 0:
+ all_files.append(subfolder)
+ else:
+ all_files = []
+ for curr_path in file_path:
+ # Could be that the path itself is a WSI
+ curr_path = Path(curr_path)
+ if curr_path.suffix[1:] == file_type and curr_path.is_file():
+ all_files += [curr_path]
+ else:
all_files += [
curr_file
- for curr_file in curr_path.glob("**/*" + file_type)
+ for curr_file in curr_path.glob("*." + file_type)
if curr_file.is_file()
]
+ # Could also be (class) folder in folder
+ if len(all_files) == 0:
+ all_files += [
+ curr_file
+ for curr_file in curr_path.glob("**/*" + file_type)
+ if curr_file.is_file()
+ ]
return all_files
@@ -1080,23 +1095,3 @@ def polygon_to_patch_mask(
label_mask[:, :, label] = label_submask
return label_mask
-
-
-# ignore kwargs for OpenSlide DeepZoomGenerator
-class DeepZoomGeneratorOS(DeepZoomGenerator):
- def __init__(self, osr, tile_size=254, overlap=1, limit_bounds=False, **kwargs):
- """Overwrite DeepZoomGenerator of OpenSlide
-
- DeepZoomGenerator gets overwritten to provide matching API with CuCim
- No Change in functionality
-
- Args:
- osr (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata.
- tile_size (int, optional): the width and height of a single tile. For best viewer
- performance, tile_size + 2 * overlap should be a power
- of two.. Defaults to 254.
- overlap (int, optional): the number of extra pixels to add to each interior edge
- of a tile. Defaults to 1.
- limit_bounds (bool, optional): True to render only the non-empty slide region. Defaults to False.
- """
- super().__init__(osr, tile_size, overlap, limit_bounds)
diff --git a/pathopatch/wsi_interfaces/__init__.py b/pathopatch/wsi_interfaces/__init__.py
new file mode 100644
index 0000000..8b02715
--- /dev/null
+++ b/pathopatch/wsi_interfaces/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# Different interfaces to interact with WSIs
+#
+# This module provides functionality for generating Deep Zoom images from
+# CuImage objects
+#
+# @ Fabian Hörst, fabian.hoerst@uk-essen.de
+# Institute for Artifical Intelligence in Medicine,
+# University Medicine Essen
+
+import logging
+
+logger = logging.getLogger("__main__")
+logger.addHandler(logging.NullHandler())
diff --git a/pathopatch/patch_extraction/cucim_deepzoom.py b/pathopatch/wsi_interfaces/cucim_deepzoom.py
similarity index 88%
rename from pathopatch/patch_extraction/cucim_deepzoom.py
rename to pathopatch/wsi_interfaces/cucim_deepzoom.py
index fbd90d7..da3d8c2 100644
--- a/pathopatch/patch_extraction/cucim_deepzoom.py
+++ b/pathopatch/wsi_interfaces/cucim_deepzoom.py
@@ -21,8 +21,8 @@ class DeepZoomGeneratorCucim(DeepZoomGenerator):
use cucim to read regions.
Args:
- osr (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata.
- cucim_slide (CuImage): CuImage slide. Used for retrieving image data.
+ meta_loader (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata.
+ image_loader (CuImage): CuImage slide. Used for retrieving image data.
tile_size (int, optional): the width and height of a single tile. For best viewer
performance, tile_size + 2 * overlap should be a power
of two.. Defaults to 254.
@@ -41,15 +41,15 @@ class DeepZoomGeneratorCucim(DeepZoomGenerator):
def __init__(
self,
- osr: OpenSlide,
- cucim_slide: CuImage,
+ meta_loader: OpenSlide,
+ image_loader: CuImage,
tile_size: int = 254,
overlap: int = 1,
limit_bounds=False,
):
- super().__init__(osr, tile_size, overlap, limit_bounds)
+ super().__init__(meta_loader, tile_size, overlap, limit_bounds)
- self._cucim_slide = cucim_slide
+ self._cucim_slide = image_loader
self.memory_capacity = preferred_memory_capacity(
self._cucim_slide, patch_size=(tile_size, tile_size)
)
diff --git a/pathopatch/wsi_interfaces/openslide_deepzoom.py b/pathopatch/wsi_interfaces/openslide_deepzoom.py
new file mode 100644
index 0000000..7773580
--- /dev/null
+++ b/pathopatch/wsi_interfaces/openslide_deepzoom.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Wrapping Openslide for a common interface
+#
+# @ Fabian Hörst, fabian.hoerst@uk-essen.de
+# Institute for Artifical Intelligence in Medicine,
+# University Medicine Essen
+
+from openslide.deepzoom import DeepZoomGenerator
+
+
+# ignore kwargs for OpenSlide DeepZoomGenerator
+class DeepZoomGeneratorOS(DeepZoomGenerator):
+ def __init__(
+ self, image_loader, tile_size=254, overlap=1, limit_bounds=False, **kwargs
+ ):
+ """Overwrite DeepZoomGenerator of OpenSlide
+
+ DeepZoomGenerator gets overwritten to provide matching API with CuCim
+ No Change in functionality
+
+ Args:
+ image_loader (OpenSlide): OpenSlide Image. Needed for OS compatibility and for retrieving metadata.
+ tile_size (int, optional): the width and height of a single tile. For best viewer
+ performance, tile_size + 2 * overlap should be a power
+ of two.. Defaults to 254.
+ overlap (int, optional): the number of extra pixels to add to each interior edge
+ of a tile. Defaults to 1.
+ limit_bounds (bool, optional): True to render only the non-empty slide region. Defaults to False.
+ """
+ super().__init__(image_loader, tile_size, overlap, limit_bounds)
diff --git a/pathopatch/wsi_interfaces/wsidicomizer_openslide.py b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py
new file mode 100644
index 0000000..1bfe599
--- /dev/null
+++ b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py
@@ -0,0 +1,274 @@
+import math
+from pathlib import Path
+from typing import List, Tuple, Union
+
+from openslide.deepzoom import DeepZoomGenerator
+from PIL import Image
+from wsidicom import WsiDicom
+from wsidicom.file import WsiDicomFileSource
+
+
+class DicomSlide(WsiDicom):
+ def __init__(self, dcm_folder: Union[Path, str]) -> None:
+ """Open the DICOM slide from the specified folder."""
+ self.dimensions: Tuple[int, int]
+ self.properties: dict
+ self.level_dimensions: Tuple[Tuple[int, int]]
+ self.level_count: int
+ self.level_downsamples: List[float]
+
+ source = WsiDicomFileSource.open(dcm_folder)
+ super().__init__(source, True)
+
+ # information and properties to make this compatible with OpenSlide
+ self.dimensions = (self.size.width, self.size.height)
+ self.level_count = len(self.levels)
+ self.level_dimensions = self._get_level_dimensions()
+ self.level_downsamples = self._get_level_downsamples(self.level_dimensions)
+
+ self.properties = {
+ "mpp": self.mpp,
+ "openslide.mpp-x": self.mpp.width,
+ "openslide.mpp-y": self.mpp.height,
+ "openslide.level-count": self.level_count,
+ "level_count": self.level_count,
+ "level_dimensions": self.level_dimensions,
+ "metadata": self.metadata,
+ }
+ for level, (downsample, dims) in enumerate(
+ zip(self.level_downsamples, self.level_dimensions)
+ ):
+ self.properties[f"openslide.level[{level}].downsample"] = downsample
+ self.properties[f"openslide.level[{level}].height"] = dims[1]
+ self.properties[
+ f"openslide.level[{level}].tile-height"
+ ] = self.tile_size.height
+ self.properties[
+ f"openslide.level[{level}].tile-width"
+ ] = self.tile_size.width
+ self.properties[f"openslide.level[{level}].width"] = dims[0]
+
+ def _get_level_dimensions(self) -> Tuple[Tuple[int, int]]:
+ """Get the dimensions of all levels.
+
+ Returns:
+ Tuple[Tuple[int, int]]: The dimensions of all levels.
+ Each tuple contains the width and height of the level.
+ """
+ return tuple((level.size.width, level.size.height) for level in self.levels)
+
+ def _get_level_downsamples(
+ self, level_dimensions: Tuple[Tuple[int, int]]
+ ) -> List[float]:
+ """Get the downsample factor for each level.
+
+ Args:
+ level_dimensions (Tuple[Tuple[int, int]]): The dimensions of all levels.
+ Each tuple contains the width and height of the level.
+
+ Returns:
+ List[float]: The downsample factor for each level.
+ """
+ highest_x = level_dimensions[0][0]
+ return tuple(highest_x / dim[0] for dim in level_dimensions)
+
+ def _convert_region_openslide(
+ self, location: Tuple[int, int], level: int
+ ) -> Tuple[Tuple[int, int], int]:
+ """Convert the location and level from OpenSlide to DICOM.
+
+ Args:
+ location (Tuple[int, int]): Location in OpenSlide format (referenced to highest level).
+ level (int): Level in OpenSlide format.
+
+ Returns:
+ Tuple[Tuple[int, int], int]:
+ The location in DICOM format and the level.
+ """
+ level = self.levels[level]
+ x = location[0] // 2**level.level
+ y = location[1] // 2**level.level
+
+ return ((x, y), level.level)
+
+ def get_best_level_for_downsample(self, downsample: float) -> int:
+ """Get the best level for a given downsample factor.
+
+ Args:
+ downsample (float): The downsample factor.
+
+ Returns:
+ int: The level with the closest downsample factor.
+ """
+ if downsample == 0:
+ return 0
+ closest_power_of_2 = 2 ** math.floor(math.log2(downsample))
+ if closest_power_of_2 in self.level_downsamples:
+ return self.level_downsamples.index(closest_power_of_2)
+ else:
+ smaller_downsamples = [
+ ds for ds in self.level_downsamples if ds < closest_power_of_2
+ ]
+ if smaller_downsamples:
+ closest_smaller_downsample = max(smaller_downsamples)
+ return self.level_downsamples.index(closest_smaller_downsample)
+ else:
+ return 0
+
+ def read_region(
+ self, location: Tuple[int, int], level: int, size: Tuple[int, int]
+ ) -> Image:
+ """Read a region from the slide. Interface equal to OpenSlide.
+
+ Args:
+ location (Tuple[int, int]): Location in OpenSlide format (referenced to highest level).
+ level (int): Level in OpenSlide format.
+ size (Tuple[int, int]): Size of the region in pixels.
+
+ Returns:
+ Image: The region as an image.
+ """
+ location, level = self._convert_region_openslide(location, level)
+ return super(DicomSlide, self).read_region(location, level, size)
+
+ def get_thumbnail(self, size: Tuple[int, int]) -> Image:
+ """Get the thumbnail of the slide. Interface equal to OpenSlide.
+
+ Args:
+ size (Tuple[int, int]): Size of the thumbnail in pixels.
+
+ Returns:
+ Image: The thumbnail as an image.
+ """
+ return super(DicomSlide, self).read_thumbnail(size)
+
+
+class DeepZoomGeneratorDicom(DeepZoomGenerator):
+ BOUNDS_SIZE_PROPS = (
+ "openslide.bounds-x",
+ "openslide.bounds-y",
+ )
+ BOUNDS_SIZE_PROPS = (
+ "openslide.bounds-width",
+ "openslide.bounds-height",
+ )
+
+ def __init__(
+ self,
+ image_loader: DicomSlide,
+ tile_size=256,
+ overlap=0,
+ limit_bounds=False,
+ **kwargs,
+ ) -> None:
+ """Create a DeepZoomGenerator (inherited from OpenSlide), but instead of utilizing OpenSlide, use a DicomSlide.
+
+ Args:
+ slide (DicomSlide): DicomSlide object.
+ tile_size (int, optional): Tile size. Defaults to 256.
+ overlap (int, optional): Overlap. Defaults to 0.
+ limit_bounds (bool, optional): Not working now, waiting for the implementation. Defaults to False.
+ """
+ self._osr = image_loader
+ self._z_t_downsample = tile_size
+ self._z_overlap = overlap
+ self._limit_bounds = limit_bounds
+
+ if limit_bounds:
+ # Level 0 coordinate offset
+ self._l0_offset = tuple(
+ int(image_loader.properties.get(prop, 0))
+ for prop in self.BOUNDS_OFFSET_PROPS
+ )
+ # Slide level dimensions scale factor in each axis
+ size_scale = tuple(
+ int(image_loader.properties.get(prop, l0_lim)) / l0_lim
+ for prop, l0_lim in zip(self.BOUNDS_SIZE_PROPS, image_loader.dimensions)
+ )
+ # Dimensions of active area
+ self._l_dimensions = tuple(
+ tuple(
+ int(math.ceil(l_lim * scale))
+ for l_lim, scale in zip(l_size, size_scale)
+ )
+ for l_size in image_loader.level_dimensions
+ )
+ else:
+ self._l_dimensions = image_loader.level_dimensions
+ self._l0_offset = (0, 0)
+ self._l0_dimensions = self._l_dimensions[0]
+ # Deep Zoom level
+ z_size = self._l0_dimensions
+ z_dimensions = [z_size]
+ while z_size[0] > 1 or z_size[1] > 1:
+ z_size = tuple(max(1, int(math.ceil(z / 2))) for z in z_size)
+ z_dimensions.append(z_size)
+ self._z_dimensions = tuple(reversed(z_dimensions))
+
+ # self._l0_offset = (0, 0)
+ # self._l_dimensions = image_loader.level_dimensions
+ # self._l0_dimensions = self._l_dimensions[0]
+ # z_size = self._l0_dimensions
+ # z_dimensions = [z_size]
+ # while z_size[0] > 1 or z_size[1] > 1:
+ # z_size = tuple(max(1, int(math.ceil(z / 2))) for z in z_size)
+ # z_dimensions.append(z_size)
+ # self._z_dimensions = tuple(reversed(z_dimensions))
+
+ # Tile
+ def tiles(z_lim):
+ return int(math.ceil(z_lim / self._z_t_downsample))
+
+ self._t_dimensions = tuple(
+ (tiles(z_w), tiles(z_h)) for z_w, z_h in self._z_dimensions
+ )
+
+ # Deep Zoom level count
+ self._dz_levels = len(self._z_dimensions)
+
+ # Total downsamples for each Deep Zoom level
+ l0_z_downsamples = tuple(
+ 2 ** (self._dz_levels - dz_level - 1) for dz_level in range(self._dz_levels)
+ )
+
+ # Preferred slide levels for each Deep Zoom level
+ self._slide_from_dz_level = tuple(
+ self._osr.get_best_level_for_downsample(d) for d in l0_z_downsamples
+ )
+
+ # Piecewise downsamples
+ self._l0_l_downsamples = self._osr.level_downsamples
+ self._l_z_downsamples = tuple(
+ l0_z_downsamples[dz_level]
+ / self._l0_l_downsamples[self._slide_from_dz_level[dz_level]]
+ for dz_level in range(self._dz_levels)
+ )
+
+ # Slide background color
+ self._bg_color = "#ffffff"
+
+ def get_tile(self, level, address):
+ """Return an RGB PIL.Image for a tile.
+
+ level: the Deep Zoom level.
+ address: the address of the tile within the level as a (col, row)
+ tuple."""
+
+ # Read tile
+ args, z_size = self._get_tile_info(level, address)
+ tile = self._osr.read_region(*args)
+ tile = tile.convert("RGBA")
+
+ # Apply on solid background
+ bg = Image.new(
+ "RGB", tile.size, self._bg_color
+ ) # -> tile -> PIL.Image.Image, image mode=RGBA
+ tile = Image.composite(tile, bg, tile)
+
+ # Scale to the correct size
+ if tile.size != z_size:
+ # Image.Resampling added in Pillow 9.1.0
+ # Image.LANCZOS removed in Pillow 10
+ tile.thumbnail(z_size, getattr(Image, "Resampling", Image).LANCZOS)
+
+ return tile
diff --git a/requirements.txt b/requirements.txt
index f00d302..1e15256 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,5 @@ setuptools<=65.6.3
tqdm
torchvision
torch
+wsidicom==0.20.4
+wsidicomizer==0.13.2
diff --git a/requirements_develop.txt b/requirements_develop.txt
deleted file mode 100644
index 1d36b1a..0000000
--- a/requirements_develop.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Pillow>=9.5.0
-PyYAML
-Shapely==1.8.5.post1
-black
-colorama
-flake8
-flake8-html
-genbadge
-geojson>=3.0.0
-matplotlib
-natsort
-numpy>1.22,<1.24
-opencv_python_headless
-openslide_python
-pandas
-pre-commit
-pydantic==1.10.4
-pytest
-pytest-sugar
-rasterio==1.3.5.post1
-requests
-scikit-image
-setuptools<=65.6.3
-tqdm
-torchvision
-torch
diff --git a/setup.py b/setup.py
index 2815d7b..6213ee4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
from setuptools import find_packages, setup
-VERSION = "1.0.0b"
+VERSION = "1.0.1b"
DESCRIPTION = "PathoPatch - Accelerating Artificial Intelligence Based Whole Slide Image Analysis with an Optimized Preprocessing Pipeline"
with open("docs/README_pypi.md", "r") as fh:
LONG_DESCRIPTION = fh.read()
@@ -39,6 +39,8 @@
"tqdm",
"torchvision",
"torch",
+ "wsidicom=0.20.4",
+ "wsidicomizer=0.13.2",
],
scripts=[
"pathopatch/wsi_extraction.py",
diff --git a/test_database/filelist.csv b/test_database/filelist.csv
new file mode 100644
index 0000000..8d4977e
--- /dev/null
+++ b/test_database/filelist.csv
@@ -0,0 +1,2 @@
+path,slide_mpp,magnification
+./test_database/input/WSI/CMU-1.svs,0.500,20.0
diff --git a/tests/static_test_files/preprocessing/annotations_filtering/config.yaml b/tests/static_test_files/preprocessing/annotations_filtering/config.yaml
index e81b791..94708f4 100644
--- a/tests/static_test_files/preprocessing/annotations_filtering/config.yaml
+++ b/tests/static_test_files/preprocessing/annotations_filtering/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI
output_path: ./tests/tmp_results_folder/annotations_filtering
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: ./test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/annotations_simple/config.yaml b/tests/static_test_files/preprocessing/annotations_simple/config.yaml
index e4f44b4..60e934b 100644
--- a/tests/static_test_files/preprocessing/annotations_simple/config.yaml
+++ b/tests/static_test_files/preprocessing/annotations_simple/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
-wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI/CMU-1.svs
+wsi_paths: ./test_database/input/WSI/CMU-1.svs
output_path: ./tests/tmp_results_folder/annotations_simple
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml b/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml
index 3d12ac2..8f09346 100644
--- a/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml
+++ b/tests/static_test_files/preprocessing/annotations_store_masks/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI/CMU-1.svs
output_path: ./tests/tmp_results_folder/store_masks
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml b/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml
index 522c181..7965e83 100644
--- a/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml
+++ b/tests/static_test_files/preprocessing/annotations_tissue_mask/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI/CMU-1.svs
output_path: ./tests/tmp_results_folder/annotations_tissue_mask
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml b/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml
index 39d4298..a1d5373 100644
--- a/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml
+++ b/tests/static_test_files/preprocessing/annotations_tissue_mask_failing/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/WSI
output_path: ./tests/tmp_results_folder/annotations_tissue_mask_failing
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: /Users/fhoerst/Fabian-Projekte/Preprocessing/pathopatch/test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/baseline/config.yaml b/tests/static_test_files/preprocessing/baseline/config.yaml
index 1644b29..d730699 100644
--- a/tests/static_test_files/preprocessing/baseline/config.yaml
+++ b/tests/static_test_files/preprocessing/baseline/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/baseline
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
downsample: 1
diff --git a/tests/static_test_files/preprocessing/baseline_dataset/config.yaml b/tests/static_test_files/preprocessing/baseline_dataset/config.yaml
index c0f6577..5fbe61a 100644
--- a/tests/static_test_files/preprocessing/baseline_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/baseline_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/baseline_pytorch_dataset
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
downsample: 1
diff --git a/tests/static_test_files/preprocessing/cli.yaml b/tests/static_test_files/preprocessing/cli.yaml
index af42a1a..56a7672 100644
--- a/tests/static_test_files/preprocessing/cli.yaml
+++ b/tests/static_test_files/preprocessing/cli.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: /homes/fhoerst/histo-projects/DigitalHistologyHub/test_database/input/WSI
output_path: /homes/fhoerst/histo-projects/DigitalHistologyHub/test_database/output/debug
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
downsample: 0
diff --git a/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml b/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml
index f9edccd..5f8164f 100644
--- a/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/complex_overlap_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/complex_setup_dataset
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 0.782
diff --git a/tests/static_test_files/preprocessing/complex_setup/config.yaml b/tests/static_test_files/preprocessing/complex_setup/config.yaml
index 006f536..1df4c6a 100644
--- a/tests/static_test_files/preprocessing/complex_setup/config.yaml
+++ b/tests/static_test_files/preprocessing/complex_setup/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/complex_setup
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 0.499
diff --git a/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml b/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml
index 3973c3d..4f4fbdd 100644
--- a/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/complex_setup_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/complex_setup_dataset
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 0.499
diff --git a/tests/static_test_files/preprocessing/dicom/dicom.yaml b/tests/static_test_files/preprocessing/dicom/dicom.yaml
new file mode 100644
index 0000000..8f548ca
--- /dev/null
+++ b/tests/static_test_files/preprocessing/dicom/dicom.yaml
@@ -0,0 +1,20 @@
+# dataset paths
+wsi_paths: ./test_database/dicom_files/CMU-1
+output_path: ./tests/tmp_results_folder/dicom/dicom_extraction
+wsi_extension: dcm
+
+# basic setups
+target_mpp: 0.499
+patch_size: 256
+patch_overlap: 0
+normalize_stains: False
+min_intersection_ratio: 0.05
+
+processes: 8
+overwrite: True
+
+# other
+log_level: debug
+wsi_properties: {
+ magnification: 20.0,
+}
diff --git a/tests/static_test_files/preprocessing/dicom/openslide.yaml b/tests/static_test_files/preprocessing/dicom/openslide.yaml
new file mode 100644
index 0000000..cbc9dfd
--- /dev/null
+++ b/tests/static_test_files/preprocessing/dicom/openslide.yaml
@@ -0,0 +1,20 @@
+# dataset paths
+wsi_paths: ./test_database/input/WSI/CMU-1.svs
+output_path: ./tests/tmp_results_folder/dicom/openslide_extraction
+wsi_extension: svs
+
+# basic setups
+target_mpp: 0.499
+patch_size: 256
+patch_overlap: 0
+normalize_stains: False
+min_intersection_ratio: 0.05
+
+processes: 8
+overwrite: True
+
+# other
+log_level: debug
+wsi_properties: {
+ magnification: 20,
+}
diff --git a/tests/static_test_files/preprocessing/downsample/config.yaml b/tests/static_test_files/preprocessing/downsample/config.yaml
index 7814ead..e0517a3 100644
--- a/tests/static_test_files/preprocessing/downsample/config.yaml
+++ b/tests/static_test_files/preprocessing/downsample/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/downsample
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
downsample: 2
diff --git a/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml b/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml
index 7814ead..e0517a3 100644
--- a/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/downsample_dataset_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/downsample
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
downsample: 2
diff --git a/tests/static_test_files/preprocessing/filelist/config.yaml b/tests/static_test_files/preprocessing/filelist/config.yaml
new file mode 100644
index 0000000..3a7ee07
--- /dev/null
+++ b/tests/static_test_files/preprocessing/filelist/config.yaml
@@ -0,0 +1,12 @@
+# dataset paths
+wsi_filelist: ./tests/static_test_files/preprocessing/filelist/filelist.csv
+output_path: ./tests/tmp_results_folder/filelist
+wsi_extension: svs
+
+# basic setups
+target_mpp: 0.998
+processes: 8
+
+# other
+log_level: debug
+overwrite: True
diff --git a/tests/static_test_files/preprocessing/filelist/filelist.csv b/tests/static_test_files/preprocessing/filelist/filelist.csv
new file mode 100644
index 0000000..8d4977e
--- /dev/null
+++ b/tests/static_test_files/preprocessing/filelist/filelist.csv
@@ -0,0 +1,2 @@
+path,slide_mpp,magnification
+./test_database/input/WSI/CMU-1.svs,0.500,20.0
diff --git a/tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml b/tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml
new file mode 100644
index 0000000..0c00ca0
--- /dev/null
+++ b/tests/static_test_files/preprocessing/filelist/results/CMU-1/metadata.yaml
@@ -0,0 +1,15 @@
+orig_n_tiles_cols: 90
+orig_n_tiles_rows: 65
+base_magnification: 20
+downsampling: 2
+label_map:
+ background: 0
+patch_overlap: 0
+patch_size: 256
+base_mpp: 0.5
+target_patch_mpp: 1.0
+stain_normalization: false
+magnification: 10.0
+level: 15
+patch_distribution:
+ 0: 0
diff --git a/tests/static_test_files/preprocessing/filelist/results/processed.json b/tests/static_test_files/preprocessing/filelist/results/processed.json
new file mode 100644
index 0000000..24a6e37
--- /dev/null
+++ b/tests/static_test_files/preprocessing/filelist/results/processed.json
@@ -0,0 +1,5 @@
+{
+ "processed_files": [
+ "CMU-1"
+ ]
+}
diff --git a/tests/static_test_files/preprocessing/macenko/load_macenko.yaml b/tests/static_test_files/preprocessing/macenko/load_macenko.yaml
index 4f24f37..115bdbe 100644
--- a/tests/static_test_files/preprocessing/macenko/load_macenko.yaml
+++ b/tests/static_test_files/preprocessing/macenko/load_macenko.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI
output_path: ./tests/output/patch_extraction
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
downsample: 1
diff --git a/tests/static_test_files/preprocessing/macenko/test_macenko.yaml b/tests/static_test_files/preprocessing/macenko/test_macenko.yaml
index f8bff60..2efc115 100644
--- a/tests/static_test_files/preprocessing/macenko/test_macenko.yaml
+++ b/tests/static_test_files/preprocessing/macenko/test_macenko.yaml
@@ -1,6 +1,6 @@
# dataset paths
wsi_path: ./test_database/input/WSI/CMU-1-Small-Region.svs
-wsi_extensions: svs
+wsi_extension: svs
save_json_path: ./tests/tmp_results_folder/macenko/test_macenko.json
# basic setups
diff --git a/tests/static_test_files/preprocessing/roi/config.yaml b/tests/static_test_files/preprocessing/roi/config.yaml
index 55bba35..a8e63b9 100644
--- a/tests/static_test_files/preprocessing/roi/config.yaml
+++ b/tests/static_test_files/preprocessing/roi/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/JP2K-33003-1.svs
output_path: ./tests/tmp_results_folder/roi
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: ./test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/roi_context/config.yaml b/tests/static_test_files/preprocessing/roi_context/config.yaml
index aac9c72..2ac3bf3 100644
--- a/tests/static_test_files/preprocessing/roi_context/config.yaml
+++ b/tests/static_test_files/preprocessing/roi_context/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/JP2K-33003-1.svs
output_path: ./tests/tmp_results_folder/roi_context
-wsi_extensions: svs
+wsi_extension: svs
# annotations
annotation_paths: ./test_database/input/Annotations
diff --git a/tests/static_test_files/preprocessing/target_magnification/config.yaml b/tests/static_test_files/preprocessing/target_magnification/config.yaml
index 4f162ab..73c0e3a 100644
--- a/tests/static_test_files/preprocessing/target_magnification/config.yaml
+++ b/tests/static_test_files/preprocessing/target_magnification/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/target_magnification
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mag: 10
diff --git a/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml b/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml
index 02f96f1..4d0260c 100644
--- a/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/target_magnification_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/target_magnification_dataset
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mag: 10
diff --git a/tests/static_test_files/preprocessing/target_mpp/config.yaml b/tests/static_test_files/preprocessing/target_mpp/config.yaml
index 926ef8a..60ee360 100644
--- a/tests/static_test_files/preprocessing/target_mpp/config.yaml
+++ b/tests/static_test_files/preprocessing/target_mpp/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/target_mpp
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 1
diff --git a/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml b/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml
index 59719a7..46839d4 100644
--- a/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/target_mpp_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/target_mpp_dataset
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 1
diff --git a/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml b/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml
index 2e17dee..575c098 100644
--- a/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml
+++ b/tests/static_test_files/preprocessing/target_mpp_macenko/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/target_mpp_macenko
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 1.2
diff --git a/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml b/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml
index 6c8ef6e..79ce990 100644
--- a/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml
+++ b/tests/static_test_files/preprocessing/target_mpp_macenko_dataset/config.yaml
@@ -1,7 +1,7 @@
# dataset paths
wsi_paths: ./test_database/input/WSI/CMU-1-Small-Region.svs
output_path: ./tests/tmp_results_folder/target_mpp_macenko_dataset
-wsi_extensions: svs
+wsi_extension: svs
# basic setups
target_mpp: 1.2
diff --git a/tests/test_core_modules/test_filelist.py b/tests/test_core_modules/test_filelist.py
new file mode 100644
index 0000000..87eb855
--- /dev/null
+++ b/tests/test_core_modules/test_filelist.py
@@ -0,0 +1,88 @@
+import os
+import shutil
+import unittest
+from pathlib import Path
+
+import yaml
+
+from pathopatch.cli import PreProcessingConfig, PreProcessingYamlConfig
+from pathopatch.patch_extraction.patch_extraction import PreProcessor
+from pathopatch.utils.logger import Logger
+from pathopatch.utils.tools import close_logger
+from test_database.download import check_test_database
+
+
+class TestPreProcessorFilelist(unittest.TestCase):
+ """Test the PreProcessor Module with basic (default) parameter setup, but with filelist input"""
+
+ @classmethod
+ def setUpClass(cls) -> None:
+ """Setup configuration"""
+ check_test_database()
+ cls.config = "./tests/static_test_files/preprocessing/filelist/config.yaml"
+ with open(cls.config, "r") as config_file:
+ yaml_config = yaml.safe_load(config_file)
+ yaml_config = PreProcessingYamlConfig(**yaml_config)
+
+ opt_dict = dict(yaml_config)
+ cls.opt_dict = {k: v for k, v in opt_dict.items() if v is not None}
+ cls.configuration = PreProcessingConfig(**cls.opt_dict)
+
+ cls.gt_folder = Path(
+ "./tests/static_test_files/preprocessing/filelist/results/"
+ ).resolve()
+ cls.wsi_name = "CMU-1"
+
+ preprocess_logger = Logger(
+ level=cls.configuration.log_level.upper(),
+ log_dir=cls.configuration.log_path,
+ comment="preprocessing",
+ use_timestamp=True,
+ )
+ cls.logger = preprocess_logger.create_logger()
+ # do preprocessing for result checking
+ cls.slide_processor = PreProcessor(slide_processor_config=cls.configuration)
+ cls.logger.info(
+ "Sucessfully started the setup - Now we calculate the base dataset. May take up to 10 Minutes!"
+ )
+ cls.slide_processor.sample_patches_dataset()
+
+ @classmethod
+ def tearDownClass(cls):
+ """Clean output directory"""
+ # close logger
+ close_logger(cls.logger)
+
+ # clean output directory
+ clean_folders = [
+ f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_dir()
+ ]
+ for f in clean_folders:
+ shutil.rmtree(f.resolve())
+ clean_files = [
+ f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_file()
+ ]
+ for f in clean_files:
+ os.remove(f.resolve())
+ shutil.rmtree(f.parent.resolve())
+
+ def test_init_files(self) -> None:
+ """For this case 1 WSI files should have been loaded"""
+ self.assertEqual(self.slide_processor.num_files, 1)
+
+ def test_init_num_annotations_loaded(self) -> None:
+ """For this case 0 annotation files should have been loaded"""
+ self.assertEqual(len(self.slide_processor.annotation_files), 0)
+
+ def test_metadata_wsi(self) -> None:
+ gt_path = self.gt_folder / self.wsi_name / "metadata.yaml"
+ with open(gt_path, "r") as config_file:
+ yaml_config = yaml.safe_load(config_file)
+
+ test_path = (
+ self.slide_processor.config.output_path / self.wsi_name / "metadata.yaml"
+ )
+ with open(test_path, "r") as config_file:
+ test_file = yaml.safe_load(config_file)
+
+ self.assertEqual(yaml_config, test_file)
diff --git a/tests/test_dicom_module/__init__.py b/tests/test_dicom_module/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_dicom_module/test_dicom_loader.py b/tests/test_dicom_module/test_dicom_loader.py
new file mode 100644
index 0000000..e2d2c3c
--- /dev/null
+++ b/tests/test_dicom_module/test_dicom_loader.py
@@ -0,0 +1,156 @@
+import json
+import shutil
+import subprocess
+import unittest
+from pathlib import Path
+
+import numpy as np
+import yaml
+from numpy.testing import assert_almost_equal
+from PIL import Image
+
+from pathopatch.cli import PreProcessingConfig, PreProcessingYamlConfig
+from pathopatch.patch_extraction.patch_extraction import PreProcessor
+from pathopatch.utils.logger import Logger
+from pathopatch.utils.tools import close_logger
+from test_database.download import check_test_database
+
+
+class TestPreProcessorDICOM(unittest.TestCase):
+ """Test the dicom image loader, must be equal to openslide loader"""
+
+ @classmethod
+ def setUpClass(cls) -> None:
+ """Setup configuration"""
+ check_test_database()
+ conversion_command = "wsidicomizer -i ./test_database/input/WSI/CMU-1.svs -o ./test_database/dicom_files/CMU-1"
+ # dicom conversion
+ process = subprocess.Popen(conversion_command.split(), stdout=subprocess.PIPE)
+ output, error = process.communicate()
+
+ # openslide
+ cls.wsi_name = "CMU-1"
+ cls.config = "./tests/static_test_files/preprocessing/dicom/openslide.yaml"
+ with open(cls.config, "r") as config_file:
+ yaml_config = yaml.safe_load(config_file)
+ yaml_config = PreProcessingYamlConfig(**yaml_config)
+
+ opt_dict = dict(yaml_config)
+ cls.opt_dict = {k: v for k, v in opt_dict.items() if v is not None}
+ cls.configuration = PreProcessingConfig(**cls.opt_dict)
+ cls.openslide_config = cls.configuration.dict()
+ preprocess_logger = Logger(
+ level=cls.configuration.log_level.upper(),
+ log_dir=cls.configuration.log_path,
+ comment="preprocessing",
+ use_timestamp=True,
+ )
+ cls.logger = preprocess_logger.create_logger()
+ # do preprocessing for openslide
+ cls.slide_processor = PreProcessor(slide_processor_config=cls.configuration)
+ cls.logger.info(
+ "Sucessfully started the setup - Now we calculate the base dataset. May take up to 10 Minutes!"
+ )
+ cls.slide_processor.sample_patches_dataset()
+
+ # dicom
+ cls.config = "./tests/static_test_files/preprocessing/dicom/dicom.yaml"
+ with open(cls.config, "r") as config_file:
+ yaml_config = yaml.safe_load(config_file)
+ yaml_config = PreProcessingYamlConfig(**yaml_config)
+
+ opt_dict = dict(yaml_config)
+ cls.opt_dict = {k: v for k, v in opt_dict.items() if v is not None}
+ cls.configuration = PreProcessingConfig(**cls.opt_dict)
+ cls.dicom_config = cls.configuration.dict()
+ cls.slide_processor = PreProcessor(slide_processor_config=cls.configuration)
+ cls.logger.info(
+ "Sucessfully started the setup - Now we calculate the base dataset. May take up to 10 Minutes!"
+ )
+ cls.slide_processor.sample_patches_dataset()
+
+ @classmethod
+ def tearDownClass(cls):
+ """Clean output directory"""
+ # close logger
+ close_logger(cls.logger)
+
+ # clean output directory
+ shutil.rmtree(cls.dicom_config["output_path"].parent)
+ shutil.rmtree(Path("./test_database/dicom_files/CMU-1").resolve())
+
+ def test_metadata_wsi(self) -> None:
+ os_path = self.openslide_config["output_path"] / self.wsi_name / "metadata.yaml"
+ with open(os_path, "r") as config_file:
+ os_config = yaml.safe_load(config_file)
+
+ dcm_path = self.dicom_config["output_path"] / self.wsi_name / "metadata.yaml"
+ with open(dcm_path, "r") as config_file:
+ dcm_file = yaml.safe_load(config_file)
+
+ self.assertEqual(os_config, dcm_file)
+
+ def test_count_patches(self) -> None:
+ """Test if the number of patches is correct"""
+ os_path = self.openslide_config["output_path"] / self.wsi_name / "metadata"
+ os_patches_count = len([f for f in os_path.glob("*.yaml")])
+
+ dcm_path = self.dicom_config["output_path"] / self.wsi_name / "metadata"
+ dcm_patches_count = len([f for f in dcm_path.glob("*.yaml")])
+
+ self.assertEqual(os_patches_count, dcm_patches_count)
+
+ def test_patch_results_wsi(self) -> None:
+ """Test if patches are extracted the right way for WSI"""
+ os_path = (
+ self.openslide_config["output_path"] / self.wsi_name / "patch_metadata.json"
+ )
+ with open(os_path, "r") as config_file:
+ patch_os = json.load(config_file)
+ patch_os = sorted(patch_os, key=lambda d: list(d.keys())[0])
+ patch_os = {list(elem.keys())[0]: list(elem.values())[0] for elem in patch_os}
+
+ dcm_path = (
+ self.dicom_config["output_path"] / self.wsi_name / "patch_metadata.json"
+ )
+ with open(dcm_path, "r") as config_file:
+ test_dcm = json.load(config_file)
+ test_dcm = sorted(test_dcm, key=lambda d: list(d.keys())[0])
+
+ test_dcm = {list(elem.keys())[0]: list(elem.values())[0] for elem in test_dcm}
+
+ # Extract unique patch names from both files
+ unique_patches = set(patch_os.keys()).union(test_dcm.keys())
+ # print(unique_patches)
+ differing_patches = 0
+ for patch_name in unique_patches:
+ if patch_name not in patch_os or patch_name not in test_dcm:
+ differing_patches += 1
+ print(f"differing_patches: {patch_name}")
+ else:
+ if not patch_os[patch_name] == test_dcm[patch_name]:
+ differing_patches += 1
+ print(f"differing_patches: {patch_name}")
+
+ self.assertLess(
+ differing_patches,
+ 15,
+ "Patches are not equal and differ in more than 15 patches",
+ )
+
+ def test_example_images(self) -> None:
+ """ """
+ patch_list = [
+ "CMU-1_8_116.png",
+ "CMU-1_10_109.png",
+ "CMU-1_28_152.png",
+ "CMU-1_102_16.png",
+ ]
+ os_path = self.openslide_config["output_path"] / self.wsi_name / "patches"
+ dcm_path = self.dicom_config["output_path"] / self.wsi_name / "patches"
+
+ for p_name in patch_list:
+ os_image = np.array(Image.open((os_path / p_name).resolve()))
+ dcm_image = np.array(Image.open((dcm_path / p_name).resolve()))
+
+ assert_almost_equal(os_image, dcm_image)