diff --git a/.gitignore b/.gitignore index 2fd09a8..80c3cf6 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ pathopatch.egg-info/ dist build push_build.yml +debug diff --git a/README.md b/README.md index 5dd18e8..6cbf969 100644 --- a/README.md +++ b/README.md @@ -331,3 +331,30 @@ TBD

PathoPatcher by Fabian Hörst, University Hospital Essen, is licensed under CC BY-NC-SA 4.0

+ +## Citation +```latex +@InProceedings{10.1007/978-3-658-44037-4_91, + author="H{\"o}rst, Fabian + and Schaheer, Sajad H. + and Baldini, Giulia + and Bahnsen, Fin H. + and Egger, Jan + and Kleesiek, Jens", + editor="Maier, Andreas + and Deserno, Thomas M. + and Handels, Heinz + and Maier-Hein, Klaus + and Palm, Christoph + and Tolxdorff, Thomas", + title="Accelerating Artificial Intelligence-based Whole Slide Image Analysis with an Optimized Preprocessing Pipeline", + booktitle="Bildverarbeitung f{\"u}r die Medizin 2024", + year="2024", + publisher="Springer Fachmedien Wiesbaden", + address="Wiesbaden", + pages="356--361",, + isbn="978-3-658-44037-4" +} + + +``` diff --git a/pathopatch/patch_extraction/dataset.py b/pathopatch/patch_extraction/dataset.py index 8b922a4..d1704eb 100644 --- a/pathopatch/patch_extraction/dataset.py +++ b/pathopatch/patch_extraction/dataset.py @@ -4,7 +4,11 @@ # @ Fabian Hörst, fabian.hoerst@uk-essen.de # Institute for Artifical Intelligence in Medicine, # University Medicine Essen +import sys +from click import Option + +sys.path.append("/Users/fhoerst/Fabian-Projekte/Preprocessing/PathoPatcher") import logging import os @@ -22,8 +26,7 @@ from shapely.geometry import Polygon from torch.utils.data import Dataset from torchvision.transforms.v2 import ToTensor - -from pathopatch import logger +from PIL import Image from pathopatch.utils.exceptions import WrongParameterException from pathopatch.utils.patch_util import ( DeepZoomGeneratorOS, @@ -43,11 +46,11 @@ warnings.filterwarnings("ignore", category=UserWarning) -class PreProcessingDatasetConfig(BaseModel): +class LivePatchWSIConfig(BaseModel): """Storing the configuration for the PatchWSIDataset Args: - wsipath (str): Path to the WSI + wsi_path (str): Path to the WSI wsi_properties (dict, optional): Dictionary with manual WSI metadata, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). Supported keys are slide_mpp and magnification patch_size (int, optional): The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. Defaults to 256. patch_overlap (float, optional): The percentage amount pixels that should overlap between two different patches. @@ -63,6 +66,7 @@ class PreProcessingDatasetConfig(BaseModel): expresses which kind of downsampling should be used with respect to the highest possible resolution. Defaults to 0. level (int, optional): The tile level for sampling, alternative to downsample. Defaults to None. + target_mpp_tolerance(float, optional): Tolerance for the target_mpp. If wsi mpp is within a range target_mpp +/- tolarance, no rescaling is performed. Defaults to 0.0. annotation_path (str, optional): Path to the .json file with the annotations. Defaults to None. label_map_file (str, optional): Path to the .json file with the label map. Defaults to None. label_map (dict, optional): Dictionary with the label map. Defaults to None. @@ -96,6 +100,7 @@ class PreProcessingDatasetConfig(BaseModel): target_mpp: Optional[float] target_mag: Optional[float] level: Optional[int] + target_mpp_tolerance: Optional[float] = 0.0 # annotation specific settings annotation_path: Optional[str] @@ -174,7 +179,7 @@ def __post_init_post_parse__(self) -> None: class LivePatchWSIDataset(Dataset): def __init__( self, - slide_processor_config: PreProcessingDatasetConfig, + slide_processor_config: LivePatchWSIConfig, logger: logging.Logger = None, transforms: Callable = ToTensor(), ) -> None: @@ -184,7 +189,7 @@ def __init__( functionality for loading and processing WSIs. Args: - slide_processor_config (PreProcessingDatasetConfig): Configuration for preprocessing the dataset. + slide_processor_config (LivePatchWSIConfig): Configuration for preprocessing the dataset. logger (logging.Logger, optional): Logger for logging events. Defaults to None. transforms (Callable, optional): Transforms to apply to the patches. Defaults to ToTensor(). @@ -195,7 +200,7 @@ def __init__( wsi_metadata (dict): Metadata of the WSI deepzoomgenerator (Union[DeepZoomGeneratorOS, Any]): Class for tile extraction, deepzoom-interface tile_extractor (Union[DeepZoomGeneratorOS, Any]): Instance of self.deepzoomgenerator - config (PreProcessingDatasetConfig): Configuration for preprocessing the dataset + config (LivePatchWSIConfig): Configuration for preprocessing the dataset logger (logging.Logger): Logger for logging events rescaling_factor (int): Rescaling factor for the slide interesting_coords (List[Tuple[int, int, float]]): List of interesting coordinates (patches -> row, col, ratio) @@ -296,9 +301,6 @@ def _set_tissue_detector(self) -> None: Raises: ImportError: If torch or torchvision cannot be imported. - - Returns: - None """ try: import torch.nn as nn @@ -318,7 +320,7 @@ def _set_tissue_detector(self) -> None: "cuda:0" if torch.cuda.is_available() else "cpu" ) if self.detector_device == "cpu": - logger.warning( + self.logger.warning( "No CUDA device detected - Speed may be very slow. Please consider performing extraction on CUDA device or disable tissue detector!" ) model = mobilenet_v3_small().to(device=self.detector_device) @@ -383,7 +385,7 @@ def _prepare_slide( # Extract the float value if match: slide_mpp = float(match.group(1)) - logger.warning( + self.logger.warning( f"MPP {slide_mpp:.4f} was extracted from the comment of the WSI (Tiff-Metadata comment string) - Please check for correctness!" ) else: @@ -414,15 +416,25 @@ def _prepare_slide( resulting_mpp = None if self.config.target_mpp is not None: - self.config.downsample, self.rescaling_factor = target_mpp_to_downsample( - slide_properties["mpp"], - self.config.target_mpp, - ) + if ( + not slide_properties["mpp"] - self.config.target_mpp_tolerance + <= self.config.target_mpp + <= slide_properties["mpp"] + self.config.target_mpp_tolerance + ): + ( + self.config.downsample, + self.rescaling_factor, + ) = target_mpp_to_downsample( + slide_properties["mpp"], + self.config.target_mpp, + ) + else: + self.config.downsample = 1 + self.rescaling_factor = 1.0 if self.rescaling_factor != 1.0: resulting_mpp = ( slide_properties["mpp"] * self.rescaling_factor - / 2 * self.config.downsample ) else: @@ -519,7 +531,7 @@ def _prepare_slide( ) self.logger.debug(f"Number of patches sampled: {len(interesting_coords)}") if len(interesting_coords) == 0: - logger.warning(f"No patches sampled from {self.config.wsi_path}") + self.logger.warning(f"No patches sampled from {self.config.wsi_path}") self.wsi_metadata = { "orig_n_tiles_cols": n_cols, @@ -539,7 +551,7 @@ def _prepare_slide( return list(interesting_coords), level, polygons_downsampled, region_labels - def _get_wsi_annotations(self, downsample: int): + def _get_wsi_annotations(self, downsample: int): # TODO: docstring region_labels: List[str] = [] polygons: List[Polygon] = [] polygons_downsampled: List[Polygon] = [] @@ -610,7 +622,7 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, dict, np.ndarray]: ratio = {} patch_mask = np.zeros( (self.res_tile_size, self.res_tile_size), dtype=np.uint8 - ) # TODO: + ) else: intersected_labels, ratio, patch_mask = get_intersected_labels( tile_size=self.res_tile_size, @@ -634,6 +646,19 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, dict, np.ndarray]: normalization_vector_path=self.config.normalization_vector_json, ) + if self.res_tile_size != self.config.patch_size: + image_tile = Image.fromarray(image_tile) + if self.res_tile_size > self.config.patch_size: + image_tile.thumbnail( + (self.config.patch_size, self.config.patch_size), + getattr(Image, "Resampling", Image).LANCZOS, + ) + else: + image_tile = image_tile.resize( + (self.config.patch_size, self.config.patch_size), + getattr(Image, "Resampling", Image).LANCZOS, + ) + image_tile = np.array(image_tile) try: image_tile = self.transforms(image_tile) except TypeError: @@ -651,11 +676,11 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, dict, np.ndarray]: return image_tile, patch_metadata, patch_mask -class PatchWSIDataloader: - """Dataloader for PatchWSIDataset +class LivePatchWSIDataloader: + """Dataloader for LivePatchWSIDataset Args: - dataset (PatchWSIDataset): Dataset to load patches from. + dataset (LivePatchWSIDataset): Dataset to load patches from. batch_size (int): Batch size for the dataloader. shuffle (bool, optional): To shuffle iterations. Defaults to False. seed (int, optional): Seed for shuffle. Defaults to 42. @@ -663,12 +688,12 @@ class PatchWSIDataloader: def __init__( self, - dataset: PatchWSIDataset, + dataset: LivePatchWSIDataset, batch_size: int, shuffle: bool = False, seed: int = 42, ) -> None: - assert isinstance(dataset, PatchWSIDataset) + assert isinstance(dataset, LivePatchWSIDataset) assert isinstance(batch_size, int) assert isinstance(shuffle, bool) assert isinstance(seed, int) @@ -682,6 +707,7 @@ def __init__( if self.shuffle: grtr = np.random.default_rng(seed) self.element_list = grtr.permutation(self.element_list) + self.i = 0 def __iter__(self): self.i = 0 @@ -732,4 +758,21 @@ def __next__(self) -> Tuple[torch.Tensor, List[dict], List[np.ndarray]]: raise StopIteration def __len__(self): - return int(np.ceil(len(self.dataset) / self.batch_size)) + return int(np.ceil((len(self.dataset) - self.discard_count) / self.batch_size)) + + +if __name__ == "__main__": + """Just for testing purposes""" + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.info("Test") + config = LivePatchWSIConfig( + wsi_path="/Users/fhoerst/Fabian-Projekte/Selocan/RicardoScans/266819.svs", + patch_size=256, + patch_overlap=0, + target_mpp=0.3, + target_mpp_tolerance=0.1, + ) + ps_dataset = LivePatchWSIDataset(config, logger) + ps_dataloader = LivePatchWSIDataloader(ps_dataset, batch_size=8) + ps_dataloader.__next__() diff --git a/pathopatch/patch_extraction/patch_extraction.py b/pathopatch/patch_extraction/patch_extraction.py index 1918156..166ba67 100644 --- a/pathopatch/patch_extraction/patch_extraction.py +++ b/pathopatch/patch_extraction/patch_extraction.py @@ -837,9 +837,8 @@ def _prepare_wsi( resulting_mpp = ( slide_properties["mpp"] * self.rescaling_factor - / 2 * self.config.downsample - ) + ) # TODO: should it be divided by 2 or not? else: resulting_mpp = slide_properties["mpp"] * self.config.downsample # target mag has precedence before downsample! diff --git a/pathopatch/utils/patch_util.py b/pathopatch/utils/patch_util.py index ae03176..7e09227 100644 --- a/pathopatch/utils/patch_util.py +++ b/pathopatch/utils/patch_util.py @@ -173,7 +173,7 @@ def target_mpp_to_downsample( "We perform rescaling, but this may not be accurate and is very slow!" ) downsample = int(np.floor(target_mpp / base_mpp)) - rescaling_factor = target_mpp / base_mpp + rescaling_factor = target_mpp / (base_mpp * downsample) else: logger.warning( f"Requested mpp resolution ({target_mpp}) is not a power of the base resultion {base_mpp}. " diff --git a/setup.py b/setup.py index 857d556..c93b5c2 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import find_packages, setup -VERSION = "0.9.3a" +VERSION = "0.5.4b" DESCRIPTION = "PathoPatch - Accelerating Artificial Intelligence Based Whole Slide Image Analysis with an Optimized Preprocessing Pipeline" with open("docs/README_pypi.md", "r") as fh: LONG_DESCRIPTION = fh.read() diff --git a/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/metadata.yaml b/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/metadata.yaml index 330f541..c01a751 100644 --- a/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/metadata.yaml +++ b/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/metadata.yaml @@ -1,5 +1,5 @@ -orig_n_tiles_cols: 2 -orig_n_tiles_rows: 3 +orig_n_tiles_cols: 4 +orig_n_tiles_rows: 5 base_magnification: 20.0 downsampling: 2 label_map: @@ -9,7 +9,7 @@ patch_size: 256 base_mpp: 0.499 target_patch_mpp: 1.2 stain_normalization: true -magnification: 4.158333333333334 +magnification: 8.316666666666668 level: 11 patch_distribution: 0: 0 diff --git a/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/patches/CMU-1-Small-Region_1_1.png b/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/patches/CMU-1-Small-Region_1_1.png index 86fb65e..346bb55 100644 Binary files a/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/patches/CMU-1-Small-Region_1_1.png and b/tests/static_test_files/preprocessing/target_mpp_macenko/results/CMU-1-Small-Region/patches/CMU-1-Small-Region_1_1.png differ diff --git a/tests/test_core_modules/test_target_mpp_macenko.py b/tests/test_core_modules/test_target_mpp_macenko.py index 79d67e4..bc02598 100644 --- a/tests/test_core_modules/test_target_mpp_macenko.py +++ b/tests/test_core_modules/test_target_mpp_macenko.py @@ -85,18 +85,18 @@ def test_resulting_patches_wsi(self) -> None: self.assertEqual(yaml_config, test_file) - # def test_macenko_patch(self) -> None: - # """Test if Macenko worked correctly""" - # gt_path = ( - # self.gt_folder / self.wsi_name / "patches" / "CMU-1-Small-Region_1_1.png" - # ) - # gt_image = np.array(Image.open(gt_path.resolve())) + def test_macenko_patch(self) -> None: + """Test if Macenko worked correctly""" + gt_path = ( + self.gt_folder / self.wsi_name / "patches" / "CMU-1-Small-Region_1_1.png" + ) + gt_image = np.array(Image.open(gt_path.resolve())) - # test_path = ( - # self.slide_processor.config.output_path - # / self.wsi_name - # / "patches" - # / "CMU-1-Small-Region_1_1.png" - # ) - # test_image = np.array(Image.open(test_path.resolve())) - # assert_almost_equal(test_image, gt_image) + test_path = ( + self.slide_processor.config.output_path + / self.wsi_name + / "patches" + / "CMU-1-Small-Region_1_1.png" + ) + test_image = np.array(Image.open(test_path.resolve())) + assert_almost_equal(test_image, gt_image)