Skip to content

Commit

Permalink
Fixing Tests, Fixing MPP issue, Adding tolerance to in memory dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
FabianHoerst committed Mar 26, 2024
1 parent 115b324 commit 9326a09
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 47 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ pathopatch.egg-info/
dist
build
push_build.yml
debug
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,30 @@ TBD
<p xmlns:cc="http://creativecommons.org/ns#" xmlns:dct="http://purl.org/dc/terms/"><a property="dct:title" rel="cc:attributionURL" href="https://github.com/TIO-IKIM/PathoPatcher">PathoPatcher</a> by <a rel="cc:attributionURL dct:creator" property="cc:attributionName" href="https://github.com/FabianHoerst">Fabian Hörst, University Hospital Essen,</a> is licensed under <a href="http://creativecommons.org/licenses/by-nc-sa/4.0/?ref=chooser-v1" target="_blank" rel="license noopener noreferrer" style="display:inline-block;">CC BY-NC-SA 4.0
<img style="height:22px!important;margin-left:3px;vertical-align:text-bottom;" src="https://mirrors.creativecommons.org/presskit/icons/cc.svg?ref=chooser-v1"><img style="height:22px!important;margin-left:3px;vertical-align:text-bottom;" src="https://mirrors.creativecommons.org/presskit/icons/by.svg?ref=chooser-v1"><img style="height:22px!important;margin-left:3px;vertical-align:text-bottom;" src="https://mirrors.creativecommons.org/presskit/icons/nc.svg?ref=chooser-v1"><img style="height:22px!important;margin-left:3px;vertical-align:text-bottom;" src="https://mirrors.creativecommons.org/presskit/icons/sa.svg?ref=chooser-v1"></a></p>
## Citation
```latex
@InProceedings{10.1007/978-3-658-44037-4_91,
author="H{\"o}rst, Fabian
and Schaheer, Sajad H.
and Baldini, Giulia
and Bahnsen, Fin H.
and Egger, Jan
and Kleesiek, Jens",
editor="Maier, Andreas
and Deserno, Thomas M.
and Handels, Heinz
and Maier-Hein, Klaus
and Palm, Christoph
and Tolxdorff, Thomas",
title="Accelerating Artificial Intelligence-based Whole Slide Image Analysis with an Optimized Preprocessing Pipeline",
booktitle="Bildverarbeitung f{\"u}r die Medizin 2024",
year="2024",
publisher="Springer Fachmedien Wiesbaden",
address="Wiesbaden",
pages="356--361",,
isbn="978-3-658-44037-4"
}


```
95 changes: 69 additions & 26 deletions pathopatch/patch_extraction/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
# @ Fabian Hörst, [email protected]
# Institute for Artifical Intelligence in Medicine,
# University Medicine Essen
import sys

from click import Option

sys.path.append("/Users/fhoerst/Fabian-Projekte/Preprocessing/PathoPatcher")

import logging
import os
Expand All @@ -22,8 +26,7 @@
from shapely.geometry import Polygon
from torch.utils.data import Dataset
from torchvision.transforms.v2 import ToTensor

from pathopatch import logger
from PIL import Image
from pathopatch.utils.exceptions import WrongParameterException
from pathopatch.utils.patch_util import (
DeepZoomGeneratorOS,
Expand All @@ -43,11 +46,11 @@
warnings.filterwarnings("ignore", category=UserWarning)


class PreProcessingDatasetConfig(BaseModel):
class LivePatchWSIConfig(BaseModel):
"""Storing the configuration for the PatchWSIDataset
Args:
wsipath (str): Path to the WSI
wsi_path (str): Path to the WSI
wsi_properties (dict, optional): Dictionary with manual WSI metadata, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). Supported keys are slide_mpp and magnification
patch_size (int, optional): The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. Defaults to 256.
patch_overlap (float, optional): The percentage amount pixels that should overlap between two different patches.
Expand All @@ -63,6 +66,7 @@ class PreProcessingDatasetConfig(BaseModel):
expresses which kind of downsampling should be used with
respect to the highest possible resolution. Defaults to 0.
level (int, optional): The tile level for sampling, alternative to downsample. Defaults to None.
target_mpp_tolerance(float, optional): Tolerance for the target_mpp. If wsi mpp is within a range target_mpp +/- tolarance, no rescaling is performed. Defaults to 0.0.
annotation_path (str, optional): Path to the .json file with the annotations. Defaults to None.
label_map_file (str, optional): Path to the .json file with the label map. Defaults to None.
label_map (dict, optional): Dictionary with the label map. Defaults to None.
Expand Down Expand Up @@ -96,6 +100,7 @@ class PreProcessingDatasetConfig(BaseModel):
target_mpp: Optional[float]
target_mag: Optional[float]
level: Optional[int]
target_mpp_tolerance: Optional[float] = 0.0

# annotation specific settings
annotation_path: Optional[str]
Expand Down Expand Up @@ -174,7 +179,7 @@ def __post_init_post_parse__(self) -> None:
class LivePatchWSIDataset(Dataset):
def __init__(
self,
slide_processor_config: PreProcessingDatasetConfig,
slide_processor_config: LivePatchWSIConfig,
logger: logging.Logger = None,
transforms: Callable = ToTensor(),
) -> None:
Expand All @@ -184,7 +189,7 @@ def __init__(
functionality for loading and processing WSIs.
Args:
slide_processor_config (PreProcessingDatasetConfig): Configuration for preprocessing the dataset.
slide_processor_config (LivePatchWSIConfig): Configuration for preprocessing the dataset.
logger (logging.Logger, optional): Logger for logging events. Defaults to None.
transforms (Callable, optional): Transforms to apply to the patches. Defaults to ToTensor().
Expand All @@ -195,7 +200,7 @@ def __init__(
wsi_metadata (dict): Metadata of the WSI
deepzoomgenerator (Union[DeepZoomGeneratorOS, Any]): Class for tile extraction, deepzoom-interface
tile_extractor (Union[DeepZoomGeneratorOS, Any]): Instance of self.deepzoomgenerator
config (PreProcessingDatasetConfig): Configuration for preprocessing the dataset
config (LivePatchWSIConfig): Configuration for preprocessing the dataset
logger (logging.Logger): Logger for logging events
rescaling_factor (int): Rescaling factor for the slide
interesting_coords (List[Tuple[int, int, float]]): List of interesting coordinates (patches -> row, col, ratio)
Expand Down Expand Up @@ -296,9 +301,6 @@ def _set_tissue_detector(self) -> None:
Raises:
ImportError: If torch or torchvision cannot be imported.
Returns:
None
"""
try:
import torch.nn as nn
Expand All @@ -318,7 +320,7 @@ def _set_tissue_detector(self) -> None:
"cuda:0" if torch.cuda.is_available() else "cpu"
)
if self.detector_device == "cpu":
logger.warning(
self.logger.warning(
"No CUDA device detected - Speed may be very slow. Please consider performing extraction on CUDA device or disable tissue detector!"
)
model = mobilenet_v3_small().to(device=self.detector_device)
Expand Down Expand Up @@ -383,7 +385,7 @@ def _prepare_slide(
# Extract the float value
if match:
slide_mpp = float(match.group(1))
logger.warning(
self.logger.warning(
f"MPP {slide_mpp:.4f} was extracted from the comment of the WSI (Tiff-Metadata comment string) - Please check for correctness!"
)
else:
Expand Down Expand Up @@ -414,15 +416,25 @@ def _prepare_slide(
resulting_mpp = None

if self.config.target_mpp is not None:
self.config.downsample, self.rescaling_factor = target_mpp_to_downsample(
slide_properties["mpp"],
self.config.target_mpp,
)
if (
not slide_properties["mpp"] - self.config.target_mpp_tolerance
<= self.config.target_mpp
<= slide_properties["mpp"] + self.config.target_mpp_tolerance
):
(
self.config.downsample,
self.rescaling_factor,
) = target_mpp_to_downsample(
slide_properties["mpp"],
self.config.target_mpp,
)
else:
self.config.downsample = 1
self.rescaling_factor = 1.0
if self.rescaling_factor != 1.0:
resulting_mpp = (
slide_properties["mpp"]
* self.rescaling_factor
/ 2
* self.config.downsample
)
else:
Expand Down Expand Up @@ -519,7 +531,7 @@ def _prepare_slide(
)
self.logger.debug(f"Number of patches sampled: {len(interesting_coords)}")
if len(interesting_coords) == 0:
logger.warning(f"No patches sampled from {self.config.wsi_path}")
self.logger.warning(f"No patches sampled from {self.config.wsi_path}")

self.wsi_metadata = {
"orig_n_tiles_cols": n_cols,
Expand All @@ -539,7 +551,7 @@ def _prepare_slide(

return list(interesting_coords), level, polygons_downsampled, region_labels

def _get_wsi_annotations(self, downsample: int):
def _get_wsi_annotations(self, downsample: int): # TODO: docstring
region_labels: List[str] = []
polygons: List[Polygon] = []
polygons_downsampled: List[Polygon] = []
Expand Down Expand Up @@ -610,7 +622,7 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, dict, np.ndarray]:
ratio = {}
patch_mask = np.zeros(
(self.res_tile_size, self.res_tile_size), dtype=np.uint8
) # TODO:
)
else:
intersected_labels, ratio, patch_mask = get_intersected_labels(
tile_size=self.res_tile_size,
Expand All @@ -634,6 +646,19 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, dict, np.ndarray]:
normalization_vector_path=self.config.normalization_vector_json,
)

if self.res_tile_size != self.config.patch_size:
image_tile = Image.fromarray(image_tile)
if self.res_tile_size > self.config.patch_size:
image_tile.thumbnail(
(self.config.patch_size, self.config.patch_size),
getattr(Image, "Resampling", Image).LANCZOS,
)
else:
image_tile = image_tile.resize(
(self.config.patch_size, self.config.patch_size),
getattr(Image, "Resampling", Image).LANCZOS,
)
image_tile = np.array(image_tile)
try:
image_tile = self.transforms(image_tile)
except TypeError:
Expand All @@ -651,24 +676,24 @@ def __getitem__(self, index: int) -> Tuple[np.ndarray, dict, np.ndarray]:
return image_tile, patch_metadata, patch_mask


class PatchWSIDataloader:
"""Dataloader for PatchWSIDataset
class LivePatchWSIDataloader:
"""Dataloader for LivePatchWSIDataset
Args:
dataset (PatchWSIDataset): Dataset to load patches from.
dataset (LivePatchWSIDataset): Dataset to load patches from.
batch_size (int): Batch size for the dataloader.
shuffle (bool, optional): To shuffle iterations. Defaults to False.
seed (int, optional): Seed for shuffle. Defaults to 42.
"""

def __init__(
self,
dataset: PatchWSIDataset,
dataset: LivePatchWSIDataset,
batch_size: int,
shuffle: bool = False,
seed: int = 42,
) -> None:
assert isinstance(dataset, PatchWSIDataset)
assert isinstance(dataset, LivePatchWSIDataset)
assert isinstance(batch_size, int)
assert isinstance(shuffle, bool)
assert isinstance(seed, int)
Expand All @@ -682,6 +707,7 @@ def __init__(
if self.shuffle:
grtr = np.random.default_rng(seed)
self.element_list = grtr.permutation(self.element_list)
self.i = 0

def __iter__(self):
self.i = 0
Expand Down Expand Up @@ -732,4 +758,21 @@ def __next__(self) -> Tuple[torch.Tensor, List[dict], List[np.ndarray]]:
raise StopIteration

def __len__(self):
return int(np.ceil(len(self.dataset) / self.batch_size))
return int(np.ceil((len(self.dataset) - self.discard_count) / self.batch_size))


if __name__ == "__main__":
"""Just for testing purposes"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.info("Test")
config = LivePatchWSIConfig(
wsi_path="/Users/fhoerst/Fabian-Projekte/Selocan/RicardoScans/266819.svs",
patch_size=256,
patch_overlap=0,
target_mpp=0.3,
target_mpp_tolerance=0.1,
)
ps_dataset = LivePatchWSIDataset(config, logger)
ps_dataloader = LivePatchWSIDataloader(ps_dataset, batch_size=8)
ps_dataloader.__next__()
3 changes: 1 addition & 2 deletions pathopatch/patch_extraction/patch_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,9 +837,8 @@ def _prepare_wsi(
resulting_mpp = (
slide_properties["mpp"]
* self.rescaling_factor
/ 2
* self.config.downsample
)
) # TODO: should it be divided by 2 or not?
else:
resulting_mpp = slide_properties["mpp"] * self.config.downsample
# target mag has precedence before downsample!
Expand Down
2 changes: 1 addition & 1 deletion pathopatch/utils/patch_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def target_mpp_to_downsample(
"We perform rescaling, but this may not be accurate and is very slow!"
)
downsample = int(np.floor(target_mpp / base_mpp))
rescaling_factor = target_mpp / base_mpp
rescaling_factor = target_mpp / (base_mpp * downsample)
else:
logger.warning(
f"Requested mpp resolution ({target_mpp}) is not a power of the base resultion {base_mpp}. "
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import find_packages, setup

VERSION = "0.9.3a"
VERSION = "0.5.4b"
DESCRIPTION = "PathoPatch - Accelerating Artificial Intelligence Based Whole Slide Image Analysis with an Optimized Preprocessing Pipeline"
with open("docs/README_pypi.md", "r") as fh:
LONG_DESCRIPTION = fh.read()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
orig_n_tiles_cols: 2
orig_n_tiles_rows: 3
orig_n_tiles_cols: 4
orig_n_tiles_rows: 5
base_magnification: 20.0
downsampling: 2
label_map:
Expand All @@ -9,7 +9,7 @@ patch_size: 256
base_mpp: 0.499
target_patch_mpp: 1.2
stain_normalization: true
magnification: 4.158333333333334
magnification: 8.316666666666668
level: 11
patch_distribution:
0: 0
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
28 changes: 14 additions & 14 deletions tests/test_core_modules/test_target_mpp_macenko.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,18 @@ def test_resulting_patches_wsi(self) -> None:

self.assertEqual(yaml_config, test_file)

# def test_macenko_patch(self) -> None:
# """Test if Macenko worked correctly"""
# gt_path = (
# self.gt_folder / self.wsi_name / "patches" / "CMU-1-Small-Region_1_1.png"
# )
# gt_image = np.array(Image.open(gt_path.resolve()))
def test_macenko_patch(self) -> None:
"""Test if Macenko worked correctly"""
gt_path = (
self.gt_folder / self.wsi_name / "patches" / "CMU-1-Small-Region_1_1.png"
)
gt_image = np.array(Image.open(gt_path.resolve()))

# test_path = (
# self.slide_processor.config.output_path
# / self.wsi_name
# / "patches"
# / "CMU-1-Small-Region_1_1.png"
# )
# test_image = np.array(Image.open(test_path.resolve()))
# assert_almost_equal(test_image, gt_image)
test_path = (
self.slide_processor.config.output_path
/ self.wsi_name
/ "patches"
/ "CMU-1-Small-Region_1_1.png"
)
test_image = np.array(Image.open(test_path.resolve()))
assert_almost_equal(test_image, gt_image)

0 comments on commit 9326a09

Please sign in to comment.