diff --git a/.gitignore b/.gitignore index 80c3cf6..7f85a87 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,11 @@ dist build push_build.yml debug +conda-recipe +conda-build-output + +# database +test_database/3DHistech +test_database/XiaLab +test_database/dicom_files +test_database/dicom_leica diff --git a/environment.yaml b/environment.yaml index 41bb476..b6a0eaf 100644 --- a/environment.yaml +++ b/environment.yaml @@ -1,36 +1,35 @@ -name: pathopatch_env_2 +name: pathopatch_env_2024 channels: - - conda-forge + - bioconda - defaults + - conda-forge + - pytorch + - nvidia dependencies: - - python=3.10.12 - - openslide=3.4.1 - - pip=23.0 - - python-javabridge - - libjpeg-turbo + - python>=3.9.0,<3.12 + - conda-forge::openslide=4.0.0=h75f8748_1 + - pip>22.0 - pip: - - Pillow>=9.5.0 - - PyYAML - - Shapely==1.8.5.post1 - - black - - colorama - - flake8-html - - flake8 - - genbadge - - geojson>=3.0.0 - - matplotlib - - natsort - - numpy>1.22,<1.24 - - opencv_python_headless==4.5.4.58 - - openslide_python>=1.3.1 - - pandas - - pre-commit - - pydantic==1.10.4 - - pytest==7.4.4 - - pytest-sugar==0.9.7 - - rasterio==1.3.5.post1 - - scikit-image - - torchvision - - tqdm - - wsidicomizer==0.13.2 - - wsidicom==0.20.4 + - openslide-python==1.3.1 + - wsidicomizer==0.14.1 + - wsidicom==0.20.4 + - Pillow>=9.5.0 + - PyYAML + - Shapely==1.8.5.post1 + - colorama + - future + - geojson>=3.0.0 + - matplotlib + - natsort + - numpy>1.22,<1.24 + - opencv_python_headless # maybe try with contrib + - pandas + - pydantic==1.10.4 + - rasterio==1.3.5.post1 + - requests + - scikit-image + - setuptools<=65.6.3 + - tqdm + - torchvision + - torch + - pytest diff --git a/examples/dicom_conversion_examples.sh b/examples/dicom_conversion_examples.sh new file mode 100644 index 0000000..dc47dce --- /dev/null +++ b/examples/dicom_conversion_examples.sh @@ -0,0 +1,6 @@ +wsidicomizer \ + -i ./test_database/3DHistech/Orig/E6225_21-1A.11.mrxs \ + -o ./test_database/3DHistech/Converted \ + -w 8 \ + --format jpeg \ + --quality 95 diff --git a/examples/patch_extraction.yaml b/examples/patch_extraction.yaml index cc08185..c60f3f1 100644 --- a/examples/patch_extraction.yaml +++ b/examples/patch_extraction.yaml @@ -4,7 +4,12 @@ wsi_filelist: # Path to a csv-filelist with WSI files (separator # used.Must include full paths to WSIs, including suffixes.Can be used as an replacement for # the wsi_paths option.If both are provided, yields an error. [str] [Optional, defaults to None] output_path: # Path to the folder where the resulting dataset should be stored [str] -wsi_extension: # The extension of the WSI-files [str] [Optional, defaults to "svs"] +wsi_extension: # The extension of the WSI-files [str] [Optional, defaults to "svs"] + +# wsi metadata (optional, overwrite magnification and mpp from openslide) +# wsi metadata is necessary if the magnification and mpp are not provided in the WSI file or cannot be read by openslide. +wsi_magnification: # The magnification of the WSI [int][Optional, defaults to None] +wsi_mpp: # The microns per pixel of the WSI [float][Optional, defaults to None] # basic setups patch_size: # The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. [][Optional, defaults to 256] @@ -76,6 +81,3 @@ filter_patches: # Post-extraction patch filtering to sort out arte log_path: # Path where log files should be stored. Otherwise, log files are stored in the output folder. [str][Optional, defaults to None] log_level: # Set the logging level. [str][Optional, defaults to info] hardware_selection: # Select hardware device (just if available, otherwise always cucim). [str] [Optional, defaults to cucim] -wsi_properties: # If provided, the properties of the WSI are used for the extraction. [str][Optional, defaults to None] - magnifcation: # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None] - slide_mpp: # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None] diff --git a/pathopatch/annotation_conversion.py b/pathopatch/annotation_conversion.py index 1d301bf..c9ae75a 100644 --- a/pathopatch/annotation_conversion.py +++ b/pathopatch/annotation_conversion.py @@ -57,6 +57,7 @@ def merge_outlines(geojson_string: str) -> str: return modified_geojson_str elif element["type"] == "Feature": + print("Deteceted single feature, not a FeatureCollection") raise NotImplementedError diff --git a/pathopatch/patch_extraction/dataset.py b/pathopatch/patch_extraction/dataset.py index 682f0e9..3e12921 100644 --- a/pathopatch/patch_extraction/dataset.py +++ b/pathopatch/patch_extraction/dataset.py @@ -367,13 +367,13 @@ def _prepare_slide( self.slide_openslide = OpenSlide(str(self.config.wsi_path)) self.slide = self.image_loader(str(self.config.wsi_path)) - if "openslide.mpp-x" in self.slide_openslide.properties: - slide_mpp = float(self.slide_openslide.properties["openslide.mpp-x"]) - elif ( + if ( self.config.wsi_properties is not None and "slide_mpp" in self.config.wsi_properties ): slide_mpp = self.config.wsi_properties["slide_mpp"] + elif "openslide.mpp-x" in self.slide_openslide.properties: + slide_mpp = float(self.slide_openslide.properties["openslide.mpp-x"]) else: # last option is to use regex try: pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)") @@ -396,15 +396,15 @@ def _prepare_slide( "MPP must be defined either by metadata or by config file!" ) - if "openslide.objective-power" in self.slide_openslide.properties: - slide_mag = float( - self.slide_openslide.properties.get("openslide.objective-power") - ) - elif ( + if ( self.config.wsi_properties is not None and "magnification" in self.config.wsi_properties ): slide_mag = self.config.wsi_properties["magnification"] + elif "openslide.objective-power" in self.slide_openslide.properties: + slide_mag = float( + self.slide_openslide.properties.get("openslide.objective-power") + ) else: raise NotImplementedError( "MPP must be defined either by metadata or by config file!" diff --git a/pathopatch/patch_extraction/patch_extraction.py b/pathopatch/patch_extraction/patch_extraction.py index 124ee7a..4312d4f 100644 --- a/pathopatch/patch_extraction/patch_extraction.py +++ b/pathopatch/patch_extraction/patch_extraction.py @@ -815,13 +815,13 @@ def _prepare_wsi( if "magnification" in slide_properties: slide_mag = slide_properties["magnification"] if slide_mpp is None: - if "openslide.mpp-x" in slide.properties: - slide_mpp = float(slide.properties.get("openslide.mpp-x")) - elif ( + if ( self.config.wsi_properties is not None and "slide_mpp" in self.config.wsi_properties ): slide_mpp = self.config.wsi_properties["slide_mpp"] + elif "openslide.mpp-x" in slide.properties: + slide_mpp = float(slide.properties.get("openslide.mpp-x")) else: # last option is to use regex try: pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)") @@ -842,13 +842,13 @@ def _prepare_wsi( "MPP must be defined either by metadata or by config file!" ) if slide_mag is None: - if "openslide.objective-power" in slide.properties: - slide_mag = float(slide.properties.get("openslide.objective-power")) - elif ( + if ( self.config.wsi_properties is not None and "magnification" in self.config.wsi_properties ): slide_mag = self.config.wsi_properties["magnification"] + elif "openslide.objective-power" in slide.properties: + slide_mag = float(slide.properties.get("openslide.objective-power")) else: raise NotImplementedError( "Magnification must be defined either by metadata or by config file!" diff --git a/pathopatch/utils/masking.py b/pathopatch/utils/masking.py index ba51797..31c0715 100644 --- a/pathopatch/utils/masking.py +++ b/pathopatch/utils/masking.py @@ -119,14 +119,18 @@ def convert_polygons_to_mask( ] src = 255 * np.ones(shape=reference_size, dtype=np.uint8) im = Image.fromarray(src) - im.save("tmp.tif") - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - with rasterio.open("tmp.tif") as src: - out_image, _ = rasterio_mask(src, polygons_downsampled, crop=False) - mask = out_image.transpose(1, 2, 0) - mask = np.invert(mask) - os.remove("tmp.tif") + with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as tmp_file: + tmp_file_path = tmp_file.name + try: + im.save(tmp_file_path) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + with rasterio.open(tmp_file_path) as src: + out_image, _ = rasterio_mask(src, polygons_downsampled, crop=False) + mask = out_image.transpose(1, 2, 0) + mask = np.invert(mask) + finally: + os.remove(tmp_file_path) mask = (mask / 255).astype(np.uint8) assert len(np.unique(mask)) <= 2, "Mask is not binary" diff --git a/pathopatch/utils/patch_util.py b/pathopatch/utils/patch_util.py index ff64d3c..ec9a98c 100644 --- a/pathopatch/utils/patch_util.py +++ b/pathopatch/utils/patch_util.py @@ -383,7 +383,7 @@ def compute_interesting_patches( apply_prefilter=apply_prefilter, ) else: - logger.info("Using tissue geometry for background seperation") + logger.info("Using tissue geometry for background separation") if mask_otsu is True: logger.warning( "Mask-Otsu is set to true, but tissue annotation has precedence" diff --git a/pathopatch/utils/plotting.py b/pathopatch/utils/plotting.py index 9fedad3..7ee9446 100644 --- a/pathopatch/utils/plotting.py +++ b/pathopatch/utils/plotting.py @@ -6,6 +6,8 @@ # University Medicine Essen import math +import tempfile + import os import warnings from typing import List, Tuple @@ -54,47 +56,51 @@ def generate_polygon_overview( region_label_set = set(region_labels) # save white basic image - white_bg = Image.fromarray(255 * np.ones(shape=reference_size, dtype=np.uint8)) - white_bg.save("tmp.tif") + with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as tmp_file: + tmp_file_path = tmp_file.name + try: + white_bg = Image.fromarray(255 * np.ones(shape=reference_size, dtype=np.uint8)) + white_bg.save(tmp_file_path) - if image is None: - src = 255 * np.ones(shape=reference_size, dtype=np.uint8) - image = Image.fromarray(src) - # draw individual images - for label in region_label_set: - label_image = image.copy() - white_image = white_bg.copy() - if tissue_grid is not None: - label_tissue_grid = tissue_grid.copy() - else: - label_tissue_grid = None - label_polygon = get_filtered_polygons( - polygons, region_labels, label, downsample - ) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - with rasterio.open("tmp.tif") as src: - out_image, out_transform = rasterio_mask(src, label_polygon, crop=False) - # check polygon draw - label_polygon_list = [] - for poly in label_polygon: - if poly.type == "MultiPolygon": - labels = [x for x in poly.geoms] - label_polygon_list = label_polygon_list + labels - else: - label_polygon_list = label_polygon_list + [poly] - poly_outline_image = label_image.copy() - poly_outline_image_draw = ImageDraw.Draw(poly_outline_image) - [ - poly_outline_image_draw.polygon( - list(lp.exterior.coords), - outline=COLOR_DEFINITIONS[label_map[label]], - width=5, + if image is None: + src = 255 * np.ones(shape=reference_size, dtype=np.uint8) + image = Image.fromarray(src) + # draw individual images + for label in region_label_set: + label_image = image.copy() + white_image = white_bg.copy() + if tissue_grid is not None: + label_tissue_grid = tissue_grid.copy() + else: + label_tissue_grid = None + label_polygon = get_filtered_polygons( + polygons, region_labels, label, downsample + ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + with rasterio.open(tmp_file_path) as src: + out_image, out_transform = rasterio_mask( + src, label_polygon, crop=False ) - for lp in label_polygon_list - ] - # [poly_outline_image_draw.polygon(list(lp.interiors), outline=COLOR_DEFINITIONS[label_map[label]] , width=5) for lp in label_polygon_list if len(list(lp.interiors)) > 2] - # TODO: interiors are wrong, needs to be fixed (check file ID_1004_LOC_4_TIME_2_BUYUE2088_STATUS_0_UID_27 for an example with interiors) + # check polygon draw + label_polygon_list = [] + for poly in label_polygon: + if poly.type == "MultiPolygon": + labels = [x for x in poly.geoms] + label_polygon_list = label_polygon_list + labels + else: + label_polygon_list = label_polygon_list + [poly] + poly_outline_image = label_image.copy() + poly_outline_image_draw = ImageDraw.Draw(poly_outline_image) + [ + poly_outline_image_draw.polygon( + list(lp.exterior.coords), + outline=COLOR_DEFINITIONS[label_map[label]], + width=5, + ) + for lp in label_polygon_list + ] + # [poly_outline_image_draw.polygon(list(lp.interiors), outline=COLOR_DEFINITIONS[label_map[label]] , width=5) for lp in label_polygon_list if len(list(lp.interiors)) > 2] mask = out_image.transpose(1, 2, 0) mask = (mask / 255).astype(np.uint8) @@ -119,8 +125,8 @@ def generate_polygon_overview( image_container[f"{label}_clean"] = white_image image_container[f"{label}_ouline"] = poly_outline_image areas[area] = label - - os.remove("tmp.tif") + finally: + os.remove(tmp_file_path) # draw all masks on one image, sorted by areas sorted_labels = [areas[k] for k in sorted(areas, reverse=True)] diff --git a/pathopatch/wsi_interfaces/wsidicomizer_openslide.py b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py index 9c2af22..307941b 100644 --- a/pathopatch/wsi_interfaces/wsidicomizer_openslide.py +++ b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py @@ -19,24 +19,27 @@ def __init__(self, dcm_folder: Union[Path, str]) -> None: # iterate through the folder to check if a DICOMDIR file exists dcm_folder = Path(dcm_folder) - files = [f for f in dcm_folder.iterdir() if f.is_file()] - if not any(f.name == "DICOMDIR" for f in files): - source = WsiDicomFileSource.open(dcm_folder) - else: - source = WsiDicomFileSource.open_dicomdir(dcm_folder / "DICOMDIR") + files = [f for f in dcm_folder.iterdir() if f.is_file() and f.suffix == ".dcm"] + source = WsiDicomFileSource.open(files) super().__init__(source, True) # information and properties to make this compatible with OpenSlide - self.dimensions = (self.size.width, self.size.height) - self.level_count = len(self.levels) + x_max = 0 + y_max = 0 + for p in self.pyramids: + x_max = max(x_max, p.size.width) + y_max = max(y_max, p.size.height) + self.dimensions = (x_max, y_max) + self.level_count = len(self.pyramids) self.level_dimensions = self._get_level_dimensions() self.level_downsamples = self._get_level_downsamples(self.level_dimensions) + # TODO: get it from pyramid self.properties = { - "mpp": self.mpp, - "openslide.mpp-x": self.mpp.width, - "openslide.mpp-y": self.mpp.height, + "mpp": self.pyramids[-1].mpp, + "openslide.mpp-x": self.pyramids[-1].mpp.width, + "openslide.mpp-y": self.pyramids[-1].mpp.height, "openslide.level-count": self.level_count, "level_count": self.level_count, "level_dimensions": self.level_dimensions, @@ -62,7 +65,7 @@ def _get_level_dimensions(self) -> Tuple[Tuple[int, int]]: Tuple[Tuple[int, int]]: The dimensions of all levels. Each tuple contains the width and height of the level. """ - return tuple((level.size.width, level.size.height) for level in self.levels) + return tuple((level.size.width, level.size.height) for level in self.pyramids) def _get_level_downsamples( self, level_dimensions: Tuple[Tuple[int, int]] @@ -76,7 +79,7 @@ def _get_level_downsamples( Returns: List[float]: The downsample factor for each level. """ - highest_x = level_dimensions[0][0] + highest_x = level_dimensions[-1][0] return tuple(highest_x / dim[0] for dim in level_dimensions) def _convert_region_openslide( diff --git a/requirements.txt b/requirements.txt index 1e15256..910112d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +openslide-python==1.3.1 +wsidicomizer==0.14.1 +wsidicom==0.20.4 Pillow>=9.5.0 PyYAML Shapely==1.8.5.post1 @@ -8,7 +11,6 @@ matplotlib natsort numpy>1.22,<1.24 opencv_python_headless -openslide_python pandas pydantic==1.10.4 rasterio==1.3.5.post1 @@ -18,5 +20,4 @@ setuptools<=65.6.3 tqdm torchvision torch -wsidicom==0.20.4 -wsidicomizer==0.13.2 +pytest diff --git a/setup.py b/setup.py index ab25738..3e0946a 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ "torchvision", "torch", "wsidicom==0.20.4", - "wsidicomizer==0.13.2", + "wsidicomizer==0.14.1", ], scripts=[ "pathopatch/wsi_extraction.py", diff --git a/tests/static_test_files/preprocessing/baseline/config.yaml b/tests/static_test_files/preprocessing/baseline/config.yaml index e975c3d..4cc59ac 100644 --- a/tests/static_test_files/preprocessing/baseline/config.yaml +++ b/tests/static_test_files/preprocessing/baseline/config.yaml @@ -12,4 +12,4 @@ log_level: debug overwrite: True apply_prefilter: True -filter_patches: True +filter_patches: False diff --git a/tests/static_test_files/preprocessing/dicom-histech/dicom.yaml b/tests/static_test_files/preprocessing/dicom-histech/dicom.yaml new file mode 100644 index 0000000..067ba73 --- /dev/null +++ b/tests/static_test_files/preprocessing/dicom-histech/dicom.yaml @@ -0,0 +1,19 @@ +# dataset paths +wsi_paths: ./test_database/3DHistech/Converted +output_path: ./tests/tmp_results_folder/dicom/dicom_histech +wsi_extension: dcm + +# basic setups +target_mpp: 1 +patch_size: 512 +patch_overlap: 0 +normalize_stains: False +min_intersection_ratio: 0.05 + +processes: 8 +overwrite: True + +# other +log_level: debug +wsi_magnification: 40 +wsi_mpp: 0.25 diff --git a/tests/test_core_modules/test_baseline.py b/tests/test_core_modules/test_baseline.py index 274ec0d..3531ac8 100644 --- a/tests/test_core_modules/test_baseline.py +++ b/tests/test_core_modules/test_baseline.py @@ -1,7 +1,5 @@ import json -import os -import shutil import unittest from pathlib import Path @@ -13,7 +11,6 @@ from pathopatch.cli import PreProcessingConfig, PreProcessingYamlConfig from pathopatch.patch_extraction.patch_extraction import PreProcessor from pathopatch.utils.logger import Logger -from pathopatch.utils.tools import close_logger from test_database.download import check_test_database @@ -52,24 +49,24 @@ def setUpClass(cls) -> None: ) cls.slide_processor.sample_patches_dataset() - @classmethod - def tearDownClass(cls): - """Clean output directory""" - # close logger - close_logger(cls.logger) - - # clean output directory - clean_folders = [ - f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_dir() - ] - for f in clean_folders: - shutil.rmtree(f.resolve()) - clean_files = [ - f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_file() - ] - for f in clean_files: - os.remove(f.resolve()) - shutil.rmtree(f.parent.resolve()) + # @classmethod + # def tearDownClass(cls): + # """Clean output directory""" + # # close logger + # close_logger(cls.logger) + + # # clean output directory + # clean_folders = [ + # f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_dir() + # ] + # for f in clean_folders: + # shutil.rmtree(f.resolve()) + # clean_files = [ + # f for f in Path(cls.opt_dict["output_path"]).iterdir() if f.is_file() + # ] + # for f in clean_files: + # os.remove(f.resolve()) + # shutil.rmtree(f.parent.resolve()) def test_init_files(self) -> None: """For this case 1 WSI files should have been loaded"""