Changing tmpfile used, changed precedence of the slide_mpp and magnif…

…ication parameter
TIO-IKIM · Jul 4, 2024 · 41dd89a · 41dd89a
1 parent f35b340
commit 41dd89a
Show file tree

Hide file tree

Showing 16 changed files with 184 additions and 138 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,11 @@ dist
 build
 push_build.yml
 debug
+conda-recipe
+conda-build-output
+
+# database
+test_database/3DHistech
+test_database/XiaLab
+test_database/dicom_files
+test_database/dicom_leica
diff --git a/environment.yaml b/environment.yaml
@@ -1,36 +1,35 @@
-name: pathopatch_env_2
+name: pathopatch_env_2024
 channels:
-  - conda-forge
+  - bioconda
   - defaults
+  - conda-forge
+  - pytorch
+  - nvidia
 dependencies:
-  - python=3.10.12
-  - openslide=3.4.1
-  - pip=23.0
-  - python-javabridge
-  - libjpeg-turbo
+  - python>=3.9.0,<3.12
+  - conda-forge::openslide=4.0.0=h75f8748_1
+  - pip>22.0
   - pip:
-    - Pillow>=9.5.0
-    - PyYAML
-    - Shapely==1.8.5.post1
-    - black
-    - colorama
-    - flake8-html
-    - flake8
-    - genbadge
-    - geojson>=3.0.0
-    - matplotlib
-    - natsort
-    - numpy>1.22,<1.24
-    - opencv_python_headless==4.5.4.58
-    - openslide_python>=1.3.1
-    - pandas
-    - pre-commit
-    - pydantic==1.10.4
-    - pytest==7.4.4
-    - pytest-sugar==0.9.7
-    - rasterio==1.3.5.post1
-    - scikit-image
-    - torchvision
-    - tqdm
-    - wsidicomizer==0.13.2
-    - wsidicom==0.20.4
+      - openslide-python==1.3.1
+      - wsidicomizer==0.14.1
+      - wsidicom==0.20.4
+      - Pillow>=9.5.0
+      - PyYAML
+      - Shapely==1.8.5.post1
+      - colorama
+      - future
+      - geojson>=3.0.0
+      - matplotlib
+      - natsort
+      - numpy>1.22,<1.24
+      - opencv_python_headless # maybe try with contrib
+      - pandas
+      - pydantic==1.10.4
+      - rasterio==1.3.5.post1
+      - requests
+      - scikit-image
+      - setuptools<=65.6.3
+      - tqdm
+      - torchvision
+      - torch
+      - pytest
diff --git a/examples/dicom_conversion_examples.sh b/examples/dicom_conversion_examples.sh
@@ -0,0 +1,6 @@
+wsidicomizer \
+    -i ./test_database/3DHistech/Orig/E6225_21-1A.11.mrxs \
+    -o ./test_database/3DHistech/Converted \
+    -w 8 \
+    --format jpeg \
+    --quality 95
diff --git a/examples/patch_extraction.yaml b/examples/patch_extraction.yaml
@@ -4,7 +4,12 @@ wsi_filelist:                 # Path to a csv-filelist with WSI files (separator
                               # used.Must include full paths to WSIs, including suffixes.Can be used as an replacement for
                               # the wsi_paths option.If both are provided, yields an error. [str] [Optional, defaults to None]
 output_path:                  # Path to the folder where the resulting dataset should be stored [str]
-wsi_extension:               # The extension of the WSI-files [str] [Optional, defaults to "svs"]
+wsi_extension:                # The extension of the WSI-files [str] [Optional, defaults to "svs"]
+
+# wsi metadata (optional, overwrite magnification and mpp from openslide)
+# wsi metadata is necessary if the magnification and mpp are not provided in the WSI file or cannot be read by openslide.
+wsi_magnification:            # The magnification of the WSI [int][Optional, defaults to None]
+wsi_mpp:                      # The microns per pixel of the WSI [float][Optional, defaults to None]
 
 # basic setups
 patch_size:                   # The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. [][Optional, defaults to 256]
@@ -76,6 +81,3 @@ filter_patches:               # Post-extraction patch filtering to sort out arte
 log_path:                     # Path where log files should be stored. Otherwise, log files are stored in the output folder. [str][Optional, defaults to None]
 log_level:                    # Set the logging level. [str][Optional, defaults to info]
 hardware_selection:           # Select hardware device (just if available, otherwise always cucim). [str] [Optional, defaults to cucim]
-wsi_properties:               # If provided, the properties of the WSI are used for the extraction. [str][Optional, defaults to None]
-  magnifcation:               # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
-  slide_mpp:                  # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
diff --git a/pathopatch/annotation_conversion.py b/pathopatch/annotation_conversion.py
@@ -57,6 +57,7 @@ def merge_outlines(geojson_string: str) -> str:
             return modified_geojson_str
 
         elif element["type"] == "Feature":
+            print("Deteceted single feature, not a FeatureCollection")
             raise NotImplementedError
 
 

diff --git a/pathopatch/patch_extraction/dataset.py b/pathopatch/patch_extraction/dataset.py
@@ -367,13 +367,13 @@ def _prepare_slide(
         self.slide_openslide = OpenSlide(str(self.config.wsi_path))
         self.slide = self.image_loader(str(self.config.wsi_path))
 
-        if "openslide.mpp-x" in self.slide_openslide.properties:
-            slide_mpp = float(self.slide_openslide.properties["openslide.mpp-x"])
-        elif (
+        if (
             self.config.wsi_properties is not None
             and "slide_mpp" in self.config.wsi_properties
         ):
             slide_mpp = self.config.wsi_properties["slide_mpp"]
+        elif "openslide.mpp-x" in self.slide_openslide.properties:
+            slide_mpp = float(self.slide_openslide.properties["openslide.mpp-x"])
         else:  # last option is to use regex
             try:
                 pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)")
@@ -396,15 +396,15 @@ def _prepare_slide(
                     "MPP must be defined either by metadata or by config file!"
                 )
 
-        if "openslide.objective-power" in self.slide_openslide.properties:
-            slide_mag = float(
-                self.slide_openslide.properties.get("openslide.objective-power")
-            )
-        elif (
+        if (
             self.config.wsi_properties is not None
             and "magnification" in self.config.wsi_properties
         ):
             slide_mag = self.config.wsi_properties["magnification"]
+        elif "openslide.objective-power" in self.slide_openslide.properties:
+            slide_mag = float(
+                self.slide_openslide.properties.get("openslide.objective-power")
+            )
         else:
             raise NotImplementedError(
                 "MPP must be defined either by metadata or by config file!"

diff --git a/pathopatch/patch_extraction/patch_extraction.py b/pathopatch/patch_extraction/patch_extraction.py
@@ -815,13 +815,13 @@ def _prepare_wsi(
             if "magnification" in slide_properties:
                 slide_mag = slide_properties["magnification"]
         if slide_mpp is None:
-            if "openslide.mpp-x" in slide.properties:
-                slide_mpp = float(slide.properties.get("openslide.mpp-x"))
-            elif (
+            if (
                 self.config.wsi_properties is not None
                 and "slide_mpp" in self.config.wsi_properties
             ):
                 slide_mpp = self.config.wsi_properties["slide_mpp"]
+            elif "openslide.mpp-x" in slide.properties:
+                slide_mpp = float(slide.properties.get("openslide.mpp-x"))
             else:  # last option is to use regex
                 try:
                     pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)")
@@ -842,13 +842,13 @@ def _prepare_wsi(
                         "MPP must be defined either by metadata or by config file!"
                     )
         if slide_mag is None:
-            if "openslide.objective-power" in slide.properties:
-                slide_mag = float(slide.properties.get("openslide.objective-power"))
-            elif (
+            if (
                 self.config.wsi_properties is not None
                 and "magnification" in self.config.wsi_properties
             ):
                 slide_mag = self.config.wsi_properties["magnification"]
+            elif "openslide.objective-power" in slide.properties:
+                slide_mag = float(slide.properties.get("openslide.objective-power"))
             else:
                 raise NotImplementedError(
                     "Magnification must be defined either by metadata or by config file!"

diff --git a/pathopatch/utils/masking.py b/pathopatch/utils/masking.py
@@ -119,14 +119,18 @@ def convert_polygons_to_mask(
     ]
     src = 255 * np.ones(shape=reference_size, dtype=np.uint8)
     im = Image.fromarray(src)
-    im.save("tmp.tif")
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore")
-        with rasterio.open("tmp.tif") as src:
-            out_image, _ = rasterio_mask(src, polygons_downsampled, crop=False)
-            mask = out_image.transpose(1, 2, 0)
-            mask = np.invert(mask)
-    os.remove("tmp.tif")
+    with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as tmp_file:
+        tmp_file_path = tmp_file.name
+    try:
+        im.save(tmp_file_path)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            with rasterio.open(tmp_file_path) as src:
+                out_image, _ = rasterio_mask(src, polygons_downsampled, crop=False)
+                mask = out_image.transpose(1, 2, 0)
+                mask = np.invert(mask)
+    finally:
+        os.remove(tmp_file_path)
     mask = (mask / 255).astype(np.uint8)
 
     assert len(np.unique(mask)) <= 2, "Mask is not binary"

diff --git a/pathopatch/utils/patch_util.py b/pathopatch/utils/patch_util.py
@@ -383,7 +383,7 @@ def compute_interesting_patches(
             apply_prefilter=apply_prefilter,
         )
     else:
-        logger.info("Using tissue geometry for background seperation")
+        logger.info("Using tissue geometry for background separation")
         if mask_otsu is True:
             logger.warning(
                 "Mask-Otsu is set to true, but tissue annotation has precedence"

diff --git a/pathopatch/utils/plotting.py b/pathopatch/utils/plotting.py
@@ -6,6 +6,8 @@
 # University Medicine Essen
 
 import math
+import tempfile
+
 import os
 import warnings
 from typing import List, Tuple
@@ -54,47 +56,51 @@ def generate_polygon_overview(
     region_label_set = set(region_labels)
 
     # save white basic image
-    white_bg = Image.fromarray(255 * np.ones(shape=reference_size, dtype=np.uint8))
-    white_bg.save("tmp.tif")
+    with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as tmp_file:
+        tmp_file_path = tmp_file.name
+    try:
+        white_bg = Image.fromarray(255 * np.ones(shape=reference_size, dtype=np.uint8))
+        white_bg.save(tmp_file_path)
 
-    if image is None:
-        src = 255 * np.ones(shape=reference_size, dtype=np.uint8)
-        image = Image.fromarray(src)
-    # draw individual images
-    for label in region_label_set:
-        label_image = image.copy()
-        white_image = white_bg.copy()
-        if tissue_grid is not None:
-            label_tissue_grid = tissue_grid.copy()
-        else:
-            label_tissue_grid = None
-        label_polygon = get_filtered_polygons(
-            polygons, region_labels, label, downsample
-        )
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore")
-            with rasterio.open("tmp.tif") as src:
-                out_image, out_transform = rasterio_mask(src, label_polygon, crop=False)
-                # check polygon draw
-                label_polygon_list = []
-                for poly in label_polygon:
-                    if poly.type == "MultiPolygon":
-                        labels = [x for x in poly.geoms]
-                        label_polygon_list = label_polygon_list + labels
-                    else:
-                        label_polygon_list = label_polygon_list + [poly]
-                poly_outline_image = label_image.copy()
-                poly_outline_image_draw = ImageDraw.Draw(poly_outline_image)
-                [
-                    poly_outline_image_draw.polygon(
-                        list(lp.exterior.coords),
-                        outline=COLOR_DEFINITIONS[label_map[label]],
-                        width=5,
+        if image is None:
+            src = 255 * np.ones(shape=reference_size, dtype=np.uint8)
+            image = Image.fromarray(src)
+        # draw individual images
+        for label in region_label_set:
+            label_image = image.copy()
+            white_image = white_bg.copy()
+            if tissue_grid is not None:
+                label_tissue_grid = tissue_grid.copy()
+            else:
+                label_tissue_grid = None
+            label_polygon = get_filtered_polygons(
+                polygons, region_labels, label, downsample
+            )
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore")
+                with rasterio.open(tmp_file_path) as src:
+                    out_image, out_transform = rasterio_mask(
+                        src, label_polygon, crop=False
                     )
-                    for lp in label_polygon_list
-                ]
-                # [poly_outline_image_draw.polygon(list(lp.interiors), outline=COLOR_DEFINITIONS[label_map[label]] , width=5) for lp in label_polygon_list if len(list(lp.interiors)) > 2]
-                # TODO: interiors are wrong, needs to be fixed (check file ID_1004_LOC_4_TIME_2_BUYUE2088_STATUS_0_UID_27 for an example with interiors)
+                    # check polygon draw
+                    label_polygon_list = []
+                    for poly in label_polygon:
+                        if poly.type == "MultiPolygon":
+                            labels = [x for x in poly.geoms]
+                            label_polygon_list = label_polygon_list + labels
+                        else:
+                            label_polygon_list = label_polygon_list + [poly]
+                    poly_outline_image = label_image.copy()
+                    poly_outline_image_draw = ImageDraw.Draw(poly_outline_image)
+                    [
+                        poly_outline_image_draw.polygon(
+                            list(lp.exterior.coords),
+                            outline=COLOR_DEFINITIONS[label_map[label]],
+                            width=5,
+                        )
+                        for lp in label_polygon_list
+                    ]
+                    # [poly_outline_image_draw.polygon(list(lp.interiors), outline=COLOR_DEFINITIONS[label_map[label]] , width=5) for lp in label_polygon_list if len(list(lp.interiors)) > 2]
 
                 mask = out_image.transpose(1, 2, 0)
                 mask = (mask / 255).astype(np.uint8)
@@ -119,8 +125,8 @@ def generate_polygon_overview(
                 image_container[f"{label}_clean"] = white_image
                 image_container[f"{label}_ouline"] = poly_outline_image
                 areas[area] = label
-
-    os.remove("tmp.tif")
+    finally:
+        os.remove(tmp_file_path)
 
     # draw all masks on one image, sorted by areas
     sorted_labels = [areas[k] for k in sorted(areas, reverse=True)]

diff --git a/pathopatch/wsi_interfaces/wsidicomizer_openslide.py b/pathopatch/wsi_interfaces/wsidicomizer_openslide.py
@@ -19,24 +19,27 @@ def __init__(self, dcm_folder: Union[Path, str]) -> None:
 
         # iterate through the folder to check if a DICOMDIR file exists
         dcm_folder = Path(dcm_folder)
-        files = [f for f in dcm_folder.iterdir() if f.is_file()]
-        if not any(f.name == "DICOMDIR" for f in files):
-            source = WsiDicomFileSource.open(dcm_folder)
-        else:
-            source = WsiDicomFileSource.open_dicomdir(dcm_folder / "DICOMDIR")
+        files = [f for f in dcm_folder.iterdir() if f.is_file() and f.suffix == ".dcm"]
+        source = WsiDicomFileSource.open(files)
 
         super().__init__(source, True)
 
         # information and properties to make this compatible with OpenSlide
-        self.dimensions = (self.size.width, self.size.height)
-        self.level_count = len(self.levels)
+        x_max = 0
+        y_max = 0
+        for p in self.pyramids:
+            x_max = max(x_max, p.size.width)
+            y_max = max(y_max, p.size.height)
+        self.dimensions = (x_max, y_max)
+        self.level_count = len(self.pyramids)
         self.level_dimensions = self._get_level_dimensions()
         self.level_downsamples = self._get_level_downsamples(self.level_dimensions)
 
+        # TODO: get it from pyramid
         self.properties = {
-            "mpp": self.mpp,
-            "openslide.mpp-x": self.mpp.width,
-            "openslide.mpp-y": self.mpp.height,
+            "mpp": self.pyramids[-1].mpp,
+            "openslide.mpp-x": self.pyramids[-1].mpp.width,
+            "openslide.mpp-y": self.pyramids[-1].mpp.height,
             "openslide.level-count": self.level_count,
             "level_count": self.level_count,
             "level_dimensions": self.level_dimensions,
@@ -62,7 +65,7 @@ def _get_level_dimensions(self) -> Tuple[Tuple[int, int]]:
             Tuple[Tuple[int, int]]: The dimensions of all levels.
                 Each tuple contains the width and height of the level.
         """
-        return tuple((level.size.width, level.size.height) for level in self.levels)
+        return tuple((level.size.width, level.size.height) for level in self.pyramids)
 
     def _get_level_downsamples(
         self, level_dimensions: Tuple[Tuple[int, int]]
@@ -76,7 +79,7 @@ def _get_level_downsamples(
         Returns:
             List[float]: The downsample factor for each level.
         """
-        highest_x = level_dimensions[0][0]
+        highest_x = level_dimensions[-1][0]
         return tuple(highest_x / dim[0] for dim in level_dimensions)
 
     def _convert_region_openslide(