Skip to content

Commit

Permalink
Changing tmpfile used, changed precedence of the slide_mpp and magnif…
Browse files Browse the repository at this point in the history
…ication parameter
  • Loading branch information
FabianHoerst committed Jul 4, 2024
1 parent f35b340 commit 41dd89a
Show file tree
Hide file tree
Showing 16 changed files with 184 additions and 138 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ dist
build
push_build.yml
debug
conda-recipe
conda-build-output

# database
test_database/3DHistech
test_database/XiaLab
test_database/dicom_files
test_database/dicom_leica
63 changes: 31 additions & 32 deletions environment.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,35 @@
name: pathopatch_env_2
name: pathopatch_env_2024
channels:
- conda-forge
- bioconda
- defaults
- conda-forge
- pytorch
- nvidia
dependencies:
- python=3.10.12
- openslide=3.4.1
- pip=23.0
- python-javabridge
- libjpeg-turbo
- python>=3.9.0,<3.12
- conda-forge::openslide=4.0.0=h75f8748_1
- pip>22.0
- pip:
- Pillow>=9.5.0
- PyYAML
- Shapely==1.8.5.post1
- black
- colorama
- flake8-html
- flake8
- genbadge
- geojson>=3.0.0
- matplotlib
- natsort
- numpy>1.22,<1.24
- opencv_python_headless==4.5.4.58
- openslide_python>=1.3.1
- pandas
- pre-commit
- pydantic==1.10.4
- pytest==7.4.4
- pytest-sugar==0.9.7
- rasterio==1.3.5.post1
- scikit-image
- torchvision
- tqdm
- wsidicomizer==0.13.2
- wsidicom==0.20.4
- openslide-python==1.3.1
- wsidicomizer==0.14.1
- wsidicom==0.20.4
- Pillow>=9.5.0
- PyYAML
- Shapely==1.8.5.post1
- colorama
- future
- geojson>=3.0.0
- matplotlib
- natsort
- numpy>1.22,<1.24
- opencv_python_headless # maybe try with contrib
- pandas
- pydantic==1.10.4
- rasterio==1.3.5.post1
- requests
- scikit-image
- setuptools<=65.6.3
- tqdm
- torchvision
- torch
- pytest
6 changes: 6 additions & 0 deletions examples/dicom_conversion_examples.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
wsidicomizer \
-i ./test_database/3DHistech/Orig/E6225_21-1A.11.mrxs \
-o ./test_database/3DHistech/Converted \
-w 8 \
--format jpeg \
--quality 95
10 changes: 6 additions & 4 deletions examples/patch_extraction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ wsi_filelist: # Path to a csv-filelist with WSI files (separator
# used.Must include full paths to WSIs, including suffixes.Can be used as an replacement for
# the wsi_paths option.If both are provided, yields an error. [str] [Optional, defaults to None]
output_path: # Path to the folder where the resulting dataset should be stored [str]
wsi_extension: # The extension of the WSI-files [str] [Optional, defaults to "svs"]
wsi_extension: # The extension of the WSI-files [str] [Optional, defaults to "svs"]

# wsi metadata (optional, overwrite magnification and mpp from openslide)
# wsi metadata is necessary if the magnification and mpp are not provided in the WSI file or cannot be read by openslide.
wsi_magnification: # The magnification of the WSI [int][Optional, defaults to None]
wsi_mpp: # The microns per pixel of the WSI [float][Optional, defaults to None]

# basic setups
patch_size: # The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. [][Optional, defaults to 256]
Expand Down Expand Up @@ -76,6 +81,3 @@ filter_patches: # Post-extraction patch filtering to sort out arte
log_path: # Path where log files should be stored. Otherwise, log files are stored in the output folder. [str][Optional, defaults to None]
log_level: # Set the logging level. [str][Optional, defaults to info]
hardware_selection: # Select hardware device (just if available, otherwise always cucim). [str] [Optional, defaults to cucim]
wsi_properties: # If provided, the properties of the WSI are used for the extraction. [str][Optional, defaults to None]
magnifcation: # Manual WSI magnification, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
slide_mpp: # Manual WSI MPP, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). [float][Optional, defaults to None]
1 change: 1 addition & 0 deletions pathopatch/annotation_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def merge_outlines(geojson_string: str) -> str:
return modified_geojson_str

elif element["type"] == "Feature":
print("Deteceted single feature, not a FeatureCollection")
raise NotImplementedError


Expand Down
16 changes: 8 additions & 8 deletions pathopatch/patch_extraction/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,13 +367,13 @@ def _prepare_slide(
self.slide_openslide = OpenSlide(str(self.config.wsi_path))
self.slide = self.image_loader(str(self.config.wsi_path))

if "openslide.mpp-x" in self.slide_openslide.properties:
slide_mpp = float(self.slide_openslide.properties["openslide.mpp-x"])
elif (
if (
self.config.wsi_properties is not None
and "slide_mpp" in self.config.wsi_properties
):
slide_mpp = self.config.wsi_properties["slide_mpp"]
elif "openslide.mpp-x" in self.slide_openslide.properties:
slide_mpp = float(self.slide_openslide.properties["openslide.mpp-x"])
else: # last option is to use regex
try:
pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)")
Expand All @@ -396,15 +396,15 @@ def _prepare_slide(
"MPP must be defined either by metadata or by config file!"
)

if "openslide.objective-power" in self.slide_openslide.properties:
slide_mag = float(
self.slide_openslide.properties.get("openslide.objective-power")
)
elif (
if (
self.config.wsi_properties is not None
and "magnification" in self.config.wsi_properties
):
slide_mag = self.config.wsi_properties["magnification"]
elif "openslide.objective-power" in self.slide_openslide.properties:
slide_mag = float(
self.slide_openslide.properties.get("openslide.objective-power")
)
else:
raise NotImplementedError(
"MPP must be defined either by metadata or by config file!"
Expand Down
12 changes: 6 additions & 6 deletions pathopatch/patch_extraction/patch_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,13 +815,13 @@ def _prepare_wsi(
if "magnification" in slide_properties:
slide_mag = slide_properties["magnification"]
if slide_mpp is None:
if "openslide.mpp-x" in slide.properties:
slide_mpp = float(slide.properties.get("openslide.mpp-x"))
elif (
if (
self.config.wsi_properties is not None
and "slide_mpp" in self.config.wsi_properties
):
slide_mpp = self.config.wsi_properties["slide_mpp"]
elif "openslide.mpp-x" in slide.properties:
slide_mpp = float(slide.properties.get("openslide.mpp-x"))
else: # last option is to use regex
try:
pattern = re.compile(r"MPP(?: =)? (\d+\.\d+)")
Expand All @@ -842,13 +842,13 @@ def _prepare_wsi(
"MPP must be defined either by metadata or by config file!"
)
if slide_mag is None:
if "openslide.objective-power" in slide.properties:
slide_mag = float(slide.properties.get("openslide.objective-power"))
elif (
if (
self.config.wsi_properties is not None
and "magnification" in self.config.wsi_properties
):
slide_mag = self.config.wsi_properties["magnification"]
elif "openslide.objective-power" in slide.properties:
slide_mag = float(slide.properties.get("openslide.objective-power"))
else:
raise NotImplementedError(
"Magnification must be defined either by metadata or by config file!"
Expand Down
20 changes: 12 additions & 8 deletions pathopatch/utils/masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,18 @@ def convert_polygons_to_mask(
]
src = 255 * np.ones(shape=reference_size, dtype=np.uint8)
im = Image.fromarray(src)
im.save("tmp.tif")
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with rasterio.open("tmp.tif") as src:
out_image, _ = rasterio_mask(src, polygons_downsampled, crop=False)
mask = out_image.transpose(1, 2, 0)
mask = np.invert(mask)
os.remove("tmp.tif")
with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as tmp_file:
tmp_file_path = tmp_file.name
try:
im.save(tmp_file_path)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with rasterio.open(tmp_file_path) as src:
out_image, _ = rasterio_mask(src, polygons_downsampled, crop=False)
mask = out_image.transpose(1, 2, 0)
mask = np.invert(mask)
finally:
os.remove(tmp_file_path)
mask = (mask / 255).astype(np.uint8)

assert len(np.unique(mask)) <= 2, "Mask is not binary"
Expand Down
2 changes: 1 addition & 1 deletion pathopatch/utils/patch_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def compute_interesting_patches(
apply_prefilter=apply_prefilter,
)
else:
logger.info("Using tissue geometry for background seperation")
logger.info("Using tissue geometry for background separation")
if mask_otsu is True:
logger.warning(
"Mask-Otsu is set to true, but tissue annotation has precedence"
Expand Down
88 changes: 47 additions & 41 deletions pathopatch/utils/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
# University Medicine Essen

import math
import tempfile

import os
import warnings
from typing import List, Tuple
Expand Down Expand Up @@ -54,47 +56,51 @@ def generate_polygon_overview(
region_label_set = set(region_labels)

# save white basic image
white_bg = Image.fromarray(255 * np.ones(shape=reference_size, dtype=np.uint8))
white_bg.save("tmp.tif")
with tempfile.NamedTemporaryFile(suffix=".tif", delete=False) as tmp_file:
tmp_file_path = tmp_file.name
try:
white_bg = Image.fromarray(255 * np.ones(shape=reference_size, dtype=np.uint8))
white_bg.save(tmp_file_path)

if image is None:
src = 255 * np.ones(shape=reference_size, dtype=np.uint8)
image = Image.fromarray(src)
# draw individual images
for label in region_label_set:
label_image = image.copy()
white_image = white_bg.copy()
if tissue_grid is not None:
label_tissue_grid = tissue_grid.copy()
else:
label_tissue_grid = None
label_polygon = get_filtered_polygons(
polygons, region_labels, label, downsample
)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with rasterio.open("tmp.tif") as src:
out_image, out_transform = rasterio_mask(src, label_polygon, crop=False)
# check polygon draw
label_polygon_list = []
for poly in label_polygon:
if poly.type == "MultiPolygon":
labels = [x for x in poly.geoms]
label_polygon_list = label_polygon_list + labels
else:
label_polygon_list = label_polygon_list + [poly]
poly_outline_image = label_image.copy()
poly_outline_image_draw = ImageDraw.Draw(poly_outline_image)
[
poly_outline_image_draw.polygon(
list(lp.exterior.coords),
outline=COLOR_DEFINITIONS[label_map[label]],
width=5,
if image is None:
src = 255 * np.ones(shape=reference_size, dtype=np.uint8)
image = Image.fromarray(src)
# draw individual images
for label in region_label_set:
label_image = image.copy()
white_image = white_bg.copy()
if tissue_grid is not None:
label_tissue_grid = tissue_grid.copy()
else:
label_tissue_grid = None
label_polygon = get_filtered_polygons(
polygons, region_labels, label, downsample
)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with rasterio.open(tmp_file_path) as src:
out_image, out_transform = rasterio_mask(
src, label_polygon, crop=False
)
for lp in label_polygon_list
]
# [poly_outline_image_draw.polygon(list(lp.interiors), outline=COLOR_DEFINITIONS[label_map[label]] , width=5) for lp in label_polygon_list if len(list(lp.interiors)) > 2]
# TODO: interiors are wrong, needs to be fixed (check file ID_1004_LOC_4_TIME_2_BUYUE2088_STATUS_0_UID_27 for an example with interiors)
# check polygon draw
label_polygon_list = []
for poly in label_polygon:
if poly.type == "MultiPolygon":
labels = [x for x in poly.geoms]
label_polygon_list = label_polygon_list + labels
else:
label_polygon_list = label_polygon_list + [poly]
poly_outline_image = label_image.copy()
poly_outline_image_draw = ImageDraw.Draw(poly_outline_image)
[
poly_outline_image_draw.polygon(
list(lp.exterior.coords),
outline=COLOR_DEFINITIONS[label_map[label]],
width=5,
)
for lp in label_polygon_list
]
# [poly_outline_image_draw.polygon(list(lp.interiors), outline=COLOR_DEFINITIONS[label_map[label]] , width=5) for lp in label_polygon_list if len(list(lp.interiors)) > 2]

mask = out_image.transpose(1, 2, 0)
mask = (mask / 255).astype(np.uint8)
Expand All @@ -119,8 +125,8 @@ def generate_polygon_overview(
image_container[f"{label}_clean"] = white_image
image_container[f"{label}_ouline"] = poly_outline_image
areas[area] = label

os.remove("tmp.tif")
finally:
os.remove(tmp_file_path)

# draw all masks on one image, sorted by areas
sorted_labels = [areas[k] for k in sorted(areas, reverse=True)]
Expand Down
27 changes: 15 additions & 12 deletions pathopatch/wsi_interfaces/wsidicomizer_openslide.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,27 @@ def __init__(self, dcm_folder: Union[Path, str]) -> None:

# iterate through the folder to check if a DICOMDIR file exists
dcm_folder = Path(dcm_folder)
files = [f for f in dcm_folder.iterdir() if f.is_file()]
if not any(f.name == "DICOMDIR" for f in files):
source = WsiDicomFileSource.open(dcm_folder)
else:
source = WsiDicomFileSource.open_dicomdir(dcm_folder / "DICOMDIR")
files = [f for f in dcm_folder.iterdir() if f.is_file() and f.suffix == ".dcm"]
source = WsiDicomFileSource.open(files)

super().__init__(source, True)

# information and properties to make this compatible with OpenSlide
self.dimensions = (self.size.width, self.size.height)
self.level_count = len(self.levels)
x_max = 0
y_max = 0
for p in self.pyramids:
x_max = max(x_max, p.size.width)
y_max = max(y_max, p.size.height)
self.dimensions = (x_max, y_max)
self.level_count = len(self.pyramids)
self.level_dimensions = self._get_level_dimensions()
self.level_downsamples = self._get_level_downsamples(self.level_dimensions)

# TODO: get it from pyramid
self.properties = {
"mpp": self.mpp,
"openslide.mpp-x": self.mpp.width,
"openslide.mpp-y": self.mpp.height,
"mpp": self.pyramids[-1].mpp,
"openslide.mpp-x": self.pyramids[-1].mpp.width,
"openslide.mpp-y": self.pyramids[-1].mpp.height,
"openslide.level-count": self.level_count,
"level_count": self.level_count,
"level_dimensions": self.level_dimensions,
Expand All @@ -62,7 +65,7 @@ def _get_level_dimensions(self) -> Tuple[Tuple[int, int]]:
Tuple[Tuple[int, int]]: The dimensions of all levels.
Each tuple contains the width and height of the level.
"""
return tuple((level.size.width, level.size.height) for level in self.levels)
return tuple((level.size.width, level.size.height) for level in self.pyramids)

def _get_level_downsamples(
self, level_dimensions: Tuple[Tuple[int, int]]
Expand All @@ -76,7 +79,7 @@ def _get_level_downsamples(
Returns:
List[float]: The downsample factor for each level.
"""
highest_x = level_dimensions[0][0]
highest_x = level_dimensions[-1][0]
return tuple(highest_x / dim[0] for dim in level_dimensions)

def _convert_region_openslide(
Expand Down
Loading

0 comments on commit 41dd89a

Please sign in to comment.