Skip to content

Commit

Permalink
Fix DICOM WSI Conversion (#104)
Browse files Browse the repository at this point in the history
Temporary fix until libvips 8.15 will be released, which will come with
OpenSlide 4.0.0 which offers dicom-wsi support.
  • Loading branch information
miriam-groeneveld authored Oct 23, 2023
1 parent 4a2d4fb commit 499d4c1
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 49 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-docstring-first
- id: debug-statements
- id: end-of-file-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
rev: v3.15.0
hooks:
- id: pyupgrade
language: python
Expand All @@ -18,12 +18,12 @@ repos:
hooks:
- id: isort
- repo: https://github.com/ambv/black
rev: 23.1.0
rev: 23.9.1
hooks:
- id: black
language: python
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
language: python
Expand All @@ -35,7 +35,7 @@ repos:
- yesqa
exclude: ^panimg/contrib/
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.0.0'
rev: 'v1.6.0'
hooks:
- id: mypy
additional_dependencies:
Expand Down
39 changes: 20 additions & 19 deletions panimg/contrib/wsi_dcm_to_tiff/dcm_to_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,12 @@
from wsidicom import WsiDicom


def is_dicom(dirpath):
def is_dicom(dir_path):
try:
path = Path(dirpath)
path = Path(dir_path)
files = list(path.iterdir())
image = WsiDicom.open(files)
return True
with WsiDicom.open(files):
return True
except Exception as e:
return False

Expand All @@ -228,46 +228,47 @@ def dcm_to_tiff(input_dir, output_path):

with TiffWriter(output_path, bigtiff=True) as tif:
tile_size = image.levels[0].default_instance.tile_size.width
test_tile = np.array(image.read_tile(0, (0, 0)))

def tiler(getter, level, cols, rows):
for row in range(rows):
for col in range(cols):
im = np.array(getter(level, (col, row)))
yield im

for level in range(0, len(image.levels.levels)):
cols = int(math.ceil(image.levels[level].size.width / tile_size))
rows = int(math.ceil(image.levels[level].size.height / tile_size))
for level in image.levels:
cols = int(math.ceil(level.size.width / tile_size))
rows = int(math.ceil(level.size.height / tile_size))

test_tile = np.array(image.read_tile(0, (0, 0)))
dtype = test_tile.dtype
d_type = test_tile.dtype
shape = (
image.levels[level].size.height,
image.levels[level].size.width,
level.size.height,
level.size.width,
3,
)

level_tiler = tiler(image.read_tile, level, cols, rows)
level_tiler = tiler(image.read_tile, level.level, cols, rows)

extratags = [(274, 3, 1, 1, False)] # Orientation TOPLEFT
extra_tags = [(274, 3, 1, 1, False)] # Orientation TOP LEFT

resolution = (
int(10 / image.levels[level].pixel_spacing.width),
int(10 / image.levels[level].pixel_spacing.height),
"CENTIMETER",
int(10 / level.pixel_spacing.width),
int(10 / level.pixel_spacing.height),
)

subfiletype = 1 if level != 0 else 0
sub_filetype = 1 if level.level != 0 else 0

tif.write(
level_tiler,
dtype=dtype,
dtype=d_type,
shape=shape,
tile=(tile_size, tile_size),
photometric="rgb",
compression="jpeg",
subsampling=(1, 1),
resolution=resolution,
resolutionunit="CENTIMETER",
description="Converted from DICOM",
subfiletype=subfiletype,
subfiletype=sub_filetype,
extratags=extra_tags,
)
4 changes: 2 additions & 2 deletions panimg/image_builders/dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def read(self) -> SimpleITKImage:
)


def _get_headers_by_study(
def get_dicom_headers_by_study(
files: Set[Path], file_errors: DefaultDict[Path, List[str]]
):
"""
Expand Down Expand Up @@ -462,7 +462,7 @@ def _find_valid_dicom_files(
Any study with an inconsistent amount of slices per time point is discarded.
"""
studies = _get_headers_by_study(files=files, file_errors=file_errors)
studies = get_dicom_headers_by_study(files=files, file_errors=file_errors)
result = []
for key in studies:
headers = studies[key]["headers"]
Expand Down
99 changes: 84 additions & 15 deletions panimg/image_builders/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from panimg.contrib.wsi_dcm_to_tiff.dcm_to_tiff import (
dcm_to_tiff as wsi_dcm_to_tiff,
)
from panimg.contrib.wsi_dcm_to_tiff.dcm_to_tiff import is_dicom as is_wsi_dicom
from panimg.exceptions import UnconsumedFilesException, ValidationError
from panimg.image_builders.dicom import get_dicom_headers_by_study
from panimg.models import MAXIMUM_SEGMENTS_LENGTH, ColorSpace, TIFFImage

try:
Expand All @@ -36,6 +36,13 @@
pyvips = False


DICOM_WSI_STORAGE_ID = "1.2.840.10008.5.1.4.1.1.77.1.6"


def format_error(message: str) -> str:
return f"TIFF image builder: {message}"


@dataclass
class GrandChallengeTiffFile:
path: Path
Expand Down Expand Up @@ -342,7 +349,11 @@ def _convert(
output_directory=output_directory,
)
except Exception as e:
file_errors[file].append(str(e))
file_errors[file].append(
format_error(
f"Could not convert file to TIFF: {file.name}, error:{str(e)}"
)
)
continue
else:
gc_file.path = tiff_file
Expand Down Expand Up @@ -402,7 +413,11 @@ def _convert_dicom_wsi_dir(

wsi_dcm_to_tiff(wsidicom_dir, new_file_name)
except Exception as e:
file_errors[file].append(str(e))
file_errors[file].append(
format_error(
f"Could not convert dicom-wsi to TIFF: {file.name}, error:{str(e)}"
)
)
else:
gc_file.path = new_file_name
gc_file.associated_files = [
Expand All @@ -411,12 +426,61 @@ def _convert_dicom_wsi_dir(
return gc_file


def _find_valid_dicom_wsi_files(
files: Set[Path], file_errors: DefaultDict[Path, List[str]]
):
"""
Gets the headers for all dicom files on path and validates them.
Parameters
----------
files
Paths images that were uploaded during an upload session.
file_errors
Dictionary in which reading errors are recorded per file
Returns
-------
A dictionary with filename as key, and all other files belonging to that study
as value
"""
# Try and get dicom files; ignore errors
dicom_errors = file_errors.copy()
studies = get_dicom_headers_by_study(files=files, file_errors=dicom_errors)
result: Dict[Path, List[Path]] = {}

for key in studies:
headers = studies[key]["headers"]
if not headers:
continue

if not all(
header["data"].SOPClassUID == DICOM_WSI_STORAGE_ID
for header in headers
):
for d in headers:
file_errors[d["file"]].append(
format_error("Non-WSI-DICOM not supported by TIFF builder")
)
else:
result[Path(headers[0]["file"])] = [
Path(h["file"]) for h in headers[1:]
]

def associated_files(file_path: Path):
return result[file_path]

return list(result.keys()), associated_files


def _load_gc_files(
*,
files: Set[Path],
converter,
output_directory: Path,
file_errors: Dict[Path, List[str]],
file_errors: DefaultDict[Path, List[str]],
) -> List[GrandChallengeTiffFile]:
loaded_files: List[GrandChallengeTiffFile] = []

Expand All @@ -430,6 +494,17 @@ def _load_gc_files(
".bif": None,
}

dicom_files, handler = _find_valid_dicom_wsi_files(files, file_errors)
for dicom_file in dicom_files:
gc_file = GrandChallengeTiffFile(dicom_file)
gc_file = _convert_dicom_wsi_dir(
gc_file=gc_file,
file=dicom_file,
output_directory=output_directory,
file_errors=file_errors,
)
loaded_files.append(gc_file)

for ext, handler in complex_file_handlers.items():
complex_files = [file for file in files if file.suffix.lower() == ext]
if len(complex_files) > 0:
Expand All @@ -450,14 +525,6 @@ def _load_gc_files(
if g.associated_files is not None
):
gc_file = GrandChallengeTiffFile(file)
if file.suffix.lower() == ".dcm" and is_wsi_dicom(file.parent):
gc_file = _convert_dicom_wsi_dir(
gc_file=gc_file,
file=file,
output_directory=output_directory,
file_errors=file_errors,
)

loaded_files.append(gc_file)

return loaded_files
Expand Down Expand Up @@ -496,15 +563,15 @@ def image_builder_tiff( # noqa: C901
gc_file = _load_with_tiff(gc_file=gc_file)
except Exception:
file_errors[gc_file.path].append(
"Could not open file with tifffile."
format_error("Could not open file with tifffile.")
)

# try and load image with open slide
try:
gc_file = _load_with_openslide(gc_file=gc_file)
except Exception:
file_errors[gc_file.path].append(
"Could not open file with OpenSlide."
format_error("Could not open file with OpenSlide.")
)

# validate
Expand All @@ -515,7 +582,9 @@ def image_builder_tiff( # noqa: C901
# GrandChallengeTiffFile
raise RuntimeError("Color space not found")
except ValidationError as e:
file_errors[gc_file.path].append(f"Validation error: {e}.")
file_errors[gc_file.path].append(
format_error(f"Validation error: {e}.")
)
continue

if gc_file.associated_files:
Expand Down
45 changes: 45 additions & 0 deletions tests/test_dcm_wsi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest
from tifffile import TiffFile
from wsidicom import WsiDicom

from panimg.contrib.wsi_dcm_to_tiff.dcm_to_tiff import dcm_to_tiff
from tests import RESOURCE_PATH


@pytest.mark.parametrize(
"src",
(
RESOURCE_PATH / "dicom_wsi/sparse_with_bot",
RESOURCE_PATH / "dicom_wsi/sparse_no_bot",
RESOURCE_PATH / "dicom_wsi/full_with_bot",
RESOURCE_PATH / "dicom_wsi/full_no_bot",
),
)
def test_dcm_to_tiff(src: Path):
with TemporaryDirectory() as output_directory:
converted_tiff = Path(output_directory) / "output.tiff"
dcm_to_tiff(src, converted_tiff)
files = {f for f in src.rglob("*") if f.is_file()}
with WsiDicom.open(files) as o_tif:
with TiffFile(converted_tiff) as c_tif:
assert len(o_tif.levels) == len(c_tif.pages)
for i in range(0, len(o_tif.levels)):
assert (
o_tif.collection[i].size.height
== c_tif.pages[i].shape[0]
)
assert (
o_tif.collection[i].size.width
== c_tif.pages[i].shape[1]
)
assert (
o_tif.collection[i].tile_size.height
== c_tif.pages[i].tile[0]
)
assert (
o_tif.collection[i].tile_size.width
== c_tif.pages[i].tile[1]
)
7 changes: 4 additions & 3 deletions tests/test_default_image_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
"(supported formats: .fds,.fda,.e2e)"
],
"image_builder_tiff": [
"Could not open file with tifffile.",
"Could not open file with OpenSlide.",
"Validation error: Not a valid tif: Image width could not be determined.",
"TIFF image builder: Could not open file with tifffile.",
"TIFF image builder: Could not open file with OpenSlide.",
"TIFF image builder: Validation error: Not a valid tif: "
"Image width could not be determined.",
],
}

Expand Down
Loading

0 comments on commit 499d4c1

Please sign in to comment.