Fix DICOM WSI Conversion (#104)

Temporary fix until libvips 8.15 will be released, which will come with OpenSlide 4.0.0 which offers dicom-wsi support.
DIAGNijmegen · Oct 23, 2023 · 499d4c1 · 499d4c1
1 parent 4a2d4fb
commit 499d4c1
Show file tree

Hide file tree

Showing 7 changed files with 165 additions and 49 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,14 +1,14 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: check-docstring-first
       - id: debug-statements
       - id: end-of-file-fixer
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.15.0
     hooks:
       - id: pyupgrade
         language: python
@@ -18,12 +18,12 @@ repos:
     hooks:
       - id: isort
   - repo: https://github.com/ambv/black
-    rev: 23.1.0
+    rev: 23.9.1
     hooks:
       - id: black
         language: python
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 6.1.0
     hooks:
       - id: flake8
         language: python
@@ -35,7 +35,7 @@ repos:
           - yesqa
         exclude: ^panimg/contrib/
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.0.0'
+    rev: 'v1.6.0'
     hooks:
       - id: mypy
         additional_dependencies:

diff --git a/panimg/contrib/wsi_dcm_to_tiff/dcm_to_tiff.py b/panimg/contrib/wsi_dcm_to_tiff/dcm_to_tiff.py
@@ -211,12 +211,12 @@
 from wsidicom import WsiDicom
 
 
-def is_dicom(dirpath):
+def is_dicom(dir_path):
     try:
-        path = Path(dirpath)
+        path = Path(dir_path)
         files = list(path.iterdir())
-        image = WsiDicom.open(files)
-        return True
+        with WsiDicom.open(files):
+            return True
     except Exception as e:
         return False
 
@@ -228,46 +228,47 @@ def dcm_to_tiff(input_dir, output_path):
 
     with TiffWriter(output_path, bigtiff=True) as tif:
         tile_size = image.levels[0].default_instance.tile_size.width
+        test_tile = np.array(image.read_tile(0, (0, 0)))
 
         def tiler(getter, level, cols, rows):
             for row in range(rows):
                 for col in range(cols):
                     im = np.array(getter(level, (col, row)))
                     yield im
 
-        for level in range(0, len(image.levels.levels)):
-            cols = int(math.ceil(image.levels[level].size.width / tile_size))
-            rows = int(math.ceil(image.levels[level].size.height / tile_size))
+        for level in image.levels:
+            cols = int(math.ceil(level.size.width / tile_size))
+            rows = int(math.ceil(level.size.height / tile_size))
 
-            test_tile = np.array(image.read_tile(0, (0, 0)))
-            dtype = test_tile.dtype
+            d_type = test_tile.dtype
             shape = (
-                image.levels[level].size.height,
-                image.levels[level].size.width,
+                level.size.height,
+                level.size.width,
                 3,
             )
 
-            level_tiler = tiler(image.read_tile, level, cols, rows)
+            level_tiler = tiler(image.read_tile, level.level, cols, rows)
 
-            extratags = [(274, 3, 1, 1, False)]  # Orientation TOPLEFT
+            extra_tags = [(274, 3, 1, 1, False)]  # Orientation TOP LEFT
 
             resolution = (
-                int(10 / image.levels[level].pixel_spacing.width),
-                int(10 / image.levels[level].pixel_spacing.height),
-                "CENTIMETER",
+                int(10 / level.pixel_spacing.width),
+                int(10 / level.pixel_spacing.height),
             )
 
-            subfiletype = 1 if level != 0 else 0
+            sub_filetype = 1 if level.level != 0 else 0
 
             tif.write(
                 level_tiler,
-                dtype=dtype,
+                dtype=d_type,
                 shape=shape,
                 tile=(tile_size, tile_size),
                 photometric="rgb",
                 compression="jpeg",
                 subsampling=(1, 1),
                 resolution=resolution,
+                resolutionunit="CENTIMETER",
                 description="Converted from DICOM",
-                subfiletype=subfiletype,
+                subfiletype=sub_filetype,
+                extratags=extra_tags,
             )
diff --git a/panimg/image_builders/dicom.py b/panimg/image_builders/dicom.py
@@ -374,7 +374,7 @@ def read(self) -> SimpleITKImage:
         )
 
 
-def _get_headers_by_study(
+def get_dicom_headers_by_study(
     files: Set[Path], file_errors: DefaultDict[Path, List[str]]
 ):
     """
@@ -462,7 +462,7 @@ def _find_valid_dicom_files(
 
     Any study with an inconsistent amount of slices per time point is discarded.
     """
-    studies = _get_headers_by_study(files=files, file_errors=file_errors)
+    studies = get_dicom_headers_by_study(files=files, file_errors=file_errors)
     result = []
     for key in studies:
         headers = studies[key]["headers"]

diff --git a/panimg/image_builders/tiff.py b/panimg/image_builders/tiff.py
@@ -21,8 +21,8 @@
 from panimg.contrib.wsi_dcm_to_tiff.dcm_to_tiff import (
     dcm_to_tiff as wsi_dcm_to_tiff,
 )
-from panimg.contrib.wsi_dcm_to_tiff.dcm_to_tiff import is_dicom as is_wsi_dicom
 from panimg.exceptions import UnconsumedFilesException, ValidationError
+from panimg.image_builders.dicom import get_dicom_headers_by_study
 from panimg.models import MAXIMUM_SEGMENTS_LENGTH, ColorSpace, TIFFImage
 
 try:
@@ -36,6 +36,13 @@
     pyvips = False
 
 
+DICOM_WSI_STORAGE_ID = "1.2.840.10008.5.1.4.1.1.77.1.6"
+
+
+def format_error(message: str) -> str:
+    return f"TIFF image builder: {message}"
+
+
 @dataclass
 class GrandChallengeTiffFile:
     path: Path
@@ -342,7 +349,11 @@ def _convert(
                 output_directory=output_directory,
             )
         except Exception as e:
-            file_errors[file].append(str(e))
+            file_errors[file].append(
+                format_error(
+                    f"Could not convert file to TIFF: {file.name}, error:{str(e)}"
+                )
+            )
             continue
         else:
             gc_file.path = tiff_file
@@ -402,7 +413,11 @@ def _convert_dicom_wsi_dir(
 
         wsi_dcm_to_tiff(wsidicom_dir, new_file_name)
     except Exception as e:
-        file_errors[file].append(str(e))
+        file_errors[file].append(
+            format_error(
+                f"Could not convert dicom-wsi to TIFF: {file.name}, error:{str(e)}"
+            )
+        )
     else:
         gc_file.path = new_file_name
         gc_file.associated_files = [
@@ -411,12 +426,61 @@ def _convert_dicom_wsi_dir(
     return gc_file
 
 
+def _find_valid_dicom_wsi_files(
+    files: Set[Path], file_errors: DefaultDict[Path, List[str]]
+):
+    """
+    Gets the headers for all dicom files on path and validates them.
+
+    Parameters
+    ----------
+    files
+        Paths images that were uploaded during an upload session.
+
+    file_errors
+        Dictionary in which reading errors are recorded per file
+
+    Returns
+    -------
+    A dictionary with filename as key, and all other files belonging to that study
+    as value
+
+    """
+    # Try and get dicom files; ignore errors
+    dicom_errors = file_errors.copy()
+    studies = get_dicom_headers_by_study(files=files, file_errors=dicom_errors)
+    result: Dict[Path, List[Path]] = {}
+
+    for key in studies:
+        headers = studies[key]["headers"]
+        if not headers:
+            continue
+
+        if not all(
+            header["data"].SOPClassUID == DICOM_WSI_STORAGE_ID
+            for header in headers
+        ):
+            for d in headers:
+                file_errors[d["file"]].append(
+                    format_error("Non-WSI-DICOM not supported by TIFF builder")
+                )
+        else:
+            result[Path(headers[0]["file"])] = [
+                Path(h["file"]) for h in headers[1:]
+            ]
+
+    def associated_files(file_path: Path):
+        return result[file_path]
+
+    return list(result.keys()), associated_files
+
+
 def _load_gc_files(
     *,
     files: Set[Path],
     converter,
     output_directory: Path,
-    file_errors: Dict[Path, List[str]],
+    file_errors: DefaultDict[Path, List[str]],
 ) -> List[GrandChallengeTiffFile]:
     loaded_files: List[GrandChallengeTiffFile] = []
 
@@ -430,6 +494,17 @@ def _load_gc_files(
         ".bif": None,
     }
 
+    dicom_files, handler = _find_valid_dicom_wsi_files(files, file_errors)
+    for dicom_file in dicom_files:
+        gc_file = GrandChallengeTiffFile(dicom_file)
+        gc_file = _convert_dicom_wsi_dir(
+            gc_file=gc_file,
+            file=dicom_file,
+            output_directory=output_directory,
+            file_errors=file_errors,
+        )
+        loaded_files.append(gc_file)
+
     for ext, handler in complex_file_handlers.items():
         complex_files = [file for file in files if file.suffix.lower() == ext]
         if len(complex_files) > 0:
@@ -450,14 +525,6 @@ def _load_gc_files(
             if g.associated_files is not None
         ):
             gc_file = GrandChallengeTiffFile(file)
-            if file.suffix.lower() == ".dcm" and is_wsi_dicom(file.parent):
-                gc_file = _convert_dicom_wsi_dir(
-                    gc_file=gc_file,
-                    file=file,
-                    output_directory=output_directory,
-                    file_errors=file_errors,
-                )
-
             loaded_files.append(gc_file)
 
     return loaded_files
@@ -496,15 +563,15 @@ def image_builder_tiff(  # noqa: C901
                 gc_file = _load_with_tiff(gc_file=gc_file)
             except Exception:
                 file_errors[gc_file.path].append(
-                    "Could not open file with tifffile."
+                    format_error("Could not open file with tifffile.")
                 )
 
             # try and load image with open slide
             try:
                 gc_file = _load_with_openslide(gc_file=gc_file)
             except Exception:
                 file_errors[gc_file.path].append(
-                    "Could not open file with OpenSlide."
+                    format_error("Could not open file with OpenSlide.")
                 )
 
             # validate
@@ -515,7 +582,9 @@ def image_builder_tiff(  # noqa: C901
                     # GrandChallengeTiffFile
                     raise RuntimeError("Color space not found")
             except ValidationError as e:
-                file_errors[gc_file.path].append(f"Validation error: {e}.")
+                file_errors[gc_file.path].append(
+                    format_error(f"Validation error: {e}.")
+                )
                 continue
 
             if gc_file.associated_files:

diff --git a/tests/test_dcm_wsi.py b/tests/test_dcm_wsi.py
@@ -0,0 +1,45 @@
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+import pytest
+from tifffile import TiffFile
+from wsidicom import WsiDicom
+
+from panimg.contrib.wsi_dcm_to_tiff.dcm_to_tiff import dcm_to_tiff
+from tests import RESOURCE_PATH
+
+
+@pytest.mark.parametrize(
+    "src",
+    (
+        RESOURCE_PATH / "dicom_wsi/sparse_with_bot",
+        RESOURCE_PATH / "dicom_wsi/sparse_no_bot",
+        RESOURCE_PATH / "dicom_wsi/full_with_bot",
+        RESOURCE_PATH / "dicom_wsi/full_no_bot",
+    ),
+)
+def test_dcm_to_tiff(src: Path):
+    with TemporaryDirectory() as output_directory:
+        converted_tiff = Path(output_directory) / "output.tiff"
+        dcm_to_tiff(src, converted_tiff)
+        files = {f for f in src.rglob("*") if f.is_file()}
+        with WsiDicom.open(files) as o_tif:
+            with TiffFile(converted_tiff) as c_tif:
+                assert len(o_tif.levels) == len(c_tif.pages)
+                for i in range(0, len(o_tif.levels)):
+                    assert (
+                        o_tif.collection[i].size.height
+                        == c_tif.pages[i].shape[0]
+                    )
+                    assert (
+                        o_tif.collection[i].size.width
+                        == c_tif.pages[i].shape[1]
+                    )
+                    assert (
+                        o_tif.collection[i].tile_size.height
+                        == c_tif.pages[i].tile[0]
+                    )
+                    assert (
+                        o_tif.collection[i].tile_size.width
+                        == c_tif.pages[i].tile[1]
+                    )
diff --git a/tests/test_default_image_builders.py b/tests/test_default_image_builders.py
@@ -24,9 +24,10 @@
         "(supported formats: .fds,.fda,.e2e)"
     ],
     "image_builder_tiff": [
-        "Could not open file with tifffile.",
-        "Could not open file with OpenSlide.",
-        "Validation error: Not a valid tif: Image width could not be determined.",
+        "TIFF image builder: Could not open file with tifffile.",
+        "TIFF image builder: Could not open file with OpenSlide.",
+        "TIFF image builder: Validation error: Not a valid tif: "
+        "Image width could not be determined.",
     ],
 }