Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds DicomImage class in medimage namespace that inherits from MedicalImage to implement deidentify extra #36

Merged
merged 5 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions extras/fileformats/extras/medimage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from . import diffusion
from . import nifti
from . import raw
from . import base
27 changes: 27 additions & 0 deletions extras/fileformats/extras/medimage/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import typing as ty
from pathlib import Path
import tempfile
from fileformats.core import extra_implementation, FileSet
from fileformats.medimage import MedicalImage


@extra_implementation(MedicalImage.deidentify)
def no_deidentification_necessary(
image: MedicalImage,
out_dir: ty.Optional[Path] = None,
new_stem: ty.Optional[str] = None,
copy_mode: FileSet.CopyMode = FileSet.CopyMode.copy,
) -> MedicalImage:
"""Assume that no deidentification is needed for medical images by default. We make a
copy of the image in the output directory for consistency with the behavior of other
deidentification formats"""
if image.contains_phi:
raise NotImplementedError(

Check warning on line 19 in extras/fileformats/extras/medimage/base.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/base.py#L18-L19

Added lines #L18 - L19 were not covered by tests
f"{type(image)} images contain Protected Health Information (PHI) and needs a "
"specific deidentification method"
)
if out_dir is None:
out_dir = Path(tempfile.mkdtemp())
out_dir.mkdir(exist_ok=True, parents=True)
cpy = image.copy(out_dir, new_stem=new_stem, mode=copy_mode)
return cpy

Check warning on line 27 in extras/fileformats/extras/medimage/base.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/base.py#L23-L27

Added lines #L23 - L27 were not covered by tests
132 changes: 127 additions & 5 deletions extras/fileformats/extras/medimage/dicom.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from pathlib import Path
import typing
import typing as ty
import tempfile
import pydicom
import numpy
import numpy.typing
from fileformats.core import FileSet, extra_implementation
from fileformats.core import SampleFileGenerator
from fileformats.medimage import (
MedicalImage,
DicomImage,
DicomCollection,
DicomDir,
DicomSeries,
)
import fileformats.extras.application.medical # noqa: F401
from fileformats.medimage.base import DataArrayType
import medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c

Expand All @@ -26,14 +29,14 @@


@extra_implementation(MedicalImage.vox_sizes)
def dicom_vox_sizes(collection: DicomCollection) -> typing.Tuple[float, float, float]:
def dicom_vox_sizes(collection: DicomCollection) -> ty.Tuple[float, float, float]:
return tuple(
collection.metadata["PixelSpacing"] + [collection.metadata["SliceThickness"]]
)


@extra_implementation(MedicalImage.dims)
def dicom_dims(collection: DicomCollection) -> typing.Tuple[int, int, int]:
def dicom_dims(collection: DicomCollection) -> ty.Tuple[int, int, int]:
return tuple(
(
collection.metadata["Rows"],
Expand All @@ -52,7 +55,7 @@
def dicom_dir_generate_sample_data(
dcmdir: DicomDir,
generator: SampleFileGenerator,
) -> typing.List[Path]:
) -> ty.List[Path]:
dcm_dir = medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c.get_image()
series_number = generator.rng.randint(1, SERIES_NUMBER_RANGE)
dest = generator.generate_fspath(DicomDir)
Expand All @@ -68,7 +71,7 @@
def dicom_series_generate_sample_data(
dcm_series: DicomSeries,
generator: SampleFileGenerator,
) -> typing.List[Path]:
) -> ty.List[Path]:
dicom_dir: Path = dicom_dir_generate_sample_data(dcm_series, generator=generator)[0] # type: ignore[arg-type]
stem = generator.generate_fspath().stem
fspaths = []
Expand All @@ -80,3 +83,122 @@

SERIES_NUMBER_TAG = ("0020", "0011")
SERIES_NUMBER_RANGE = int(1e8)


@extra_implementation(MedicalImage.deidentify)
def dicom_deidentify(
dicom: DicomImage,
out_dir: ty.Optional[Path] = None,
new_stem: ty.Optional[str] = None,
copy_mode: FileSet.CopyMode = FileSet.CopyMode.copy,
) -> DicomImage:
if out_dir is None:
out_dir = Path(tempfile.mkdtemp())
out_dir.mkdir(parents=True, exist_ok=True)
dcm = dicom.load()
dcm.PatientBirthDate = dcm.PatientBirthDate[:4] + "0101"
dcm.PatientName = "Anonymous^Anonymous"
for field in FIELDS_TO_DEIDENTIFY:
try:
elem = dcm[field]
except KeyError:
pass

Check warning on line 105 in extras/fileformats/extras/medimage/dicom.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/dicom.py#L95-L105

Added lines #L95 - L105 were not covered by tests
else:
elem.value = ""
return dicom.new(out_dir / dicom.fspath.name, dcm)

Check warning on line 108 in extras/fileformats/extras/medimage/dicom.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/dicom.py#L107-L108

Added lines #L107 - L108 were not covered by tests


@extra_implementation(MedicalImage.deidentify)
def dicom_collection_deidentify(
collection: DicomCollection,
out_dir: ty.Optional[Path] = None,
new_stem: ty.Optional[str] = None,
copy_mode: FileSet.CopyMode = FileSet.CopyMode.copy,
) -> DicomCollection:
if out_dir is None:
out_dir = Path(tempfile.mkdtemp())
if isinstance(collection, DicomDir):
out_dir /= collection.name
out_dir.mkdir(parents=True, exist_ok=True)
deid_fspaths = []
for dicom in collection.contents:
deid_fspaths.append(dicom.deidentify(out_dir).fspath)
type_ = type(collection)
if isinstance(collection, DicomDir):
deidentified = type_(out_dir)

Check warning on line 128 in extras/fileformats/extras/medimage/dicom.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/dicom.py#L118-L128

Added lines #L118 - L128 were not covered by tests
else:
deidentified = type_(deid_fspaths)
return deidentified

Check warning on line 131 in extras/fileformats/extras/medimage/dicom.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/dicom.py#L130-L131

Added lines #L130 - L131 were not covered by tests


FIELDS_TO_DEIDENTIFY = [
("0008", "0014"), # Instance Creator UID
("0008", "1111"), # Referenced Performed Procedure Step SQ
("0008", "1120"), # Referenced Patient SQ
("0008", "1140"), # Referenced Image SQ
("0008", "0096"), # Referring Physician Identification SQ
("0008", "1032"), # Procedure Code SQ
("0008", "1048"), # Physician(s) of Record
("0008", "1049"), # Physician(s) of Record Identification SQ
("0008", "1050"), # Performing Physicians' Name
("0008", "1052"), # Performing Physician Identification SQ
("0008", "1060"), # Name of Physician(s) Reading Study
("0008", "1062"), # Physician(s) Reading Study Identification SQ
("0008", "1110"), # Referenced Study SQ
("0008", "1111"), # Referenced Performed Procedure Step SQ
("0008", "1250"), # Related Series SQ
("0008", "9092"), # Referenced Image Evidence SQ
("0008", "0080"), # Institution Name
("0008", "0081"), # Institution Address
("0008", "0082"), # Institution Code Sequence
("0008", "0092"), # Referring Physician's Address
("0008", "0094"), # Referring Physician's Telephone Numbers
("0008", "009C"), # Consulting Physician's Name
("0008", "1070"), # Operators' Name
("0010", "4000"), # Patient Comments
# ("0010", "0010"), # Patient's Name
("0010", "0021"), # Issuer of Patient ID
("0010", "0032"), # Patient's Birth Time
("0010", "0050"), # Patient's Insurance Plan Code SQ
("0010", "0101"), # Patient's Primary Language Code SQ
("0010", "1000"), # Other Patient IDs
("0010", "1001"), # Other Patient Names
("0010", "1002"), # Other Patient IDs SQ
("0010", "1005"), # Patient's Birth Name
("0010", "1010"), # Patient's Age
("0010", "1040"), # Patient's Address
("0010", "1060"), # Patient's Mother's Birth Name
("0010", "1080"), # Military Rank
("0010", "1081"), # Branch of Service
("0010", "1090"), # Medical Record Locator
("0010", "2000"), # Medical Alerts
("0010", "2110"), # Allergies
("0010", "2150"), # Country of Residence
("0010", "2152"), # Region of Residence
("0010", "2154"), # Patient's Telephone Numbers
("0010", "2160"), # Ethnic Group
("0010", "2180"), # Occupation
("0010", "21A0"), # Smoking Status
("0010", "21B0"), # Additional Patient History
("0010", "21C0"), # Pregnancy Status
("0010", "21D0"), # Last Menstrual Date
("0010", "21F0"), # Patient's Religious Preference
("0010", "2203"), # Patient's Sex Neutered
("0010", "2297"), # Responsible Person
("0010", "2298"), # Responsible Person Role
("0010", "2299"), # Responsible Organization
("0020", "9221"), # Dimension Organization SQ
("0020", "9222"), # Dimension Index SQ
("0038", "0010"), # Admission ID
("0038", "0011"), # Issuer of Admission ID
("0038", "0060"), # Service Episode ID
("0038", "0061"), # Issuer of Service Episode ID
("0038", "0062"), # Service Episode Description
("0038", "0500"), # Patient State
("0038", "0100"), # Pertinent Documents SQ
("0040", "0260"), # Performed Protocol Code SQ
("0088", "0130"), # Storage Media File-Set ID
("0088", "0140"), # Storage Media File-Set UID
("0400", "0561"), # Original Attributes Sequence
("5200", "9229"), # Shared Functional Groups SQ
]
4 changes: 2 additions & 2 deletions extras/fileformats/extras/medimage/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
get_data as get_pet_countrate_data,
)
from fileformats.core import extra_implementation, FileSet
from fileformats.application import Dicom
from fileformats.medimage.dicom import DicomImage
from fileformats.medimage.raw import (
Vnd_Siemens_Biograph128Vision_Vr20b_PetRawData,
Vnd_Siemens_Biograph128Vision_Vr20b_PetCountRate,
Expand Down Expand Up @@ -40,7 +40,7 @@
pet_raw_data.dcm_hdr_size_int_offset,
)
dcm = pydicom.dcmread(window, specific_tags=specific_tags)
return Dicom.pydicom_to_dict(dcm)
return DicomImage.pydicom_to_dict(dcm)

Check warning on line 43 in extras/fileformats/extras/medimage/raw.py

View check run for this annotation

Codecov / codecov/patch

extras/fileformats/extras/medimage/raw.py#L43

Added line #L43 was not covered by tests


@extra_implementation(FileSet.generate_sample_data)
Expand Down
42 changes: 42 additions & 0 deletions extras/fileformats/extras/medimage/tests/test_deidentify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest
from fileformats.core.exceptions import FileFormatsExtrasError
from fileformats.medimage import DicomImage, DicomDir, DicomSeries, Nifti1
from medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c import (
get_image as get_dicom_image,
)
from fileformats.medimage import Vnd_Siemens_Biograph128Vision_Vr20b_PetSinogram


@pytest.fixture(params=["image", "dir", "series"])
def dicom(request):
dicom_dir = get_dicom_image(first_name="John", last_name="Doe")
dicom_files = (p for p in dicom_dir.iterdir() if p.suffix == ".dcm")
if request.param == "image":
return DicomImage(next(dicom_files))
elif request.param == "dir":
return DicomDir(dicom_dir)
else:
return DicomSeries(dicom_files)


def test_deidentify_dicom(dicom):
assert str(dicom.metadata["PatientName"]) == "Doe^John"
assert dicom.metadata["InstitutionAddress"]
assert not dicom.metadata["PatientBirthDate"].endswith("0101")
deidentified = dicom.deidentify()
assert str(deidentified.metadata["PatientName"]) == "Anonymous^Anonymous"
assert deidentified.metadata["InstitutionAddress"] == ""
assert deidentified.metadata["PatientBirthDate"] == "19800101"


def test_nifti_deidentify():
nifti = Nifti1.sample()
deidentified = nifti.deidentify()
assert nifti is not deidentified
assert nifti.hash_files() == deidentified.hash_files()


def test_raw_pet_data_deidentify():
raw_pet = Vnd_Siemens_Biograph128Vision_Vr20b_PetSinogram.sample()
with pytest.raises(FileFormatsExtrasError):
raw_pet.deidentify()
9 changes: 5 additions & 4 deletions fileformats/medimage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from ._version import __version__
from .base import MedicalImage
from .base import MedicalImagingData, MedicalImage


# import Dicom to alias to the medimage namespace it here as well
from fileformats.application import Dicom
from .misc import (
Analyze,
Mgh,
Expand All @@ -26,6 +25,7 @@
NiftiGzXBvec,
)
from .dicom import (
DicomImage,
DicomCollection,
DicomDir,
DicomSeries,
Expand Down Expand Up @@ -128,8 +128,9 @@

__all__ = [
"__version__",
"MedicalImagingData",
"MedicalImage",
"Dicom",
"DicomImage",
"Analyze",
"Mgh",
"MghGz",
Expand Down
26 changes: 24 additions & 2 deletions fileformats/medimage/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import typing as ty
from pathlib import Path
import logging
from fileformats.core import extra, FileSet, mtime_cached_property
from fileformats.core.mixin import WithClassifiers
Expand All @@ -14,6 +15,10 @@
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
if sys.version_info >= (3, 12):
from typing import Self
else:
from typing_extensions import Self

Check warning on line 21 in fileformats/medimage/base.py

View check run for this annotation

Codecov / codecov/patch

fileformats/medimage/base.py#L21

Added line #L21 was not covered by tests

if ty.TYPE_CHECKING:
import numpy.typing # noqa: F401
Expand All @@ -28,7 +33,25 @@
) # In Py<3.9 this is problematic "numpy.typing.NDArray[typing.Union[numpy.floating[typing.Any], numpy.integer[typing.Any]]]"


class MedicalImage(WithClassifiers, FileSet):
class MedicalImagingData(FileSet):
"""Base class for all medical imaging data including pre-image raw data and
associated data"""

contains_phi: bool = True

@extra
def deidentify(
self,
out_dir: ty.Optional[Path] = None,
new_stem: ty.Optional[str] = None,
copy_mode: FileSet.CopyMode = FileSet.CopyMode.copy,
) -> Self:
"""Returns a new copy of the image with any subject-identifying information
stripped from the from the image header"""
raise NotImplementedError

Check warning on line 51 in fileformats/medimage/base.py

View check run for this annotation

Codecov / codecov/patch

fileformats/medimage/base.py#L51

Added line #L51 was not covered by tests


class MedicalImage(WithClassifiers, MedicalImagingData):

INCLUDE_HDR_KEYS: ty.Optional[ty.Tuple[str, ...]] = None
IGNORE_HDR_KEYS: ty.Optional[ty.Tuple[str, ...]] = None
Expand All @@ -43,7 +66,6 @@
"""
Returns the binary data of the image in a numpy array
"""
raise NotImplementedError

@mtime_cached_property
def data_array(self) -> DataArrayType:
Expand Down
Loading
Loading