Skip to content

Commit

Permalink
changed dicoms to resources
Browse files Browse the repository at this point in the history
  • Loading branch information
tclose committed Nov 24, 2023
1 parent 42ee199 commit dc819d8
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 170 deletions.
10 changes: 4 additions & 6 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,7 @@
default="info",
type=str,
envvar="XNAT_INGEST_LOGLEVEL",
help=(
"The level of the logging printed to stdout"
)
help=("The level of the logging printed to stdout"),
)
@click.option(
"--log-file",
Expand Down Expand Up @@ -153,7 +151,6 @@ def stage(
mail_server: MailServer,
raise_errors: bool,
):

set_logger_handling(log_level, log_file, log_emails, mail_server)

logger.info(
Expand All @@ -165,7 +162,6 @@ def stage(
sessions = ImagingSession.construct(
dicoms_path=dicoms_path,
associated_files_pattern=associated,
assoc_files_identification=assoc_identification,
project_field=project_field,
subject_field=subject_field,
session_field=session_field,
Expand All @@ -186,7 +182,9 @@ def stage(
continue
session_staging_dir.mkdir(exist_ok=True)
# Deidentify files and save them to the staging directory
staged_session = session.deidentify(session_staging_dir)
staged_session = session.stage(
session_staging_dir, assoc_files_identification=assoc_identification
)
staged_session.save(session_staging_dir)
if delete:
session.delete()
Expand Down
115 changes: 0 additions & 115 deletions xnat_ingest/dicom.py

This file was deleted.

103 changes: 55 additions & 48 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pydicom
from fileformats.application import Dicom
from fileformats.medimage import DicomSeries
from fileformats.core import from_paths, FileSet, DataType
from fileformats.core import from_paths, FileSet, DataType, from_mime, to_mime
from fileformats.generic import File, Directory
from arcana.core.data.set import Dataset
from arcana.core.data.space import DataSpace
Expand All @@ -29,22 +29,34 @@
logger = logging.getLogger("xnat-ingest")


def dicoms_converter(
multi_dicom_series: ty.Union[ty.List[DicomSeries], ty.Dict[str, DicomSeries]]
) -> ty.Dict[str, DicomSeries]:
if isinstance(multi_dicom_series, ty.Sequence):
multi_dicom_series = {str(s["SeriesNumber"]): s for s in multi_dicom_series}
return multi_dicom_series
def resources_converter(
resources: ty.Union[
ty.List[DicomSeries], ty.Dict[ty.Tuple[str, str], ty.Tuple[str, FileSet]]
]
) -> ty.Dict[ty.Tuple[str, str], ty.Tuple[str, FileSet]]:
if isinstance(resources, ty.Sequence):
resources_dict = {}
for resource in resources:
if not isinstance(resource, DicomSeries):
raise TypeError(
f"Only sequences of DicomSeries can be converted, otherwise needs "
f"to be already in a dictionary, found {resources}"
)
resources_dict[
(str(resource["SeriesNumber"]), "DICOM")
] = (str(resource["SeriesDescription"]), resource)
resources = resources_dict
return resources


@attrs.define(slots=False)
class ImagingSession:
project_id: str
subject_id: str
session_id: str
dicoms: ty.Dict[str, DicomSeries] = attrs.field(
factory=dict, converter=dicoms_converter
)
resources: ty.Dict[ty.Tuple[str, str], ty.Tuple[str, FileSet]] = attrs.field(
factory=dict, converter=resources_converter
) # keys -> scan-id & resource-type, values -> description, scan
associated_files_pattern: str | None = None
associated_file_fspaths: ty.List[Path] = attrs.field(factory=list)

Expand Down Expand Up @@ -72,7 +84,6 @@ def select_resources(
dataset: Dataset,
include_all_dicoms: bool = False,
include_all_assoc: bool = False,
assoc_id_pattern: str = None
) -> ty.Iterator[ty.Tuple[str, str, str, FileSet]]:
"""Returns selected resources that match the columns in the dataset definition
Expand All @@ -95,7 +106,7 @@ def select_resources(
scan : FileSet
a fileset to upload
"""
store = MockDataStore(self, assoc_id_pattern=assoc_id_pattern)
store = MockDataStore(self)

uploaded: ty.Set[FileSet] = set()

Expand Down Expand Up @@ -185,7 +196,6 @@ def construct(
cls,
dicoms_path: str | Path,
associated_files_pattern: str | None = None,
assoc_files_identification: str | None = None,
project_field: str = "StudyID",
subject_field: str = "PatientID",
session_field: str = "AccessionNumber",
Expand All @@ -205,10 +215,6 @@ def construct(
are substituted before the string is used to glob the non-DICOM files. In
order to deidentify the filenames, the pattern must explicitly reference all
identifiable fields in string template placeholders.
assoc_files_identification : str, optional
Used to extract the scan ID & type/resource from the associated filename. Should
be a regular-expression (Python syntax) with named groups called 'id' and 'type', e.g.
'[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'
project_field : str
the name of the DICOM field that is to be interpreted as the corresponding
XNAT project
Expand Down Expand Up @@ -238,9 +244,6 @@ def construct(
else:
dicom_fspaths = [Path(p) for p in glob(dicoms_path)]

if assoc_files_identification:
raise NotImplementedError

# Sort loaded series by StudyInstanceUID (imaging session)
logger.info("Loading DICOM series from %s", str(dicoms_path))
dicom_sessions = defaultdict(list)
Expand Down Expand Up @@ -280,7 +283,7 @@ def get_id(field):

sessions.append(
cls(
dicoms={str(s["SeriesNumber"]): s for s in session_dicom_series},
resources=session_dicom_series,
associated_file_fspaths=associated_file_fspaths,
associated_files_pattern=associated_files_pattern,
project_id=(project_id if project_id else get_id(project_field)),
Expand Down Expand Up @@ -312,7 +315,13 @@ def load(cls, save_dir: Path):
"is a valid YAML file",
)
raise e
dct["dicoms"] = {k: DicomSeries(v) for k, v in dct["dicoms"].items()}
dct["resources"] = {
(rd["scan_id"], rd["resource"]): (
rd["description"],
from_mime(rd["datatype"])(rd["fspaths"]),
)
for rd in dct["resources"]
}
dct["associated_file_fspaths"] = [
Path(f) for f in dct["associated_file_fspaths"]
]
Expand All @@ -327,26 +336,41 @@ def save(self, save_dir: Path):
yaml_file : Path
name of the file to load the manually specified IDs from (YAML format)
"""
dct = attrs.asdict(self, recurse=True)
dct = attrs.asdict(self, recurse=False)
dct["associated_file_fspaths"] = [
str(p) for p in dct["associated_file_fspaths"]
]
dct["dicoms"] = {k: [str(p) for p in v["fspaths"]] for k, v in dct["dicoms"].items()}
dct["resources"] = [
{
"scan_id": id_,
"resource": res,
"description": desc,
"datatype": to_mime(scan, official=False),
"fspaths": [str(p) for p in scan.fspaths],
}
for (id_, res), (desc, scan) in dct["resources"].items()
]
yaml_file = save_dir / self.SAVE_FILENAME
with open(yaml_file, "w") as f:
yaml.dump(
dct,
f,
)

def deidentify(self, dest_dir: Path) -> "ImagingSession":
"""Deidentify files by removing the fields listed `FIELDS_TO_ANONYMISE` and
def stage(
self, dest_dir: Path, assoc_files_identification: str | None = None
) -> "ImagingSession":
"""Stages and deidentifies files by removing the fields listed `FIELDS_TO_ANONYMISE` and
replacing birth date with 01/01/<BIRTH-YEAR> and returning new imaging session
Parameters
----------
dest_dir : Path
destination directory to save the deidentified files
assoc_files_identification : str, optional
Used to extract the scan ID & type/resource from the associated filename. Should
be a regular-expression (Python syntax) with named groups called 'id' and 'type', e.g.
'[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'
Returns
-------
Expand Down Expand Up @@ -579,23 +603,11 @@ def populate_row(self, row: DataRow):
row : DataRow
The row to populate with entries
"""
series_numbers = []
for series_number, dcm in self.session.dicoms.items():
for (scan_id, scan_type), (scan_desc, scan) in self.session.scans.items():
row.add_entry(
path=dcm["SeriesDescription"],
datatype=DicomSeries,
uri=f"dicom::{series_number}",
)
series_numbers.append(series_number)

collated = defaultdict(list)
for assoc_fspath in self.session.associated_file_fspaths:

for resource in collated:
row.add_entry(
path=assoc_fspath.name,
datatype=FileSet,
uri=f"associated_file::{assoc_fspath}",
path=scan_desc,
datatype=type(scan),
uri=(scan_id, scan_type),
)

def get(self, entry: DataEntry, datatype: type) -> DataType:
Expand All @@ -614,12 +626,7 @@ def get(self, entry: DataEntry, datatype: type) -> DataType:
item : DataType
the item stored within the specified entry
"""
file_category, path = entry.uri.split("::")
if file_category == "dicom":
fileset = datatype(self.session.dicoms[path])
else:
fileset = datatype(path)
return fileset
return datatype(self.session.scans[entry.uri])

######################################
# The following methods can be empty #
Expand Down
2 changes: 1 addition & 1 deletion xnat_ingest/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def imaging_session() -> ImagingSession:
project_id="PROJECTID",
subject_id="SUBJECTID",
session_id="SESSIONID",
dicoms=[
resources=[
DicomSeries(d.iterdir())
for d in (
get_pet_image(PatientName=PatientName),
Expand Down

0 comments on commit dc819d8

Please sign in to comment.