diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py
index 1c44dc9..52d467f 100644
--- a/xnat_ingest/cli/stage.py
+++ b/xnat_ingest/cli/stage.py
@@ -93,9 +93,7 @@
     default="info",
     type=str,
     envvar="XNAT_INGEST_LOGLEVEL",
-    help=(
-        "The level of the logging printed to stdout"
-    )
+    help=("The level of the logging printed to stdout"),
 )
 @click.option(
     "--log-file",
@@ -153,7 +151,6 @@ def stage(
     mail_server: MailServer,
     raise_errors: bool,
 ):
-
     set_logger_handling(log_level, log_file, log_emails, mail_server)
 
     logger.info(
@@ -165,7 +162,6 @@ def stage(
     sessions = ImagingSession.construct(
         dicoms_path=dicoms_path,
         associated_files_pattern=associated,
-        assoc_files_identification=assoc_identification,
         project_field=project_field,
         subject_field=subject_field,
         session_field=session_field,
@@ -186,7 +182,9 @@ def stage(
                 continue
             session_staging_dir.mkdir(exist_ok=True)
             # Deidentify files and save them to the staging directory
-            staged_session = session.deidentify(session_staging_dir)
+            staged_session = session.stage(
+                session_staging_dir, assoc_files_identification=assoc_identification
+            )
             staged_session.save(session_staging_dir)
             if delete:
                 session.delete()
diff --git a/xnat_ingest/dicom.py b/xnat_ingest/dicom.py
deleted file mode 100644
index 0594af7..0000000
--- a/xnat_ingest/dicom.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import typing as ty
-from collections import defaultdict
-from pathlib import Path
-from copy import copy
-import attrs
-
-
-@attrs.define
-class DicomScan:
-
-    Tag = ty.NewType("Tag", ty.Tuple[str, str])
-
-    modality: str
-    files: list[Path] = attrs.field(factory=list)
-    ids: dict[str, str] = attrs.field(factory=dict)
-
-    DEFAULT_ID_FIELDS = {
-        "project": "StudyID",
-        "subject": "PatientID",
-        "session": "AccessionNumber",
-    }
-
-    @classmethod
-    def from_files(
-        cls,
-        dicom_files: ty.Sequence[Path],
-        ids: ty.Optional[dict[str, str]] = None,
-        **id_fields: dict[str, ty.Union[str, Tag, tuple[str, ty.Callable], tuple[Tag, ty.Callable]]],
-    ) -> "ty.Sequence[DicomScan]":
-        """Loads a series of DICOM scans from a list of dicom files, grouping the files
-        by series number and pulling various session-identifying fields from the headers
-
-        Parameters
-        ----------
-        dicom_files: Sequence[Path]
-            The dicom files to sort
-        ids : dict[str, str]
-            IDs to specifiy manually, overrides those loaded from the DICOM headers
-        **id_fields : dict[str, ty.Union[str, Tag, tuple[str, ty.Callable], tuple[Tag, ty.Callable]]]
-            The DICOM fields to extractx the IDs from. Values of the dictionary
-            can either be the DICOM field name or tag as a tuple (e.g. `("0001", "0008")`)
-            or a tuple containging the str/tag and a callable used to extract the
-            ID from. For regex expressions you can use the DicomScan.id_exractor method
-        """
-        id_fields = copy(cls.DEFAULT_ID_FIELDS)
-        id_fields.update(id_fields)
-
-        scans: dict[str, DicomScan] = {}
-        ids_dct = defaultdict(list)
-        subject_id_dct = defaultdict(list)
-        project_id_dct = defaultdict(list)
-        # TESTNAME_GePhantom_20230825_155050
-        for dcm_file in dicom_files:
-            dcm = pydicom.dcmread(dcm_file)
-            scan_id = dcm.SeriesNumber
-            if "SECONDARY" in dcm.ImageType:
-                modality = "SC"
-            else:
-                modality = dcm.Modality
-            try:
-                scan = scans[scan_id]
-            except KeyError:
-                scan = scans[scan_id] = Scan(modality=modality)
-            else:
-                # Get scan modality (should be the same for all dicoms with the same series
-                # number)
-                assert modality == scan.modality
-            scan.files.append(dcm_file)
-            project_id_dct[dcm.get(project_field.keyword)].append(dcm_file)
-            subject_id_dct[dcm.get(subject_field.keyword)].append(dcm_file)
-            session_id_dct[dcm.get(session_field.keyword)].append(dcm_file)
-        errors: list[str] = []
-        project_id: str = spec.get("project_id")  # type: ignore
-        subject_id: str = spec.get("subject_id")  # type: ignore
-        session_id: str = spec.get("session_id")  # type: ignore
-        if project_id is None:
-            project_ids = list(project_id_dct)
-            if len(list(project_ids)) > 1:
-                errors.append(
-                    f"Incosistent project IDs found in {project_field}:\n"
-                    + json.dumps(project_id_dct, indent=4)
-                )
-            else:
-                project_id = project_ids[0]
-                if not project_id:
-                    logger.error(f"Project ID ({project_field}) not provided")
-        if subject_id is None:
-            subject_ids = list(subject_id_dct)
-            if len(subject_ids) > 1:
-                errors.append(
-                    f"Incosistent subject IDs found in {subject_field}:\n"
-                    + json.dumps(subject_id_dct, indent=4)
-                )
-            else:
-                # FIXME: space is present in test data, but shouldn't be in prod
-                subject_id = subject_ids[0].replace(" ", "_")
-                if not subject_id:
-                    errors.append(f"Subject ID ({subject_field}) not provided")
-        if session_id is None:
-            session_ids = list(session_id_dct)
-            if len(session_ids) > 1:
-                errors.append(
-                    f"Incosistent session IDs found in {session_field}:\n"
-                    + json.dumps(session_id_dct, indent=4)
-                )
-            else:
-                session_id = session_ids[0]
-                if not session_id:
-                    errors.append(f"Session ID ({session_field}) not provided")
-        if errors:
-            raise DicomParseError("\n".join(errors))
-        associated_file_dir_name = "_".join(dcm.PatientName.split("^")) + "_" + dcm.StudyDate
-        return scans, SessionMetadata(
-            project_id, subject_id, session_id, associated_file_dir_name
-        )
diff --git a/xnat_ingest/session.py b/xnat_ingest/session.py
index c4b0cb5..a67f9f1 100644
--- a/xnat_ingest/session.py
+++ b/xnat_ingest/session.py
@@ -14,7 +14,7 @@
 import pydicom
 from fileformats.application import Dicom
 from fileformats.medimage import DicomSeries
-from fileformats.core import from_paths, FileSet, DataType
+from fileformats.core import from_paths, FileSet, DataType, from_mime, to_mime
 from fileformats.generic import File, Directory
 from arcana.core.data.set import Dataset
 from arcana.core.data.space import DataSpace
@@ -29,12 +29,24 @@
 logger = logging.getLogger("xnat-ingest")
 
 
-def dicoms_converter(
-    multi_dicom_series: ty.Union[ty.List[DicomSeries], ty.Dict[str, DicomSeries]]
-) -> ty.Dict[str, DicomSeries]:
-    if isinstance(multi_dicom_series, ty.Sequence):
-        multi_dicom_series = {str(s["SeriesNumber"]): s for s in multi_dicom_series}
-    return multi_dicom_series
+def resources_converter(
+    resources: ty.Union[
+        ty.List[DicomSeries], ty.Dict[ty.Tuple[str, str], ty.Tuple[str, FileSet]]
+    ]
+) -> ty.Dict[ty.Tuple[str, str], ty.Tuple[str, FileSet]]:
+    if isinstance(resources, ty.Sequence):
+        resources_dict = {}
+        for resource in resources:
+            if not isinstance(resource, DicomSeries):
+                raise TypeError(
+                    f"Only sequences of DicomSeries can be converted, otherwise needs "
+                    f"to be already in a dictionary, found {resources}"
+                )
+            resources_dict[
+                (str(resource["SeriesNumber"]), "DICOM")
+            ] = (str(resource["SeriesDescription"]), resource)
+        resources = resources_dict
+    return resources
 
 
 @attrs.define(slots=False)
@@ -42,9 +54,9 @@ class ImagingSession:
     project_id: str
     subject_id: str
     session_id: str
-    dicoms: ty.Dict[str, DicomSeries] = attrs.field(
-        factory=dict, converter=dicoms_converter
-    )
+    resources: ty.Dict[ty.Tuple[str, str], ty.Tuple[str, FileSet]] = attrs.field(
+        factory=dict, converter=resources_converter
+    )  # keys -> scan-id & resource-type, values -> description, scan
     associated_files_pattern: str | None = None
     associated_file_fspaths: ty.List[Path] = attrs.field(factory=list)
 
@@ -72,7 +84,6 @@ def select_resources(
         dataset: Dataset,
         include_all_dicoms: bool = False,
         include_all_assoc: bool = False,
-        assoc_id_pattern: str = None
     ) -> ty.Iterator[ty.Tuple[str, str, str, FileSet]]:
         """Returns selected resources that match the columns in the dataset definition
 
@@ -95,7 +106,7 @@ def select_resources(
         scan : FileSet
             a fileset to upload
         """
-        store = MockDataStore(self, assoc_id_pattern=assoc_id_pattern)
+        store = MockDataStore(self)
 
         uploaded: ty.Set[FileSet] = set()
 
@@ -185,7 +196,6 @@ def construct(
         cls,
         dicoms_path: str | Path,
         associated_files_pattern: str | None = None,
-        assoc_files_identification: str | None = None,
         project_field: str = "StudyID",
         subject_field: str = "PatientID",
         session_field: str = "AccessionNumber",
@@ -205,10 +215,6 @@ def construct(
             are substituted before the string is used to glob the non-DICOM files. In
             order to deidentify the filenames, the pattern must explicitly reference all
             identifiable fields in string template placeholders.
-        assoc_files_identification : str, optional
-            Used to extract the scan ID & type/resource from the associated filename. Should
-            be a regular-expression (Python syntax) with named groups called 'id' and 'type', e.g.
-            '[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'
         project_field : str
             the name of the DICOM field that is to be interpreted as the corresponding
             XNAT project
@@ -238,9 +244,6 @@ def construct(
         else:
             dicom_fspaths = [Path(p) for p in glob(dicoms_path)]
 
-        if assoc_files_identification:
-            raise NotImplementedError
-
         # Sort loaded series by StudyInstanceUID (imaging session)
         logger.info("Loading DICOM series from %s", str(dicoms_path))
         dicom_sessions = defaultdict(list)
@@ -280,7 +283,7 @@ def get_id(field):
 
             sessions.append(
                 cls(
-                    dicoms={str(s["SeriesNumber"]): s for s in session_dicom_series},
+                    resources=session_dicom_series,
                     associated_file_fspaths=associated_file_fspaths,
                     associated_files_pattern=associated_files_pattern,
                     project_id=(project_id if project_id else get_id(project_field)),
@@ -312,7 +315,13 @@ def load(cls, save_dir: Path):
                 "is a valid YAML file",
             )
             raise e
-        dct["dicoms"] = {k: DicomSeries(v) for k, v in dct["dicoms"].items()}
+        dct["resources"] = {
+            (rd["scan_id"], rd["resource"]): (
+                rd["description"],
+                from_mime(rd["datatype"])(rd["fspaths"]),
+            )
+            for rd in dct["resources"]
+        }
         dct["associated_file_fspaths"] = [
             Path(f) for f in dct["associated_file_fspaths"]
         ]
@@ -327,11 +336,20 @@ def save(self, save_dir: Path):
         yaml_file : Path
             name of the file to load the manually specified IDs from (YAML format)
         """
-        dct = attrs.asdict(self, recurse=True)
+        dct = attrs.asdict(self, recurse=False)
         dct["associated_file_fspaths"] = [
             str(p) for p in dct["associated_file_fspaths"]
         ]
-        dct["dicoms"] = {k: [str(p) for p in v["fspaths"]] for k, v in dct["dicoms"].items()}
+        dct["resources"] = [
+            {
+                "scan_id": id_,
+                "resource": res,
+                "description": desc,
+                "datatype": to_mime(scan, official=False),
+                "fspaths": [str(p) for p in scan.fspaths],
+            }
+            for (id_, res), (desc, scan) in dct["resources"].items()
+        ]
         yaml_file = save_dir / self.SAVE_FILENAME
         with open(yaml_file, "w") as f:
             yaml.dump(
@@ -339,14 +357,20 @@ def save(self, save_dir: Path):
                 f,
             )
 
-    def deidentify(self, dest_dir: Path) -> "ImagingSession":
-        """Deidentify files by removing the fields listed `FIELDS_TO_ANONYMISE` and
+    def stage(
+        self, dest_dir: Path, assoc_files_identification: str | None = None
+    ) -> "ImagingSession":
+        """Stages and deidentifies files by removing the fields listed `FIELDS_TO_ANONYMISE` and
         replacing birth date with 01/01/<BIRTH-YEAR> and returning new imaging session
 
         Parameters
         ----------
         dest_dir : Path
             destination directory to save the deidentified files
+        assoc_files_identification : str, optional
+            Used to extract the scan ID & type/resource from the associated filename. Should
+            be a regular-expression (Python syntax) with named groups called 'id' and 'type', e.g.
+            '[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'
 
         Returns
         -------
@@ -579,23 +603,11 @@ def populate_row(self, row: DataRow):
         row : DataRow
             The row to populate with entries
         """
-        series_numbers = []
-        for series_number, dcm in self.session.dicoms.items():
+        for (scan_id, scan_type), (scan_desc, scan) in self.session.scans.items():
             row.add_entry(
-                path=dcm["SeriesDescription"],
-                datatype=DicomSeries,
-                uri=f"dicom::{series_number}",
-            )
-            series_numbers.append(series_number)
-        
-        collated = defaultdict(list)
-        for assoc_fspath in self.session.associated_file_fspaths:
-
-        for resource in collated:
-            row.add_entry(
-                path=assoc_fspath.name,
-                datatype=FileSet,
-                uri=f"associated_file::{assoc_fspath}",
+                path=scan_desc,
+                datatype=type(scan),
+                uri=(scan_id, scan_type),
             )
 
     def get(self, entry: DataEntry, datatype: type) -> DataType:
@@ -614,12 +626,7 @@ def get(self, entry: DataEntry, datatype: type) -> DataType:
         item : DataType
             the item stored within the specified entry
         """
-        file_category, path = entry.uri.split("::")
-        if file_category == "dicom":
-            fileset = datatype(self.session.dicoms[path])
-        else:
-            fileset = datatype(path)
-        return fileset
+        return datatype(self.session.scans[entry.uri])
 
     ######################################
     # The following methods can be empty #
diff --git a/xnat_ingest/tests/test_session.py b/xnat_ingest/tests/test_session.py
index 3f8b503..5be8c82 100644
--- a/xnat_ingest/tests/test_session.py
+++ b/xnat_ingest/tests/test_session.py
@@ -36,7 +36,7 @@ def imaging_session() -> ImagingSession:
         project_id="PROJECTID",
         subject_id="SUBJECTID",
         session_id="SESSIONID",
-        dicoms=[
+        resources=[
             DicomSeries(d.iterdir())
             for d in (
                 get_pet_image(PatientName=PatientName),