From 56673dddd246f806ec2d7ee1911d86fcd21414ae Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Fri, 15 Nov 2024 06:52:44 +1100 Subject: [PATCH] Added CSV support to DeepLabCutInterface (#1140) --- CHANGELOG.md | 1 + .../behavior/deeplabcut.rst | 5 +- .../behavior/deeplabcut/_dlc_utils.py | 41 ++++-------- .../deeplabcut/deeplabcutdatainterface.py | 30 +++++---- .../tools/testing/data_interface_mixins.py | 24 ------- .../behavior/test_behavior_interfaces.py | 65 ++++++++++++++++--- 6 files changed, 91 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdc70223f..0545001d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Features * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) +* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) ## Improvements diff --git a/docs/conversion_examples_gallery/behavior/deeplabcut.rst b/docs/conversion_examples_gallery/behavior/deeplabcut.rst index c20dd057d..64201ea72 100644 --- a/docs/conversion_examples_gallery/behavior/deeplabcut.rst +++ b/docs/conversion_examples_gallery/behavior/deeplabcut.rst @@ -8,6 +8,7 @@ Install NeuroConv with the additional dependencies necessary for reading DeepLab pip install "neuroconv[deeplabcut]" Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datainterfaces.behavior.deeplabcut.deeplabcutdatainterface.DeepLabCutInterface`. +This interface supports both .h5 and .csv output files from DeepLabCut. .. code-block:: python @@ -16,8 +17,8 @@ Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datai >>> from pathlib import Path >>> from neuroconv.datainterfaces import DeepLabCutInterface - >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" - >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "config.yaml" + >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml" >>> interface = DeepLabCutInterface(file_path=file_path, config_file_path=config_file_path, subject_name="ind1", verbose=False) diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py index 9e368fb39..5d1224e85 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py @@ -251,21 +251,6 @@ def _get_video_info_from_config_file(config_file_path: Path, vidname: str): return video_file_path, image_shape -def _get_pes_args( - *, - h5file: Path, - individual_name: str, -): - h5file = Path(h5file) - - _, scorer = h5file.stem.split("DLC") - scorer = "DLC" + scorer - - df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name) - - return scorer, df - - def _write_pes_to_nwbfile( nwbfile, animal, @@ -339,23 +324,23 @@ def _write_pes_to_nwbfile( return nwbfile -def add_subject_to_nwbfile( +def _add_subject_to_nwbfile( nwbfile: NWBFile, - h5file: FilePath, + file_path: FilePath, individual_name: str, config_file: Optional[FilePath] = None, timestamps: Optional[Union[list, np.ndarray]] = None, pose_estimation_container_kwargs: Optional[dict] = None, ) -> NWBFile: """ - Given the subject name, add the DLC .h5 file to an in-memory NWBFile object. + Given the subject name, add the DLC output file (.h5 or .csv) to an in-memory NWBFile object. Parameters ---------- nwbfile : pynwb.NWBFile The in-memory nwbfile object to which the subject specific pose estimation series will be added. - h5file : str or path - Path to the DeepLabCut .h5 output file. + file_path : str or path + Path to the DeepLabCut .h5 or .csv output file. individual_name : str Name of the subject (whose pose is predicted) for single-animal DLC project. For multi-animal projects, the names from the DLC project will be used directly. @@ -371,18 +356,18 @@ def add_subject_to_nwbfile( nwbfile : pynwb.NWBFile nwbfile with pes written in the behavior module """ - h5file = Path(h5file) - - if "DLC" not in h5file.name or not h5file.suffix == ".h5": - raise IOError("The file passed in is not a DeepLabCut h5 data file.") + file_path = Path(file_path) - video_name, scorer = h5file.stem.split("DLC") + video_name, scorer = file_path.stem.split("DLC") scorer = "DLC" + scorer # TODO probably could be read directly with h5py # This requires pytables - data_frame_from_hdf5 = pd.read_hdf(h5file) - df = _ensure_individuals_in_header(data_frame_from_hdf5, individual_name) + if ".h5" in file_path.suffixes: + df = pd.read_hdf(file_path) + elif ".csv" in file_path.suffixes: + df = pd.read_csv(file_path, header=[0, 1, 2], index_col=0) + df = _ensure_individuals_in_header(df, individual_name) # Note the video here is a tuple of the video path and the image shape if config_file is not None: @@ -404,7 +389,7 @@ def add_subject_to_nwbfile( # Fetch the corresponding metadata pickle file, we extract the edges graph from here # TODO: This is the original implementation way to extract the file name but looks very brittle. Improve it - filename = str(h5file.parent / h5file.stem) + filename = str(file_path.parent / file_path.stem) for i, c in enumerate(filename[::-1]): if c.isnumeric(): break diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py index 21b054e85..f45913061 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py @@ -5,6 +5,7 @@ from pydantic import FilePath, validate_call from pynwb.file import NWBFile +# import ndx_pose from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface @@ -13,7 +14,7 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface): display_name = "DeepLabCut" keywords = ("DLC",) - associated_suffixes = (".h5",) + associated_suffixes = (".h5", ".csv") info = "Interface for handling data from DeepLabCut." _timestamps = None @@ -21,8 +22,8 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface): @classmethod def get_source_schema(cls) -> dict: source_schema = super().get_source_schema() - source_schema["properties"]["file_path"]["description"] = "Path to the .h5 file output by dlc." - source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file" + source_schema["properties"]["file_path"]["description"] = "Path to the file output by dlc (.h5 or .csv)." + source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file." return source_schema @validate_call @@ -34,24 +35,25 @@ def __init__( verbose: bool = True, ): """ - Interface for writing DLC's h5 files to nwb using dlc2nwb. + Interface for writing DLC's output files to nwb using dlc2nwb. Parameters ---------- file_path : FilePath - path to the h5 file output by dlc. + Path to the file output by dlc (.h5 or .csv). config_file_path : FilePath, optional - path to .yml config file + Path to .yml config file subject_name : str, default: "ind1" - the name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created. + The name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created. verbose: bool, default: True - controls verbosity. + Controls verbosity. """ from ._dlc_utils import _read_config file_path = Path(file_path) - if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes: - raise IOError("The file passed in is not a DeepLabCut h5 data file.") + suffix_is_valid = ".h5" in file_path.suffixes or ".csv" in file_path.suffixes + if not "DLC" in file_path.stem or not suffix_is_valid: + raise IOError("The file passed in is not a valid DeepLabCut output data file.") self.config_dict = dict() if config_file_path is not None: @@ -108,12 +110,14 @@ def add_to_nwbfile( nwb file to which the recording information is to be added metadata: dict metadata info for constructing the nwb file (optional). + container_name: str, default: "PoseEstimation" + Name of the container to store the pose estimation. """ - from ._dlc_utils import add_subject_to_nwbfile + from ._dlc_utils import _add_subject_to_nwbfile - add_subject_to_nwbfile( + _add_subject_to_nwbfile( nwbfile=nwbfile, - h5file=str(self.source_data["file_path"]), + file_path=str(self.source_data["file_path"]), individual_name=self.subject_name, config_file=self.source_data["config_file_path"], timestamps=self._timestamps, diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 946b3fd6c..5187ff2e4 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -743,30 +743,6 @@ def test_interface_alignment(self): pass -class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): - """ - A mixin for testing DeepLabCut interfaces. - """ - - def check_interface_get_original_timestamps(self): - pass # TODO in separate PR - - def check_interface_get_timestamps(self): - pass # TODO in separate PR - - def check_interface_set_aligned_timestamps(self): - pass # TODO in separate PR - - def check_shift_timestamps_by_start_time(self): - pass # TODO in separate PR - - def check_interface_original_timestamps_inmutability(self): - pass # TODO in separate PR - - def check_nwbfile_temporal_alignment(self): - pass # TODO in separate PR - - class VideoInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): """ A mixin for testing Video interfaces. diff --git a/tests/test_on_data/behavior/test_behavior_interfaces.py b/tests/test_on_data/behavior/test_behavior_interfaces.py index 8e3e01d61..b43e65206 100644 --- a/tests/test_on_data/behavior/test_behavior_interfaces.py +++ b/tests/test_on_data/behavior/test_behavior_interfaces.py @@ -29,7 +29,6 @@ ) from neuroconv.tools.testing.data_interface_mixins import ( DataInterfaceTestMixin, - DeepLabCutInterfaceMixin, MedPCInterfaceMixin, TemporalAlignmentMixin, VideoInterfaceMixin, @@ -332,11 +331,16 @@ class TestFicTracDataInterfaceTiming(TemporalAlignmentMixin): platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", ) -class TestDeepLabCutInterface(DeepLabCutInterfaceMixin): +class TestDeepLabCutInterface(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), - config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), subject_name="ind1", ) save_directory = OUTPUT_PATH @@ -384,7 +388,12 @@ def check_read_nwb(self, nwbfile_path: str): class TestDeepLabCutInterfaceNoConfigFile(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), config_file_path=None, subject_name="ind1", ) @@ -411,11 +420,16 @@ def check_read_nwb(self, nwbfile_path: str): platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", ) -class TestDeepLabCutInterfaceSetTimestamps(DeepLabCutInterfaceMixin): +class TestDeepLabCutInterfaceSetTimestamps(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), - config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), subject_name="ind1", ) @@ -454,6 +468,41 @@ def check_read_nwb(self, nwbfile_path: str): pass +@pytest.mark.skipif( + platform == "darwin" and python_version < version.parse("3.10"), + reason="interface not supported on macOS with Python < 3.10", +) +class TestDeepLabCutInterfaceFromCSV(DataInterfaceTestMixin): + data_interface_cls = DeepLabCutInterface + interface_kwargs = dict( + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "SL18_csv" + / "SL18_D19_S01_F01_BOX_SLP_20230503_112642.1DLC_resnet50_SubLearnSleepBoxRedLightJun26shuffle1_100000_stubbed.csv" + ), + config_file_path=None, + subject_name="SL18", + ) + save_directory = OUTPUT_PATH + + def check_read_nwb(self, nwbfile_path: str): + with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: + nwbfile = io.read() + assert "behavior" in nwbfile.processing + processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces + assert "PoseEstimation" in processing_module_interfaces + + pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series + expected_pose_estimation_series = ["SL18_redled", "SL18_shoulder", "SL18_haunch", "SL18_baseoftail"] + + expected_pose_estimation_series_are_in_nwb_file = [ + pose_estimation in pose_estimation_series_in_nwb for pose_estimation in expected_pose_estimation_series + ] + + assert all(expected_pose_estimation_series_are_in_nwb_file) + + class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin): data_interface_cls = SLEAPInterface interface_kwargs = dict(