Added CSV support to DeepLabCutInterface (#1140)

catalystneuro · Nov 14, 2024 · 56673dd · 56673dd
1 parent e3cde1f
commit 56673dd
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 75 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
 
 ## Features
 * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125)
+* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140)
 
 ## Improvements
 

diff --git a/docs/conversion_examples_gallery/behavior/deeplabcut.rst b/docs/conversion_examples_gallery/behavior/deeplabcut.rst
@@ -8,6 +8,7 @@ Install NeuroConv with the additional dependencies necessary for reading DeepLab
     pip install "neuroconv[deeplabcut]"
 
 Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datainterfaces.behavior.deeplabcut.deeplabcutdatainterface.DeepLabCutInterface`.
+This interface supports both .h5 and .csv output files from DeepLabCut.
 
 .. code-block:: python
 
@@ -16,8 +17,8 @@ Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datai
     >>> from pathlib import Path
     >>> from neuroconv.datainterfaces import DeepLabCutInterface
 
-    >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
-    >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"
+    >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
+    >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"
 
     >>> interface = DeepLabCutInterface(file_path=file_path, config_file_path=config_file_path, subject_name="ind1", verbose=False)
 

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -251,21 +251,6 @@ def _get_video_info_from_config_file(config_file_path: Path, vidname: str):
     return video_file_path, image_shape
 
 
-def _get_pes_args(
-    *,
-    h5file: Path,
-    individual_name: str,
-):
-    h5file = Path(h5file)
-
-    _, scorer = h5file.stem.split("DLC")
-    scorer = "DLC" + scorer
-
-    df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name)
-
-    return scorer, df
-
-
 def _write_pes_to_nwbfile(
     nwbfile,
     animal,
@@ -339,23 +324,23 @@ def _write_pes_to_nwbfile(
     return nwbfile
 
 
-def add_subject_to_nwbfile(
+def _add_subject_to_nwbfile(
     nwbfile: NWBFile,
-    h5file: FilePath,
+    file_path: FilePath,
     individual_name: str,
     config_file: Optional[FilePath] = None,
     timestamps: Optional[Union[list, np.ndarray]] = None,
     pose_estimation_container_kwargs: Optional[dict] = None,
 ) -> NWBFile:
     """
-    Given the subject name, add the DLC .h5 file to an in-memory NWBFile object.
+    Given the subject name, add the DLC output file (.h5 or .csv) to an in-memory NWBFile object.
 
     Parameters
     ----------
     nwbfile : pynwb.NWBFile
         The in-memory nwbfile object to which the subject specific pose estimation series will be added.
-    h5file : str or path
-        Path to the DeepLabCut .h5 output file.
+    file_path : str or path
+        Path to the DeepLabCut .h5 or .csv output file.
     individual_name : str
         Name of the subject (whose pose is predicted) for single-animal DLC project.
         For multi-animal projects, the names from the DLC project will be used directly.
@@ -371,18 +356,18 @@ def add_subject_to_nwbfile(
     nwbfile : pynwb.NWBFile
         nwbfile with pes written in the behavior module
     """
-    h5file = Path(h5file)
-
-    if "DLC" not in h5file.name or not h5file.suffix == ".h5":
-        raise IOError("The file passed in is not a DeepLabCut h5 data file.")
+    file_path = Path(file_path)
 
-    video_name, scorer = h5file.stem.split("DLC")
+    video_name, scorer = file_path.stem.split("DLC")
     scorer = "DLC" + scorer
 
     # TODO probably could be read directly with h5py
     # This requires pytables
-    data_frame_from_hdf5 = pd.read_hdf(h5file)
-    df = _ensure_individuals_in_header(data_frame_from_hdf5, individual_name)
+    if ".h5" in file_path.suffixes:
+        df = pd.read_hdf(file_path)
+    elif ".csv" in file_path.suffixes:
+        df = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
+    df = _ensure_individuals_in_header(df, individual_name)
 
     # Note the video here is a tuple of the video path and the image shape
     if config_file is not None:
@@ -404,7 +389,7 @@ def add_subject_to_nwbfile(
 
     # Fetch the corresponding metadata pickle file, we extract the edges graph from here
     # TODO: This is the original implementation way to extract the file name but looks very brittle. Improve it
-    filename = str(h5file.parent / h5file.stem)
+    filename = str(file_path.parent / file_path.stem)
     for i, c in enumerate(filename[::-1]):
         if c.isnumeric():
             break

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -5,6 +5,7 @@
 from pydantic import FilePath, validate_call
 from pynwb.file import NWBFile
 
+# import ndx_pose
 from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface
 
 
@@ -13,16 +14,16 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface):
 
     display_name = "DeepLabCut"
     keywords = ("DLC",)
-    associated_suffixes = (".h5",)
+    associated_suffixes = (".h5", ".csv")
     info = "Interface for handling data from DeepLabCut."
 
     _timestamps = None
 
     @classmethod
     def get_source_schema(cls) -> dict:
         source_schema = super().get_source_schema()
-        source_schema["properties"]["file_path"]["description"] = "Path to the .h5 file output by dlc."
-        source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file"
+        source_schema["properties"]["file_path"]["description"] = "Path to the file output by dlc (.h5 or .csv)."
+        source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file."
         return source_schema
 
     @validate_call
@@ -34,24 +35,25 @@ def __init__(
         verbose: bool = True,
     ):
         """
-        Interface for writing DLC's h5 files to nwb using dlc2nwb.
+        Interface for writing DLC's output files to nwb using dlc2nwb.
 
         Parameters
         ----------
         file_path : FilePath
-            path to the h5 file output by dlc.
+            Path to the file output by dlc (.h5 or .csv).
         config_file_path : FilePath, optional
-            path to .yml config file
+            Path to .yml config file
         subject_name : str, default: "ind1"
-            the name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created.
+            The name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created.
         verbose: bool, default: True
-            controls verbosity.
+            Controls verbosity.
         """
         from ._dlc_utils import _read_config
 
         file_path = Path(file_path)
-        if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes:
-            raise IOError("The file passed in is not a DeepLabCut h5 data file.")
+        suffix_is_valid = ".h5" in file_path.suffixes or ".csv" in file_path.suffixes
+        if not "DLC" in file_path.stem or not suffix_is_valid:
+            raise IOError("The file passed in is not a valid DeepLabCut output data file.")
 
         self.config_dict = dict()
         if config_file_path is not None:
@@ -108,12 +110,14 @@ def add_to_nwbfile(
             nwb file to which the recording information is to be added
         metadata: dict
             metadata info for constructing the nwb file (optional).
+        container_name: str, default: "PoseEstimation"
+            Name of the container to store the pose estimation.
         """
-        from ._dlc_utils import add_subject_to_nwbfile
+        from ._dlc_utils import _add_subject_to_nwbfile
 
-        add_subject_to_nwbfile(
+        _add_subject_to_nwbfile(
             nwbfile=nwbfile,
-            h5file=str(self.source_data["file_path"]),
+            file_path=str(self.source_data["file_path"]),
             individual_name=self.subject_name,
             config_file=self.source_data["config_file_path"],
             timestamps=self._timestamps,

diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py
@@ -743,30 +743,6 @@ def test_interface_alignment(self):
         pass
 
 
-class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
-    """
-    A mixin for testing DeepLabCut interfaces.
-    """
-
-    def check_interface_get_original_timestamps(self):
-        pass  # TODO in separate PR
-
-    def check_interface_get_timestamps(self):
-        pass  # TODO in separate PR
-
-    def check_interface_set_aligned_timestamps(self):
-        pass  # TODO in separate PR
-
-    def check_shift_timestamps_by_start_time(self):
-        pass  # TODO in separate PR
-
-    def check_interface_original_timestamps_inmutability(self):
-        pass  # TODO in separate PR
-
-    def check_nwbfile_temporal_alignment(self):
-        pass  # TODO in separate PR
-
-
 class VideoInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
     """
     A mixin for testing Video interfaces.

diff --git a/tests/test_on_data/behavior/test_behavior_interfaces.py b/tests/test_on_data/behavior/test_behavior_interfaces.py
@@ -29,7 +29,6 @@
 )
 from neuroconv.tools.testing.data_interface_mixins import (
     DataInterfaceTestMixin,
-    DeepLabCutInterfaceMixin,
     MedPCInterfaceMixin,
     TemporalAlignmentMixin,
     VideoInterfaceMixin,
@@ -332,11 +331,16 @@ class TestFicTracDataInterfaceTiming(TemporalAlignmentMixin):
     platform == "darwin" and python_version < version.parse("3.10"),
     reason="interface not supported on macOS with Python < 3.10",
 )
-class TestDeepLabCutInterface(DeepLabCutInterfaceMixin):
+class TestDeepLabCutInterface(DataInterfaceTestMixin):
     data_interface_cls = DeepLabCutInterface
     interface_kwargs = dict(
-        file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"),
-        config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"),
+        file_path=str(
+            BEHAVIOR_DATA_PATH
+            / "DLC"
+            / "open_field_without_video"
+            / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
+        ),
+        config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"),
         subject_name="ind1",
     )
     save_directory = OUTPUT_PATH
@@ -384,7 +388,12 @@ def check_read_nwb(self, nwbfile_path: str):
 class TestDeepLabCutInterfaceNoConfigFile(DataInterfaceTestMixin):
     data_interface_cls = DeepLabCutInterface
     interface_kwargs = dict(
-        file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"),
+        file_path=str(
+            BEHAVIOR_DATA_PATH
+            / "DLC"
+            / "open_field_without_video"
+            / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
+        ),
         config_file_path=None,
         subject_name="ind1",
     )
@@ -411,11 +420,16 @@ def check_read_nwb(self, nwbfile_path: str):
     platform == "darwin" and python_version < version.parse("3.10"),
     reason="interface not supported on macOS with Python < 3.10",
 )
-class TestDeepLabCutInterfaceSetTimestamps(DeepLabCutInterfaceMixin):
+class TestDeepLabCutInterfaceSetTimestamps(DataInterfaceTestMixin):
     data_interface_cls = DeepLabCutInterface
     interface_kwargs = dict(
-        file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"),
-        config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"),
+        file_path=str(
+            BEHAVIOR_DATA_PATH
+            / "DLC"
+            / "open_field_without_video"
+            / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
+        ),
+        config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"),
         subject_name="ind1",
     )
 
@@ -454,6 +468,41 @@ def check_read_nwb(self, nwbfile_path: str):
         pass
 
 
+@pytest.mark.skipif(
+    platform == "darwin" and python_version < version.parse("3.10"),
+    reason="interface not supported on macOS with Python < 3.10",
+)
+class TestDeepLabCutInterfaceFromCSV(DataInterfaceTestMixin):
+    data_interface_cls = DeepLabCutInterface
+    interface_kwargs = dict(
+        file_path=str(
+            BEHAVIOR_DATA_PATH
+            / "DLC"
+            / "SL18_csv"
+            / "SL18_D19_S01_F01_BOX_SLP_20230503_112642.1DLC_resnet50_SubLearnSleepBoxRedLightJun26shuffle1_100000_stubbed.csv"
+        ),
+        config_file_path=None,
+        subject_name="SL18",
+    )
+    save_directory = OUTPUT_PATH
+
+    def check_read_nwb(self, nwbfile_path: str):
+        with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io:
+            nwbfile = io.read()
+            assert "behavior" in nwbfile.processing
+            processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces
+            assert "PoseEstimation" in processing_module_interfaces
+
+            pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series
+            expected_pose_estimation_series = ["SL18_redled", "SL18_shoulder", "SL18_haunch", "SL18_baseoftail"]
+
+            expected_pose_estimation_series_are_in_nwb_file = [
+                pose_estimation in pose_estimation_series_in_nwb for pose_estimation in expected_pose_estimation_series
+            ]
+
+            assert all(expected_pose_estimation_series_are_in_nwb_file)
+
+
 class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin):
     data_interface_cls = SLEAPInterface
     interface_kwargs = dict(