Merge branch 'main' into make_metadata_encoder_public_again

catalystneuro · Nov 15, 2024 · aecf92a · aecf92a
2 parents 8e8a1a3 + 64fb9e0
commit aecf92a
Show file tree

Hide file tree

Showing 8 changed files with 134 additions and 157 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,8 +9,10 @@
 
 ## Features
 * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125)
+* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140)
 
 ## Improvements
+* Use mixing tests for ecephy's mocks [PR #1136](https://github.com/catalystneuro/neuroconv/pull/1136)
 
 # v0.6.5 (November 1, 2024)
 

diff --git a/docs/conversion_examples_gallery/behavior/deeplabcut.rst b/docs/conversion_examples_gallery/behavior/deeplabcut.rst
@@ -8,6 +8,7 @@ Install NeuroConv with the additional dependencies necessary for reading DeepLab
     pip install "neuroconv[deeplabcut]"
 
 Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datainterfaces.behavior.deeplabcut.deeplabcutdatainterface.DeepLabCutInterface`.
+This interface supports both .h5 and .csv output files from DeepLabCut.
 
 .. code-block:: python
 
@@ -16,8 +17,8 @@ Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datai
     >>> from pathlib import Path
     >>> from neuroconv.datainterfaces import DeepLabCutInterface
 
-    >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
-    >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"
+    >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
+    >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"
 
     >>> interface = DeepLabCutInterface(file_path=file_path, config_file_path=config_file_path, subject_name="ind1", verbose=False)
 

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
@@ -251,21 +251,6 @@ def _get_video_info_from_config_file(config_file_path: Path, vidname: str):
     return video_file_path, image_shape
 
 
-def _get_pes_args(
-    *,
-    h5file: Path,
-    individual_name: str,
-):
-    h5file = Path(h5file)
-
-    _, scorer = h5file.stem.split("DLC")
-    scorer = "DLC" + scorer
-
-    df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name)
-
-    return scorer, df
-
-
 def _write_pes_to_nwbfile(
     nwbfile,
     animal,
@@ -339,23 +324,23 @@ def _write_pes_to_nwbfile(
     return nwbfile
 
 
-def add_subject_to_nwbfile(
+def _add_subject_to_nwbfile(
     nwbfile: NWBFile,
-    h5file: FilePath,
+    file_path: FilePath,
     individual_name: str,
     config_file: Optional[FilePath] = None,
     timestamps: Optional[Union[list, np.ndarray]] = None,
     pose_estimation_container_kwargs: Optional[dict] = None,
 ) -> NWBFile:
     """
-    Given the subject name, add the DLC .h5 file to an in-memory NWBFile object.
+    Given the subject name, add the DLC output file (.h5 or .csv) to an in-memory NWBFile object.
 
     Parameters
     ----------
     nwbfile : pynwb.NWBFile
         The in-memory nwbfile object to which the subject specific pose estimation series will be added.
-    h5file : str or path
-        Path to the DeepLabCut .h5 output file.
+    file_path : str or path
+        Path to the DeepLabCut .h5 or .csv output file.
     individual_name : str
         Name of the subject (whose pose is predicted) for single-animal DLC project.
         For multi-animal projects, the names from the DLC project will be used directly.
@@ -371,18 +356,18 @@ def add_subject_to_nwbfile(
     nwbfile : pynwb.NWBFile
         nwbfile with pes written in the behavior module
     """
-    h5file = Path(h5file)
-
-    if "DLC" not in h5file.name or not h5file.suffix == ".h5":
-        raise IOError("The file passed in is not a DeepLabCut h5 data file.")
+    file_path = Path(file_path)
 
-    video_name, scorer = h5file.stem.split("DLC")
+    video_name, scorer = file_path.stem.split("DLC")
     scorer = "DLC" + scorer
 
     # TODO probably could be read directly with h5py
     # This requires pytables
-    data_frame_from_hdf5 = pd.read_hdf(h5file)
-    df = _ensure_individuals_in_header(data_frame_from_hdf5, individual_name)
+    if ".h5" in file_path.suffixes:
+        df = pd.read_hdf(file_path)
+    elif ".csv" in file_path.suffixes:
+        df = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
+    df = _ensure_individuals_in_header(df, individual_name)
 
     # Note the video here is a tuple of the video path and the image shape
     if config_file is not None:
@@ -404,7 +389,7 @@ def add_subject_to_nwbfile(
 
     # Fetch the corresponding metadata pickle file, we extract the edges graph from here
     # TODO: This is the original implementation way to extract the file name but looks very brittle. Improve it
-    filename = str(h5file.parent / h5file.stem)
+    filename = str(file_path.parent / file_path.stem)
     for i, c in enumerate(filename[::-1]):
         if c.isnumeric():
             break

diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py
@@ -5,6 +5,7 @@
 from pydantic import FilePath, validate_call
 from pynwb.file import NWBFile
 
+# import ndx_pose
 from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface
 
 
@@ -13,16 +14,16 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface):
 
     display_name = "DeepLabCut"
     keywords = ("DLC",)
-    associated_suffixes = (".h5",)
+    associated_suffixes = (".h5", ".csv")
     info = "Interface for handling data from DeepLabCut."
 
     _timestamps = None
 
     @classmethod
     def get_source_schema(cls) -> dict:
         source_schema = super().get_source_schema()
-        source_schema["properties"]["file_path"]["description"] = "Path to the .h5 file output by dlc."
-        source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file"
+        source_schema["properties"]["file_path"]["description"] = "Path to the file output by dlc (.h5 or .csv)."
+        source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file."
         return source_schema
 
     @validate_call
@@ -34,24 +35,25 @@ def __init__(
         verbose: bool = True,
     ):
         """
-        Interface for writing DLC's h5 files to nwb using dlc2nwb.
+        Interface for writing DLC's output files to nwb using dlc2nwb.
 
         Parameters
         ----------
         file_path : FilePath
-            path to the h5 file output by dlc.
+            Path to the file output by dlc (.h5 or .csv).
         config_file_path : FilePath, optional
-            path to .yml config file
+            Path to .yml config file
         subject_name : str, default: "ind1"
-            the name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created.
+            The name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created.
         verbose: bool, default: True
-            controls verbosity.
+            Controls verbosity.
         """
         from ._dlc_utils import _read_config
 
         file_path = Path(file_path)
-        if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes:
-            raise IOError("The file passed in is not a DeepLabCut h5 data file.")
+        suffix_is_valid = ".h5" in file_path.suffixes or ".csv" in file_path.suffixes
+        if not "DLC" in file_path.stem or not suffix_is_valid:
+            raise IOError("The file passed in is not a valid DeepLabCut output data file.")
 
         self.config_dict = dict()
         if config_file_path is not None:
@@ -108,12 +110,14 @@ def add_to_nwbfile(
             nwb file to which the recording information is to be added
         metadata: dict
             metadata info for constructing the nwb file (optional).
+        container_name: str, default: "PoseEstimation"
+            Name of the container to store the pose estimation.
         """
-        from ._dlc_utils import add_subject_to_nwbfile
+        from ._dlc_utils import _add_subject_to_nwbfile
 
-        add_subject_to_nwbfile(
+        _add_subject_to_nwbfile(
             nwbfile=nwbfile,
-            h5file=str(self.source_data["file_path"]),
+            file_path=str(self.source_data["file_path"]),
             individual_name=self.subject_name,
             config_file=self.source_data["config_file_path"],
             timestamps=self._timestamps,

diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py
@@ -92,7 +92,6 @@ def test_metadata_schema_valid(self, setup_interface):
         Draft7Validator.check_schema(schema=schema)
 
     def test_metadata(self, setup_interface):
-        # Validate metadata now happens on the class itself
         metadata = self.interface.get_metadata()
         self.check_extracted_metadata(metadata)
 
@@ -743,30 +742,6 @@ def test_interface_alignment(self):
         pass
 
 
-class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
-    """
-    A mixin for testing DeepLabCut interfaces.
-    """
-
-    def check_interface_get_original_timestamps(self):
-        pass  # TODO in separate PR
-
-    def check_interface_get_timestamps(self):
-        pass  # TODO in separate PR
-
-    def check_interface_set_aligned_timestamps(self):
-        pass  # TODO in separate PR
-
-    def check_shift_timestamps_by_start_time(self):
-        pass  # TODO in separate PR
-
-    def check_interface_original_timestamps_inmutability(self):
-        pass  # TODO in separate PR
-
-    def check_nwbfile_temporal_alignment(self):
-        pass  # TODO in separate PR
-
-
 class VideoInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
     """
     A mixin for testing Video interfaces.

diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py
@@ -27,42 +27,61 @@
 
 python_version = Version(get_python_version())
 
+from neuroconv.tools.testing.data_interface_mixins import (
+    RecordingExtractorInterfaceTestMixin,
+    SortingExtractorInterfaceTestMixin,
+)
 
-class TestRecordingInterface(TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.single_segment_recording_interface = MockRecordingInterface(durations=[0.100])
-        cls.multi_segment_recording_interface = MockRecordingInterface(durations=[0.100, 0.100])
 
-    def test_stub_single_segment(self):
-        interface = self.single_segment_recording_interface
+class TestSortingInterface(SortingExtractorInterfaceTestMixin):
+
+    data_interface_cls = MockSortingInterface
+    interface_kwargs = dict(num_units=4, durations=[0.100])
+
+    def test_propagate_conversion_options(self, setup_interface):
+        interface = self.interface
         metadata = interface.get_metadata()
-        interface.create_nwbfile(stub_test=True, metadata=metadata)
+        nwbfile = interface.create_nwbfile(
+            stub_test=True,
+            metadata=metadata,
+            write_as="processing",
+            units_name="processed_units",
+            units_description="The processed units.",
+        )
 
-    def test_stub_multi_segment(self):
-        interface = self.multi_segment_recording_interface
+        ecephys = get_module(nwbfile, "ecephys")
+
+        assert nwbfile.units is None
+        assert "processed_units" in ecephys.data_interfaces
+
+
+class TestRecordingInterface(RecordingExtractorInterfaceTestMixin):
+    data_interface_cls = MockRecordingInterface
+    interface_kwargs = dict(durations=[0.100])
+
+    def test_stub(self, setup_interface):
+        interface = self.interface
         metadata = interface.get_metadata()
         interface.create_nwbfile(stub_test=True, metadata=metadata)
 
-    def test_no_slash_in_name(self):
-        interface = self.single_segment_recording_interface
+    def test_no_slash_in_name(self, setup_interface):
+        interface = self.interface
         metadata = interface.get_metadata()
         metadata["Ecephys"]["ElectricalSeries"]["name"] = "test/slash"
-        with self.assertRaises(jsonschema.exceptions.ValidationError):
+        with pytest.raises(jsonschema.exceptions.ValidationError):
             interface.validate_metadata(metadata)
 
+    def test_stub_multi_segment(self):
 
-class TestAlwaysWriteTimestamps:
+        interface = MockRecordingInterface(durations=[0.100, 0.100])
+        metadata = interface.get_metadata()
+        interface.create_nwbfile(stub_test=True, metadata=metadata)
 
-    def test_always_write_timestamps(self):
-        # By default the MockRecordingInterface has a uniform sampling rate
-        interface = MockRecordingInterface(durations=[1.0], sampling_frequency=30_000.0)
+    def test_always_write_timestamps(self, setup_interface):
 
-        nwbfile = interface.create_nwbfile(always_write_timestamps=True)
+        nwbfile = self.interface.create_nwbfile(always_write_timestamps=True)
         electrical_series = nwbfile.acquisition["ElectricalSeries"]
-
-        expected_timestamps = interface.recording_extractor.get_times()
-
+        expected_timestamps = self.interface.recording_extractor.get_times()
         np.testing.assert_array_equal(electrical_series.timestamps[:], expected_timestamps)
 
 
@@ -84,33 +103,9 @@ def test_spike2_import_assertions_3_11(self):
             Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx")
 
 
-class TestSortingInterface:
-
-    def test_run_conversion(self, tmp_path):
-
-        nwbfile_path = Path(tmp_path) / "test_sorting.nwb"
-        num_units = 4
-        interface = MockSortingInterface(num_units=num_units, durations=(1.0,))
-        interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"])
-
-        interface.run_conversion(nwbfile_path=nwbfile_path)
-        with NWBHDF5IO(nwbfile_path, "r") as io:
-            nwbfile = io.read()
-
-            units = nwbfile.units
-            assert len(units) == num_units
-            units_df = units.to_dataframe()
-            # Get index in units table
-            for unit_id in interface.sorting_extractor.unit_ids:
-                # In pynwb we write unit name as unit_id
-                row = units_df.query(f"unit_name == '{unit_id}'")
-                spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True)
-                written_spike_times = row["spike_times"].iloc[0]
-
-                np.testing.assert_array_equal(spike_times, written_spike_times)
-
-
 class TestSortingInterfaceOld(unittest.TestCase):
+    """Old-style tests for the SortingInterface. Remove once we we are sure all the behaviors are covered by the mock."""
+
     @classmethod
     def setUpClass(cls) -> None:
         cls.test_dir = Path(mkdtemp())
@@ -194,28 +189,3 @@ def test_sorting_full(self):
             nwbfile = io.read()
             for i, start_times in enumerate(self.sorting_start_frames):
                 assert len(nwbfile.units["spike_times"][i]) == self.num_frames - start_times
-
-    def test_sorting_propagate_conversion_options(self):
-        minimal_nwbfile = self.test_dir / "temp2.nwb"
-        metadata = self.test_sorting_interface.get_metadata()
-        metadata["NWBFile"]["session_start_time"] = datetime.now().astimezone()
-        units_description = "The processed units."
-        conversion_options = dict(
-            TestSortingInterface=dict(
-                write_as="processing",
-                units_name="processed_units",
-                units_description=units_description,
-            )
-        )
-        self.test_sorting_interface.run_conversion(
-            nwbfile_path=minimal_nwbfile,
-            metadata=metadata,
-            conversion_options=conversion_options,
-        )
-
-        with NWBHDF5IO(minimal_nwbfile, "r") as io:
-            nwbfile = io.read()
-            ecephys = get_module(nwbfile, "ecephys")
-            self.assertIsNone(nwbfile.units)
-            self.assertIn("processed_units", ecephys.data_interfaces)
-            self.assertEqual(ecephys["processed_units"].description, units_description)
diff --git a/tests/test_ecephys/test_mock_recording_interface.py b/tests/test_ecephys/test_mock_recording_interface.py