From 56673dddd246f806ec2d7ee1911d86fcd21414ae Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Fri, 15 Nov 2024 06:52:44 +1100 Subject: [PATCH 1/2] Added CSV support to DeepLabCutInterface (#1140) --- CHANGELOG.md | 1 + .../behavior/deeplabcut.rst | 5 +- .../behavior/deeplabcut/_dlc_utils.py | 41 ++++-------- .../deeplabcut/deeplabcutdatainterface.py | 30 +++++---- .../tools/testing/data_interface_mixins.py | 24 ------- .../behavior/test_behavior_interfaces.py | 65 ++++++++++++++++--- 6 files changed, 91 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdc70223f..0545001d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Features * Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125) +* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) ## Improvements diff --git a/docs/conversion_examples_gallery/behavior/deeplabcut.rst b/docs/conversion_examples_gallery/behavior/deeplabcut.rst index c20dd057d..64201ea72 100644 --- a/docs/conversion_examples_gallery/behavior/deeplabcut.rst +++ b/docs/conversion_examples_gallery/behavior/deeplabcut.rst @@ -8,6 +8,7 @@ Install NeuroConv with the additional dependencies necessary for reading DeepLab pip install "neuroconv[deeplabcut]" Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datainterfaces.behavior.deeplabcut.deeplabcutdatainterface.DeepLabCutInterface`. +This interface supports both .h5 and .csv output files from DeepLabCut. .. code-block:: python @@ -16,8 +17,8 @@ Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datai >>> from pathlib import Path >>> from neuroconv.datainterfaces import DeepLabCutInterface - >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" - >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "config.yaml" + >>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + >>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml" >>> interface = DeepLabCutInterface(file_path=file_path, config_file_path=config_file_path, subject_name="ind1", verbose=False) diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py index 9e368fb39..5d1224e85 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py @@ -251,21 +251,6 @@ def _get_video_info_from_config_file(config_file_path: Path, vidname: str): return video_file_path, image_shape -def _get_pes_args( - *, - h5file: Path, - individual_name: str, -): - h5file = Path(h5file) - - _, scorer = h5file.stem.split("DLC") - scorer = "DLC" + scorer - - df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name) - - return scorer, df - - def _write_pes_to_nwbfile( nwbfile, animal, @@ -339,23 +324,23 @@ def _write_pes_to_nwbfile( return nwbfile -def add_subject_to_nwbfile( +def _add_subject_to_nwbfile( nwbfile: NWBFile, - h5file: FilePath, + file_path: FilePath, individual_name: str, config_file: Optional[FilePath] = None, timestamps: Optional[Union[list, np.ndarray]] = None, pose_estimation_container_kwargs: Optional[dict] = None, ) -> NWBFile: """ - Given the subject name, add the DLC .h5 file to an in-memory NWBFile object. + Given the subject name, add the DLC output file (.h5 or .csv) to an in-memory NWBFile object. Parameters ---------- nwbfile : pynwb.NWBFile The in-memory nwbfile object to which the subject specific pose estimation series will be added. - h5file : str or path - Path to the DeepLabCut .h5 output file. + file_path : str or path + Path to the DeepLabCut .h5 or .csv output file. individual_name : str Name of the subject (whose pose is predicted) for single-animal DLC project. For multi-animal projects, the names from the DLC project will be used directly. @@ -371,18 +356,18 @@ def add_subject_to_nwbfile( nwbfile : pynwb.NWBFile nwbfile with pes written in the behavior module """ - h5file = Path(h5file) - - if "DLC" not in h5file.name or not h5file.suffix == ".h5": - raise IOError("The file passed in is not a DeepLabCut h5 data file.") + file_path = Path(file_path) - video_name, scorer = h5file.stem.split("DLC") + video_name, scorer = file_path.stem.split("DLC") scorer = "DLC" + scorer # TODO probably could be read directly with h5py # This requires pytables - data_frame_from_hdf5 = pd.read_hdf(h5file) - df = _ensure_individuals_in_header(data_frame_from_hdf5, individual_name) + if ".h5" in file_path.suffixes: + df = pd.read_hdf(file_path) + elif ".csv" in file_path.suffixes: + df = pd.read_csv(file_path, header=[0, 1, 2], index_col=0) + df = _ensure_individuals_in_header(df, individual_name) # Note the video here is a tuple of the video path and the image shape if config_file is not None: @@ -404,7 +389,7 @@ def add_subject_to_nwbfile( # Fetch the corresponding metadata pickle file, we extract the edges graph from here # TODO: This is the original implementation way to extract the file name but looks very brittle. Improve it - filename = str(h5file.parent / h5file.stem) + filename = str(file_path.parent / file_path.stem) for i, c in enumerate(filename[::-1]): if c.isnumeric(): break diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py index 21b054e85..f45913061 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py @@ -5,6 +5,7 @@ from pydantic import FilePath, validate_call from pynwb.file import NWBFile +# import ndx_pose from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface @@ -13,7 +14,7 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface): display_name = "DeepLabCut" keywords = ("DLC",) - associated_suffixes = (".h5",) + associated_suffixes = (".h5", ".csv") info = "Interface for handling data from DeepLabCut." _timestamps = None @@ -21,8 +22,8 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface): @classmethod def get_source_schema(cls) -> dict: source_schema = super().get_source_schema() - source_schema["properties"]["file_path"]["description"] = "Path to the .h5 file output by dlc." - source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file" + source_schema["properties"]["file_path"]["description"] = "Path to the file output by dlc (.h5 or .csv)." + source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file." return source_schema @validate_call @@ -34,24 +35,25 @@ def __init__( verbose: bool = True, ): """ - Interface for writing DLC's h5 files to nwb using dlc2nwb. + Interface for writing DLC's output files to nwb using dlc2nwb. Parameters ---------- file_path : FilePath - path to the h5 file output by dlc. + Path to the file output by dlc (.h5 or .csv). config_file_path : FilePath, optional - path to .yml config file + Path to .yml config file subject_name : str, default: "ind1" - the name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created. + The name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created. verbose: bool, default: True - controls verbosity. + Controls verbosity. """ from ._dlc_utils import _read_config file_path = Path(file_path) - if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes: - raise IOError("The file passed in is not a DeepLabCut h5 data file.") + suffix_is_valid = ".h5" in file_path.suffixes or ".csv" in file_path.suffixes + if not "DLC" in file_path.stem or not suffix_is_valid: + raise IOError("The file passed in is not a valid DeepLabCut output data file.") self.config_dict = dict() if config_file_path is not None: @@ -108,12 +110,14 @@ def add_to_nwbfile( nwb file to which the recording information is to be added metadata: dict metadata info for constructing the nwb file (optional). + container_name: str, default: "PoseEstimation" + Name of the container to store the pose estimation. """ - from ._dlc_utils import add_subject_to_nwbfile + from ._dlc_utils import _add_subject_to_nwbfile - add_subject_to_nwbfile( + _add_subject_to_nwbfile( nwbfile=nwbfile, - h5file=str(self.source_data["file_path"]), + file_path=str(self.source_data["file_path"]), individual_name=self.subject_name, config_file=self.source_data["config_file_path"], timestamps=self._timestamps, diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 946b3fd6c..5187ff2e4 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -743,30 +743,6 @@ def test_interface_alignment(self): pass -class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): - """ - A mixin for testing DeepLabCut interfaces. - """ - - def check_interface_get_original_timestamps(self): - pass # TODO in separate PR - - def check_interface_get_timestamps(self): - pass # TODO in separate PR - - def check_interface_set_aligned_timestamps(self): - pass # TODO in separate PR - - def check_shift_timestamps_by_start_time(self): - pass # TODO in separate PR - - def check_interface_original_timestamps_inmutability(self): - pass # TODO in separate PR - - def check_nwbfile_temporal_alignment(self): - pass # TODO in separate PR - - class VideoInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): """ A mixin for testing Video interfaces. diff --git a/tests/test_on_data/behavior/test_behavior_interfaces.py b/tests/test_on_data/behavior/test_behavior_interfaces.py index 8e3e01d61..b43e65206 100644 --- a/tests/test_on_data/behavior/test_behavior_interfaces.py +++ b/tests/test_on_data/behavior/test_behavior_interfaces.py @@ -29,7 +29,6 @@ ) from neuroconv.tools.testing.data_interface_mixins import ( DataInterfaceTestMixin, - DeepLabCutInterfaceMixin, MedPCInterfaceMixin, TemporalAlignmentMixin, VideoInterfaceMixin, @@ -332,11 +331,16 @@ class TestFicTracDataInterfaceTiming(TemporalAlignmentMixin): platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", ) -class TestDeepLabCutInterface(DeepLabCutInterfaceMixin): +class TestDeepLabCutInterface(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), - config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), subject_name="ind1", ) save_directory = OUTPUT_PATH @@ -384,7 +388,12 @@ def check_read_nwb(self, nwbfile_path: str): class TestDeepLabCutInterfaceNoConfigFile(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), config_file_path=None, subject_name="ind1", ) @@ -411,11 +420,16 @@ def check_read_nwb(self, nwbfile_path: str): platform == "darwin" and python_version < version.parse("3.10"), reason="interface not supported on macOS with Python < 3.10", ) -class TestDeepLabCutInterfaceSetTimestamps(DeepLabCutInterfaceMixin): +class TestDeepLabCutInterfaceSetTimestamps(DataInterfaceTestMixin): data_interface_cls = DeepLabCutInterface interface_kwargs = dict( - file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"), - config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"), + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "open_field_without_video" + / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5" + ), + config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"), subject_name="ind1", ) @@ -454,6 +468,41 @@ def check_read_nwb(self, nwbfile_path: str): pass +@pytest.mark.skipif( + platform == "darwin" and python_version < version.parse("3.10"), + reason="interface not supported on macOS with Python < 3.10", +) +class TestDeepLabCutInterfaceFromCSV(DataInterfaceTestMixin): + data_interface_cls = DeepLabCutInterface + interface_kwargs = dict( + file_path=str( + BEHAVIOR_DATA_PATH + / "DLC" + / "SL18_csv" + / "SL18_D19_S01_F01_BOX_SLP_20230503_112642.1DLC_resnet50_SubLearnSleepBoxRedLightJun26shuffle1_100000_stubbed.csv" + ), + config_file_path=None, + subject_name="SL18", + ) + save_directory = OUTPUT_PATH + + def check_read_nwb(self, nwbfile_path: str): + with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: + nwbfile = io.read() + assert "behavior" in nwbfile.processing + processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces + assert "PoseEstimation" in processing_module_interfaces + + pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series + expected_pose_estimation_series = ["SL18_redled", "SL18_shoulder", "SL18_haunch", "SL18_baseoftail"] + + expected_pose_estimation_series_are_in_nwb_file = [ + pose_estimation in pose_estimation_series_in_nwb for pose_estimation in expected_pose_estimation_series + ] + + assert all(expected_pose_estimation_series_are_in_nwb_file) + + class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin): data_interface_cls = SLEAPInterface interface_kwargs = dict( From 64fb9e01a5f4070fd3b01ebab50d8fc19a2fe953 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 14 Nov 2024 21:33:52 -0600 Subject: [PATCH 2/2] Use mixing tests for mocks (#1136) --- CHANGELOG.md | 1 + .../tools/testing/data_interface_mixins.py | 1 - tests/test_ecephys/test_ecephys_interfaces.py | 114 +++++++----------- .../test_mock_recording_interface.py | 9 -- 4 files changed, 43 insertions(+), 82 deletions(-) delete mode 100644 tests/test_ecephys/test_mock_recording_interface.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0545001d1..92f4e6b5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140) ## Improvements +* Use mixing tests for ecephy's mocks [PR #1136](https://github.com/catalystneuro/neuroconv/pull/1136) # v0.6.5 (November 1, 2024) diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 5187ff2e4..fab049165 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -92,7 +92,6 @@ def test_metadata_schema_valid(self, setup_interface): Draft7Validator.check_schema(schema=schema) def test_metadata(self, setup_interface): - # Validate metadata now happens on the class itself metadata = self.interface.get_metadata() self.check_extracted_metadata(metadata) diff --git a/tests/test_ecephys/test_ecephys_interfaces.py b/tests/test_ecephys/test_ecephys_interfaces.py index 4d4232bf2..24393923f 100644 --- a/tests/test_ecephys/test_ecephys_interfaces.py +++ b/tests/test_ecephys/test_ecephys_interfaces.py @@ -27,42 +27,61 @@ python_version = Version(get_python_version()) +from neuroconv.tools.testing.data_interface_mixins import ( + RecordingExtractorInterfaceTestMixin, + SortingExtractorInterfaceTestMixin, +) -class TestRecordingInterface(TestCase): - @classmethod - def setUpClass(cls): - cls.single_segment_recording_interface = MockRecordingInterface(durations=[0.100]) - cls.multi_segment_recording_interface = MockRecordingInterface(durations=[0.100, 0.100]) - def test_stub_single_segment(self): - interface = self.single_segment_recording_interface +class TestSortingInterface(SortingExtractorInterfaceTestMixin): + + data_interface_cls = MockSortingInterface + interface_kwargs = dict(num_units=4, durations=[0.100]) + + def test_propagate_conversion_options(self, setup_interface): + interface = self.interface metadata = interface.get_metadata() - interface.create_nwbfile(stub_test=True, metadata=metadata) + nwbfile = interface.create_nwbfile( + stub_test=True, + metadata=metadata, + write_as="processing", + units_name="processed_units", + units_description="The processed units.", + ) - def test_stub_multi_segment(self): - interface = self.multi_segment_recording_interface + ecephys = get_module(nwbfile, "ecephys") + + assert nwbfile.units is None + assert "processed_units" in ecephys.data_interfaces + + +class TestRecordingInterface(RecordingExtractorInterfaceTestMixin): + data_interface_cls = MockRecordingInterface + interface_kwargs = dict(durations=[0.100]) + + def test_stub(self, setup_interface): + interface = self.interface metadata = interface.get_metadata() interface.create_nwbfile(stub_test=True, metadata=metadata) - def test_no_slash_in_name(self): - interface = self.single_segment_recording_interface + def test_no_slash_in_name(self, setup_interface): + interface = self.interface metadata = interface.get_metadata() metadata["Ecephys"]["ElectricalSeries"]["name"] = "test/slash" - with self.assertRaises(jsonschema.exceptions.ValidationError): + with pytest.raises(jsonschema.exceptions.ValidationError): interface.validate_metadata(metadata) + def test_stub_multi_segment(self): -class TestAlwaysWriteTimestamps: + interface = MockRecordingInterface(durations=[0.100, 0.100]) + metadata = interface.get_metadata() + interface.create_nwbfile(stub_test=True, metadata=metadata) - def test_always_write_timestamps(self): - # By default the MockRecordingInterface has a uniform sampling rate - interface = MockRecordingInterface(durations=[1.0], sampling_frequency=30_000.0) + def test_always_write_timestamps(self, setup_interface): - nwbfile = interface.create_nwbfile(always_write_timestamps=True) + nwbfile = self.interface.create_nwbfile(always_write_timestamps=True) electrical_series = nwbfile.acquisition["ElectricalSeries"] - - expected_timestamps = interface.recording_extractor.get_times() - + expected_timestamps = self.interface.recording_extractor.get_times() np.testing.assert_array_equal(electrical_series.timestamps[:], expected_timestamps) @@ -84,33 +103,9 @@ def test_spike2_import_assertions_3_11(self): Spike2RecordingInterface.get_all_channels_info(file_path="does_not_matter.smrx") -class TestSortingInterface: - - def test_run_conversion(self, tmp_path): - - nwbfile_path = Path(tmp_path) / "test_sorting.nwb" - num_units = 4 - interface = MockSortingInterface(num_units=num_units, durations=(1.0,)) - interface.sorting_extractor = interface.sorting_extractor.rename_units(new_unit_ids=["a", "b", "c", "d"]) - - interface.run_conversion(nwbfile_path=nwbfile_path) - with NWBHDF5IO(nwbfile_path, "r") as io: - nwbfile = io.read() - - units = nwbfile.units - assert len(units) == num_units - units_df = units.to_dataframe() - # Get index in units table - for unit_id in interface.sorting_extractor.unit_ids: - # In pynwb we write unit name as unit_id - row = units_df.query(f"unit_name == '{unit_id}'") - spike_times = interface.sorting_extractor.get_unit_spike_train(unit_id=unit_id, return_times=True) - written_spike_times = row["spike_times"].iloc[0] - - np.testing.assert_array_equal(spike_times, written_spike_times) - - class TestSortingInterfaceOld(unittest.TestCase): + """Old-style tests for the SortingInterface. Remove once we we are sure all the behaviors are covered by the mock.""" + @classmethod def setUpClass(cls) -> None: cls.test_dir = Path(mkdtemp()) @@ -194,28 +189,3 @@ def test_sorting_full(self): nwbfile = io.read() for i, start_times in enumerate(self.sorting_start_frames): assert len(nwbfile.units["spike_times"][i]) == self.num_frames - start_times - - def test_sorting_propagate_conversion_options(self): - minimal_nwbfile = self.test_dir / "temp2.nwb" - metadata = self.test_sorting_interface.get_metadata() - metadata["NWBFile"]["session_start_time"] = datetime.now().astimezone() - units_description = "The processed units." - conversion_options = dict( - TestSortingInterface=dict( - write_as="processing", - units_name="processed_units", - units_description=units_description, - ) - ) - self.test_sorting_interface.run_conversion( - nwbfile_path=minimal_nwbfile, - metadata=metadata, - conversion_options=conversion_options, - ) - - with NWBHDF5IO(minimal_nwbfile, "r") as io: - nwbfile = io.read() - ecephys = get_module(nwbfile, "ecephys") - self.assertIsNone(nwbfile.units) - self.assertIn("processed_units", ecephys.data_interfaces) - self.assertEqual(ecephys["processed_units"].description, units_description) diff --git a/tests/test_ecephys/test_mock_recording_interface.py b/tests/test_ecephys/test_mock_recording_interface.py deleted file mode 100644 index a33f3acd1..000000000 --- a/tests/test_ecephys/test_mock_recording_interface.py +++ /dev/null @@ -1,9 +0,0 @@ -from neuroconv.tools.testing.data_interface_mixins import ( - RecordingExtractorInterfaceTestMixin, -) -from neuroconv.tools.testing.mock_interfaces import MockRecordingInterface - - -class TestMockRecordingInterface(RecordingExtractorInterfaceTestMixin): - data_interface_cls = MockRecordingInterface - interface_kwargs = dict(durations=[0.100])