Skip to content

Commit

Permalink
Added CSV support to DeepLabCutInterface (#1140)
Browse files Browse the repository at this point in the history
  • Loading branch information
pauladkisson authored Nov 14, 2024
1 parent e3cde1f commit 56673dd
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 75 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

## Features
* Imaging interfaces have a new conversion option `always_write_timestamps` that can be used to force writing timestamps even if neuroconv's heuristics indicates regular sampling rate [PR #1125](https://github.com/catalystneuro/neuroconv/pull/1125)
* Added .csv support to DeepLabCutInterface [PR #1140](https://github.com/catalystneuro/neuroconv/pull/1140)

## Improvements

Expand Down
5 changes: 3 additions & 2 deletions docs/conversion_examples_gallery/behavior/deeplabcut.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Install NeuroConv with the additional dependencies necessary for reading DeepLab
pip install "neuroconv[deeplabcut]"
Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datainterfaces.behavior.deeplabcut.deeplabcutdatainterface.DeepLabCutInterface`.
This interface supports both .h5 and .csv output files from DeepLabCut.

.. code-block:: python
Expand All @@ -16,8 +17,8 @@ Convert DeepLabCut pose estimation data to NWB using :py:class:`~neuroconv.datai
>>> from pathlib import Path
>>> from neuroconv.datainterfaces import DeepLabCutInterface
>>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
>>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"
>>> file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
>>> config_file_path = BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"
>>> interface = DeepLabCutInterface(file_path=file_path, config_file_path=config_file_path, subject_name="ind1", verbose=False)
Expand Down
41 changes: 13 additions & 28 deletions src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,21 +251,6 @@ def _get_video_info_from_config_file(config_file_path: Path, vidname: str):
return video_file_path, image_shape


def _get_pes_args(
*,
h5file: Path,
individual_name: str,
):
h5file = Path(h5file)

_, scorer = h5file.stem.split("DLC")
scorer = "DLC" + scorer

df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name)

return scorer, df


def _write_pes_to_nwbfile(
nwbfile,
animal,
Expand Down Expand Up @@ -339,23 +324,23 @@ def _write_pes_to_nwbfile(
return nwbfile


def add_subject_to_nwbfile(
def _add_subject_to_nwbfile(
nwbfile: NWBFile,
h5file: FilePath,
file_path: FilePath,
individual_name: str,
config_file: Optional[FilePath] = None,
timestamps: Optional[Union[list, np.ndarray]] = None,
pose_estimation_container_kwargs: Optional[dict] = None,
) -> NWBFile:
"""
Given the subject name, add the DLC .h5 file to an in-memory NWBFile object.
Given the subject name, add the DLC output file (.h5 or .csv) to an in-memory NWBFile object.
Parameters
----------
nwbfile : pynwb.NWBFile
The in-memory nwbfile object to which the subject specific pose estimation series will be added.
h5file : str or path
Path to the DeepLabCut .h5 output file.
file_path : str or path
Path to the DeepLabCut .h5 or .csv output file.
individual_name : str
Name of the subject (whose pose is predicted) for single-animal DLC project.
For multi-animal projects, the names from the DLC project will be used directly.
Expand All @@ -371,18 +356,18 @@ def add_subject_to_nwbfile(
nwbfile : pynwb.NWBFile
nwbfile with pes written in the behavior module
"""
h5file = Path(h5file)

if "DLC" not in h5file.name or not h5file.suffix == ".h5":
raise IOError("The file passed in is not a DeepLabCut h5 data file.")
file_path = Path(file_path)

video_name, scorer = h5file.stem.split("DLC")
video_name, scorer = file_path.stem.split("DLC")
scorer = "DLC" + scorer

# TODO probably could be read directly with h5py
# This requires pytables
data_frame_from_hdf5 = pd.read_hdf(h5file)
df = _ensure_individuals_in_header(data_frame_from_hdf5, individual_name)
if ".h5" in file_path.suffixes:
df = pd.read_hdf(file_path)
elif ".csv" in file_path.suffixes:
df = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
df = _ensure_individuals_in_header(df, individual_name)

# Note the video here is a tuple of the video path and the image shape
if config_file is not None:
Expand All @@ -404,7 +389,7 @@ def add_subject_to_nwbfile(

# Fetch the corresponding metadata pickle file, we extract the edges graph from here
# TODO: This is the original implementation way to extract the file name but looks very brittle. Improve it
filename = str(h5file.parent / h5file.stem)
filename = str(file_path.parent / file_path.stem)
for i, c in enumerate(filename[::-1]):
if c.isnumeric():
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pydantic import FilePath, validate_call
from pynwb.file import NWBFile

# import ndx_pose
from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface


Expand All @@ -13,16 +14,16 @@ class DeepLabCutInterface(BaseTemporalAlignmentInterface):

display_name = "DeepLabCut"
keywords = ("DLC",)
associated_suffixes = (".h5",)
associated_suffixes = (".h5", ".csv")
info = "Interface for handling data from DeepLabCut."

_timestamps = None

@classmethod
def get_source_schema(cls) -> dict:
source_schema = super().get_source_schema()
source_schema["properties"]["file_path"]["description"] = "Path to the .h5 file output by dlc."
source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file"
source_schema["properties"]["file_path"]["description"] = "Path to the file output by dlc (.h5 or .csv)."
source_schema["properties"]["config_file_path"]["description"] = "Path to .yml config file."
return source_schema

@validate_call
Expand All @@ -34,24 +35,25 @@ def __init__(
verbose: bool = True,
):
"""
Interface for writing DLC's h5 files to nwb using dlc2nwb.
Interface for writing DLC's output files to nwb using dlc2nwb.
Parameters
----------
file_path : FilePath
path to the h5 file output by dlc.
Path to the file output by dlc (.h5 or .csv).
config_file_path : FilePath, optional
path to .yml config file
Path to .yml config file
subject_name : str, default: "ind1"
the name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created.
The name of the subject for which the :py:class:`~pynwb.file.NWBFile` is to be created.
verbose: bool, default: True
controls verbosity.
Controls verbosity.
"""
from ._dlc_utils import _read_config

file_path = Path(file_path)
if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes:
raise IOError("The file passed in is not a DeepLabCut h5 data file.")
suffix_is_valid = ".h5" in file_path.suffixes or ".csv" in file_path.suffixes
if not "DLC" in file_path.stem or not suffix_is_valid:
raise IOError("The file passed in is not a valid DeepLabCut output data file.")

self.config_dict = dict()
if config_file_path is not None:
Expand Down Expand Up @@ -108,12 +110,14 @@ def add_to_nwbfile(
nwb file to which the recording information is to be added
metadata: dict
metadata info for constructing the nwb file (optional).
container_name: str, default: "PoseEstimation"
Name of the container to store the pose estimation.
"""
from ._dlc_utils import add_subject_to_nwbfile
from ._dlc_utils import _add_subject_to_nwbfile

add_subject_to_nwbfile(
_add_subject_to_nwbfile(
nwbfile=nwbfile,
h5file=str(self.source_data["file_path"]),
file_path=str(self.source_data["file_path"]),
individual_name=self.subject_name,
config_file=self.source_data["config_file_path"],
timestamps=self._timestamps,
Expand Down
24 changes: 0 additions & 24 deletions src/neuroconv/tools/testing/data_interface_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,30 +743,6 @@ def test_interface_alignment(self):
pass


class DeepLabCutInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
"""
A mixin for testing DeepLabCut interfaces.
"""

def check_interface_get_original_timestamps(self):
pass # TODO in separate PR

def check_interface_get_timestamps(self):
pass # TODO in separate PR

def check_interface_set_aligned_timestamps(self):
pass # TODO in separate PR

def check_shift_timestamps_by_start_time(self):
pass # TODO in separate PR

def check_interface_original_timestamps_inmutability(self):
pass # TODO in separate PR

def check_nwbfile_temporal_alignment(self):
pass # TODO in separate PR


class VideoInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin):
"""
A mixin for testing Video interfaces.
Expand Down
65 changes: 57 additions & 8 deletions tests/test_on_data/behavior/test_behavior_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
)
from neuroconv.tools.testing.data_interface_mixins import (
DataInterfaceTestMixin,
DeepLabCutInterfaceMixin,
MedPCInterfaceMixin,
TemporalAlignmentMixin,
VideoInterfaceMixin,
Expand Down Expand Up @@ -332,11 +331,16 @@ class TestFicTracDataInterfaceTiming(TemporalAlignmentMixin):
platform == "darwin" and python_version < version.parse("3.10"),
reason="interface not supported on macOS with Python < 3.10",
)
class TestDeepLabCutInterface(DeepLabCutInterfaceMixin):
class TestDeepLabCutInterface(DataInterfaceTestMixin):
data_interface_cls = DeepLabCutInterface
interface_kwargs = dict(
file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"),
config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"),
file_path=str(
BEHAVIOR_DATA_PATH
/ "DLC"
/ "open_field_without_video"
/ "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
),
config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"),
subject_name="ind1",
)
save_directory = OUTPUT_PATH
Expand Down Expand Up @@ -384,7 +388,12 @@ def check_read_nwb(self, nwbfile_path: str):
class TestDeepLabCutInterfaceNoConfigFile(DataInterfaceTestMixin):
data_interface_cls = DeepLabCutInterface
interface_kwargs = dict(
file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"),
file_path=str(
BEHAVIOR_DATA_PATH
/ "DLC"
/ "open_field_without_video"
/ "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
),
config_file_path=None,
subject_name="ind1",
)
Expand All @@ -411,11 +420,16 @@ def check_read_nwb(self, nwbfile_path: str):
platform == "darwin" and python_version < version.parse("3.10"),
reason="interface not supported on macOS with Python < 3.10",
)
class TestDeepLabCutInterfaceSetTimestamps(DeepLabCutInterfaceMixin):
class TestDeepLabCutInterfaceSetTimestamps(DataInterfaceTestMixin):
data_interface_cls = DeepLabCutInterface
interface_kwargs = dict(
file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"),
config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "config.yaml"),
file_path=str(
BEHAVIOR_DATA_PATH
/ "DLC"
/ "open_field_without_video"
/ "m3v1mp4DLC_resnet50_openfieldAug20shuffle1_30000.h5"
),
config_file_path=str(BEHAVIOR_DATA_PATH / "DLC" / "open_field_without_video" / "config.yaml"),
subject_name="ind1",
)

Expand Down Expand Up @@ -454,6 +468,41 @@ def check_read_nwb(self, nwbfile_path: str):
pass


@pytest.mark.skipif(
platform == "darwin" and python_version < version.parse("3.10"),
reason="interface not supported on macOS with Python < 3.10",
)
class TestDeepLabCutInterfaceFromCSV(DataInterfaceTestMixin):
data_interface_cls = DeepLabCutInterface
interface_kwargs = dict(
file_path=str(
BEHAVIOR_DATA_PATH
/ "DLC"
/ "SL18_csv"
/ "SL18_D19_S01_F01_BOX_SLP_20230503_112642.1DLC_resnet50_SubLearnSleepBoxRedLightJun26shuffle1_100000_stubbed.csv"
),
config_file_path=None,
subject_name="SL18",
)
save_directory = OUTPUT_PATH

def check_read_nwb(self, nwbfile_path: str):
with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io:
nwbfile = io.read()
assert "behavior" in nwbfile.processing
processing_module_interfaces = nwbfile.processing["behavior"].data_interfaces
assert "PoseEstimation" in processing_module_interfaces

pose_estimation_series_in_nwb = processing_module_interfaces["PoseEstimation"].pose_estimation_series
expected_pose_estimation_series = ["SL18_redled", "SL18_shoulder", "SL18_haunch", "SL18_baseoftail"]

expected_pose_estimation_series_are_in_nwb_file = [
pose_estimation in pose_estimation_series_in_nwb for pose_estimation in expected_pose_estimation_series
]

assert all(expected_pose_estimation_series_are_in_nwb_file)


class TestSLEAPInterface(DataInterfaceTestMixin, TemporalAlignmentMixin):
data_interface_cls = SLEAPInterface
interface_kwargs = dict(
Expand Down

0 comments on commit 56673dd

Please sign in to comment.