From ddb8a86e72a1ce470227c0fa0dcbaa00e07c6fce Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Thu, 11 Jul 2024 12:43:23 -0400 Subject: [PATCH] Port over DLC conversion utils (#946) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 3 +- setup.py | 1 - .../behavior/deeplabcut/_dlc_utils.py | 288 ++++++++++++++++++ .../deeplabcut/deeplabcutdatainterface.py | 51 +--- .../behavior/deeplabcut/requirements.txt | 4 +- .../test_on_data/test_behavior_interfaces.py | 17 ++ 6 files changed, 314 insertions(+), 50 deletions(-) create mode 100644 src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 69d038f49..8b837f700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,8 +20,7 @@ * Make annotations from the raw format available on `IntanRecordingInterface`. [PR #934](https://github.com/catalystneuro/neuroconv/pull/943) * Add an option to suppress display the progress bar (tqdm) in `VideoContext` [PR #937](https://github.com/catalystneuro/neuroconv/pull/937) * Automatic compression of data in the `LightnignPoseDataInterface` has been disabled - users should refer to the new `configure_backend` method for a general approach for setting compression. [PR #942](https://github.com/catalystneuro/neuroconv/pull/942) - - +* Port over `dlc2nwb` utility functions for ease of maintenance. [PR #946](https://github.com/catalystneuro/neuroconv/pull/946) diff --git a/setup.py b/setup.py index 1af10b6c3..e06c90077 100644 --- a/setup.py +++ b/setup.py @@ -88,7 +88,6 @@ license="BSD-3-Clause", classifiers=[ "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py new file mode 100644 index 000000000..72ddafe28 --- /dev/null +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/_dlc_utils.py @@ -0,0 +1,288 @@ +import importlib +import os +import pickle +import warnings +from pathlib import Path +from typing import List, Optional, Union + +import numpy as np +import pandas as pd +import yaml +from pynwb import NWBFile +from ruamel.yaml import YAML + +from ....utils import FilePathType + + +def _read_config(config_file_path): + """ + Reads structured config file defining a project. + """ + ruamelFile = YAML() + path = Path(config_file_path) + if os.path.exists(path): + try: + with open(path, "r") as f: + cfg = ruamelFile.load(f) + curr_dir = os.path.dirname(config_file_path) + if cfg["project_path"] != curr_dir: + cfg["project_path"] = curr_dir + except Exception as err: + if len(err.args) > 2: + if err.args[2] == "could not determine a constructor for the tag '!!python/tuple'": + with open(path, "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + else: + raise + + else: + raise FileNotFoundError( + "Config file is not found. Please make sure that the file exists and/or that you passed the path of the config file correctly!" + ) + return cfg + + +def _get_movie_timestamps(movie_file, VARIABILITYBOUND=1000, infer_timestamps=True): + """ + Return numpy array of the timestamps for a video. + + Parameters + ---------- + movie_file : str + Path to movie_file + """ + import cv2 + + reader = cv2.VideoCapture(movie_file) + timestamps = [] + n_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT)) + fps = reader.get(cv2.CAP_PROP_FPS) + + for _ in range(n_frames): + _ = reader.read() + timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC)) + + for _ in range(len(reader)): + _ = reader.read() + timestamps.append(reader.get(cv2.CAP_PROP_POS_MSEC)) + + timestamps = np.array(timestamps) / 1000 # Convert to seconds + + if np.nanvar(np.diff(timestamps)) < 1.0 / fps * 1.0 / VARIABILITYBOUND: + warnings.warn( + "Variability of timestamps suspiciously small. See: https://github.com/DeepLabCut/DLC2NWB/issues/1" + ) + + if any(timestamps[1:] == 0): + # Infers times when OpenCV provides 0s + warning_msg = "Removing" + timestamp_zero_count = np.count_nonzero(timestamps == 0) + timestamps[1:][timestamps[1:] == 0] = np.nan # replace 0s with nan + + if infer_timestamps: + warning_msg = "Replacing" + timestamps = _infer_nan_timestamps(timestamps) + + warnings.warn( # warns user of percent of 0 frames + "%s cv2 timestamps returned as 0: %f%%" % (warning_msg, (timestamp_zero_count / len(timestamps) * 100)) + ) + + return timestamps + + +def _infer_nan_timestamps(timestamps): + """Given np.array, interpolate nan values using index * sampling rate""" + bad_timestamps_mask = np.isnan(timestamps) + # Runs of good timestamps + good_run_indices = np.where(np.diff(np.hstack(([False], bad_timestamps_mask == False, [False]))))[0].reshape(-1, 2) + + # For each good run, get the diff and append to cumulative array + sampling_diffs = np.array([]) + for idx in good_run_indices: + sampling_diffs = np.append(sampling_diffs, np.diff(timestamps[idx[0] : idx[1]])) + estimated_sampling_rate = np.mean(sampling_diffs) # Average over diffs + + # Infer timestamps with avg sampling rate + bad_timestamps_indexes = np.argwhere(bad_timestamps_mask)[:, 0] + inferred_timestamps = bad_timestamps_indexes * estimated_sampling_rate + timestamps[bad_timestamps_mask] = inferred_timestamps + + return timestamps + + +def _ensure_individuals_in_header(df, dummy_name): + if "individuals" not in df.columns.names: + # Single animal project -> add individual row to + # the header of single animal projects. + temp = pd.concat({dummy_name: df}, names=["individuals"], axis=1) + df = temp.reorder_levels(["scorer", "individuals", "bodyparts", "coords"], axis=1) + return df + + +def _get_pes_args(config_file, h5file, individual_name, infer_timestamps=True): + if "DLC" not in h5file or not h5file.endswith(".h5"): + raise IOError("The file passed in is not a DeepLabCut h5 data file.") + + cfg = _read_config(config_file) + + vidname, scorer = os.path.split(h5file)[-1].split("DLC") + scorer = "DLC" + os.path.splitext(scorer)[0] + video = None + + df = _ensure_individuals_in_header(pd.read_hdf(h5file), individual_name) + + # Fetch the corresponding metadata pickle file + paf_graph = [] + filename, _ = os.path.splitext(h5file) + for i, c in enumerate(filename[::-1]): + if c.isnumeric(): + break + if i > 0: + filename = filename[:-i] + metadata_file = filename + "_meta.pickle" + if os.path.isfile(metadata_file): + with open(metadata_file, "rb") as file: + metadata = pickle.load(file) + test_cfg = metadata["data"]["DLC-model-config file"] + paf_graph = test_cfg.get("partaffinityfield_graph", []) + if paf_graph: + paf_inds = test_cfg.get("paf_best") + if paf_inds is not None: + paf_graph = [paf_graph[i] for i in paf_inds] + else: + warnings.warn("Metadata not found...") + + for video_path, params in cfg["video_sets"].items(): + if vidname in video_path: + video = video_path, params["crop"] + break + + if video is None: + warnings.warn(f"The video file corresponding to {h5file} could not be found...") + video = "fake_path", "0, 0, 0, 0" + + timestamps = df.index.tolist() # setting timestamps to dummy TODO: extract timestamps in DLC? + else: + timestamps = _get_movie_timestamps(video[0], infer_timestamps=infer_timestamps) + return scorer, df, video, paf_graph, timestamps, cfg + + +def _write_pes_to_nwbfile( + nwbfile, + animal, + df_animal, + scorer, + video, # Expects this to be a tuple; first index is string path, second is the image shape as "0, width, 0, height" + paf_graph, + timestamps, + exclude_nans, + pose_estimation_container_kwargs: Optional[dict] = None, +): + from ndx_pose import PoseEstimation, PoseEstimationSeries + + pose_estimation_container_kwargs = pose_estimation_container_kwargs or dict() + + pose_estimation_series = [] + for kpt, xyp in df_animal.groupby(level="bodyparts", axis=1, sort=False): + data = xyp.to_numpy() + + if exclude_nans: + # exclude_nans is inverse infer_timestamps. if not infer, there may be nans + data = data[~np.isnan(timestamps)] + timestamps_cleaned = timestamps[~np.isnan(timestamps)] + else: + timestamps_cleaned = timestamps + + pes = PoseEstimationSeries( + name=f"{animal}_{kpt}", + description=f"Keypoint {kpt} from individual {animal}.", + data=data[:, :2], + unit="pixels", + reference_frame="(0,0) corresponds to the bottom left corner of the video.", + timestamps=timestamps_cleaned, + confidence=data[:, 2], + confidence_definition="Softmax output of the deep neural network.", + ) + pose_estimation_series.append(pes) + + deeplabcut_version = None + is_deeplabcut_installed = importlib.util.find_spec(name="deeplabcut") is not None + if is_deeplabcut_installed: + deeplabcut_version = importlib.metadata.version(distribution_name="deeplabcut") + + pose_estimation_default_kwargs = dict( + pose_estimation_series=pose_estimation_series, + description="2D keypoint coordinates estimated using DeepLabCut.", + original_videos=[video[0]], + # TODO check if this is a mandatory arg in ndx-pose (can skip if video is not found_ + dimensions=[list(map(int, video[1].split(",")))[1::2]], + scorer=scorer, + source_software="DeepLabCut", + source_software_version=deeplabcut_version, + nodes=[pes.name for pes in pose_estimation_series], + edges=paf_graph if paf_graph else None, + **pose_estimation_container_kwargs, + ) + pose_estimation_default_kwargs.update(pose_estimation_container_kwargs) + pose_estimation_container = PoseEstimation(**pose_estimation_default_kwargs) + + if "behavior" in nwbfile.processing: # TODO: replace with get_module + behavior_processing_module = nwbfile.processing["behavior"] + else: + behavior_processing_module = nwbfile.create_processing_module( + name="behavior", description="processed behavioral data" + ) + behavior_processing_module.add(pose_estimation_container) + + return nwbfile + + +def add_subject_to_nwbfile( + nwbfile: NWBFile, + h5file: FilePathType, + individual_name: str, + config_file: FilePathType, + timestamps: Optional[Union[List, np.ndarray]] = None, + pose_estimation_container_kwargs: Optional[dict] = None, +) -> NWBFile: + """ + Given the subject name, add the DLC .h5 file to an in-memory NWBFile object. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + The in-memory nwbfile object to which the subject specific pose estimation series will be added. + h5file : str or path + Path to the DeepLabCut .h5 output file. + individual_name : str + Name of the subject (whose pose is predicted) for single-animal DLC project. + For multi-animal projects, the names from the DLC project will be used directly. + config_file : str or path + Path to a project config.yaml file + timestamps : list, np.ndarray or None, default: None + Alternative timestamps vector. If None, then use the inferred timestamps from DLC2NWB + pose_estimation_container_kwargs : dict, optional + Dictionary of keyword argument pairs to pass to the PoseEstimation container. + + Returns + ------- + nwbfile : pynwb.NWBFile + nwbfile with pes written in the behavior module + """ + scorer, df, video, paf_graph, dlc_timestamps, _ = _get_pes_args(config_file, h5file, individual_name) + if timestamps is None: + timestamps = dlc_timestamps + + df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name) + + return _write_pes_to_nwbfile( + nwbfile, + individual_name, + df_animal, + scorer, + video, + paf_graph, + timestamps, + exclude_nans=False, + pose_estimation_container_kwargs=pose_estimation_container_kwargs, + ) diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py index 8fa1d3cf1..e053e88fa 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/deeplabcutdatainterface.py @@ -5,50 +5,9 @@ from pynwb.file import NWBFile from ....basetemporalalignmentinterface import BaseTemporalAlignmentInterface -from ....tools import get_package from ....utils import FilePathType -def write_subject_to_nwb( - nwbfile: NWBFile, - h5file: FilePathType, - individual_name: str, - config_file: FilePathType, - timestamps: Optional[Union[List, np.ndarray]] = None, -): - """ - Given, subject name, write h5file to an existing nwbfile. - - Parameters - ---------- - nwbfile : pynwb.NWBFile - The in-memory nwbfile object to which the subject specific pose estimation series will be added. - h5file : str or path - Path to the DeepLabCut .h5 output file. - individual_name : str - Name of the subject (whose pose is predicted) for single-animal DLC project. - For multi-animal projects, the names from the DLC project will be used directly. - config_file : str or path - Path to a project config.yaml file - timestamps : list, np.ndarray or None, default: None - Alternative timestamps vector. If None, then use the inferred timestamps from DLC2NWB - Returns - ------- - nwbfile : pynwb.NWBFile - nwbfile with pes written in the behavior module - """ - dlc2nwb = get_package(package_name="dlc2nwb") - - scorer, df, video, paf_graph, dlc_timestamps, _ = dlc2nwb.utils._get_pes_args(config_file, h5file, individual_name) - if timestamps is None: - timestamps = dlc_timestamps - - df_animal = df.groupby(level="individuals", axis=1).get_group(individual_name) - return dlc2nwb.utils._write_pes_to_nwbfile( - nwbfile, individual_name, df_animal, scorer, video, paf_graph, timestamps, exclude_nans=False - ) - - class DeepLabCutInterface(BaseTemporalAlignmentInterface): """Data interface for DeepLabCut datasets.""" @@ -87,13 +46,13 @@ def __init__( verbose: bool, default: True controls verbosity. """ - dlc2nwb = get_package(package_name="dlc2nwb") + from ._dlc_utils import _read_config file_path = Path(file_path) if "DLC" not in file_path.stem or ".h5" not in file_path.suffixes: raise IOError("The file passed in is not a DeepLabCut h5 data file.") - self._config_file = dlc2nwb.utils.read_config(config_file_path) + self._config_file = _read_config(config_file_path=config_file_path) self.subject_name = subject_name self.verbose = verbose super().__init__(file_path=file_path, config_file_path=config_file_path) @@ -126,13 +85,13 @@ def set_aligned_timestamps(self, aligned_timestamps: Union[List, np.ndarray]): aligned_timestamps : list, np.ndarray alternative timestamps vector. """ - self._timestamps = np.array(aligned_timestamps) def add_to_nwbfile( self, nwbfile: NWBFile, metadata: Optional[dict] = None, + container_name: str = "PoseEstimation", ): """ Conversion from DLC output files to nwb. Derived from dlc2nwb library. @@ -144,11 +103,13 @@ def add_to_nwbfile( metadata: dict metadata info for constructing the nwb file (optional). """ + from ._dlc_utils import add_subject_to_nwbfile - write_subject_to_nwb( + add_subject_to_nwbfile( nwbfile=nwbfile, h5file=str(self.source_data["file_path"]), individual_name=self.subject_name, config_file=str(self.source_data["config_file_path"]), timestamps=self._timestamps, + pose_estimation_container_kwargs=dict(name=container_name), ) diff --git a/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt b/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt index 17d8300fb..03e0ee0b0 100644 --- a/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt +++ b/src/neuroconv/datainterfaces/behavior/deeplabcut/requirements.txt @@ -1,4 +1,4 @@ -dlc2nwb>=0.3 -tables<3.9.0;python_version<'3.9' # imported by package but not included in pip setup (is included in setup.cfg) tables<3.9.2;sys_platform=="darwin" tables;sys_platform=="linux" or sys_platform=="win32" +ndx-pose==0.1.1 +neuroconv[video] diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py index 155b32d72..36011da92 100644 --- a/tests/test_on_data/test_behavior_interfaces.py +++ b/tests/test_on_data/test_behavior_interfaces.py @@ -335,6 +335,7 @@ class TestDeepLabCutInterface(DeepLabCutInterfaceMixin, unittest.TestCase): def run_custom_checks(self): self.check_custom_timestamps(nwbfile_path=self.nwbfile_path) + self.check_renaming_instance(nwbfile_path=self.nwbfile_path) def check_custom_timestamps(self, nwbfile_path: str): # TODO: Peel out into separate test class and replace this part with check_read_nwb @@ -361,6 +362,22 @@ def check_custom_timestamps(self, nwbfile_path: str): pose_timestamps = pose_estimation.timestamps np.testing.assert_array_equal(pose_timestamps, self._custom_timestamps_case_1) + def check_renaming_instance(self, nwbfile_path: str): + custom_container_name = "TestPoseEstimation" + + metadata = self.interface.get_metadata() + metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) + + self.interface.run_conversion( + nwbfile_path=nwbfile_path, overwrite=True, metadata=metadata, container_name=custom_container_name + ) + + with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: + nwbfile = io.read() + assert "behavior" in nwbfile.processing + assert "PoseEstimation" not in nwbfile.processing["behavior"].data_interfaces + assert custom_container_name in nwbfile.processing["behavior"].data_interfaces + def check_read_nwb(self, nwbfile_path: str): # TODO: move this to the upstream mixin with NWBHDF5IO(path=nwbfile_path, mode="r", load_namespaces=True) as io: