From 019dfe4ff6918f91116456eb4ac1db772493ca5b Mon Sep 17 00:00:00 2001 From: Paul Adkisson Date: Thu, 18 Jul 2024 10:30:26 +1000 Subject: [PATCH] MedpcInterface (#883) Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- CHANGELOG.md | 2 +- docs/api/interfaces.behavior.rst | 4 + .../behavior/medpc.rst | 86 ++++++ docs/conversion_examples_gallery/index.rst | 1 + src/neuroconv/datainterfaces/__init__.py | 3 + .../datainterfaces/behavior/medpc/__init__.py | 0 .../behavior/medpc/medpc_helpers.py | 174 ++++++++++++ .../behavior/medpc/medpcdatainterface.py | 250 +++++++++++++++++ .../behavior/medpc/requirements.txt | 1 + .../tools/testing/data_interface_mixins.py | 233 ++++++++++++++++ tests/test_behavior/test_medpc_helpers.py | 255 ++++++++++++++++++ .../test_on_data/test_behavior_interfaces.py | 128 +++++++++ 12 files changed, 1136 insertions(+), 1 deletion(-) create mode 100644 docs/conversion_examples_gallery/behavior/medpc.rst create mode 100644 src/neuroconv/datainterfaces/behavior/medpc/__init__.py create mode 100644 src/neuroconv/datainterfaces/behavior/medpc/medpc_helpers.py create mode 100644 src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py create mode 100644 src/neuroconv/datainterfaces/behavior/medpc/requirements.txt create mode 100644 tests/test_behavior/test_medpc_helpers.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a790317e6..dfc6e93c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ### Features * Added docker image and tests for an automated Rclone configuration (with file stream passed via an environment variable). [PR #902](https://github.com/catalystneuro/neuroconv/pull/902) +* Added MedPCInterface for operant behavioral output files. [PR #883](https://github.com/catalystneuro/neuroconv/pull/883) ### Bug fixes * Fixed the conversion option schema of a `SpikeGLXConverter` when used inside another `NWBConverter`. [PR #922](https://github.com/catalystneuro/neuroconv/pull/922) @@ -48,7 +49,6 @@ * Fixed bug causing overwrite of NWB GUIDE watermark. [PR #890](https://github.com/catalystneuro/neuroconv/pull/890) - ## v0.4.9 (June 5, 2024) ### Deprecations diff --git a/docs/api/interfaces.behavior.rst b/docs/api/interfaces.behavior.rst index f5b09a806..b79d67181 100644 --- a/docs/api/interfaces.behavior.rst +++ b/docs/api/interfaces.behavior.rst @@ -28,3 +28,7 @@ SLEAP Video ----- .. automodule:: neuroconv.datainterfaces.behavior.video.videodatainterface + +MedPC +----- +.. automodule:: neuroconv.datainterfaces.behavior.medpc.medpcdatainterface diff --git a/docs/conversion_examples_gallery/behavior/medpc.rst b/docs/conversion_examples_gallery/behavior/medpc.rst new file mode 100644 index 000000000..ffe82bed2 --- /dev/null +++ b/docs/conversion_examples_gallery/behavior/medpc.rst @@ -0,0 +1,86 @@ +MedPC data conversion +--------------------- + +MedPC output files contain information about operant behavior such as nose pokes and rewards. +Install NeuroConv with the additional dependencies necessary for writing medpc behavioral data. + +.. code-block:: bash + + pip install neuroconv[medpc] + +Convert MedPC output data to NWB using +:py:class:`~.neuroconv.datainterfaces.behavior.medpc.medpcdatainterface.MedPCInterface`. + +.. code-block:: python + + from datetime import datetime + from zoneinfo import ZoneInfo + from neuroconv.datainterfaces import MedPCInterface + + # For this data interface we need to pass the output file from MedPC + file_path = f"{BEHAVIOR_DATA_PATH}/medpc/example_medpc_file_06_06_2024.txt" + # Change the folder_path to the appropriate location in your system + session_conditions = {"Start Date": "04/18/19", "Start Time": "10:41:42"} + start_variable = "Start Date", + metadata_medpc_name_to_info_dict = dict( + "Start Date": {"name": "start_date", "is_array": False}, + "Start Time": {"name": "start_time", "is_array": False}, + "Subject": {"name": "subject", "is_array": False}, + "Box": {"name": "box", "is_array": False}, + "MSN": {"name": "MSN", "is_array": False}, + ) + interface = MedPCInterface( + file_path=file_path, + session_conditions=session_conditions, + start_variable=start_variable, + metadata_medpc_name_to_info_dict=metadata_medpc_name_to_info_dict + ) + + # Extract what metadata we can from the source file + metadata = interface.get_metadata() + # We add the time zone information, which is required by NWB + session_start_time = metadata["NWBFile"]["session_start_time"].replace(tzinfo=ZoneInfo("US/Pacific")) + metadata["NWBFile"].update(session_start_time=session_start_time) + metadata["MedPC"]["medpc_name_to_info_dict"] = { + "A": {"name": "left_nose_poke_times", "is_array": True}, + "B": {"name": "left_reward_times", "is_array": True}, + "C": {"name": "right_nose_poke_times", "is_array": True}, + "D": {"name": "right_reward_times", "is_array": True}, + "E": {"name": "duration_of_port_entry", "is_array": True}, + "G": {"name": "port_entry_times", "is_array": True}, + "H": {"name": "footshock_times", "is_array": True}, + } + metadata["MedPC"]["Events"] = [ + { + "name": "left_nose_poke_times", + "description": "Left nose poke times.", + }, + { + "name": "left_reward_times", + "description": "Left reward times.", + }, + { + "name": "right_nose_poke_times", + "description": "Right nose poke times.", + }, + { + "name": "right_reward_times", + "description": "Right reward times.", + }, + { + "name": "footshock_times", + "description": "Footshock times.", + }, + ] + metadata["MedPC"]["IntervalSeries"] = [ + { + "name": "reward_port_intervals", + "description": "Interval of time spent in reward port (1 is entry, -1 is exit).", + "onset_name": "port_entry_times", + "duration_name": "duration_of_port_entry", + }, + ] + + # Choose a path for saving the nwb file and run the conversion + nwbfile_path = f"{path_to_save_nwbfile}" # This should be something like: "./saved_file.nwb" + interface.run_conversion(nwbfile_path=nwbfile_path, metadata=metadata) diff --git a/docs/conversion_examples_gallery/index.rst b/docs/conversion_examples_gallery/index.rst index 877a07ac2..298dd28a3 100644 --- a/docs/conversion_examples_gallery/index.rst +++ b/docs/conversion_examples_gallery/index.rst @@ -96,6 +96,7 @@ Behavior Neuralynx NVT SLEAP Videos + MedPC Text diff --git a/src/neuroconv/datainterfaces/__init__.py b/src/neuroconv/datainterfaces/__init__.py index fa27e7763..ac603a4ac 100644 --- a/src/neuroconv/datainterfaces/__init__.py +++ b/src/neuroconv/datainterfaces/__init__.py @@ -5,6 +5,7 @@ from .behavior.lightningpose.lightningposedatainterface import ( LightningPoseDataInterface, ) +from .behavior.medpc.medpcdatainterface import MedPCInterface from .behavior.miniscope.miniscopedatainterface import MiniscopeBehaviorInterface from .behavior.neuralynx.neuralynx_nvt_interface import NeuralynxNvtInterface from .behavior.sleap.sleapdatainterface import SLEAPInterface @@ -155,6 +156,7 @@ FicTracDataInterface, NeuralynxNvtInterface, LightningPoseDataInterface, + MedPCInterface, # Text CsvTimeIntervalsInterface, ExcelTimeIntervalsInterface, @@ -191,5 +193,6 @@ # Text CsvTimeIntervals=CsvTimeIntervalsInterface, ExcelTimeIntervals=ExcelTimeIntervalsInterface, + MedPC=MedPCInterface, ), ) diff --git a/src/neuroconv/datainterfaces/behavior/medpc/__init__.py b/src/neuroconv/datainterfaces/behavior/medpc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/neuroconv/datainterfaces/behavior/medpc/medpc_helpers.py b/src/neuroconv/datainterfaces/behavior/medpc/medpc_helpers.py new file mode 100644 index 000000000..3610c4a49 --- /dev/null +++ b/src/neuroconv/datainterfaces/behavior/medpc/medpc_helpers.py @@ -0,0 +1,174 @@ +import numpy as np + +from neuroconv.utils import FilePathType + + +def get_medpc_variables(file_path: FilePathType, variable_names: list) -> dict: + """ + Get the values of the given single-line variables from a MedPC file for all sessions in that file. + + Parameters + ---------- + file_path : FilePathType + The path to the MedPC file. + variable_names : list + The names of the variables to get the values of. + + Returns + ------- + dict + A dictionary with the variable names as keys and a list of variable values as values. + """ + with open(file_path, "r") as f: + lines = f.readlines() + medpc_variables = {name: [] for name in variable_names} + for line in lines: + for variable_name in variable_names: + if line.startswith(variable_name): + medpc_variables[variable_name].append(line.split(":", maxsplit=1)[1].strip()) + return medpc_variables + + +def _get_session_lines(lines: list, session_conditions: dict, start_variable: str) -> list: + """ + Get the lines for a session from a MedPC file. + + Parameters + ---------- + lines : list + The lines of the MedPC file. + session_conditions : dict + The conditions that define the session. The keys are the names of the single-line variables (ex. 'Start Date') + and the values are the values of those variables for the desired session (ex. '11/09/18'). + start_variable : str + The name of the variable that starts the session (ex. 'Start Date'). + + Returns + ------- + list + The lines for the session. + + Raises + ------ + ValueError + If the session with the given conditions could not be found. + ValueError + If the start variable of the session with the given conditions could not be found. + + Notes + ----- + If multiple sessions satisfy the session_conditions, the first session that meets the conditions will be returned. + """ + session_condition_has_been_met = {name: False for name in session_conditions} + start_line, end_line = None, len(lines) + for i, line in enumerate(lines): + line = line.strip() + if line.startswith(f"{start_variable}:"): + start_line = i + for condition_name, condition_value in session_conditions.items(): + if line == f"{condition_name}: {condition_value}": + session_condition_has_been_met[condition_name] = True + if line == "" and all(session_condition_has_been_met.values()): + end_line = i + break + elif line == "": + session_condition_has_been_met = {name: False for name in session_conditions} + start_line = None + if not all(session_condition_has_been_met.values()): + raise ValueError(f"Could not find the session with conditions {session_conditions}") + if start_line is None: + raise ValueError( + f"Could not find the start variable ({start_variable}) of the session with conditions {session_conditions}" + ) + session_lines = lines[start_line:end_line] + return session_lines + + +def read_medpc_file( + file_path: FilePathType, + medpc_name_to_info_dict: dict, + session_conditions: dict, + start_variable: str, +) -> dict: + """ + Read a raw MedPC text file into a dictionary. + + Parameters + ---------- + file_path : FilePathType + The path to the MedPC file. + medpc_name_to_info_dict : dict + A dictionary where the keys are the MedPC variable names and the values are dictionaries with the keys 'name' and + 'is_array'. 'name' is the name of the variable in the output dictionary and 'is_array' is a boolean indicating + whether the variable is an array. Ex. {'Start Date': {'name': 'start_date', 'is_array': False}} + session_conditions : dict + The conditions that define the session. The keys are the names of the single-line variables (ex. 'Start Date') + and the values are the values of those variables for the desired session (ex. '11/09/18'). + start_variable : str + The name of the variable that starts the session (ex. 'Start Date'). + + Returns + ------- + dict + A dictionary with the variable names as keys and the data extracted from medpc output are the values. + + Raises + ------ + ValueError + If the session with the given conditions could not be found. + """ + with open(file_path, "r") as f: + lines = f.readlines() + session_lines = _get_session_lines(lines, session_conditions=session_conditions, start_variable=start_variable) + + # Parse the session lines into a dictionary + session_dict = {} + for i, line in enumerate(session_lines): + line = line.rstrip() + if line.startswith("\\"): # \\ indicates a commented line in the MedPC file + continue + assert ":" in line, f"Could not find ':' in line {repr(line)}" + split_line = line.split(":", maxsplit=1) + medpc_name, data = split_line + data = data.strip() + if "\t" in data: # some sessions have a bunch of garbage after the last datum in the line separated by tabs + data = data.split("\t")[0] + if line.find(":") == 6: # multiline variable + if medpc_name == " 0": # first line of multiline variable + multiline_variable_name = session_lines[i - 1].split(":")[0] + if multiline_variable_name in medpc_name_to_info_dict: + output_name = medpc_name_to_info_dict[multiline_variable_name]["name"] + session_dict[output_name] = [] + if multiline_variable_name not in medpc_name_to_info_dict: + continue + data = data.split(" ") + for datum in data: + datum = datum.strip() + if datum == "": + continue + output_name = medpc_name_to_info_dict[multiline_variable_name]["name"] + session_dict[output_name].append(datum) + + # single line variable + elif medpc_name in medpc_name_to_info_dict: + output_name = medpc_name_to_info_dict[medpc_name]["name"] + session_dict[output_name] = data + + # Convert the data types + for info in medpc_name_to_info_dict.values(): + output_name = info["name"] + is_array = info["is_array"] + if output_name in session_dict: + if is_array: + if session_dict[output_name] == "": + session_dict[output_name] = np.array([], dtype=float) + elif type(session_dict[output_name]) == str: # not a multiline variable + raise ValueError( + f"Expected {output_name} to be a multiline variable, but found a single line variable." + ) + else: + session_dict[output_name] = np.array(session_dict[output_name], dtype=float) + session_dict[output_name] = np.trim_zeros( + session_dict[output_name], trim="b" + ) # MEDPC adds extra zeros to the end of the array + return session_dict diff --git a/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py b/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py new file mode 100644 index 000000000..1ed84cbb1 --- /dev/null +++ b/src/neuroconv/datainterfaces/behavior/medpc/medpcdatainterface.py @@ -0,0 +1,250 @@ +from typing import Optional + +import numpy as np +from pynwb.behavior import BehavioralEpochs, IntervalSeries +from pynwb.file import NWBFile + +from neuroconv.basetemporalalignmentinterface import BaseTemporalAlignmentInterface +from neuroconv.tools import get_package, nwb_helpers +from neuroconv.utils import DeepDict, FilePathType + +from .medpc_helpers import read_medpc_file + + +class MedPCInterface(BaseTemporalAlignmentInterface): + """ + Data Interface for MedPC output files. + + The output files from MedPC are raw text files that contain behavioral data from the operant box sessions such as + lever presses, reward port entries, nose pokes, etc. The output text files format this data into a series of + colon-separated variables that are either single-line (for metadata) or multi-line (for arrays). The multi-line + variables keep a colon-separated index of the array every 5 elements. For example, a single variable might look like:: + + Start Date: 11/09/18 + + while a multi-line variable might look like:: + + A: + 0: 175.150 270.750 762.050 762.900 1042.600 + 5: 1567.800 1774.950 2448.450 2454.050 2552.800 + 10: 2620.550 2726.250 + + Different sessions are usually separated by a blank line or two. + + This data is parsed by the MedPCInterface and added to the NWBFile as Events and IntervalSeries objects in the + behavior module. + """ + + keywords = ["behavior"] + display_name = "MedPC" + info = "Interface for handling MedPC output files." + associated_suffixes = ".txt" + + def __init__( + self, + file_path: FilePathType, + session_conditions: dict, + start_variable: str, + metadata_medpc_name_to_info_dict: dict, + aligned_timestamp_names: Optional[list[str]] = None, + verbose: bool = True, + ): + """ + Initialize MedpcInterface. + + Parameters + ---------- + file_path : FilePathType + Path to the MedPC file. + session_conditions : dict + The conditions that define the session. The keys are the names of the single-line variables (ex. 'Start Date') + and the values are the values of those variables for the desired session (ex. '11/09/18'). + start_variable : str + The name of the variable that starts the session (ex. 'Start Date'). + metadata_medpc_name_to_info_dict : dict + A dictionary mapping the names of the desired variables in the MedPC file + to an info dictionary with the names of the variables in the metadata and whether or not they are arrays. + ex. {"Start Date": {"name": "start_date", "is_array": False}} + aligned_timestamp_names : list, optional + The names of the variables that are externally aligned timestamps, + which should be retrieved from self.timestamps_dict instead of the MedPC output file. + verbose : bool, optional + Whether to print verbose output, by default True + """ + if aligned_timestamp_names is None: + aligned_timestamp_names = [] + super().__init__( + file_path=file_path, + session_conditions=session_conditions, + start_variable=start_variable, + metadata_medpc_name_to_info_dict=metadata_medpc_name_to_info_dict, + aligned_timestamp_names=aligned_timestamp_names, + verbose=verbose, + ) + self.timestamps_dict = {} + + def get_metadata(self) -> DeepDict: + metadata = super().get_metadata() + session_dict = read_medpc_file( + file_path=self.source_data["file_path"], + medpc_name_to_info_dict=self.source_data["metadata_medpc_name_to_info_dict"], + session_conditions=self.source_data["session_conditions"], + start_variable=self.source_data["start_variable"], + ) + for k, v in session_dict.items(): + metadata["MedPC"][k] = v + + return metadata + + def get_metadata_schema(self) -> dict: + metadata_schema = super().get_metadata_schema() + medpc_name_to_info_dict = self.source_data["metadata_medpc_name_to_info_dict"] + metadata_schema["properties"]["MedPC"] = { + "type": "object", + "properties": {info_dict["name"]: {"type": "string"} for info_dict in medpc_name_to_info_dict.values()}, + } + return metadata_schema + + def get_original_timestamps(self, medpc_name_to_info_dict: dict) -> dict[str, np.ndarray]: + """ + Retrieve the original unaltered timestamps dictionary for the data in this interface. + + This function retrieves the data on-demand by re-reading the medpc file. + + Parameters + ---------- + medpc_name_to_info_dict : dict + A dictionary mapping the names of the desired variables in the MedPC file + to an info dictionary with the names of the variables in the metadata and whether or not they are arrays. + ex. {"A": {"name": "left_nose_poke_times", "is_array": True}} + + Returns + ------- + timestamps_dict: dict + A dictionary mapping the names of the variables to the original medpc timestamps. + """ + timestamps_dict = read_medpc_file( + file_path=self.source_data["file_path"], + medpc_name_to_info_dict=medpc_name_to_info_dict, + session_conditions=self.source_data["session_conditions"], + start_variable=self.source_data["start_variable"], + ) + return timestamps_dict + + def get_timestamps(self) -> dict[str, np.ndarray]: + """ + Retrieve the timestamps dictionary for the data in this interface. + + Returns + ------- + timestamps_dict: dict + A dictionary mapping the names of the variables to the timestamps. + """ + return self.timestamps_dict + + def set_aligned_timestamps(self, aligned_timestamps_dict: dict[str, np.ndarray]) -> None: + """ + Replace all timestamps for this interface with those aligned to the common session start time. + + Must be in units seconds relative to the common 'session_start_time'. + + Parameters + ---------- + aligned_timestamps_dict : dict + A dictionary mapping the names of the variables to the synchronized timestamps for data in this interface. + """ + self.timestamps_dict = aligned_timestamps_dict + + def set_aligned_starting_time(self, aligned_starting_time: float, medpc_name_to_info_dict: dict) -> None: + """ + Align the starting time for this interface relative to the common session start time. + + Must be in units seconds relative to the common 'session_start_time'. + + Parameters + ---------- + aligned_starting_time : float + The starting time for all temporal data in this interface. + medpc_name_to_info_dict : dict + A dictionary mapping the names of the desired variables in the MedPC file + to an info dictionary with the names of the variables in the metadata and whether or not they are arrays. + ex. {"A": {"name": "left_nose_poke_times", "is_array": True}} + """ + original_timestamps_dict = self.get_original_timestamps(medpc_name_to_info_dict=medpc_name_to_info_dict) + aligned_timestamps_dict = {} + for name, original_timestamps in original_timestamps_dict.items(): + aligned_timestamps_dict[name] = original_timestamps + aligned_starting_time + self.set_aligned_timestamps(aligned_timestamps_dict=aligned_timestamps_dict) + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: dict, + ) -> None: + ndx_events = get_package(package_name="ndx_events", installation_instructions="pip install ndx-events") + medpc_name_to_info_dict = metadata["MedPC"].get("medpc_name_to_info_dict", None) + assert medpc_name_to_info_dict is not None, "medpc_name_to_info_dict must be provided in metadata" + info_name_to_medpc_name = { + info_dict["name"]: medpc_name for medpc_name, info_dict in medpc_name_to_info_dict.items() + } + for name in self.source_data["aligned_timestamp_names"]: + medpc_name = info_name_to_medpc_name[name] + medpc_name_to_info_dict.pop(medpc_name) + session_dict = read_medpc_file( + file_path=self.source_data["file_path"], + medpc_name_to_info_dict=medpc_name_to_info_dict, + session_conditions=self.source_data["session_conditions"], + start_variable=self.source_data["start_variable"], + ) + aligned_timestamps_dict = self.get_timestamps() + for name, aligned_timestamps in aligned_timestamps_dict.items(): + session_dict[name] = aligned_timestamps + + # Add behavior data to nwbfile + module_name = metadata["MedPC"].get("module_name", "behavior") + module_description = metadata["MedPC"].get("module_description", "Behavioral data from MedPC output files.") + behavior_module = nwb_helpers.get_module( + nwbfile=nwbfile, + name=module_name, + description=module_description, + ) + + event_dicts = metadata["MedPC"].get("Events", []) + for event_dict in event_dicts: + name = event_dict["name"] + description = event_dict["description"] + event_data = session_dict[name] + if len(event_data) > 0: + event = ndx_events.Events( + name=name, + description=description, + timestamps=event_data, + ) + behavior_module.add(event) + interval_dicts = metadata["MedPC"].get("IntervalSeries", []) + for interval_dict in interval_dicts: + name = interval_dict["name"] + description = interval_dict["description"] + onset_name = interval_dict["onset_name"] + duration_name = interval_dict["duration_name"] + onset_data = session_dict[onset_name] + duration_data = session_dict[duration_name] + if len(onset_data) == 0: + continue + assert not len(duration_data) == 0, f"Duration data for {name} is empty!" + + interval_times, data = [], [] + for onset_time, duration in zip(onset_data, duration_data): + interval_times.append(onset_time) + data.append(1) + interval_times.append(onset_time + duration) + data.append(-1) + interval = IntervalSeries( + name=name, + description=description, + timestamps=interval_times, + data=data, + ) + behavioral_epochs = BehavioralEpochs(name="behavioral_epochs") + behavioral_epochs.add_interval_series(interval) + behavior_module.add(behavioral_epochs) diff --git a/src/neuroconv/datainterfaces/behavior/medpc/requirements.txt b/src/neuroconv/datainterfaces/behavior/medpc/requirements.txt new file mode 100644 index 000000000..f0f292cab --- /dev/null +++ b/src/neuroconv/datainterfaces/behavior/medpc/requirements.txt @@ -0,0 +1 @@ +ndx-events==0.2.0 diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index dabd855f5..77d42b55f 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -907,6 +907,239 @@ def test_interface_alignment(self): self.check_nwbfile_temporal_alignment() +class MedPCInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): + def check_no_metadata_mutation(self, metadata: dict): + """Ensure the metadata object was not altered by `add_to_nwbfile` method.""" + + metadata_in = deepcopy(metadata) + + nwbfile = mock_NWBFile() + self.interface.add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, **self.conversion_options) + + assert metadata == metadata_in + + def check_run_conversion_with_backend( + self, nwbfile_path: str, metadata: dict, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + self.interface.run_conversion( + nwbfile_path=nwbfile_path, + overwrite=True, + metadata=metadata, + backend=backend, + **self.conversion_options, + ) + + def check_configure_backend_for_equivalent_nwbfiles( + self, metadata: dict, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + nwbfile_1 = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options) + nwbfile_2 = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options) + + backend_configuration = get_default_backend_configuration(nwbfile=nwbfile_1, backend=backend) + configure_backend(nwbfile=nwbfile_2, backend_configuration=backend_configuration) + + def check_run_conversion_with_backend_configuration( + self, nwbfile_path: str, metadata: dict, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + nwbfile = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options) + backend_configuration = self.interface.get_default_backend_configuration(nwbfile=nwbfile, backend=backend) + self.interface.run_conversion( + nwbfile_path=nwbfile_path, + nwbfile=nwbfile, + overwrite=True, + backend_configuration=backend_configuration, + **self.conversion_options, + ) + + def check_run_conversion_in_nwbconverter_with_backend( + self, nwbfile_path: str, metadata: dict, backend: Literal["hdf5", "zarr"] = "hdf5" + ): + class TestNWBConverter(NWBConverter): + data_interface_classes = dict(Test=type(self.interface)) + + test_kwargs = self.test_kwargs[0] if isinstance(self.test_kwargs, list) else self.test_kwargs + source_data = dict(Test=test_kwargs) + converter = TestNWBConverter(source_data=source_data) + + conversion_options = dict(Test=self.conversion_options) + converter.run_conversion( + nwbfile_path=nwbfile_path, + overwrite=True, + metadata=metadata, + backend=backend, + conversion_options=conversion_options, + ) + + def check_run_conversion_in_nwbconverter_with_backend_configuration( + self, nwbfile_path: str, metadata: dict, backend: Union["hdf5", "zarr"] = "hdf5" + ): + class TestNWBConverter(NWBConverter): + data_interface_classes = dict(Test=type(self.interface)) + + test_kwargs = self.test_kwargs[0] if isinstance(self.test_kwargs, list) else self.test_kwargs + source_data = dict(Test=test_kwargs) + converter = TestNWBConverter(source_data=source_data) + + conversion_options = dict(Test=self.conversion_options) + + nwbfile = converter.create_nwbfile(metadata=metadata, conversion_options=conversion_options) + backend_configuration = converter.get_default_backend_configuration(nwbfile=nwbfile, backend=backend) + converter.run_conversion( + nwbfile_path=nwbfile_path, + nwbfile=nwbfile, + overwrite=True, + metadata=metadata, + backend_configuration=backend_configuration, + conversion_options=conversion_options, + ) + + def test_all_conversion_checks(self, metadata: dict): + interface_kwargs = self.interface_kwargs + if isinstance(interface_kwargs, dict): + interface_kwargs = [interface_kwargs] + for num, kwargs in enumerate(interface_kwargs): + with self.subTest(str(num)): + self.case = num + self.test_kwargs = kwargs + self.interface = self.data_interface_cls(**self.test_kwargs) + + self.check_metadata_schema_valid() + self.check_conversion_options_schema_valid() + self.check_metadata() + self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb") + + self.check_no_metadata_mutation(metadata=metadata) + + self.check_configure_backend_for_equivalent_nwbfiles(metadata=metadata) + + self.check_run_conversion_in_nwbconverter_with_backend( + nwbfile_path=self.nwbfile_path, metadata=metadata, backend="hdf5" + ) + self.check_run_conversion_in_nwbconverter_with_backend_configuration( + nwbfile_path=self.nwbfile_path, metadata=metadata, backend="hdf5" + ) + + self.check_run_conversion_with_backend( + nwbfile_path=self.nwbfile_path, metadata=metadata, backend="hdf5" + ) + self.check_run_conversion_with_backend_configuration( + nwbfile_path=self.nwbfile_path, metadata=metadata, backend="hdf5" + ) + + self.check_read_nwb(nwbfile_path=self.nwbfile_path) + + # TODO: enable when all H5DataIO prewraps are gone + # self.nwbfile_path = str(self.save_directory / f"{self.__class__.__name__}_{num}.nwb.zarr") + # self.check_run_conversion(nwbfile_path=self.nwbfile_path, backend="zarr") + # self.check_run_conversion_custom_backend(nwbfile_path=self.nwbfile_path, backend="zarr") + # self.check_basic_zarr_read(nwbfile_path=self.nwbfile_path) + + # Any extra custom checks to run + self.run_custom_checks() + + def check_interface_get_original_timestamps(self, medpc_name_to_info_dict: dict): + """ + Just to ensure each interface can call .get_original_timestamps() without an error raising. + + Also, that it always returns non-empty. + """ + self.setUpFreshInterface() + original_timestamps_dict = self.interface.get_original_timestamps( + medpc_name_to_info_dict=medpc_name_to_info_dict + ) + for name in self.interface.source_data["aligned_timestamp_names"]: + original_timestamps = original_timestamps_dict[name] + assert len(original_timestamps) != 0, f"Timestamps for {name} are empty." + + def check_interface_get_timestamps(self): + """ + Just to ensure each interface can call .get_timestamps() without an error raising. + + Also, that it always returns non-empty. + """ + self.setUpFreshInterface() + timestamps_dict = self.interface.get_timestamps() + for timestamps in timestamps_dict.values(): + assert len(timestamps) != 0 + + def check_interface_set_aligned_timestamps(self, medpc_name_to_info_dict: dict): + """Ensure that internal mechanisms for the timestamps getter/setter work as expected.""" + self.setUpFreshInterface() + unaligned_timestamps_dict = self.interface.get_original_timestamps( + medpc_name_to_info_dict=medpc_name_to_info_dict + ) + + random_number_generator = np.random.default_rng(seed=0) + aligned_timestamps_dict = {} + for name, unaligned_timestamps in unaligned_timestamps_dict.items(): + aligned_timestamps = ( + unaligned_timestamps + 1.23 + random_number_generator.random(size=unaligned_timestamps.shape) + ) + aligned_timestamps_dict[name] = aligned_timestamps + self.interface.set_aligned_timestamps(aligned_timestamps_dict=aligned_timestamps_dict) + + retrieved_aligned_timestamps = self.interface.get_timestamps() + for name, aligned_timestamps in aligned_timestamps_dict.items(): + assert_array_equal(retrieved_aligned_timestamps[name], aligned_timestamps) + + def check_shift_timestamps_by_start_time(self, medpc_name_to_info_dict: dict): + """Ensure that internal mechanisms for shifting timestamps by a starting time work as expected.""" + self.setUpFreshInterface() + unaligned_timestamps_dict = self.interface.get_original_timestamps( + medpc_name_to_info_dict=medpc_name_to_info_dict + ) + + aligned_starting_time = 1.23 + self.interface.set_aligned_starting_time( + aligned_starting_time=aligned_starting_time, + medpc_name_to_info_dict=medpc_name_to_info_dict, + ) + + aligned_timestamps = self.interface.get_timestamps() + expected_timestamps_dict = { + name: unaligned_timestamps + aligned_starting_time + for name, unaligned_timestamps in unaligned_timestamps_dict.items() + } + for name, expected_timestamps in expected_timestamps_dict.items(): + assert_array_equal(aligned_timestamps[name], expected_timestamps) + + def check_interface_original_timestamps_inmutability(self, medpc_name_to_info_dict: dict): + """Check aligning the timestamps for the interface does not change the value of .get_original_timestamps().""" + self.setUpFreshInterface() + pre_alignment_original_timestamps_dict = self.interface.get_original_timestamps( + medpc_name_to_info_dict=medpc_name_to_info_dict + ) + + aligned_timestamps_dict = { + name: pre_alignment_og_timestamps + 1.23 + for name, pre_alignment_og_timestamps in pre_alignment_original_timestamps_dict.items() + } + self.interface.set_aligned_timestamps(aligned_timestamps_dict=aligned_timestamps_dict) + + post_alignment_original_timestamps_dict = self.interface.get_original_timestamps( + medpc_name_to_info_dict=medpc_name_to_info_dict + ) + for name, post_alignment_original_timestamps_dict in post_alignment_original_timestamps_dict.items(): + assert_array_equal(post_alignment_original_timestamps_dict, pre_alignment_original_timestamps_dict[name]) + + def test_interface_alignment(self, medpc_name_to_info_dict: dict): + interface_kwargs = self.interface_kwargs + if isinstance(interface_kwargs, dict): + interface_kwargs = [interface_kwargs] + for num, kwargs in enumerate(interface_kwargs): + with self.subTest(str(num)): + self.case = num + self.test_kwargs = kwargs + + self.check_interface_get_original_timestamps(medpc_name_to_info_dict=medpc_name_to_info_dict) + self.check_interface_get_timestamps() + self.check_interface_set_aligned_timestamps(medpc_name_to_info_dict=medpc_name_to_info_dict) + self.check_shift_timestamps_by_start_time(medpc_name_to_info_dict=medpc_name_to_info_dict) + self.check_interface_original_timestamps_inmutability(medpc_name_to_info_dict=medpc_name_to_info_dict) + + self.check_nwbfile_temporal_alignment() + + class MiniscopeImagingInterfaceMixin(DataInterfaceTestMixin, TemporalAlignmentMixin): def check_read_nwb(self, nwbfile_path: str): from ndx_miniscope import Miniscope diff --git a/tests/test_behavior/test_medpc_helpers.py b/tests/test_behavior/test_medpc_helpers.py new file mode 100644 index 000000000..caaa38d60 --- /dev/null +++ b/tests/test_behavior/test_medpc_helpers.py @@ -0,0 +1,255 @@ +import numpy as np +import pytest + +from neuroconv.datainterfaces.behavior.medpc.medpc_helpers import ( + _get_session_lines, + get_medpc_variables, + read_medpc_file, +) + + +@pytest.fixture(scope="function") +def medpc_file_path(tmp_path): + content = """ +Start Date: 04/09/19 +End Date: 04/09/19 +Subject: 95.259 +Experiment: +\\ This line is a comment +Group: +Box: 1 +Start Time: 10:34:30 +End Time: 11:35:53 +MSN: FOOD_FR1 TTL Left +A: + 0: 175.150 270.750 762.050 762.900 1042.600 + 5: 1567.800 1774.950 2448.450 2454.050 2552.800 + 10: 2620.550 2726.250 0.000000 0.000000 0.000000 +B: + 0: 175.150 270.750 762.050 1042.600 1567.800 + 5: 1774.950 2448.450 2552.800 2620.550 2726.250 + 10: 0.000 0.000 0.000 0.000 0.000 +C: + 0: 330.050 362.500 947.200 1232.100 1233.400 + 5: 1255.200 1309.200 1430.300 1460.500 1466.850 + 10: 1468.800 1967.450 2537.950 2542.250 2614.850 + 15: 2707.350 2717.700 2801.050 2818.450 3324.450 + 20: 3384.750 3538.250 someGarbage-9172937 more garbage + + +Start Date: 04/11/19 +End Date: 04/11/19 +Subject: 95.259 +Experiment: +Group: +Box: 1 +Start Time: 09:41:34 +End Time: 10:41:38 +MSN: FOOD_FR1 TTL Left +A: + 0: 37.000 155.800 246.600 286.350 301.150 + 5: 378.650 455.600 480.250 501.500 634.550 + 10: 639.050 656.050 656.400 658.400 660.900 + 15: 663.300 664.200 666.600 668.150 677.650 +B: + 0: 37.000 155.800 246.600 286.350 301.150 + 5: 378.650 455.600 480.250 501.500 639.050 + 10: 677.650 695.650 747.250 820.550 973.000 + 15: 992.050 1031.700 1110.050 1446.750 1480.950 +C: + 0: 626.250 626.500 691.250 1098.900 1202.150 + 5: 1813.750 2718.550 3264.450 3413.600 3473.300 + + +Start Date: 04/12/19 +End Date: 04/12/19 +Subject: 95.259 +Experiment: +Group: +Box: 1 +Start Time: 12:40:18 +End Time: 13:18:18 +MSN: RR10_Left_AHJS +A: + 0: 52.300 72.050 101.300 106.200 106.600 + 5: 133.200 135.350 152.550 153.000 155.300 + 10: 166.100 166.700 177.550 184.300 184.950 + 15: 188.300 188.800 191.150 191.550 218.450 +B: +C: + 0: 99.200 215.500 278.600 283.850 311.450 + 5: 314.500 438.950 480.650 503.500 521.300 + 10: 573.150 579.100 616.350 649.100 665.150 + 15: 666.150 702.550 703.350 703.850 706.300 +""" + medpc_file_path = tmp_path / "medpc_file.txt" + medpc_file_path.write_text(content) + return medpc_file_path + + +def test_get_medpc_variables(medpc_file_path): + variables = get_medpc_variables(medpc_file_path, ["Start Date", "End Date", "Subject"]) + assert variables == { + "Start Date": ["04/09/19", "04/11/19", "04/12/19"], + "End Date": ["04/09/19", "04/11/19", "04/12/19"], + "Subject": ["95.259", "95.259", "95.259"], + } + + +@pytest.mark.parametrize( + "session_conditions, start_variable, expected_slice", + [ + ({"Start Date": "04/09/19", "Start Time": "10:34:30"}, "Start Date", slice(1, 25)), + ({"Start Date": "04/11/19", "Start Time": "09:41:34"}, "Start Date", slice(27, 49)), + ({"Start Date": "04/12/19", "Start Time": "12:40:18"}, "Start Date", slice(51, 73)), + ], +) +def test_get_session_lines(medpc_file_path, session_conditions, start_variable, expected_slice): + with open(medpc_file_path, "r") as f: + lines = f.readlines() + session_lines = _get_session_lines(lines, session_conditions, start_variable) + expected_session_lines = lines[expected_slice] + assert session_lines == expected_session_lines + + +def test_get_session_lines_invalid_session_conditions(medpc_file_path): + with open(medpc_file_path, "r") as f: + lines = f.readlines() + session_conditions = {"Invalid": "session condition"} + start_variable = "Start Date" + with pytest.raises(ValueError) as exc_info: + _get_session_lines(lines, session_conditions, start_variable) + assert str(exc_info.value) == f"Could not find the session with conditions {session_conditions}" + + +def test_get_session_lines_invalid_start_variable(medpc_file_path): + with open(medpc_file_path, "r") as f: + lines = f.readlines() + session_conditions = {"Start Date": "04/09/19", "Start Time": "10:34:30"} + start_variable = "Invalid Start Variable" + with pytest.raises(ValueError) as exc_info: + _get_session_lines(lines, session_conditions, start_variable) + assert ( + str(exc_info.value) + == f"Could not find the start variable ({start_variable}) of the session with conditions {session_conditions}" + ) + + +def test_get_session_lines_ambiguous_session_conditions(medpc_file_path): + with open(medpc_file_path, "r") as f: + lines = f.readlines() + session_conditions = {"Subject": "95.259"} + start_variable = "Start Date" + session_lines = _get_session_lines(lines, session_conditions, start_variable) + expected_session_lines = lines[1:25] + assert session_lines == expected_session_lines + + +def test_read_medpc_file(medpc_file_path): + medpc_name_to_info_dict = { + "Start Date": {"name": "start_date", "is_array": False}, + "End Date": {"name": "end_date", "is_array": False}, + "Subject": {"name": "subject", "is_array": False}, + "Experiment": {"name": "experiment", "is_array": False}, + "Group": {"name": "group", "is_array": False}, + "Box": {"name": "box", "is_array": False}, + "Start Time": {"name": "start_time", "is_array": False}, + "End Time": {"name": "end_time", "is_array": False}, + "MSN": {"name": "msn", "is_array": False}, + "A": {"name": "a", "is_array": True}, + "B": {"name": "b", "is_array": True}, + "C": {"name": "c", "is_array": True}, + } + session_conditions = {"Start Date": "04/09/19", "Start Time": "10:34:30"} + start_variable = "Start Date" + session_dict = read_medpc_file(medpc_file_path, medpc_name_to_info_dict, session_conditions, start_variable) + expected_session_dict = { + "start_date": "04/09/19", + "end_date": "04/09/19", + "subject": "95.259", + "experiment": "", + "group": "", + "box": "1", + "start_time": "10:34:30", + "end_time": "11:35:53", + "msn": "FOOD_FR1 TTL Left", + "a": np.array( + [ + 175.150, + 270.750, + 762.050, + 762.900, + 1042.600, + 1567.800, + 1774.950, + 2448.450, + 2454.050, + 2552.800, + 2620.550, + 2726.250, + ] + ), + "b": np.array( + [ + 175.150, + 270.750, + 762.050, + 1042.600, + 1567.800, + 1774.950, + 2448.450, + 2552.800, + 2620.550, + 2726.250, + ] + ), + "c": np.array( + [ + 330.050, + 362.500, + 947.200, + 1232.100, + 1233.400, + 1255.200, + 1309.200, + 1430.300, + 1460.500, + 1466.850, + 1468.800, + 1967.450, + 2537.950, + 2542.250, + 2614.850, + 2707.350, + 2717.700, + 2801.050, + 2818.450, + 3324.450, + 3384.750, + 3538.250, + ] + ), + } + assert session_dict["start_date"] == expected_session_dict["start_date"] + assert session_dict["end_date"] == expected_session_dict["end_date"] + assert session_dict["subject"] == expected_session_dict["subject"] + assert session_dict["experiment"] == expected_session_dict["experiment"] + assert session_dict["group"] == expected_session_dict["group"] + assert session_dict["box"] == expected_session_dict["box"] + assert session_dict["start_time"] == expected_session_dict["start_time"] + assert session_dict["end_time"] == expected_session_dict["end_time"] + assert session_dict["msn"] == expected_session_dict["msn"] + assert np.array_equal(session_dict["a"], expected_session_dict["a"]) + assert np.array_equal(session_dict["b"], expected_session_dict["b"]) + assert np.array_equal(session_dict["c"], expected_session_dict["c"]) + + +def test_read_medpc_file_invalid_multiline_variable(medpc_file_path): + medpc_name_to_info_dict = { + "Start Date": {"name": "start_date", "is_array": True}, + } + session_conditions = {"Start Date": "04/09/19", "Start Time": "10:34:30"} + start_variable = "Start Date" + with pytest.raises(ValueError) as exc_info: + session_dict = read_medpc_file(medpc_file_path, medpc_name_to_info_dict, session_conditions, start_variable) + assert str(exc_info.value) == "Expected start_date to be a multiline variable, but found a single line variable." diff --git a/tests/test_on_data/test_behavior_interfaces.py b/tests/test_on_data/test_behavior_interfaces.py index 36011da92..1d25aaf36 100644 --- a/tests/test_on_data/test_behavior_interfaces.py +++ b/tests/test_on_data/test_behavior_interfaces.py @@ -20,6 +20,7 @@ DeepLabCutInterface, FicTracDataInterface, LightningPoseDataInterface, + MedPCInterface, MiniscopeBehaviorInterface, NeuralynxNvtInterface, SLEAPInterface, @@ -28,6 +29,7 @@ from neuroconv.tools.testing.data_interface_mixins import ( DataInterfaceTestMixin, DeepLabCutInterfaceMixin, + MedPCInterfaceMixin, TemporalAlignmentMixin, VideoInterfaceMixin, ) @@ -754,5 +756,131 @@ def check_video_stub(self): assert nwbfile.acquisition[self.image_series_name].data.shape[0] == 10 +class TestMedPCInterface(TestCase, MedPCInterfaceMixin): + data_interface_cls = MedPCInterface + interface_kwargs = dict( + file_path=str(BEHAVIOR_DATA_PATH / "medpc" / "example_medpc_file_06_06_2024.txt"), + session_conditions={ + "Start Date": "04/10/19", + "Start Time": "12:36:13", + }, + start_variable="Start Date", + metadata_medpc_name_to_info_dict={ + "Start Date": {"name": "start_date", "is_array": False}, + "Start Time": {"name": "start_time", "is_array": False}, + "Subject": {"name": "subject", "is_array": False}, + "Box": {"name": "box", "is_array": False}, + "MSN": {"name": "MSN", "is_array": False}, + }, + aligned_timestamp_names=[], + ) + save_directory = OUTPUT_PATH + expected_metadata = { + "start_date": "04/10/19", + "start_time": "12:36:13", + "subject": "95.259", + "box": "1", + "MSN": "FOOD_FR1 TTL Left", + } + expected_events = [ + { + "name": "left_nose_poke_times", + "description": "Left nose poke times", + }, + { + "name": "right_nose_poke_times", + "description": "Right nose poke times", + }, + { + "name": "left_reward_times", + "description": "Left reward times", + }, + ] + expected_interval_series = [ + { + "name": "reward_port_intervals", + "description": "Interval of time spent in reward port (1 is entry, -1 is exit)", + "onset_name": "port_entry_times", + "duration_name": "duration_of_port_entry", + }, + ] + + def check_extracted_metadata(self, metadata: dict): + assert metadata["MedPC"] == self.expected_metadata + + def check_read_nwb(self, nwbfile_path: str): + with NWBHDF5IO(nwbfile_path, "r") as io: + nwbfile = io.read() + for event_dict in self.expected_events: + expected_name = event_dict["name"] + expected_description = event_dict["description"] + assert expected_name in nwbfile.processing["behavior"].data_interfaces + event = nwbfile.processing["behavior"].data_interfaces[expected_name] + assert event.description == expected_description + + for interval_dict in self.expected_interval_series: + expected_name = interval_dict["name"] + expected_description = interval_dict["description"] + assert expected_name in nwbfile.processing["behavior"]["behavioral_epochs"].interval_series + interval_series = nwbfile.processing["behavior"]["behavioral_epochs"].interval_series[expected_name] + assert interval_series.description == expected_description + + def test_all_conversion_checks(self): + metadata = { + "NWBFile": {"session_start_time": datetime(2019, 4, 10, 12, 36, 13).astimezone()}, + "MedPC": { + "start_date": "04/10/19", + "start_time": "12:36:13", + "subject": "95.259", + "box": "1", + "MSN": "FOOD_FR1 TTL Left", + "module_name": "behavior", + "module_description": "Behavioral data from MedPC output files.", + "medpc_name_to_info_dict": { + "A": {"name": "left_nose_poke_times", "is_array": True}, + "B": {"name": "left_reward_times", "is_array": True}, + "C": {"name": "right_nose_poke_times", "is_array": True}, + "D": {"name": "right_reward_times", "is_array": True}, + "E": {"name": "duration_of_port_entry", "is_array": True}, + "G": {"name": "port_entry_times", "is_array": True}, + }, + "Events": [ + { + "name": "left_nose_poke_times", + "description": "Left nose poke times", + }, + { + "name": "right_nose_poke_times", + "description": "Right nose poke times", + }, + { + "name": "left_reward_times", + "description": "Left reward times", + }, + ], + "IntervalSeries": [ + { + "name": "reward_port_intervals", + "description": "Interval of time spent in reward port (1 is entry, -1 is exit)", + "onset_name": "port_entry_times", + "duration_name": "duration_of_port_entry", + }, + ], + }, + } + super().test_all_conversion_checks(metadata=metadata) + + def test_interface_alignment(self): + medpc_name_to_info_dict = { + "A": {"name": "left_nose_poke_times", "is_array": True}, + "B": {"name": "left_reward_times", "is_array": True}, + "C": {"name": "right_nose_poke_times", "is_array": True}, + "D": {"name": "right_reward_times", "is_array": True}, + "E": {"name": "duration_of_port_entry", "is_array": True}, + "G": {"name": "port_entry_times", "is_array": True}, + } + super().test_interface_alignment(medpc_name_to_info_dict=medpc_name_to_info_dict) + + if __name__ == "__main__": unittest.main()