diff --git a/src/constantinople_lab_to_nwb/general_interfaces/__init__.py b/src/constantinople_lab_to_nwb/general_interfaces/__init__.py new file mode 100644 index 0000000..ee95b94 --- /dev/null +++ b/src/constantinople_lab_to_nwb/general_interfaces/__init__.py @@ -0,0 +1 @@ +from .bpodbehaviorinterface import BpodBehaviorInterface diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py b/src/constantinople_lab_to_nwb/general_interfaces/bpodbehaviorinterface.py similarity index 88% rename from src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py rename to src/constantinople_lab_to_nwb/general_interfaces/bpodbehaviorinterface.py index 322db38..21bd738 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py +++ b/src/constantinople_lab_to_nwb/general_interfaces/bpodbehaviorinterface.py @@ -23,14 +23,14 @@ from pynwb import NWBFile -class Mah2024BpodInterface(BaseDataInterface): - """Behavior interface for mah_2024 conversion""" +class BpodBehaviorInterface(BaseDataInterface): + """Behavior interface for converting behavior data from Bpod system.""" def __init__( - self, - file_path: Union[str, Path], - default_struct_name: str = "SessionData", - verbose: bool = True, + self, + file_path: Union[str, Path], + default_struct_name: str = "SessionData", + verbose: bool = True, ): """ Interface for converting raw Bpod data to NWB. @@ -47,6 +47,7 @@ def __init__( self.default_struct_name = default_struct_name self.file_path = file_path self._bpod_struct = self._read_file() + self._block_name_mapping = {1: "Mixed", 2: "High", 3: "Low"} super().__init__(file_path=file_path, verbose=verbose) def get_metadata(self) -> DeepDict: @@ -60,7 +61,7 @@ def get_metadata(self) -> DeepDict: if "Info" in self._bpod_struct: info_dict = self._bpod_struct["Info"] date_string = info_dict["SessionDate"] + info_dict["SessionStartTime_UTC"] - session_start_time = datetime.strptime(date_string, '%d-%b-%Y%H:%M:%S') + session_start_time = datetime.strptime(date_string, "%d-%b-%Y%H:%M:%S") metadata["NWBFile"].update(session_start_time=session_start_time) # Device info @@ -139,13 +140,13 @@ def get_metadata(self) -> DeepDict: ), Block=dict( name="block_type", - description="The block type (High, Low or Test).", + description="The block type (High, Low or Mixed).", expression_type="string", output_type="string", ), BlockLengthTest=dict( - name="num_trials_in_test_blocks", - description="The number of trials in test blocks.", + name="num_trials_in_mixed_blocks", + description="The number of trials in mixed blocks.", expression_type="integer", output_type="numeric", ), @@ -302,7 +303,9 @@ def _read_file(self) -> dict: def get_trial_times(self) -> (List[float], List[float]): return self._bpod_struct["TrialStartTimestamp"], self._bpod_struct["TrialEndTimestamp"] - def create_states_table(self, metadata: dict, trial_start_times: List[float]) -> tuple[StateTypesTable, StatesTable]: + def create_states_table( + self, metadata: dict, trial_start_times: List[float] + ) -> tuple[StateTypesTable, StatesTable]: state_types_metadata = metadata["Behavior"]["StateTypesTable"] states_table_metadata = metadata["Behavior"]["StatesTable"] @@ -311,9 +314,24 @@ def create_states_table(self, metadata: dict, trial_start_times: List[float]) -> states_table = StatesTable(description=states_table_metadata["description"], state_types_table=state_types) trials_data = self._bpod_struct["RawEvents"]["Trial"] - for state_name in trials_data[0]["States"]: + num_trials = self._bpod_struct["nTrials"] + + # make it iterable if only one trial + if num_trials == 1: + trials_data = [trials_data] + trial_start_times = [trial_start_times] + + unique_state_names = set() + for trial_index in range(num_trials): + unique_state_names.update(trials_data[trial_index]["States"]) + for state_name in unique_state_names: + if state_name not in state_types_metadata: + raise ValueError( + f"State '{state_name}' not in metadata. State type should be defined in metadata['Behavior']['StateTypesTable']." + ) + state_type = state_types_metadata[state_name]["name"] state_types.add_row( - state_name=state_types_metadata[state_name]["name"], + state_name=state_type, check_ragged=False, ) @@ -333,7 +351,9 @@ def create_states_table(self, metadata: dict, trial_start_times: List[float]) -> return state_types, states_table - def create_actions_table(self, metadata: dict, trial_start_times: List[float]) -> tuple[ActionTypesTable, ActionsTable]: + def create_actions_table( + self, metadata: dict, trial_start_times: List[float] + ) -> tuple[ActionTypesTable, ActionsTable]: action_types_metadata = metadata["Behavior"]["ActionTypesTable"] actions_table_metadata = metadata["Behavior"]["ActionsTable"] @@ -350,13 +370,15 @@ def create_actions_table(self, metadata: dict, trial_start_times: List[float]) - for trial_states_and_events, trial_start_time in zip(trials_data, trial_start_times): events = trial_states_and_events["Events"] - sound_events = [event_name for event_name in events if "AudioPlayer" in event_name or "WavePlayer" in event_name] + sound_events = [ + event_name for event_name in events if "AudioPlayer" in event_name or "WavePlayer" in event_name + ] if not len(sound_events): continue for sound_event in sound_events: timestamps = events[sound_event] - if isinstance(timestamps, float): + if not isinstance(timestamps, list): timestamps = [timestamps] for timestamp in timestamps: actions_table.add_row( @@ -368,7 +390,9 @@ def create_actions_table(self, metadata: dict, trial_start_times: List[float]) - return action_types, actions_table - def create_events_table(self, metadata: dict, trial_start_times: List[float]) -> tuple[EventTypesTable, EventsTable]: + def create_events_table( + self, metadata: dict, trial_start_times: List[float] + ) -> tuple[EventTypesTable, EventsTable]: event_types_metadata = metadata["Behavior"]["EventTypesTable"] events_table_metadata = metadata["Behavior"]["EventsTable"] @@ -405,7 +429,7 @@ def create_events_table(self, metadata: dict, trial_start_times: List[float]) -> if event_name not in event_value_mapping: continue relative_timestamps = events[event_name] - if isinstance(relative_timestamps, float): + if not isinstance(relative_timestamps, list): relative_timestamps = [relative_timestamps] event_type = event_types.event_name[:].index(event_types_metadata[event_name]["name"]) for timestamp in relative_timestamps: @@ -434,8 +458,7 @@ def create_task_arguments_table(self, metadata: dict) -> TaskArgumentsTable: if expression_type == "boolean": task_argument_value = bool(task_argument_value) if task_argument_name == "Block": - block_name_mapping = {1: "Test", 2: "High", 3: "Low"} - task_argument_value = block_name_mapping[task_argument_value] + task_argument_value = self._block_name_mapping[task_argument_value] task_arguments.add_row( argument_name=task_arguments_metadata[task_argument_name]["name"], @@ -520,10 +543,10 @@ def add_trials(self, nwbfile: NWBFile, metadata: dict) -> None: nwbfile.trials = trials def add_task_arguments_to_trials( - self, - nwbfile: NWBFile, - metadata: dict, - arguments_to_exclude: List[str] = None, + self, + nwbfile: NWBFile, + metadata: dict, + arguments_to_exclude: List[str] = None, ) -> None: if arguments_to_exclude is None: arguments_to_exclude = [] @@ -539,17 +562,26 @@ def add_task_arguments_to_trials( for task_argument_name in task_arguments_for_this_session: if task_argument_name in arguments_to_exclude: continue - task_argument_values = np.array([trial_settings["GUI"][task_argument_name] for trial_settings in trials_settings]) + if task_argument_name not in task_arguments_metadata: + warn(f"Task argument '{task_argument_name}' not in metadata.") + task_argument_column_name = task_argument_name + description = "no description" + else: + task_argument_column_name = task_arguments_metadata[task_argument_name]["name"] + description = task_arguments_metadata[task_argument_name]["description"] + + task_argument_values = np.array( + [trial_settings["GUI"][task_argument_name] for trial_settings in trials_settings] + ) task_argument_type = task_arguments_metadata[task_argument_name]["expression_type"] if task_argument_type == "boolean": task_argument_values = task_argument_values.astype(bool) elif task_argument_name == "Block": - block_name_mapping = {1: "Test", 2: "High", 3: "Low"} - task_argument_values = np.array([block_name_mapping[block] for block in task_argument_values]) + task_argument_values = np.array([self._block_name_mapping[block] for block in task_argument_values]) trials.add_column( - name=task_arguments_metadata[task_argument_name]["name"], - description=task_arguments_metadata[task_argument_name]["description"], + name=task_argument_column_name, + description=description, data=task_argument_values, ) diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py index 16bba25..a1a3b75 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py +++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py @@ -1,2 +1 @@ -from .mah_2024_bpodinterface import Mah2024BpodInterface from .mah_2024_processedbehaviorinterface import Mah2024ProcessedBehaviorInterface diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py index 0e99c88..2c94b8e 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py +++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py @@ -1,4 +1,5 @@ """Primary class for converting experiment-specific behavior.""" + from pathlib import Path from typing import Optional, Union @@ -9,48 +10,15 @@ from pynwb.file import NWBFile -def _transform_data(data: dict, session_index: int) -> pd.DataFrame: - """ - Transform the data from the .mat file into a DataFrame. - """ - if "ntrials" not in data: - raise ValueError("The 'ntrials' key is missing from the data.") - num_trials = data["ntrials"] - # Calculate start and stop indices - start_indices = np.concatenate(([0], np.cumsum(num_trials)[:-1])).astype(int) - stop_indices = np.cumsum(num_trials).astype(int) - - start = start_indices[session_index] - stop = stop_indices[session_index] - - num_all_trials = int(np.sum(num_trials)) - column_names = list(data.keys()) - - columns_with_arrays = [column for column in column_names if isinstance(data[column], list) and len(data[column]) == num_all_trials] - # Create DataFrame with relevant columns - dataframe = pd.DataFrame({column_name: data[column_name][start:stop] for column_name in columns_with_arrays}) - - # Add side - if "side" in data: - side = np.array([side_char for side_char in data["side"]]) - side_to_add = side[start:stop] - dataframe["side"] = side_to_add - - if "wait_thresh" in data: - dataframe["wait_thresh"] = data["wait_thresh"] * len(dataframe) - - return dataframe - - class Mah2024ProcessedBehaviorInterface(BaseDataInterface): """Behavior interface for mah_2024 conversion""" def __init__( - self, - file_path: Union[str, Path], - date: str, - default_struct_name: str = "A", - verbose: bool = True, + self, + file_path: Union[str, Path], + date_index: int, + default_struct_name: str = "A", + verbose: bool = True, ): """ Interface for adding data from the processed behavior file to an existing NWB file. @@ -59,14 +27,14 @@ def __init__( ---------- file_path: Union[str, Path] Path to the .mat file containing the processed behavior data. - date: str - Date of the session to convert. + date_index: int + The row index of the date in the .mat file. default_struct_name: str, optional The struct name to load from the .mat file, default is "A". """ self.default_struct_name = default_struct_name - self.date = date + self.date_index = date_index super().__init__(file_path=file_path, verbose=verbose) def _read_file(self, file_path: Union[str, Path]) -> pd.DataFrame: @@ -77,15 +45,52 @@ def _read_file(self, file_path: Union[str, Path]) -> pd.DataFrame: behavior_data = behavior_data[self.default_struct_name] if "date" not in behavior_data: raise ValueError(f"Date not found in {file_path}.") - if self.date not in behavior_data["date"]: - raise ValueError(f"Date '{self.date}' not found in {file_path}.") - session_index = behavior_data["date"].index(self.date) - dataframe = _transform_data(data=behavior_data, session_index=session_index) + dataframe = self._transform_data(data=behavior_data) return dataframe - def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, column_name_mapping: Optional[dict] = None, column_descriptions: Optional[dict] = None) -> None: + def _transform_data(self, data: dict) -> pd.DataFrame: + """ + Transform the data from the .mat file into a DataFrame. + """ + if "ntrials" not in data: + raise ValueError("The 'ntrials' key is missing from the data.") + num_trials = data["ntrials"] + # Calculate start and stop indices + start_indices = np.concatenate(([0], np.cumsum(num_trials)[:-1])).astype(int) + stop_indices = np.cumsum(num_trials).astype(int) + + start = start_indices[self.date_index] + stop = stop_indices[self.date_index] + + num_all_trials = int(np.sum(num_trials)) + column_names = list(data.keys()) + + columns_with_arrays = [ + column for column in column_names if isinstance(data[column], list) and len(data[column]) == num_all_trials + ] + # Create DataFrame with relevant columns + dataframe = pd.DataFrame({column_name: data[column_name][start:stop] for column_name in columns_with_arrays}) + + # Add side + if "side" in data: + side = np.array([side_char for side_char in data["side"]]) + side_to_add = side[start:stop] + dataframe["side"] = side_to_add + + if "wait_thresh" in data: + dataframe["wait_thresh"] = data["wait_thresh"] * len(dataframe) + + return dataframe + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: dict, + column_name_mapping: Optional[dict] = None, + column_descriptions: Optional[dict] = None, + ) -> None: dataframe = self._read_file(file_path=self.source_data["file_path"]) if "side" in dataframe.columns: @@ -97,7 +102,9 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, column_name_mapping: if column in dataframe.columns: dataframe[column] = dataframe[column].astype(bool) - columns_to_add = column_name_mapping.keys() if column_name_mapping is not None else dataframe.columns + columns_to_add = dataframe.columns + if column_name_mapping is not None: + columns_to_add = [column for column in column_name_mapping.keys() if column in dataframe.columns] trials = nwbfile.trials if trials is None: @@ -105,7 +112,11 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, column_name_mapping: for column_name in columns_to_add: name = column_name_mapping.get(column_name, column_name) if column_name_mapping is not None else column_name - description = column_descriptions.get(column_name, "no description") if column_descriptions is not None else "no description" + description = ( + column_descriptions.get(column_name, "no description") + if column_descriptions is not None + else "no description" + ) trials.add_column( name=name, description=description, diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py new file mode 100644 index 0000000..2e88d00 --- /dev/null +++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py @@ -0,0 +1,277 @@ +import os +from datetime import datetime +from pathlib import Path +from typing import Union, List +from warnings import warn + +import pandas as pd +from pymatreader import read_mat +from tqdm import tqdm + +from constantinople_lab_to_nwb.mah_2024.mah_2024_convert_session import ( + session_to_nwb, + get_subject_metadata_from_rat_info_folder, +) + +import warnings + +# Suppress specific UserWarning messages +warnings.filterwarnings( + "ignore", + message="The linked table for DynamicTableRegion 'action_type' does not share an ancestor with the DynamicTableRegion.", +) +warnings.filterwarnings( + "ignore", + message="The linked table for DynamicTableRegion 'event_type' does not share an ancestor with the DynamicTableRegion.", +) +warnings.filterwarnings( + "ignore", + message="The linked table for DynamicTableRegion 'state_type' does not share an ancestor with the DynamicTableRegion.", +) + + +def _get_sessions_to_convert_from_mat( + file_path: Union[str, Path], + bpod_folder_path: Union[str, Path], + default_struct_name: str = "A", +) -> List[str]: + """ + Get the list of sessions to convert from a .mat file. + + Parameters + ---------- + file_path : str or Path + The path to the .mat file. + """ + file_path = Path(file_path) + if not file_path.exists(): + raise FileNotFoundError(f"The file {file_path} does not exist.") + if ".mat" not in file_path.suffixes: + raise ValueError(f"The file {file_path} is not a .mat file.") + behavior_data = read_mat(str(file_path)) + if default_struct_name not in behavior_data: + raise ValueError(f"The default struct name '{default_struct_name}' is missing from {file_path}.") + + behavior_data = behavior_data[default_struct_name] + if "date" not in behavior_data: + raise ValueError(f"The 'date' key is missing from {file_path}.") + + dates = behavior_data["date"] + + subject_id = file_path.stem.split("_")[-1] + bpod_files_to_convert = [] + for date in dates: + date_obj = datetime.strptime(date, "%d-%b-%Y") + formatted_date_str = date_obj.strftime("%Y%m%d") + + raw_behavior_file_paths = list( + (bpod_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat") + ) + bpod_files_to_convert.extend(raw_behavior_file_paths) + + return bpod_files_to_convert + + +def _get_date_index(bpod_file_path: Union[str, Path], a_struct_file_path: Union[str, Path]) -> Union[int, None]: + """ + Figure out the date index for the processed behavior file. + + Parameters + ---------- + bpod_file_path: Union[str, Path] + Path to the raw Bpod output (.mat file). + a_struct_file_path: Union[str, Path] + Path to the processed behavior data (.mat file). + + Returns + ------- + int + The date index for the processed behavior file. + """ + bpod_data = read_mat(str(bpod_file_path)) + try: + bpod_session_data = bpod_data["SessionData"] + except KeyError: + warn( + f"'SessionData' key not found in '{bpod_file_path}'. The date index could not be determined from the file." + ) + return None + + num_trials = bpod_session_data["nTrials"] + date = bpod_session_data["Info"]["SessionDate"] + + a_struct_data = read_mat(str(a_struct_file_path)) + dates = a_struct_data["A"]["date"] + num_trials_per_day = a_struct_data["A"]["ntrials"] + + dates_and_trials = pd.DataFrame(dict(date=dates, num_trials=num_trials_per_day)) + filtered_dates_and_trials = dates_and_trials[ + (dates_and_trials["date"] == date) & (dates_and_trials["num_trials"] == num_trials) + ] + + if filtered_dates_and_trials.empty: + warn(f"Date index for '{date}' not found in '{a_struct_file_path}'.") + return None + + return filtered_dates_and_trials.index[0] + + +def sessions_to_nwb( + raw_behavior_folder_path: Union[str, Path], + processed_behavior_folder_path: Union[str, Path], + rat_info_folder_path: Union[str, Path], + nwbfile_folder_path: Union[str, Path], + column_name_mapping: dict = None, + column_descriptions: dict = None, + overwrite: bool = False, +): + """ + Convert all sessions to NWB format. + The number of sessions to convert is determined by the processed behavior files. + Each processed behavior file contains data for multiple days, the 'date' key is used to identify the sessions in the raw Bpod output. + + Parameters + ---------- + raw_behavior_folder_path: str or Path + The path to the folder containing the raw Bpod output files. + processed_behavior_folder_path: str or Path + The path to the folder containing the processed behavior files. + rat_info_folder_path: str or Path + The path to the folder containing the rat info files. + nwbfile_folder_path: str or Path + The path to the folder where the NWB files will be saved. + column_name_mapping: dict, optional + Dictionary to map the column names in the processed behavior data to more descriptive column names. + column_descriptions: dict, optional + Dictionary to add descriptions to the columns in the processed behavior data. + overwrite + Whether to overwrite existing NWB files. + """ + if not nwbfile_folder_path.exists(): + os.makedirs(nwbfile_folder_path, exist_ok=True) + + processed_mat_files = list(processed_behavior_folder_path.glob("ratTrial*.mat")) + subject_ids = [ + processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files + ] + sessions_to_convert_per_subject = { + subject_id: _get_sessions_to_convert_from_mat( + file_path=processed_behavior_file_path, bpod_folder_path=raw_behavior_folder_path + ) + for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files) + } + + for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files): + raw_bpod_file_paths = sessions_to_convert_per_subject[subject_id] + num_sessions_per_subject = len(raw_bpod_file_paths) + progress_bar = tqdm( + raw_bpod_file_paths, + desc=f"Converting subject '{subject_id}' with {num_sessions_per_subject} sessions to NWB ...", + position=0, + total=num_sessions_per_subject, + dynamic_ncols=True, + ) + + for raw_behavior_file_path in progress_bar: + session_id = Path(raw_behavior_file_path).stem.split("_", maxsplit=1)[1].replace("_", "-") + subject_nwb_folder_path = nwbfile_folder_path / f"sub-{subject_id}" + if not subject_nwb_folder_path.exists(): + os.makedirs(subject_nwb_folder_path, exist_ok=True) + nwbfile_path = subject_nwb_folder_path / f"sub-{subject_id}_ses-{session_id}.nwb" + + if nwbfile_path.exists() and not overwrite: + continue + + date_index = _get_date_index( + bpod_file_path=raw_behavior_file_path, a_struct_file_path=processed_behavior_file_path + ) + if date_index is None: + print( + f"Skipping '{subject_id}' session '{session_id}', session not found in the processed behavior file." + ) + continue + + date_from_mat = session_id.split("-")[1] + date_obj = datetime.strptime(date_from_mat, "%Y%d%M") + subject_metadata = get_subject_metadata_from_rat_info_folder( + folder_path=rat_info_folder_path, + subject_id=subject_id, + date=date_obj.strftime("%Y-%m-%d"), + ) + + session_to_nwb( + raw_behavior_file_path=raw_behavior_file_path, + processed_behavior_file_path=processed_behavior_file_path, + date_index=date_index, + nwbfile_path=nwbfile_path, + column_name_mapping=column_name_mapping, + column_descriptions=column_descriptions, + subject_metadata=subject_metadata, + overwrite=overwrite, + ) + + +if __name__ == "__main__": + + # Parameters for conversion + processed_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/published/A_Structs_Final") + raw_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/raw_Bpod") + rat_info_folder_path = Path(r"/Volumes/T9/Constantinople/Rat_info") + + column_name_mapping = dict( + hits="is_rewarded", + vios="is_violation", + optout="is_opt_out", + wait_time="wait_time", + wait_time_unthresholded="wait_time_unthresholded", + wait_thresh="wait_time_threshold", + wait_for_cpoke="wait_for_center_poke", + zwait_for_cpoke="z_scored_wait_for_center_poke", + side="rewarded_port", + lpoke="num_left_pokes", + rpoke="num_right_pokes", + cpoke="num_center_pokes", + lpokedur="duration_of_left_pokes", + rpokedur="duration_of_right_pokes", + cpokedur="duration_of_center_pokes", + rt="reaction_time", + slrt="side_poke_reaction_time", # side led on = side poke + ITI="inter_trial_interval", + ) + # The column descriptions are used to add descriptions to the columns in the processed data. (optional) + column_descriptions = dict( + hits="Whether the subject received reward for each trial.", + vios="Whether the subject violated the trial by not maintaining center poke for the time required by 'nose_in_center'.", + optout="Whether the subject opted out for each trial.", + wait_time="The wait time for the subject for for each trial in seconds, after removing outliers." + " For hit trials (when reward was delivered) the wait time is equal to the reward delay." + " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.", + wait_for_cpoke="The time between side port poke and center poke in seconds, includes the time when the subject is consuming the reward.", + zwait_for_cpoke="The z-scored wait_for_cpoke using all trials.", + side="The rewarded port (Left or Right) for each trial.", + lpoke="The number of left pokes for each trial.", + rpoke="The number of right pokes for each trial.", + cpoke="The number of center pokes for each trial.", + lpokedur="The duration of left pokes for each trial in seconds.", + rpokedur="The duration of right pokes for each trial in seconds.", + cpokedur="The duration of center pokes for each trial in seconds.", + rt="The reaction time in seconds.", + slrt="The side poke reaction time in seconds.", + ITI="The time to initiate trial in seconds (the time between the end of the consummatory period and the time to initiate the next trial).", + wait_time_unthresholded="The wait time for the subject for each trial in seconds without removing outliers.", + wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).", + ) + + nwbfile_folder_path = Path("/Users/weian/data/001169") + + overwrite = False + + sessions_to_nwb( + raw_behavior_folder_path=raw_behavior_folder_path, + processed_behavior_folder_path=processed_behavior_folder_path, + rat_info_folder_path=rat_info_folder_path, + nwbfile_folder_path=nwbfile_folder_path, + column_name_mapping=column_name_mapping, + column_descriptions=column_descriptions, + overwrite=overwrite, + ) diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py index ab59ea9..5cd72ee 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py +++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Union, Optional +from warnings import warn import pandas as pd from dateutil import tz @@ -40,13 +41,16 @@ def get_subject_metadata_from_rat_info_folder( filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id] if not filtered_rat_registry.empty: date_of_birth = filtered_rat_registry["DOB"].values[0] - # convert date of birth to datetime with format "yyyy-mm-dd" - date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d") - sex = filtered_rat_registry["sex"].values[0] - subject_metadata.update( - date_of_birth=date_of_birth, - sex=sex, - ) + if date_of_birth: + # convert date of birth to datetime with format "yyyy-mm-dd" + date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d") + subject_metadata.update(date_of_birth=date_of_birth) + else: + # TODO: what to do if date of birth is missing? + warn("Date of birth is missing. We recommend adding this information to the rat info files.") + # Using age range specified in the manuscript + subject_metadata.update(age="P6M/P24M") + subject_metadata.update(sex=filtered_rat_registry["sex"].values[0]) vendor = filtered_rat_registry["vendor"].values[0] if vendor: subject_metadata.update(description=f"Vendor: {vendor}") @@ -69,7 +73,7 @@ def get_subject_metadata_from_rat_info_folder( def session_to_nwb( raw_behavior_file_path: Union[str, Path], processed_behavior_file_path: Union[str, Path], - date: str, + date_index: int, nwbfile_path: Union[str, Path], column_name_mapping: Optional[dict] = None, column_descriptions: Optional[dict] = None, @@ -122,7 +126,7 @@ def session_to_nwb( conversion_options.update(dict(RawBehavior=dict(task_arguments_to_exclude=task_arguments_to_exclude))) # Add Processed Behavior - source_data.update(dict(ProcessedBehavior=dict(file_path=processed_behavior_file_path, date=date))) + source_data.update(dict(ProcessedBehavior=dict(file_path=processed_behavior_file_path, date_index=date_index))) conversion_options.update( dict(ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions)) ) @@ -130,6 +134,7 @@ def session_to_nwb( converter = Mah2024NWBConverter(source_data=source_data, verbose=verbose) subject_id, session_id = Path(raw_behavior_file_path).stem.split("_", maxsplit=1) + protocol = session_id.split("_")[0] session_id = session_id.replace("_", "-") # Add datetime to conversion @@ -139,6 +144,7 @@ def session_to_nwb( metadata["NWBFile"].update( session_start_time=session_start_time.replace(tzinfo=tzinfo), session_id=session_id, + protocol=protocol, ) # Update default metadata with the editable in the corresponding yaml file @@ -171,8 +177,8 @@ def session_to_nwb( bpod_file_path = Path("/Volumes/T9/Constantinople/raw_Bpod/C005/DataFiles/C005_RWTautowait_20190909_145629.mat") # The processed behavior data is stored in a .mat file (contains data for multiple days) processed_behavior_file_path = Path("/Volumes/T9/Constantinople/A_Structs/ratTrial_C005.mat") - # The date is used to identify the session to convert from the processed behavior file - date = "09-Sep-2019" + # The row index of the date in the processed behavior file + date_index = 0 # The column name mapping is used to rename the columns in the processed data to more descriptive column names. (optional) column_name_mapping = dict( hits="is_rewarded", @@ -191,7 +197,7 @@ def session_to_nwb( rpokedur="duration_of_right_pokes", cpokedur="duration_of_center_pokes", rt="reaction_time", - slrt="short_latency_reaction_time", + slrt="side_poke_reaction_time", ITI="inter_trial_interval", ) # The column descriptions are used to add descriptions to the columns in the processed data. (optional) @@ -212,7 +218,7 @@ def session_to_nwb( rpokedur="The duration of right pokes for each trial in seconds.", cpokedur="The duration of center pokes for each trial in seconds.", rt="The reaction time in seconds.", - slrt="The short-latency reaction time in seconds.", + slrt="The side poke reaction time in seconds.", ITI="The time to initiate trial in seconds (the time between the end of the consummatory period and the time to initiate the next trial).", wait_time_unthresholded="The wait time for the subject for each trial in seconds without removing outliers.", wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).", @@ -236,7 +242,7 @@ def session_to_nwb( session_to_nwb( raw_behavior_file_path=bpod_file_path, processed_behavior_file_path=processed_behavior_file_path, - date=date, + date_index=date_index, column_name_mapping=column_name_mapping, column_descriptions=column_descriptions, nwbfile_path=nwbfile_path, diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py index f809baf..80c4be9 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py +++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py @@ -1,12 +1,13 @@ from neuroconv import NWBConverter -from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024BpodInterface, Mah2024ProcessedBehaviorInterface +from constantinople_lab_to_nwb.general_interfaces import BpodBehaviorInterface +from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024ProcessedBehaviorInterface class Mah2024NWBConverter(NWBConverter): """Primary conversion class for converting the Behavior dataset from the Constantinople Lab.""" data_interface_classes = dict( - RawBehavior=Mah2024BpodInterface, + RawBehavior=BpodBehaviorInterface, ProcessedBehavior=Mah2024ProcessedBehaviorInterface, ) diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt index e9a3dc1..7b3cb0e 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt +++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt @@ -1 +1 @@ -ndx-structured-behavior @ git+https://github.com/catalystneuro/ndx-structured-behavior.git@make_task_program_and_schema_optional +git+https://github.com/rly/ndx-structured-behavior.git@main diff --git a/src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m b/src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m new file mode 100644 index 0000000..76824a6 --- /dev/null +++ b/src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m @@ -0,0 +1,32 @@ +% Utility script to load all .mat files from a specified folder path, convert +% the 'date' field from datetime format to string format for compatibility +% with Python, and overwrite the original files in place with the modified data. + +% Define the folder path +folderPath = '/Volumes/T9/Constantinople/A_Structs'; % Adjust this path as needed + +% Get a list of all .mat files in the specified folder +matFiles = dir(fullfile(folderPath, '*.mat')); + +% Iterate over each .mat file +for i = 1:length(matFiles) + % Construct the full file path + filePath = fullfile(matFiles(i).folder, matFiles(i).name); + + % Load the .mat file + load(filePath); + + % Check if 'date' field exists and is of type datetime + if isfield(A, 'date') && isa(A.date, 'datetime') + % Convert the datetime column to a string format (e.g., 'dd-mmm-yyyy') + A.date = datestr(A.date, 'dd-mmm-yyyy'); + + % Save the modified data back to the file + save(filePath, 'A'); + else + error('The "date" field does not exist or is not of type datetime.'); + end + + clear A; +end + diff --git a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml index 465475b..ca39691 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml +++ b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml @@ -45,6 +45,8 @@ Behavior: name: reward OptOut: name: opt_out + StopSound: + name: stop_sound StatesTable: description: Contains the start and end times of each state in the task. EventTypesTable: @@ -123,11 +125,11 @@ Behavior: output_type: numeric Block: name: block_type - description: The block type (High, Low or Test). High and Low blocks are high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks. Test blocks are mixed blocks. + description: The block type (High, Low or Mixed). High and Low blocks are high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks. The mixed blocks offered all volumes. expression_type: string output_type: string BlockLengthTest: - name: num_trials_in_test_blocks + name: num_trials_in_mixed_blocks description: The number of trials in each mixed blocks. expression_type: integer output_type: numeric @@ -246,3 +248,8 @@ Behavior: description: Determines how many trials occur in stage 8 before transition. expression_type: integer output_type: numeric + HiITI: + name: high_ITI + description: Task parameter. # no description provided + expression_type: double + output_type: numeric diff --git a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml index fa30d0e..54d0e21 100644 --- a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml +++ b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml @@ -30,6 +30,10 @@ NWBFile: blocks which offered all rewards. 20μL was present in all blocks, so comparing behavior on trials offering this reward revealed contextual effects (i.e., effects of hidden states). The hidden states differed in their average reward and therefore in their opportunity costs, or what the rat might miss out on by continuing to wait. + keywords: + - decision making + - reinforcement learning + - hidden state inference institution: NYU Center for Neural Science lab: Constantinople experimenter: diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py index cf1b8b9..db703fe 100644 --- a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py +++ b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py @@ -12,8 +12,7 @@ from neuroconv.utils import FilePathType from probeinterface import read_probeinterface, Probe -# TODO: move to general_interfaces -from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024BpodInterface +from constantinople_lab_to_nwb.general_interfaces import BpodBehaviorInterface from constantinople_lab_to_nwb.schierek_embargo_2024.interfaces import ( SchierekEmbargo2024SortingInterface, @@ -29,7 +28,7 @@ class SchierekEmbargo2024NWBConverter(NWBConverter): RecordingLFP=OpenEphysRecordingInterface, PhySorting=PhySortingInterface, ProcessedSorting=SchierekEmbargo2024SortingInterface, - RawBehavior=Mah2024BpodInterface, + RawBehavior=BpodBehaviorInterface, ProcessedBehavior=SchierekEmbargo2024ProcessedBehaviorInterface, )