From 8a8ebbd143fd9ae2b6d2e44e41cb5ed8c118bd79 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 28 Aug 2024 10:14:21 +0200
Subject: [PATCH 01/26] add matlab script

---
 .../mat_utils/convertMatDateToString.m        | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m b/src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m
new file mode 100644
index 0000000..76824a6
--- /dev/null
+++ b/src/constantinople_lab_to_nwb/mah_2024/mat_utils/convertMatDateToString.m
@@ -0,0 +1,32 @@
+% Utility script to load all .mat files from a specified folder path, convert
+% the 'date' field from datetime format to string format for compatibility
+% with Python, and overwrite the original files in place with the modified data.
+
+% Define the folder path
+folderPath = '/Volumes/T9/Constantinople/A_Structs';  % Adjust this path as needed
+
+% Get a list of all .mat files in the specified folder
+matFiles = dir(fullfile(folderPath, '*.mat'));
+
+% Iterate over each .mat file
+for i = 1:length(matFiles)
+    % Construct the full file path
+    filePath = fullfile(matFiles(i).folder, matFiles(i).name);
+
+    % Load the .mat file
+    load(filePath);
+
+    % Check if 'date' field exists and is of type datetime
+    if isfield(A, 'date') && isa(A.date, 'datetime')
+        % Convert the datetime column to a string format (e.g., 'dd-mmm-yyyy')
+        A.date = datestr(A.date, 'dd-mmm-yyyy');
+
+        % Save the modified data back to the file
+        save(filePath, 'A');
+    else
+        error('The "date" field does not exist or is not of type datetime.');
+    end
+
+    clear A;
+end
+

From 5b904da7c7fe653af8efb6e581f1ad17928cc025 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 28 Aug 2024 10:14:48 +0200
Subject: [PATCH 02/26] add convert sessions script draft

---
 .../mah_2024/mah_2024_convert_all_sessions.py | 146 ++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
new file mode 100644
index 0000000..c26db16
--- /dev/null
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -0,0 +1,146 @@
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Union, List
+
+from pymatreader import read_mat
+
+from constantinople_lab_to_nwb.mah_2024.mah_2024_convert_session import session_to_nwb
+
+
+def _get_sessions_to_convert_from_mat(
+        file_path: Union[str, Path],
+        default_struct_name: str = "A",
+) -> List[str]:
+    """
+    Get the list of sessions to convert from a .mat file.
+
+    Parameters
+    ----------
+    file_path : str or Path
+        The path to the .mat file.
+    """
+    file_path = Path(file_path)
+    if not file_path.exists():
+        raise FileNotFoundError(f"The file {file_path} does not exist.")
+    if ".mat" not in file_path.suffixes:
+        raise ValueError(f"The file {file_path} is not a .mat file.")
+    behavior_data = read_mat(str(file_path))
+    if default_struct_name not in behavior_data:
+        raise ValueError(f"The default struct name '{default_struct_name}' is missing from {file_path}.")
+
+    behavior_data = behavior_data[default_struct_name]
+    if "date" not in behavior_data:
+        raise ValueError(f"The 'date' key is missing from {file_path}.")
+
+    return behavior_data["date"]
+
+
+def sessions_to_nwb(
+        raw_behavior_folder_path: Union[str, Path],
+        processed_behavior_folder_path: Union[str, Path],
+        nwbfile_folder_path: Union[str, Path],
+        column_name_mapping: dict = None,
+        column_descriptions: dict = None,
+        overwrite: bool = False,
+):
+    if not nwbfile_folder_path.exists():
+        os.makedirs(nwbfile_folder_path, exist_ok=True)
+
+    processed_mat_files = list(processed_behavior_folder_path.glob("*.mat"))
+    subject_ids = [processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files]
+
+    for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files):
+        if subject_id != "C005":
+            continue
+        dates_from_mat = _get_sessions_to_convert_from_mat(file_path=processed_behavior_file_path)
+
+        for date_from_mat in dates_from_mat:
+            date_obj = datetime.strptime(date_from_mat, '%d-%b-%Y')
+            formatted_date_str = date_obj.strftime('%Y%m%d')
+
+            raw_behavior_file_paths = list((raw_behavior_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat"))
+            if len(raw_behavior_file_paths) != 1:
+                raise ValueError(f"Expected to find 1 raw behavior file for date {formatted_date_str}, found {len(raw_behavior_file_paths)}.")
+            raw_behavior_file_path = raw_behavior_file_paths[0]
+
+            session_id = raw_behavior_file_path.stem.split("_", maxsplit=1)[1].replace("_", "-")
+            nwbfile_path = nwbfile_folder_path / f"sub-{subject_id}_ses-{session_id}.nwb"
+
+            if nwbfile_path.exists() and not overwrite:
+                print(f"Skipping existing NWB file: {nwbfile_path}")
+                continue
+
+            session_to_nwb(
+                raw_behavior_file_path=raw_behavior_file_path,
+                processed_behavior_file_path=processed_behavior_file_path,
+                date=date_from_mat,
+                nwbfile_path=nwbfile_path,
+                column_name_mapping=column_name_mapping,
+                column_descriptions=column_descriptions,
+                overwrite=overwrite,
+            )
+
+
+if __name__ == "__main__":
+
+    # Parameters for conversion
+    processed_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/A_Structs")
+    raw_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/raw_Bpod")
+
+    column_name_mapping = dict(
+        hits="is_rewarded",
+        vios="is_violation",
+        optout="is_opt_out",
+        wait_time="wait_time",
+        wait_time_unthresholded="wait_time_unthresholded",
+        wait_thresh="wait_time_threshold",
+        wait_for_cpoke="wait_for_center_poke",
+        zwait_for_cpoke="z_scored_wait_for_center_poke",
+        side="rewarded_port",
+        lpoke="num_left_pokes",
+        rpoke="num_right_pokes",
+        cpoke="num_center_pokes",
+        lpokedur="duration_of_left_pokes",
+        rpokedur="duration_of_right_pokes",
+        cpokedur="duration_of_center_pokes",
+        rt="reaction_time",
+        slrt="short_latency_reaction_time",
+        ITI="inter_trial_interval",
+    )
+    # The column descriptions are used to add descriptions to the columns in the processed data. (optional)
+    column_descriptions = dict(
+        hits="Whether the subject received reward for each trial.",
+        vios="Whether the subject violated the trial by not maintaining center poke for the time required by 'nose_in_center'.",
+        optout="Whether the subject opted out for each trial.",
+        wait_time="The wait time for the subject for for each trial in seconds, after removing outliers."
+                  " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
+                  " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",
+        wait_for_cpoke="The time between side port poke and center poke in seconds, includes the time when the subject is consuming the reward.",
+        zwait_for_cpoke="The z-scored wait_for_cpoke using all trials.",
+        side="The rewarded port (Left or Right) for each trial.",
+        lpoke="The number of left pokes for each trial.",
+        rpoke="The number of right pokes for each trial.",
+        cpoke="The number of center pokes for each trial.",
+        lpokedur="The duration of left pokes for each trial in seconds.",
+        rpokedur="The duration of right pokes for each trial in seconds.",
+        cpokedur="The duration of center pokes for each trial in seconds.",
+        rt="The reaction time in seconds.",
+        slrt="The short-latency reaction time in seconds.",
+        ITI="The time to initiate trial in seconds (the time between the end of the consummatory period and the time to initiate the next trial).",
+        wait_time_unthresholded="The wait time for the subject for each trial in seconds without removing outliers.",
+        wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
+    )
+
+    nwbfile_folder_path = Path("/Volumes/T9/Constantinople/nwbfiles")
+
+    overwrite = False
+
+    sessions_to_nwb(
+        raw_behavior_folder_path=raw_behavior_folder_path,
+        processed_behavior_folder_path=processed_behavior_folder_path,
+        nwbfile_folder_path=nwbfile_folder_path,
+        column_name_mapping=column_name_mapping,
+        column_descriptions=column_descriptions,
+        overwrite=overwrite,
+    )
\ No newline at end of file

From 1e9ed4f6edd3c671ba4d96fd62768be34201270d Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 28 Aug 2024 10:48:45 +0200
Subject: [PATCH 03/26] switch back to main

---
 .../mah_2024/mah_2024_requirements.txt                          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
index e9a3dc1..07a761b 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
@@ -1 +1 @@
-ndx-structured-behavior @ git+https://github.com/catalystneuro/ndx-structured-behavior.git@make_task_program_and_schema_optional
+ndx-structured-behavior @ git+https://github.com/catalystneuro/ndx-structured-behavior.git@main

From 31bd924965567e08f2b88b4bca7699774f84b0dc Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 28 Aug 2024 11:16:57 +0200
Subject: [PATCH 04/26] fixes

---
 .../mah_2024/interfaces/mah_2024_bpodinterface.py          | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
index 322db38..c8927cd 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
@@ -356,7 +356,7 @@ def create_actions_table(self, metadata: dict, trial_start_times: List[float]) -
 
             for sound_event in sound_events:
                 timestamps = events[sound_event]
-                if isinstance(timestamps, float):
+                if not isinstance(timestamps, list):
                     timestamps = [timestamps]
                 for timestamp in timestamps:
                     actions_table.add_row(
@@ -405,7 +405,7 @@ def create_events_table(self, metadata: dict, trial_start_times: List[float]) ->
                 if event_name not in event_value_mapping:
                     continue
                 relative_timestamps = events[event_name]
-                if isinstance(relative_timestamps, float):
+                if not isinstance(relative_timestamps, list):
                     relative_timestamps = [relative_timestamps]
                 event_type = event_types.event_name[:].index(event_types_metadata[event_name]["name"])
                 for timestamp in relative_timestamps:
@@ -539,6 +539,9 @@ def add_task_arguments_to_trials(
         for task_argument_name in task_arguments_for_this_session:
             if task_argument_name in arguments_to_exclude:
                 continue
+            if task_argument_name not in task_arguments_metadata:
+                warn(f"Task argument '{task_argument_name}' not in metadata. Skipping.")
+                continue
             task_argument_values = np.array([trial_settings["GUI"][task_argument_name] for trial_settings in trials_settings])
             task_argument_type = task_arguments_metadata[task_argument_name]["expression_type"]
             if task_argument_type == "boolean":

From 006e6bd78be59b4e9a06b572c72ade9003b1ee07 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 28 Aug 2024 11:17:16 +0200
Subject: [PATCH 05/26] add new state

---
 .../mah_2024/metadata/mah_2024_behavior_metadata.yaml      | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml
index 465475b..a316e94 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml
+++ b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml
@@ -45,6 +45,8 @@ Behavior:
       name: reward
     OptOut:
       name: opt_out
+    StopSound:
+      name: stop_sound
   StatesTable:
     description: Contains the start and end times of each state in the task.
   EventTypesTable:
@@ -246,3 +248,8 @@ Behavior:
       description: Determines how many trials occur in stage 8 before transition.
       expression_type: integer
       output_type: numeric
+    HiITI:
+      name: high_ITI
+      description: Task parameter. # no description provided
+      expression_type: double
+      output_type: numeric

From 1121b7f844f858e79c84b67fa05cece6749443e7 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 29 Aug 2024 13:43:07 +0200
Subject: [PATCH 06/26] fix slrt meaning

---
 .../mah_2024/mah_2024_convert_all_sessions.py                 | 4 ++--
 .../mah_2024/mah_2024_convert_session.py                      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index c26db16..021ae7a 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -105,7 +105,7 @@ def sessions_to_nwb(
         rpokedur="duration_of_right_pokes",
         cpokedur="duration_of_center_pokes",
         rt="reaction_time",
-        slrt="short_latency_reaction_time",
+        slrt="side_poke_reaction_time", # side led on = side poke
         ITI="inter_trial_interval",
     )
     # The column descriptions are used to add descriptions to the columns in the processed data. (optional)
@@ -126,7 +126,7 @@ def sessions_to_nwb(
         rpokedur="The duration of right pokes for each trial in seconds.",
         cpokedur="The duration of center pokes for each trial in seconds.",
         rt="The reaction time in seconds.",
-        slrt="The short-latency reaction time in seconds.",
+        slrt="The side poke reaction time in seconds.",
         ITI="The time to initiate trial in seconds (the time between the end of the consummatory period and the time to initiate the next trial).",
         wait_time_unthresholded="The wait time for the subject for each trial in seconds without removing outliers.",
         wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
index f22ca0b..842e6a8 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
@@ -125,7 +125,7 @@ def session_to_nwb(
         rpokedur="duration_of_right_pokes",
         cpokedur="duration_of_center_pokes",
         rt="reaction_time",
-        slrt="short_latency_reaction_time",
+        slrt="side_poke_reaction_time",
         ITI="inter_trial_interval",
     )
     # The column descriptions are used to add descriptions to the columns in the processed data. (optional)
@@ -146,7 +146,7 @@ def session_to_nwb(
         rpokedur="The duration of right pokes for each trial in seconds.",
         cpokedur="The duration of center pokes for each trial in seconds.",
         rt="The reaction time in seconds.",
-        slrt="The short-latency reaction time in seconds.",
+        slrt="The side poke reaction time in seconds.",
         ITI="The time to initiate trial in seconds (the time between the end of the consummatory period and the time to initiate the next trial).",
         wait_time_unthresholded="The wait time for the subject for each trial in seconds without removing outliers.",
         wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",

From 12bf2c0d8bbb1c039569a8a52c43cd086af08e8d Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 29 Aug 2024 14:37:16 +0200
Subject: [PATCH 07/26] rename test block to mixed block

---
 .../mah_2024/interfaces/mah_2024_bpodinterface.py   | 13 ++++++-------
 .../metadata/mah_2024_behavior_metadata.yaml        |  4 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
index c8927cd..6b24943 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
@@ -47,6 +47,7 @@ def __init__(
         self.default_struct_name = default_struct_name
         self.file_path = file_path
         self._bpod_struct = self._read_file()
+        self._block_name_mapping = {1: "Mixed", 2: "High", 3: "Low"}
         super().__init__(file_path=file_path, verbose=verbose)
 
     def get_metadata(self) -> DeepDict:
@@ -139,13 +140,13 @@ def get_metadata(self) -> DeepDict:
             ),
             Block=dict(
                 name="block_type",
-                description="The block type (High, Low or Test).",
+                description="The block type (High, Low or Mixed).",
                 expression_type="string",
                 output_type="string",
             ),
             BlockLengthTest=dict(
-                name="num_trials_in_test_blocks",
-                description="The number of trials in test blocks.",
+                name="num_trials_in_mixed_blocks",
+                description="The number of trials in mixed blocks.",
                 expression_type="integer",
                 output_type="numeric",
             ),
@@ -434,8 +435,7 @@ def create_task_arguments_table(self, metadata: dict) -> TaskArgumentsTable:
             if expression_type == "boolean":
                 task_argument_value = bool(task_argument_value)
             if task_argument_name == "Block":
-                block_name_mapping = {1: "Test", 2: "High", 3: "Low"}
-                task_argument_value = block_name_mapping[task_argument_value]
+                task_argument_value = self._block_name_mapping[task_argument_value]
 
             task_arguments.add_row(
                 argument_name=task_arguments_metadata[task_argument_name]["name"],
@@ -547,8 +547,7 @@ def add_task_arguments_to_trials(
             if task_argument_type == "boolean":
                 task_argument_values = task_argument_values.astype(bool)
             elif task_argument_name == "Block":
-                block_name_mapping = {1: "Test", 2: "High", 3: "Low"}
-                task_argument_values = np.array([block_name_mapping[block] for block in task_argument_values])
+                task_argument_values = np.array([self._block_name_mapping[block] for block in task_argument_values])
 
             trials.add_column(
                 name=task_arguments_metadata[task_argument_name]["name"],
diff --git a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml
index a316e94..ca39691 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml
+++ b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_behavior_metadata.yaml
@@ -125,11 +125,11 @@ Behavior:
       output_type: numeric
     Block:
       name: block_type
-      description: The block type (High, Low or Test). High and Low blocks are high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks. Test blocks are mixed blocks.
+      description: The block type (High, Low or Mixed). High and Low blocks are high reward (20, 40, or 80μL) or low reward (5, 10, or 20μL) blocks. The mixed blocks offered all volumes.
       expression_type: string
       output_type: string
     BlockLengthTest:
-      name: num_trials_in_test_blocks
+      name: num_trials_in_mixed_blocks
       description: The number of trials in each mixed blocks.
       expression_type: integer
       output_type: numeric

From 80f104927d159685a0b82b60a6d28c2857eb8852 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 29 Aug 2024 14:37:53 +0200
Subject: [PATCH 08/26] filter columns

---
 .../interfaces/mah_2024_processedbehaviorinterface.py         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py
index 0e99c88..22e77fa 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py
@@ -97,7 +97,9 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, column_name_mapping:
             if column in dataframe.columns:
                 dataframe[column] = dataframe[column].astype(bool)
 
-        columns_to_add = column_name_mapping.keys() if column_name_mapping is not None else dataframe.columns
+        columns_to_add = dataframe.columns
+        if column_name_mapping is not None:
+            columns_to_add = [column for column in column_name_mapping.keys() if column in dataframe.columns]
 
         trials = nwbfile.trials
         if trials is None:

From 7e99b64c1823e04a7d3aacdb723023b5736e0e84 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 29 Aug 2024 14:40:56 +0200
Subject: [PATCH 09/26] add docstring

---
 .../mah_2024/mah_2024_convert_all_sessions.py | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index 021ae7a..7b5e382 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -44,6 +44,26 @@ def sessions_to_nwb(
         column_descriptions: dict = None,
         overwrite: bool = False,
 ):
+    """
+    Convert all sessions to NWB format.
+    The number of sessions to convert is determined by the processed behavior files.
+    Each processed behavior file contains data for multiple days, the 'date' key is used to identify the sessions in the raw Bpod output.
+
+    Parameters
+    ----------
+    raw_behavior_folder_path: str or Path
+        The path to the folder containing the raw Bpod output files.
+    processed_behavior_folder_path: str or Path
+        The path to the folder containing the processed behavior files.
+    nwbfile_folder_path: str or Path
+        The path to the folder where the NWB files will be saved.
+    column_name_mapping: dict, optional
+        Dictionary to map the column names in the processed behavior data to more descriptive column names.
+    column_descriptions: dict, optional
+        Dictionary to add descriptions to the columns in the processed behavior data.
+    overwrite
+        Whether to overwrite existing NWB files.
+    """
     if not nwbfile_folder_path.exists():
         os.makedirs(nwbfile_folder_path, exist_ok=True)
 

From 970922d836f52e186dd56f3347fe628371f14171 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Fri, 30 Aug 2024 14:14:54 +0200
Subject: [PATCH 10/26] propagate subject metadata

---
 .../mah_2024/mah_2024_convert_all_sessions.py | 63 ++++++++++++-------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index 7b5e382..c14d82f 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -5,12 +5,15 @@
 
 from pymatreader import read_mat
 
-from constantinople_lab_to_nwb.mah_2024.mah_2024_convert_session import session_to_nwb
+from constantinople_lab_to_nwb.mah_2024.mah_2024_convert_session import (
+    session_to_nwb,
+    get_subject_metadata_from_rat_info_folder,
+)
 
 
 def _get_sessions_to_convert_from_mat(
-        file_path: Union[str, Path],
-        default_struct_name: str = "A",
+    file_path: Union[str, Path],
+    default_struct_name: str = "A",
 ) -> List[str]:
     """
     Get the list of sessions to convert from a .mat file.
@@ -37,12 +40,13 @@ def _get_sessions_to_convert_from_mat(
 
 
 def sessions_to_nwb(
-        raw_behavior_folder_path: Union[str, Path],
-        processed_behavior_folder_path: Union[str, Path],
-        nwbfile_folder_path: Union[str, Path],
-        column_name_mapping: dict = None,
-        column_descriptions: dict = None,
-        overwrite: bool = False,
+    raw_behavior_folder_path: Union[str, Path],
+    processed_behavior_folder_path: Union[str, Path],
+    rat_info_folder_path: Union[str, Path],
+    nwbfile_folder_path: Union[str, Path],
+    column_name_mapping: dict = None,
+    column_descriptions: dict = None,
+    overwrite: bool = False,
 ):
     """
     Convert all sessions to NWB format.
@@ -55,6 +59,8 @@ def sessions_to_nwb(
         The path to the folder containing the raw Bpod output files.
     processed_behavior_folder_path: str or Path
         The path to the folder containing the processed behavior files.
+    rat_info_folder_path: str or Path
+        The path to the folder containing the rat info files.
     nwbfile_folder_path: str or Path
         The path to the folder where the NWB files will be saved.
     column_name_mapping: dict, optional
@@ -68,20 +74,26 @@ def sessions_to_nwb(
         os.makedirs(nwbfile_folder_path, exist_ok=True)
 
     processed_mat_files = list(processed_behavior_folder_path.glob("*.mat"))
-    subject_ids = [processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files]
+    subject_ids = [
+        processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files
+    ]
 
     for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files):
-        if subject_id != "C005":
+        if subject_id != "J076":
             continue
         dates_from_mat = _get_sessions_to_convert_from_mat(file_path=processed_behavior_file_path)
 
         for date_from_mat in dates_from_mat:
-            date_obj = datetime.strptime(date_from_mat, '%d-%b-%Y')
-            formatted_date_str = date_obj.strftime('%Y%m%d')
+            date_obj = datetime.strptime(date_from_mat, "%d-%b-%Y")
+            formatted_date_str = date_obj.strftime("%Y%m%d")
 
-            raw_behavior_file_paths = list((raw_behavior_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat"))
+            raw_behavior_file_paths = list(
+                (raw_behavior_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat")
+            )
             if len(raw_behavior_file_paths) != 1:
-                raise ValueError(f"Expected to find 1 raw behavior file for date {formatted_date_str}, found {len(raw_behavior_file_paths)}.")
+                raise ValueError(
+                    f"Expected to find 1 raw behavior file for date {formatted_date_str}, found {len(raw_behavior_file_paths)}."
+                )
             raw_behavior_file_path = raw_behavior_file_paths[0]
 
             session_id = raw_behavior_file_path.stem.split("_", maxsplit=1)[1].replace("_", "-")
@@ -91,6 +103,12 @@ def sessions_to_nwb(
                 print(f"Skipping existing NWB file: {nwbfile_path}")
                 continue
 
+            subject_metadata = get_subject_metadata_from_rat_info_folder(
+                folder_path=rat_info_folder_path,
+                subject_id=subject_id,
+                date=date_obj.strftime("%Y-%m-%d"),
+            )
+
             session_to_nwb(
                 raw_behavior_file_path=raw_behavior_file_path,
                 processed_behavior_file_path=processed_behavior_file_path,
@@ -98,6 +116,7 @@ def sessions_to_nwb(
                 nwbfile_path=nwbfile_path,
                 column_name_mapping=column_name_mapping,
                 column_descriptions=column_descriptions,
+                subject_metadata=subject_metadata,
                 overwrite=overwrite,
             )
 
@@ -105,8 +124,9 @@ def sessions_to_nwb(
 if __name__ == "__main__":
 
     # Parameters for conversion
-    processed_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/A_Structs")
+    processed_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/published/A_Structs_Final")
     raw_behavior_folder_path = Path(r"/Volumes/T9/Constantinople/raw_Bpod")
+    rat_info_folder_path = Path(r"/Volumes/T9/Constantinople/Rat_info")
 
     column_name_mapping = dict(
         hits="is_rewarded",
@@ -125,7 +145,7 @@ def sessions_to_nwb(
         rpokedur="duration_of_right_pokes",
         cpokedur="duration_of_center_pokes",
         rt="reaction_time",
-        slrt="side_poke_reaction_time", # side led on = side poke
+        slrt="side_poke_reaction_time",  # side led on = side poke
         ITI="inter_trial_interval",
     )
     # The column descriptions are used to add descriptions to the columns in the processed data. (optional)
@@ -134,8 +154,8 @@ def sessions_to_nwb(
         vios="Whether the subject violated the trial by not maintaining center poke for the time required by 'nose_in_center'.",
         optout="Whether the subject opted out for each trial.",
         wait_time="The wait time for the subject for for each trial in seconds, after removing outliers."
-                  " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
-                  " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",
+        " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
+        " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",
         wait_for_cpoke="The time between side port poke and center poke in seconds, includes the time when the subject is consuming the reward.",
         zwait_for_cpoke="The z-scored wait_for_cpoke using all trials.",
         side="The rewarded port (Left or Right) for each trial.",
@@ -154,13 +174,14 @@ def sessions_to_nwb(
 
     nwbfile_folder_path = Path("/Volumes/T9/Constantinople/nwbfiles")
 
-    overwrite = False
+    overwrite = True
 
     sessions_to_nwb(
         raw_behavior_folder_path=raw_behavior_folder_path,
         processed_behavior_folder_path=processed_behavior_folder_path,
+        rat_info_folder_path=rat_info_folder_path,
         nwbfile_folder_path=nwbfile_folder_path,
         column_name_mapping=column_name_mapping,
         column_descriptions=column_descriptions,
         overwrite=overwrite,
-    )
\ No newline at end of file
+    )

From e6ba1d97009937ef0976fd7616a3bc0f514011c2 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 11 Sep 2024 14:08:36 +0200
Subject: [PATCH 11/26] update nwb folder

---
 .../mah_2024/mah_2024_convert_all_sessions.py              | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index c14d82f..8544dd0 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -97,7 +97,10 @@ def sessions_to_nwb(
             raw_behavior_file_path = raw_behavior_file_paths[0]
 
             session_id = raw_behavior_file_path.stem.split("_", maxsplit=1)[1].replace("_", "-")
-            nwbfile_path = nwbfile_folder_path / f"sub-{subject_id}_ses-{session_id}.nwb"
+            subject_nwb_folder_path = nwbfile_folder_path / f"sub-{subject_id}"
+            if not subject_nwb_folder_path.exists():
+                os.makedirs(subject_nwb_folder_path, exist_ok=True)
+            nwbfile_path = subject_nwb_folder_path / f"sub-{subject_id}_ses-{session_id}.nwb"
 
             if nwbfile_path.exists() and not overwrite:
                 print(f"Skipping existing NWB file: {nwbfile_path}")
@@ -172,7 +175,7 @@ def sessions_to_nwb(
         wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
     )
 
-    nwbfile_folder_path = Path("/Volumes/T9/Constantinople/nwbfiles")
+    nwbfile_folder_path = Path("/Users/weian/data/001169")
 
     overwrite = True
 

From c00d35feb1bfae1787ff2485c8104dfb48638dae Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 11 Sep 2024 14:09:12 +0200
Subject: [PATCH 12/26] disable overwrite

---
 .../mah_2024/mah_2024_convert_all_sessions.py                 | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index 8544dd0..46e8540 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -79,8 +79,6 @@ def sessions_to_nwb(
     ]
 
     for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files):
-        if subject_id != "J076":
-            continue
         dates_from_mat = _get_sessions_to_convert_from_mat(file_path=processed_behavior_file_path)
 
         for date_from_mat in dates_from_mat:
@@ -177,7 +175,7 @@ def sessions_to_nwb(
 
     nwbfile_folder_path = Path("/Users/weian/data/001169")
 
-    overwrite = True
+    overwrite = False
 
     sessions_to_nwb(
         raw_behavior_folder_path=raw_behavior_folder_path,

From 4e5dc328bec9489a59f21df30cc50dc0ba1ba923 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Fri, 13 Sep 2024 13:39:39 +0200
Subject: [PATCH 13/26] add keywords

---
 .../mah_2024/metadata/mah_2024_general_metadata.yaml          | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml
index fa30d0e..54d0e21 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml
+++ b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml
@@ -30,6 +30,10 @@ NWBFile:
     blocks which offered all rewards. 20μL was present in all blocks, so comparing behavior on trials
     offering this reward revealed contextual effects (i.e., effects of hidden states). The hidden states differed in
     their average reward and therefore in their opportunity costs, or what the rat might miss out on by continuing to wait.
+  keywords:
+    - decision making
+    - reinforcement learning
+    - hidden state inference
   institution: NYU Center for Neural Science
   lab: Constantinople
   experimenter:

From 0921681f28de5d9d054249a437ee43d57764d25b Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Fri, 13 Sep 2024 13:40:04 +0200
Subject: [PATCH 14/26] add task protocol name to NWBFile metadata

---
 .../mah_2024/mah_2024_convert_session.py                        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
index bee8038..368e60a 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
@@ -130,6 +130,7 @@ def session_to_nwb(
     converter = Mah2024NWBConverter(source_data=source_data, verbose=verbose)
 
     subject_id, session_id = Path(raw_behavior_file_path).stem.split("_", maxsplit=1)
+    protocol = session_id.split("_")[0]
     session_id = session_id.replace("_", "-")
 
     # Add datetime to conversion
@@ -139,6 +140,7 @@ def session_to_nwb(
     metadata["NWBFile"].update(
         session_start_time=session_start_time.replace(tzinfo=tzinfo),
         session_id=session_id,
+        protocol=protocol,
     )
 
     # Update default metadata with the editable in the corresponding yaml file

From ca6aa37aea640a75d7b115ede0bae98f3032d6df Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Fri, 13 Sep 2024 13:40:25 +0200
Subject: [PATCH 15/26] pin to ndx-structured-behavior@main

---
 .../mah_2024/mah_2024_requirements.txt                          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
index 07a761b..5f8ac4e 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
@@ -1 +1 @@
-ndx-structured-behavior @ git+https://github.com/catalystneuro/ndx-structured-behavior.git@main
+git+https://github.com/catalystneuro/ndx-structured-behavior.git@main

From 7cc2b7cb7b0f109f785d5184da0801542c905fa6 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Fri, 13 Sep 2024 14:39:53 +0200
Subject: [PATCH 16/26] pin to ndx-structured-behavior@main

---
 .../mah_2024/mah_2024_requirements.txt                          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
index 5f8ac4e..7b3cb0e 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_requirements.txt
@@ -1 +1 @@
-git+https://github.com/catalystneuro/ndx-structured-behavior.git@main
+git+https://github.com/rly/ndx-structured-behavior.git@main

From bce97e40c35d2a47e637b71084c5a694d4f6479e Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Mon, 23 Sep 2024 14:04:54 +0200
Subject: [PATCH 17/26] update

---
 .../mah_2024/mah_2024_convert_all_sessions.py | 56 +++++++++++++++++--
 .../mah_2024/mah_2024_convert_session.py      | 18 +++---
 2 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index 46e8540..f5ce32a 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -3,13 +3,31 @@
 from pathlib import Path
 from typing import Union, List
 
+from nwbinspector import inspect_all, format_messages, save_report
 from pymatreader import read_mat
+from tqdm import tqdm
 
 from constantinople_lab_to_nwb.mah_2024.mah_2024_convert_session import (
     session_to_nwb,
     get_subject_metadata_from_rat_info_folder,
 )
 
+import warnings
+
+# Suppress specific UserWarning messages
+warnings.filterwarnings(
+    "ignore",
+    message="The linked table for DynamicTableRegion 'action_type' does not share an ancestor with the DynamicTableRegion.",
+)
+warnings.filterwarnings(
+    "ignore",
+    message="The linked table for DynamicTableRegion 'event_type' does not share an ancestor with the DynamicTableRegion.",
+)
+warnings.filterwarnings(
+    "ignore",
+    message="The linked table for DynamicTableRegion 'state_type' does not share an ancestor with the DynamicTableRegion.",
+)
+
 
 def _get_sessions_to_convert_from_mat(
     file_path: Union[str, Path],
@@ -76,18 +94,48 @@ def sessions_to_nwb(
     processed_mat_files = list(processed_behavior_folder_path.glob("*.mat"))
     subject_ids = [
         processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files
-    ]
+    ][:10]
+    sessions_to_convert_per_subject = {
+        subject_id: _get_sessions_to_convert_from_mat(file_path=processed_behavior_file_path)
+        for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files)
+    }
 
     for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files):
-        dates_from_mat = _get_sessions_to_convert_from_mat(file_path=processed_behavior_file_path)
-
-        for date_from_mat in dates_from_mat:
+        dates_from_mat = sessions_to_convert_per_subject[subject_id]
+        num_sessions_per_subject = len(dates_from_mat)
+        progress_bar = tqdm(
+            dates_from_mat,
+            desc=f"Converting subject '{subject_id}' with {num_sessions_per_subject} sessions to NWB ...",
+            position=0,
+            total=num_sessions_per_subject,
+            dynamic_ncols=True,
+        )
+
+        for date_from_mat in progress_bar:
             date_obj = datetime.strptime(date_from_mat, "%d-%b-%Y")
             formatted_date_str = date_obj.strftime("%Y%m%d")
 
             raw_behavior_file_paths = list(
                 (raw_behavior_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat")
             )
+            if len(raw_behavior_file_paths) != 1:
+                # TODO: figure out how to match duplicate dates
+                # ntrials from processed then read the raw file and check if the number of trials match
+                processed_behavior_data = read_mat(str(processed_behavior_file_path))
+
+                date_index = list(dates_from_mat).index(date_from_mat)
+                num_trials = processed_behavior_data["A"]["ntrials"][date_index]
+                for behavior_file_path in raw_behavior_file_paths:
+                    try:
+                        raw_behavior_data = read_mat(str(behavior_file_path))
+                    except ValueError as e:
+                        print(f"Error reading file: {behavior_file_path} , {e}")
+                        continue
+                    num_trials_here = raw_behavior_data["SessionData"]["nTrials"]
+                    if num_trials_here == num_trials:
+                        raw_behavior_file_paths = [behavior_file_path]
+                        break
+
             if len(raw_behavior_file_paths) != 1:
                 raise ValueError(
                     f"Expected to find 1 raw behavior file for date {formatted_date_str}, found {len(raw_behavior_file_paths)}."
diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
index 368e60a..48e83c0 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 from typing import Union, Optional
+from warnings import warn
 
 import pandas as pd
 from dateutil import tz
@@ -40,13 +41,16 @@ def get_subject_metadata_from_rat_info_folder(
         filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id]
         if not filtered_rat_registry.empty:
             date_of_birth = filtered_rat_registry["DOB"].values[0]
-            # convert date of birth to datetime with format "yyyy-mm-dd"
-            date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d")
-            sex = filtered_rat_registry["sex"].values[0]
-            subject_metadata.update(
-                date_of_birth=date_of_birth,
-                sex=sex,
-            )
+            if date_of_birth:
+                # convert date of birth to datetime with format "yyyy-mm-dd"
+                date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d")
+                subject_metadata.update(date_of_birth=date_of_birth)
+            else:
+                # TODO: what to do if date of birth is missing?
+                warn("Date of birth is missing. We recommend adding this information to the rat info files.")
+                # Using age range specified in the manuscript
+                subject_metadata.update(age="P6M/P24M")
+            subject_metadata.update(sex=filtered_rat_registry["sex"].values[0])
             vendor = filtered_rat_registry["vendor"].values[0]
             if vendor:
                 subject_metadata.update(description=f"Vendor: {vendor}")

From 93a9fe1e71ab993f0843d078c0ef48c28ff2a8fe Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Mon, 30 Sep 2024 16:42:28 +0200
Subject: [PATCH 18/26] switch to date_index to fix issue when duplicated dates
 are in the file

---
 .../mah_2024_processedbehaviorinterface.py    | 103 ++++++++++--------
 1 file changed, 56 insertions(+), 47 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py
index 22e77fa..2c94b8e 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_processedbehaviorinterface.py
@@ -1,4 +1,5 @@
 """Primary class for converting experiment-specific behavior."""
+
 from pathlib import Path
 from typing import Optional, Union
 
@@ -9,48 +10,15 @@
 from pynwb.file import NWBFile
 
 
-def _transform_data(data: dict, session_index: int) -> pd.DataFrame:
-    """
-    Transform the data from the .mat file into a DataFrame.
-    """
-    if "ntrials" not in data:
-        raise ValueError("The 'ntrials' key is missing from the data.")
-    num_trials = data["ntrials"]
-    # Calculate start and stop indices
-    start_indices = np.concatenate(([0], np.cumsum(num_trials)[:-1])).astype(int)
-    stop_indices = np.cumsum(num_trials).astype(int)
-
-    start = start_indices[session_index]
-    stop = stop_indices[session_index]
-
-    num_all_trials = int(np.sum(num_trials))
-    column_names = list(data.keys())
-
-    columns_with_arrays = [column for column in column_names if isinstance(data[column], list) and len(data[column]) == num_all_trials]
-    # Create DataFrame with relevant columns
-    dataframe = pd.DataFrame({column_name: data[column_name][start:stop] for column_name in columns_with_arrays})
-
-    # Add side
-    if "side" in data:
-        side = np.array([side_char for side_char in data["side"]])
-        side_to_add = side[start:stop]
-        dataframe["side"] = side_to_add
-
-    if "wait_thresh" in data:
-        dataframe["wait_thresh"] = data["wait_thresh"] * len(dataframe)
-
-    return dataframe
-
-
 class Mah2024ProcessedBehaviorInterface(BaseDataInterface):
     """Behavior interface for mah_2024 conversion"""
 
     def __init__(
-            self,
-            file_path: Union[str, Path],
-            date: str,
-            default_struct_name: str = "A",
-            verbose: bool = True,
+        self,
+        file_path: Union[str, Path],
+        date_index: int,
+        default_struct_name: str = "A",
+        verbose: bool = True,
     ):
         """
         Interface for adding data from the processed behavior file to an existing NWB file.
@@ -59,14 +27,14 @@ def __init__(
         ----------
         file_path: Union[str, Path]
             Path to the .mat file containing the processed behavior data.
-        date: str
-            Date of the session to convert.
+        date_index: int
+            The row index of the date in the .mat file.
         default_struct_name: str, optional
             The struct name to load from the .mat file, default is "A".
         """
 
         self.default_struct_name = default_struct_name
-        self.date = date
+        self.date_index = date_index
         super().__init__(file_path=file_path, verbose=verbose)
 
     def _read_file(self, file_path: Union[str, Path]) -> pd.DataFrame:
@@ -77,15 +45,52 @@ def _read_file(self, file_path: Union[str, Path]) -> pd.DataFrame:
         behavior_data = behavior_data[self.default_struct_name]
         if "date" not in behavior_data:
             raise ValueError(f"Date not found in {file_path}.")
-        if self.date not in behavior_data["date"]:
-            raise ValueError(f"Date '{self.date}' not found in {file_path}.")
 
-        session_index = behavior_data["date"].index(self.date)
-        dataframe = _transform_data(data=behavior_data, session_index=session_index)
+        dataframe = self._transform_data(data=behavior_data)
 
         return dataframe
 
-    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, column_name_mapping: Optional[dict] = None, column_descriptions: Optional[dict] = None) -> None:
+    def _transform_data(self, data: dict) -> pd.DataFrame:
+        """
+        Transform the data from the .mat file into a DataFrame.
+        """
+        if "ntrials" not in data:
+            raise ValueError("The 'ntrials' key is missing from the data.")
+        num_trials = data["ntrials"]
+        # Calculate start and stop indices
+        start_indices = np.concatenate(([0], np.cumsum(num_trials)[:-1])).astype(int)
+        stop_indices = np.cumsum(num_trials).astype(int)
+
+        start = start_indices[self.date_index]
+        stop = stop_indices[self.date_index]
+
+        num_all_trials = int(np.sum(num_trials))
+        column_names = list(data.keys())
+
+        columns_with_arrays = [
+            column for column in column_names if isinstance(data[column], list) and len(data[column]) == num_all_trials
+        ]
+        # Create DataFrame with relevant columns
+        dataframe = pd.DataFrame({column_name: data[column_name][start:stop] for column_name in columns_with_arrays})
+
+        # Add side
+        if "side" in data:
+            side = np.array([side_char for side_char in data["side"]])
+            side_to_add = side[start:stop]
+            dataframe["side"] = side_to_add
+
+        if "wait_thresh" in data:
+            dataframe["wait_thresh"] = data["wait_thresh"] * len(dataframe)
+
+        return dataframe
+
+    def add_to_nwbfile(
+        self,
+        nwbfile: NWBFile,
+        metadata: dict,
+        column_name_mapping: Optional[dict] = None,
+        column_descriptions: Optional[dict] = None,
+    ) -> None:
         dataframe = self._read_file(file_path=self.source_data["file_path"])
 
         if "side" in dataframe.columns:
@@ -107,7 +112,11 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, column_name_mapping:
 
         for column_name in columns_to_add:
             name = column_name_mapping.get(column_name, column_name) if column_name_mapping is not None else column_name
-            description = column_descriptions.get(column_name, "no description") if column_descriptions is not None else "no description"
+            description = (
+                column_descriptions.get(column_name, "no description")
+                if column_descriptions is not None
+                else "no description"
+            )
             trials.add_column(
                 name=name,
                 description=description,

From b63cb45b1fc8915dd8e9109d690676d095635eff Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Mon, 30 Sep 2024 16:43:18 +0200
Subject: [PATCH 19/26] add utility method to figure out which date to look for
 in the processed file

---
 .../mah_2024/mah_2024_convert_session.py      | 42 ++++++++++++++++---
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
index 48e83c0..90db503 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
@@ -10,6 +10,41 @@
 from constantinople_lab_to_nwb.mah_2024 import Mah2024NWBConverter
 
 
+def get_date_index(bpod_file_path: Union[str, Path], a_struct_file_path: Union[str, Path]) -> int:
+    """
+    Figure out the date index for the processed behavior file.
+
+    Parameters
+    ----------
+    bpod_file_path: Union[str, Path]
+        Path to the raw Bpod output (.mat file).
+    a_struct_file_path: Union[str, Path]
+        Path to the processed behavior data (.mat file).
+
+    Returns
+    -------
+    int
+        The date index for the processed behavior file.
+    """
+    bpod_data = read_mat(str(bpod_file_path))
+    num_trials = bpod_data["SessionData"]["nTrials"]
+    date = bpod_data["SessionData"]["Info"]["SessionDate"]
+
+    a_struct_data = read_mat(str(a_struct_file_path))
+    dates = a_struct_data["A"]["date"]
+    num_trials_per_day = a_struct_data["A"]["ntrials"]
+
+    dates_and_trials = pd.DataFrame(dict(date=dates, num_trials=num_trials_per_day))
+    filtered_dates_and_trials = dates_and_trials[
+        (dates_and_trials["date"] == date) & (dates_and_trials["num_trials"] == num_trials)
+    ]
+
+    if filtered_dates_and_trials.empty:
+        raise ValueError("Could not find the date index in the processed behavior file. This should not happen.")
+
+    return filtered_dates_and_trials.index[0]
+
+
 def get_subject_metadata_from_rat_info_folder(
     folder_path: Union[str, Path],
     subject_id: str,
@@ -73,7 +108,6 @@ def get_subject_metadata_from_rat_info_folder(
 def session_to_nwb(
     raw_behavior_file_path: Union[str, Path],
     processed_behavior_file_path: Union[str, Path],
-    date: str,
     nwbfile_path: Union[str, Path],
     column_name_mapping: Optional[dict] = None,
     column_descriptions: Optional[dict] = None,
@@ -126,7 +160,8 @@ def session_to_nwb(
     conversion_options.update(dict(RawBehavior=dict(task_arguments_to_exclude=task_arguments_to_exclude)))
 
     # Add Processed Behavior
-    source_data.update(dict(ProcessedBehavior=dict(file_path=processed_behavior_file_path, date=date)))
+    date_index = get_date_index(bpod_file_path=raw_behavior_file_path, a_struct_file_path=processed_behavior_file_path)
+    source_data.update(dict(ProcessedBehavior=dict(file_path=processed_behavior_file_path, date_index=date_index)))
     conversion_options.update(
         dict(ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions))
     )
@@ -177,8 +212,6 @@ def session_to_nwb(
     bpod_file_path = Path("/Volumes/T9/Constantinople/raw_Bpod/C005/DataFiles/C005_RWTautowait_20190909_145629.mat")
     # The processed behavior data is stored in a .mat file (contains data for multiple days)
     processed_behavior_file_path = Path("/Volumes/T9/Constantinople/A_Structs/ratTrial_C005.mat")
-    # The date is used to identify the session to convert from the processed behavior file
-    date = "09-Sep-2019"
     # The column name mapping is used to rename the columns in the processed data to more descriptive column names. (optional)
     column_name_mapping = dict(
         hits="is_rewarded",
@@ -242,7 +275,6 @@ def session_to_nwb(
     session_to_nwb(
         raw_behavior_file_path=bpod_file_path,
         processed_behavior_file_path=processed_behavior_file_path,
-        date=date,
         column_name_mapping=column_name_mapping,
         column_descriptions=column_descriptions,
         nwbfile_path=nwbfile_path,

From f41aa975d9b22855731a97e92ce721da68fcb021 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Mon, 30 Sep 2024 16:44:20 +0200
Subject: [PATCH 20/26] modify convert all sessions

---
 .../mah_2024/mah_2024_convert_all_sessions.py | 63 ++++++++-----------
 1 file changed, 25 insertions(+), 38 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index f5ce32a..e05aae6 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -31,6 +31,7 @@
 
 def _get_sessions_to_convert_from_mat(
     file_path: Union[str, Path],
+    bpod_folder_path: Union[str, Path],
     default_struct_name: str = "A",
 ) -> List[str]:
     """
@@ -54,7 +55,20 @@ def _get_sessions_to_convert_from_mat(
     if "date" not in behavior_data:
         raise ValueError(f"The 'date' key is missing from {file_path}.")
 
-    return behavior_data["date"]
+    dates = behavior_data["date"]
+
+    subject_id = file_path.stem.split("_")[-1]
+    bpod_files_to_convert = []
+    for date in dates:
+        date_obj = datetime.strptime(date, "%d-%b-%Y")
+        formatted_date_str = date_obj.strftime("%Y%m%d")
+
+        raw_behavior_file_paths = list(
+            (bpod_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat")
+        )
+        bpod_files_to_convert.extend(raw_behavior_file_paths)
+
+    return bpod_files_to_convert
 
 
 def sessions_to_nwb(
@@ -91,57 +105,29 @@ def sessions_to_nwb(
     if not nwbfile_folder_path.exists():
         os.makedirs(nwbfile_folder_path, exist_ok=True)
 
-    processed_mat_files = list(processed_behavior_folder_path.glob("*.mat"))
+    processed_mat_files = list(processed_behavior_folder_path.glob("ratTrial*.mat"))
     subject_ids = [
         processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files
     ][:10]
     sessions_to_convert_per_subject = {
-        subject_id: _get_sessions_to_convert_from_mat(file_path=processed_behavior_file_path)
+        subject_id: _get_sessions_to_convert_from_mat(
+            file_path=processed_behavior_file_path, bpod_folder_path=raw_behavior_folder_path
+        )
         for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files)
     }
 
     for subject_id, processed_behavior_file_path in zip(subject_ids, processed_mat_files):
-        dates_from_mat = sessions_to_convert_per_subject[subject_id]
-        num_sessions_per_subject = len(dates_from_mat)
+        raw_bpod_file_paths = sessions_to_convert_per_subject[subject_id]
+        num_sessions_per_subject = len(raw_bpod_file_paths)
         progress_bar = tqdm(
-            dates_from_mat,
+            raw_bpod_file_paths,
             desc=f"Converting subject '{subject_id}' with {num_sessions_per_subject} sessions to NWB ...",
             position=0,
             total=num_sessions_per_subject,
             dynamic_ncols=True,
         )
 
-        for date_from_mat in progress_bar:
-            date_obj = datetime.strptime(date_from_mat, "%d-%b-%Y")
-            formatted_date_str = date_obj.strftime("%Y%m%d")
-
-            raw_behavior_file_paths = list(
-                (raw_behavior_folder_path / subject_id / "DataFiles").glob(f"*{formatted_date_str}*.mat")
-            )
-            if len(raw_behavior_file_paths) != 1:
-                # TODO: figure out how to match duplicate dates
-                # ntrials from processed then read the raw file and check if the number of trials match
-                processed_behavior_data = read_mat(str(processed_behavior_file_path))
-
-                date_index = list(dates_from_mat).index(date_from_mat)
-                num_trials = processed_behavior_data["A"]["ntrials"][date_index]
-                for behavior_file_path in raw_behavior_file_paths:
-                    try:
-                        raw_behavior_data = read_mat(str(behavior_file_path))
-                    except ValueError as e:
-                        print(f"Error reading file: {behavior_file_path} , {e}")
-                        continue
-                    num_trials_here = raw_behavior_data["SessionData"]["nTrials"]
-                    if num_trials_here == num_trials:
-                        raw_behavior_file_paths = [behavior_file_path]
-                        break
-
-            if len(raw_behavior_file_paths) != 1:
-                raise ValueError(
-                    f"Expected to find 1 raw behavior file for date {formatted_date_str}, found {len(raw_behavior_file_paths)}."
-                )
-            raw_behavior_file_path = raw_behavior_file_paths[0]
-
+        for raw_behavior_file_path in progress_bar:
             session_id = raw_behavior_file_path.stem.split("_", maxsplit=1)[1].replace("_", "-")
             subject_nwb_folder_path = nwbfile_folder_path / f"sub-{subject_id}"
             if not subject_nwb_folder_path.exists():
@@ -152,6 +138,8 @@ def sessions_to_nwb(
                 print(f"Skipping existing NWB file: {nwbfile_path}")
                 continue
 
+            date_from_mat = session_id.split("-")[1]
+            date_obj = datetime.strptime(date_from_mat, "%Y%d%M")
             subject_metadata = get_subject_metadata_from_rat_info_folder(
                 folder_path=rat_info_folder_path,
                 subject_id=subject_id,
@@ -161,7 +149,6 @@ def sessions_to_nwb(
             session_to_nwb(
                 raw_behavior_file_path=raw_behavior_file_path,
                 processed_behavior_file_path=processed_behavior_file_path,
-                date=date_from_mat,
                 nwbfile_path=nwbfile_path,
                 column_name_mapping=column_name_mapping,
                 column_descriptions=column_descriptions,

From b01f226fa21a9def355db1778c090f72a664ff48 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 3 Oct 2024 16:19:08 +0200
Subject: [PATCH 21/26] update to use date_index

---
 .../mah_2024/mah_2024_convert_session.py      | 40 ++-----------------
 1 file changed, 4 insertions(+), 36 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
index 90db503..5cd72ee 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
@@ -10,41 +10,6 @@
 from constantinople_lab_to_nwb.mah_2024 import Mah2024NWBConverter
 
 
-def get_date_index(bpod_file_path: Union[str, Path], a_struct_file_path: Union[str, Path]) -> int:
-    """
-    Figure out the date index for the processed behavior file.
-
-    Parameters
-    ----------
-    bpod_file_path: Union[str, Path]
-        Path to the raw Bpod output (.mat file).
-    a_struct_file_path: Union[str, Path]
-        Path to the processed behavior data (.mat file).
-
-    Returns
-    -------
-    int
-        The date index for the processed behavior file.
-    """
-    bpod_data = read_mat(str(bpod_file_path))
-    num_trials = bpod_data["SessionData"]["nTrials"]
-    date = bpod_data["SessionData"]["Info"]["SessionDate"]
-
-    a_struct_data = read_mat(str(a_struct_file_path))
-    dates = a_struct_data["A"]["date"]
-    num_trials_per_day = a_struct_data["A"]["ntrials"]
-
-    dates_and_trials = pd.DataFrame(dict(date=dates, num_trials=num_trials_per_day))
-    filtered_dates_and_trials = dates_and_trials[
-        (dates_and_trials["date"] == date) & (dates_and_trials["num_trials"] == num_trials)
-    ]
-
-    if filtered_dates_and_trials.empty:
-        raise ValueError("Could not find the date index in the processed behavior file. This should not happen.")
-
-    return filtered_dates_and_trials.index[0]
-
-
 def get_subject_metadata_from_rat_info_folder(
     folder_path: Union[str, Path],
     subject_id: str,
@@ -108,6 +73,7 @@ def get_subject_metadata_from_rat_info_folder(
 def session_to_nwb(
     raw_behavior_file_path: Union[str, Path],
     processed_behavior_file_path: Union[str, Path],
+    date_index: int,
     nwbfile_path: Union[str, Path],
     column_name_mapping: Optional[dict] = None,
     column_descriptions: Optional[dict] = None,
@@ -160,7 +126,6 @@ def session_to_nwb(
     conversion_options.update(dict(RawBehavior=dict(task_arguments_to_exclude=task_arguments_to_exclude)))
 
     # Add Processed Behavior
-    date_index = get_date_index(bpod_file_path=raw_behavior_file_path, a_struct_file_path=processed_behavior_file_path)
     source_data.update(dict(ProcessedBehavior=dict(file_path=processed_behavior_file_path, date_index=date_index)))
     conversion_options.update(
         dict(ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions))
@@ -212,6 +177,8 @@ def session_to_nwb(
     bpod_file_path = Path("/Volumes/T9/Constantinople/raw_Bpod/C005/DataFiles/C005_RWTautowait_20190909_145629.mat")
     # The processed behavior data is stored in a .mat file (contains data for multiple days)
     processed_behavior_file_path = Path("/Volumes/T9/Constantinople/A_Structs/ratTrial_C005.mat")
+    # The row index of the date in the processed behavior file
+    date_index = 0
     # The column name mapping is used to rename the columns in the processed data to more descriptive column names. (optional)
     column_name_mapping = dict(
         hits="is_rewarded",
@@ -275,6 +242,7 @@ def session_to_nwb(
     session_to_nwb(
         raw_behavior_file_path=bpod_file_path,
         processed_behavior_file_path=processed_behavior_file_path,
+        date_index=date_index,
         column_name_mapping=column_name_mapping,
         column_descriptions=column_descriptions,
         nwbfile_path=nwbfile_path,

From 1186fbc4093a78648c97729e62c83ec7ebdd2a5d Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 3 Oct 2024 16:21:45 +0200
Subject: [PATCH 22/26] update to use date_index

---
 .../mah_2024/mah_2024_convert_all_sessions.py | 62 +++++++++++++++++--
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
index e05aae6..2e88d00 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_all_sessions.py
@@ -2,8 +2,9 @@
 from datetime import datetime
 from pathlib import Path
 from typing import Union, List
+from warnings import warn
 
-from nwbinspector import inspect_all, format_messages, save_report
+import pandas as pd
 from pymatreader import read_mat
 from tqdm import tqdm
 
@@ -71,6 +72,50 @@ def _get_sessions_to_convert_from_mat(
     return bpod_files_to_convert
 
 
+def _get_date_index(bpod_file_path: Union[str, Path], a_struct_file_path: Union[str, Path]) -> Union[int, None]:
+    """
+    Figure out the date index for the processed behavior file.
+
+    Parameters
+    ----------
+    bpod_file_path: Union[str, Path]
+        Path to the raw Bpod output (.mat file).
+    a_struct_file_path: Union[str, Path]
+        Path to the processed behavior data (.mat file).
+
+    Returns
+    -------
+    int
+        The date index for the processed behavior file.
+    """
+    bpod_data = read_mat(str(bpod_file_path))
+    try:
+        bpod_session_data = bpod_data["SessionData"]
+    except KeyError:
+        warn(
+            f"'SessionData' key not found in '{bpod_file_path}'. The date index could not be determined from the file."
+        )
+        return None
+
+    num_trials = bpod_session_data["nTrials"]
+    date = bpod_session_data["Info"]["SessionDate"]
+
+    a_struct_data = read_mat(str(a_struct_file_path))
+    dates = a_struct_data["A"]["date"]
+    num_trials_per_day = a_struct_data["A"]["ntrials"]
+
+    dates_and_trials = pd.DataFrame(dict(date=dates, num_trials=num_trials_per_day))
+    filtered_dates_and_trials = dates_and_trials[
+        (dates_and_trials["date"] == date) & (dates_and_trials["num_trials"] == num_trials)
+    ]
+
+    if filtered_dates_and_trials.empty:
+        warn(f"Date index for '{date}' not found in '{a_struct_file_path}'.")
+        return None
+
+    return filtered_dates_and_trials.index[0]
+
+
 def sessions_to_nwb(
     raw_behavior_folder_path: Union[str, Path],
     processed_behavior_folder_path: Union[str, Path],
@@ -108,7 +153,7 @@ def sessions_to_nwb(
     processed_mat_files = list(processed_behavior_folder_path.glob("ratTrial*.mat"))
     subject_ids = [
         processed_behavior_file_path.stem.split("_")[-1] for processed_behavior_file_path in processed_mat_files
-    ][:10]
+    ]
     sessions_to_convert_per_subject = {
         subject_id: _get_sessions_to_convert_from_mat(
             file_path=processed_behavior_file_path, bpod_folder_path=raw_behavior_folder_path
@@ -128,14 +173,22 @@ def sessions_to_nwb(
         )
 
         for raw_behavior_file_path in progress_bar:
-            session_id = raw_behavior_file_path.stem.split("_", maxsplit=1)[1].replace("_", "-")
+            session_id = Path(raw_behavior_file_path).stem.split("_", maxsplit=1)[1].replace("_", "-")
             subject_nwb_folder_path = nwbfile_folder_path / f"sub-{subject_id}"
             if not subject_nwb_folder_path.exists():
                 os.makedirs(subject_nwb_folder_path, exist_ok=True)
             nwbfile_path = subject_nwb_folder_path / f"sub-{subject_id}_ses-{session_id}.nwb"
 
             if nwbfile_path.exists() and not overwrite:
-                print(f"Skipping existing NWB file: {nwbfile_path}")
+                continue
+
+            date_index = _get_date_index(
+                bpod_file_path=raw_behavior_file_path, a_struct_file_path=processed_behavior_file_path
+            )
+            if date_index is None:
+                print(
+                    f"Skipping '{subject_id}' session '{session_id}', session not found in the processed behavior file."
+                )
                 continue
 
             date_from_mat = session_id.split("-")[1]
@@ -149,6 +202,7 @@ def sessions_to_nwb(
             session_to_nwb(
                 raw_behavior_file_path=raw_behavior_file_path,
                 processed_behavior_file_path=processed_behavior_file_path,
+                date_index=date_index,
                 nwbfile_path=nwbfile_path,
                 column_name_mapping=column_name_mapping,
                 column_descriptions=column_descriptions,

From b7820b7fe67d1c513785c3a389f71db97f141687 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Thu, 3 Oct 2024 16:35:34 +0200
Subject: [PATCH 23/26] raise error when state type is missing in metadata

---
 .../interfaces/mah_2024_bpodinterface.py      | 56 +++++++++++++------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
index 6b24943..fd6dbca 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
@@ -27,10 +27,10 @@ class Mah2024BpodInterface(BaseDataInterface):
     """Behavior interface for mah_2024 conversion"""
 
     def __init__(
-            self,
-            file_path: Union[str, Path],
-            default_struct_name: str = "SessionData",
-            verbose: bool = True,
+        self,
+        file_path: Union[str, Path],
+        default_struct_name: str = "SessionData",
+        verbose: bool = True,
     ):
         """
         Interface for converting raw Bpod data to NWB.
@@ -61,7 +61,7 @@ def get_metadata(self) -> DeepDict:
         if "Info" in self._bpod_struct:
             info_dict = self._bpod_struct["Info"]
             date_string = info_dict["SessionDate"] + info_dict["SessionStartTime_UTC"]
-            session_start_time = datetime.strptime(date_string, '%d-%b-%Y%H:%M:%S')
+            session_start_time = datetime.strptime(date_string, "%d-%b-%Y%H:%M:%S")
             metadata["NWBFile"].update(session_start_time=session_start_time)
 
             # Device info
@@ -303,7 +303,9 @@ def _read_file(self) -> dict:
     def get_trial_times(self) -> (List[float], List[float]):
         return self._bpod_struct["TrialStartTimestamp"], self._bpod_struct["TrialEndTimestamp"]
 
-    def create_states_table(self, metadata: dict, trial_start_times: List[float]) -> tuple[StateTypesTable, StatesTable]:
+    def create_states_table(
+        self, metadata: dict, trial_start_times: List[float]
+    ) -> tuple[StateTypesTable, StatesTable]:
         state_types_metadata = metadata["Behavior"]["StateTypesTable"]
         states_table_metadata = metadata["Behavior"]["StatesTable"]
 
@@ -313,6 +315,11 @@ def create_states_table(self, metadata: dict, trial_start_times: List[float]) ->
 
         trials_data = self._bpod_struct["RawEvents"]["Trial"]
         for state_name in trials_data[0]["States"]:
+            if state_name not in state_types_metadata:
+                raise ValueError(
+                    f"State '{state_name}' not found in metadata. Please provide in metadata['Behavior']['StateTypesTable']."
+                )
+
             state_types.add_row(
                 state_name=state_types_metadata[state_name]["name"],
                 check_ragged=False,
@@ -334,7 +341,9 @@ def create_states_table(self, metadata: dict, trial_start_times: List[float]) ->
 
         return state_types, states_table
 
-    def create_actions_table(self, metadata: dict, trial_start_times: List[float]) -> tuple[ActionTypesTable, ActionsTable]:
+    def create_actions_table(
+        self, metadata: dict, trial_start_times: List[float]
+    ) -> tuple[ActionTypesTable, ActionsTable]:
         action_types_metadata = metadata["Behavior"]["ActionTypesTable"]
         actions_table_metadata = metadata["Behavior"]["ActionsTable"]
 
@@ -351,7 +360,9 @@ def create_actions_table(self, metadata: dict, trial_start_times: List[float]) -
         for trial_states_and_events, trial_start_time in zip(trials_data, trial_start_times):
             events = trial_states_and_events["Events"]
 
-            sound_events = [event_name for event_name in events if "AudioPlayer" in event_name or "WavePlayer" in event_name]
+            sound_events = [
+                event_name for event_name in events if "AudioPlayer" in event_name or "WavePlayer" in event_name
+            ]
             if not len(sound_events):
                 continue
 
@@ -369,7 +380,9 @@ def create_actions_table(self, metadata: dict, trial_start_times: List[float]) -
 
         return action_types, actions_table
 
-    def create_events_table(self, metadata: dict, trial_start_times: List[float]) -> tuple[EventTypesTable, EventsTable]:
+    def create_events_table(
+        self, metadata: dict, trial_start_times: List[float]
+    ) -> tuple[EventTypesTable, EventsTable]:
         event_types_metadata = metadata["Behavior"]["EventTypesTable"]
         events_table_metadata = metadata["Behavior"]["EventsTable"]
 
@@ -520,10 +533,10 @@ def add_trials(self, nwbfile: NWBFile, metadata: dict) -> None:
         nwbfile.trials = trials
 
     def add_task_arguments_to_trials(
-            self,
-            nwbfile: NWBFile,
-            metadata: dict,
-            arguments_to_exclude: List[str] = None,
+        self,
+        nwbfile: NWBFile,
+        metadata: dict,
+        arguments_to_exclude: List[str] = None,
     ) -> None:
         if arguments_to_exclude is None:
             arguments_to_exclude = []
@@ -540,9 +553,16 @@ def add_task_arguments_to_trials(
             if task_argument_name in arguments_to_exclude:
                 continue
             if task_argument_name not in task_arguments_metadata:
-                warn(f"Task argument '{task_argument_name}' not in metadata. Skipping.")
-                continue
-            task_argument_values = np.array([trial_settings["GUI"][task_argument_name] for trial_settings in trials_settings])
+                warn(f"Task argument '{task_argument_name}' not in metadata.")
+                task_argument_column_name = task_argument_name
+                description = "no description"
+            else:
+                task_argument_column_name = task_arguments_metadata[task_argument_name]["name"]
+                description = task_arguments_metadata[task_argument_name]["description"]
+
+            task_argument_values = np.array(
+                [trial_settings["GUI"][task_argument_name] for trial_settings in trials_settings]
+            )
             task_argument_type = task_arguments_metadata[task_argument_name]["expression_type"]
             if task_argument_type == "boolean":
                 task_argument_values = task_argument_values.astype(bool)
@@ -550,8 +570,8 @@ def add_task_arguments_to_trials(
                 task_argument_values = np.array([self._block_name_mapping[block] for block in task_argument_values])
 
             trials.add_column(
-                name=task_arguments_metadata[task_argument_name]["name"],
-                description=task_arguments_metadata[task_argument_name]["description"],
+                name=task_argument_column_name,
+                description=description,
                 data=task_argument_values,
             )
 

From 4b029472b514e76b1547d8a67a84d6916605d5c5 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Sun, 6 Oct 2024 17:04:03 +0200
Subject: [PATCH 24/26] move bpod interface to general_interfaces/

---
 src/constantinople_lab_to_nwb/general_interfaces/__init__.py | 1 +
 .../mah_2024/interfaces/__init__.py                          | 1 -
 .../mah_2024/mah_2024_nwbconverter.py                        | 5 +++--
 .../schierek_embargo_2024_nwbconverter.py                    | 5 ++---
 4 files changed, 6 insertions(+), 6 deletions(-)
 create mode 100644 src/constantinople_lab_to_nwb/general_interfaces/__init__.py

diff --git a/src/constantinople_lab_to_nwb/general_interfaces/__init__.py b/src/constantinople_lab_to_nwb/general_interfaces/__init__.py
new file mode 100644
index 0000000..ee95b94
--- /dev/null
+++ b/src/constantinople_lab_to_nwb/general_interfaces/__init__.py
@@ -0,0 +1 @@
+from .bpodbehaviorinterface import BpodBehaviorInterface
diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py
index 16bba25..a1a3b75 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py
@@ -1,2 +1 @@
-from .mah_2024_bpodinterface import Mah2024BpodInterface
 from .mah_2024_processedbehaviorinterface import Mah2024ProcessedBehaviorInterface
diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py
index f809baf..80c4be9 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py
@@ -1,12 +1,13 @@
 from neuroconv import NWBConverter
 
-from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024BpodInterface, Mah2024ProcessedBehaviorInterface
+from constantinople_lab_to_nwb.general_interfaces import BpodBehaviorInterface
+from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024ProcessedBehaviorInterface
 
 
 class Mah2024NWBConverter(NWBConverter):
     """Primary conversion class for converting the Behavior dataset from the Constantinople Lab."""
 
     data_interface_classes = dict(
-        RawBehavior=Mah2024BpodInterface,
+        RawBehavior=BpodBehaviorInterface,
         ProcessedBehavior=Mah2024ProcessedBehaviorInterface,
     )
diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py
index cf1b8b9..db703fe 100644
--- a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py
+++ b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py
@@ -12,8 +12,7 @@
 from neuroconv.utils import FilePathType
 from probeinterface import read_probeinterface, Probe
 
-# TODO: move to general_interfaces
-from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024BpodInterface
+from constantinople_lab_to_nwb.general_interfaces import BpodBehaviorInterface
 
 from constantinople_lab_to_nwb.schierek_embargo_2024.interfaces import (
     SchierekEmbargo2024SortingInterface,
@@ -29,7 +28,7 @@ class SchierekEmbargo2024NWBConverter(NWBConverter):
         RecordingLFP=OpenEphysRecordingInterface,
         PhySorting=PhySortingInterface,
         ProcessedSorting=SchierekEmbargo2024SortingInterface,
-        RawBehavior=Mah2024BpodInterface,
+        RawBehavior=BpodBehaviorInterface,
         ProcessedBehavior=SchierekEmbargo2024ProcessedBehaviorInterface,
     )
 

From 4221a4752b77787612af4e003582ca30e3b165a5 Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Sun, 6 Oct 2024 17:04:03 +0200
Subject: [PATCH 25/26] move bpod interface to general_interfaces/

---
 src/constantinople_lab_to_nwb/general_interfaces/__init__.py | 1 +
 .../bpodbehaviorinterface.py}                                | 4 ++--
 .../mah_2024/interfaces/__init__.py                          | 1 -
 .../mah_2024/mah_2024_nwbconverter.py                        | 5 +++--
 .../schierek_embargo_2024_nwbconverter.py                    | 5 ++---
 5 files changed, 8 insertions(+), 8 deletions(-)
 create mode 100644 src/constantinople_lab_to_nwb/general_interfaces/__init__.py
 rename src/constantinople_lab_to_nwb/{mah_2024/interfaces/mah_2024_bpodinterface.py => general_interfaces/bpodbehaviorinterface.py} (99%)

diff --git a/src/constantinople_lab_to_nwb/general_interfaces/__init__.py b/src/constantinople_lab_to_nwb/general_interfaces/__init__.py
new file mode 100644
index 0000000..ee95b94
--- /dev/null
+++ b/src/constantinople_lab_to_nwb/general_interfaces/__init__.py
@@ -0,0 +1 @@
+from .bpodbehaviorinterface import BpodBehaviorInterface
diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py b/src/constantinople_lab_to_nwb/general_interfaces/bpodbehaviorinterface.py
similarity index 99%
rename from src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
rename to src/constantinople_lab_to_nwb/general_interfaces/bpodbehaviorinterface.py
index fd6dbca..871160d 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
+++ b/src/constantinople_lab_to_nwb/general_interfaces/bpodbehaviorinterface.py
@@ -23,8 +23,8 @@
 from pynwb import NWBFile
 
 
-class Mah2024BpodInterface(BaseDataInterface):
-    """Behavior interface for mah_2024 conversion"""
+class BpodBehaviorInterface(BaseDataInterface):
+    """Behavior interface for converting behavior data from Bpod system."""
 
     def __init__(
         self,
diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py
index 16bba25..a1a3b75 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/__init__.py
@@ -1,2 +1 @@
-from .mah_2024_bpodinterface import Mah2024BpodInterface
 from .mah_2024_processedbehaviorinterface import Mah2024ProcessedBehaviorInterface
diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py
index f809baf..80c4be9 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_nwbconverter.py
@@ -1,12 +1,13 @@
 from neuroconv import NWBConverter
 
-from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024BpodInterface, Mah2024ProcessedBehaviorInterface
+from constantinople_lab_to_nwb.general_interfaces import BpodBehaviorInterface
+from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024ProcessedBehaviorInterface
 
 
 class Mah2024NWBConverter(NWBConverter):
     """Primary conversion class for converting the Behavior dataset from the Constantinople Lab."""
 
     data_interface_classes = dict(
-        RawBehavior=Mah2024BpodInterface,
+        RawBehavior=BpodBehaviorInterface,
         ProcessedBehavior=Mah2024ProcessedBehaviorInterface,
     )
diff --git a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py
index cf1b8b9..db703fe 100644
--- a/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py
+++ b/src/constantinople_lab_to_nwb/schierek_embargo_2024/schierek_embargo_2024_nwbconverter.py
@@ -12,8 +12,7 @@
 from neuroconv.utils import FilePathType
 from probeinterface import read_probeinterface, Probe
 
-# TODO: move to general_interfaces
-from constantinople_lab_to_nwb.mah_2024.interfaces import Mah2024BpodInterface
+from constantinople_lab_to_nwb.general_interfaces import BpodBehaviorInterface
 
 from constantinople_lab_to_nwb.schierek_embargo_2024.interfaces import (
     SchierekEmbargo2024SortingInterface,
@@ -29,7 +28,7 @@ class SchierekEmbargo2024NWBConverter(NWBConverter):
         RecordingLFP=OpenEphysRecordingInterface,
         PhySorting=PhySortingInterface,
         ProcessedSorting=SchierekEmbargo2024SortingInterface,
-        RawBehavior=Mah2024BpodInterface,
+        RawBehavior=BpodBehaviorInterface,
         ProcessedBehavior=SchierekEmbargo2024ProcessedBehaviorInterface,
     )
 

From c628adee02b57ed560207b0ffddc2c39ff61642f Mon Sep 17 00:00:00 2001
From: weiglszonja <wszonja@gmail.com>
Date: Wed, 9 Oct 2024 14:00:10 +0200
Subject: [PATCH 26/26] fix state

---
 .../interfaces/mah_2024_bpodinterface.py       | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
index fd6dbca..4d90bd4 100644
--- a/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
+++ b/src/constantinople_lab_to_nwb/mah_2024/interfaces/mah_2024_bpodinterface.py
@@ -314,14 +314,24 @@ def create_states_table(
         states_table = StatesTable(description=states_table_metadata["description"], state_types_table=state_types)
 
         trials_data = self._bpod_struct["RawEvents"]["Trial"]
-        for state_name in trials_data[0]["States"]:
+        num_trials = self._bpod_struct["nTrials"]
+
+        # make it iterable if only one trial
+        if num_trials == 1:
+            trials_data = [trials_data]
+            trial_start_times = [trial_start_times]
+
+        unique_state_names = set()
+        for trial_index in range(num_trials):
+            unique_state_names.update(trials_data[trial_index]["States"])
+        for state_name in unique_state_names:
             if state_name not in state_types_metadata:
                 raise ValueError(
-                    f"State '{state_name}' not found in metadata. Please provide in metadata['Behavior']['StateTypesTable']."
+                    f"State '{state_name}' not in metadata. State type should be defined in metadata['Behavior']['StateTypesTable']."
                 )
-
+            state_type = state_types_metadata[state_name]["name"]
             state_types.add_row(
-                state_name=state_types_metadata[state_name]["name"],
+                state_name=state_type,
                 check_ragged=False,
             )