From 675d3a18af5fe6f8e804db1891e51a06a77f4765 Mon Sep 17 00:00:00 2001 From: ZhixiaoSu <37242111+ZhixiaoSu@users.noreply.github.com> Date: Thu, 31 Oct 2024 04:13:18 -0700 Subject: [PATCH 01/17] Change absolute time reference to first go cue --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index fec54c6..b23b6e3 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -329,7 +329,7 @@ def create_df_trials(nwb_filename): df_ses_trials["ses_idx"] = ses_idx # Adjust all times relative to start of the first trial - t0 = df_ses_trials.start_time[0] + t0 = df_ses_trials.goCue_start_time[0] skip_cols = ["right_valve_open_time", "left_valve_open_time"] for col in df_ses_trials.columns: if ("time" in col) and (col not in skip_cols): From 2bfdd4ba3cc13914be028b5c318f965598c03194 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Thu, 31 Oct 2024 10:06:39 -0700 Subject: [PATCH 02/17] changing two other instances --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index b23b6e3..6ecdb37 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -328,7 +328,7 @@ def create_df_trials(nwb_filename): df_ses_trials = df_ses_trials.rename(columns={"id": "trial"}) df_ses_trials["ses_idx"] = ses_idx - # Adjust all times relative to start of the first trial + # Adjust all times relative to start of the first go cue t0 = df_ses_trials.goCue_start_time[0] skip_cols = ["right_valve_open_time", "left_valve_open_time"] for col in df_ses_trials.columns: @@ -461,8 +461,8 @@ def create_events_df(nwb_filename, adjust_time=True): ) event_types -= ignore_types - # Determine time 0 - t0 = nwb.trials.start_time[0] + # Determine time 0 as first go Cue + t0 = nwb.trials.goCue_start_time[0] # Iterate over event types and build a dataframe of each events = [] @@ -560,7 +560,7 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): return None # Determine time 0 - t0 = nwb.trials.start_time[0] + t0 = nwb.trials.goCue_start_time[0] # Iterate over event types and build a dataframe of each events = [] From f742506591a928f97ed7682390de017260a7327c Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Thu, 31 Oct 2024 10:55:04 -0700 Subject: [PATCH 03/17] checking more adjustments --- .../nwb_utils.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 6ecdb37..154027b 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -341,7 +341,7 @@ def create_df_trials(nwb_filename): -1, fill_value=last_stop ) - # Adjust times relative to go cue + # Adjust times relative to go cue on each trial for col in df_ses_trials.columns: if ( ("time" in col) @@ -354,6 +354,12 @@ def create_df_trials(nwb_filename): ) df_ses_trials["goCue_start_time"] = 0.0 + # TODO, CHECK FROM HERE + # TODO, a unit test that checks that the goCue_start_time in df_events matches goCue_start_time absolute in df_trials + # TODO, a unit test that matches that right/left licks in df_events.trial==i matches df_trials.loc[i] + # TODO, same with reward times + # TODO Trial 11, we seem to have a mismatch in reward_time + # TODO Trial 15, choice time mismatch # Adjust event times relative to trial events_ses = {key: nwb.acquisition[key].timestamps[:] - t0 for key in key_from_acq} for event in [ @@ -373,7 +379,7 @@ def create_df_trials(nwb_filename): 4, ), axis=1, - ) + ) #TODO, something feels wrong here. event_times should be relative to t0 # Compute time of reward for each trial df_ses_trials["reward_time"] = df_ses_trials.apply( @@ -415,6 +421,7 @@ def create_df_trials(nwb_filename): "right_reward_delivery_time", ] ) + # TODO CHECK TO HERE return df_ses_trials @@ -493,9 +500,9 @@ def create_events_df(nwb_filename, adjust_time=True): df = df.sort_values(by="timestamps") df = df.dropna(subset="timestamps").reset_index(drop=True) - # Add trial index for each event - trial_starts = nwb.trials.start_time[:] - nwb.trials.start_time[0] - last_stop = nwb.trials.stop_time[-1] - nwb.trials.start_time[0] + # Add trial index for each event + trial_starts = nwb.trials.start_time[:] - nwb.trials.goCue_start_time[0] + last_stop = nwb.trials.stop_time[-1] - nwb.trials.goCue_start_time[0] trial_index = [] for index, e in df.iterrows(): starts = np.where(e.timestamps > trial_starts)[0] @@ -597,3 +604,6 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): return df_pivoted else: return df + + + From 5b447aef865d1c8634ecfb245deef196c6bdfb18 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Thu, 31 Oct 2024 10:56:44 -0700 Subject: [PATCH 04/17] linting --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 154027b..44217d7 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -379,7 +379,7 @@ def create_df_trials(nwb_filename): 4, ), axis=1, - ) #TODO, something feels wrong here. event_times should be relative to t0 + ) # TODO, something feels wrong here. event_times should be relative to t0 # Compute time of reward for each trial df_ses_trials["reward_time"] = df_ses_trials.apply( @@ -500,7 +500,7 @@ def create_events_df(nwb_filename, adjust_time=True): df = df.sort_values(by="timestamps") df = df.dropna(subset="timestamps").reset_index(drop=True) - # Add trial index for each event + # Add trial index for each event trial_starts = nwb.trials.start_time[:] - nwb.trials.goCue_start_time[0] last_stop = nwb.trials.stop_time[-1] - nwb.trials.goCue_start_time[0] trial_index = [] @@ -604,6 +604,3 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): return df_pivoted else: return df - - - From ac84076732717da5e777057223a159b303d85749 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Fri, 1 Nov 2024 11:26:47 -0700 Subject: [PATCH 05/17] sanity check for df_events and fip_df test --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 44217d7..e2500c4 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -514,6 +514,10 @@ def create_events_df(nwb_filename, adjust_time=True): trial_index.append(starts[-1]) df["trial"] = trial_index + # Sanity check that the first go cue is time 0 + gocues = df.query('event == "goCue_start_time"') + if (len(gocues) > 0) and (adjust_time): + assert np.isclose(gocues.iloc[0]['timestamps'], 0, rtol=0.01) return df From 80488180a74df0dc5f0c93bb09ac7d43ae681597 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Fri, 1 Nov 2024 12:51:32 -0700 Subject: [PATCH 06/17] major refactor --- .../nwb_utils.py | 196 ++++++++++-------- 1 file changed, 107 insertions(+), 89 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index e2500c4..8041286 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -12,6 +12,7 @@ import os import re +import warnings import numpy as np import pandas as pd @@ -46,6 +47,7 @@ def unpack_metadata(nwb): Unpacks metadata as a dictionary attribute, instead of a Dynamic table nested inside a dictionary """ + # TODO, this should be outdated once we fix the NWB files themselves nwb.metadata = nwb.scratch["metadata"].to_dataframe().iloc[0].to_dict() @@ -297,20 +299,15 @@ def create_single_df_session(nwb_filename): return df_session -def create_df_trials(nwb_filename): +def create_df_trials(nwb_filename, adjust_time=True): """ Process nwb and create df_trials for every single session + + adjust_time (bool) if true, adjust t0 to be the first gocue """ - nwb = load_nwb_from_filename(nwb_filename) - key_from_acq = [ - "left_lick_time", - "right_lick_time", - "left_reward_delivery_time", - "right_reward_delivery_time", - "FIP_falling_time", - "FIP_rising_time", - ] + # If we are given a filename, load the NWB object itself + nwb = load_nwb_from_filename(nwb_filename) # Parse subject and session_date if nwb.session_id.startswith("behavior") or nwb.session_id.startswith("FIP"): @@ -321,108 +318,123 @@ def create_df_trials(nwb_filename): splits = nwb.session_id.split("_") subject_id = splits[0] session_date = splits[1] - ses_idx = subject_id + "_" + session_date - df_ses_trials = nwb.trials.to_dataframe().reset_index() - df_ses_trials = df_ses_trials.rename(columns={"id": "trial"}) - df_ses_trials["ses_idx"] = ses_idx + # Build dataframe + df = nwb.trials.to_dataframe().reset_index() + df = df.rename(columns={"id": "trial"}) + df["ses_idx"] = ses_idx - # Adjust all times relative to start of the first go cue - t0 = df_ses_trials.goCue_start_time[0] + # Adjust for gaps in trial start/stop, and use the last stop time + last_stop = df.iloc[-1]["stop_time"] + df["stop_time"] = df["start_time"].shift(-1, fill_value=last_stop) + + # We skip these columns because they are how long the valve is open + # not the times at which the valves were opened skip_cols = ["right_valve_open_time", "left_valve_open_time"] - for col in df_ses_trials.columns: + + # compute times relative to start of trial and start of session + t0 = nwb.trials.goCue_start_time[0] + drop_cols = [] + for col in df.columns: if ("time" in col) and (col not in skip_cols): - df_ses_trials[col + "_absolute"] = df_ses_trials[col] - t0 + # Adjust all times relative to start of the first go cue + if adjust_time: + df[col + "_in_session"] = df[col] - t0 + else: + df[col + "_in_session"] = df[col] - # Adjust for gaps in trial start/stop, and use the last stop time - last_stop = df_ses_trials.iloc[-1]["stop_time_absolute"] - df_ses_trials["stop_time_absolute"] = df_ses_trials["start_time_absolute"].shift( - -1, fill_value=last_stop - ) + # Adjust times relative to go cue on each trial + if ("time" in col) and (col not in skip_cols): + df[col + "_in_trial"] = df[col].values - df["goCue_start_time"].values + + # Clean up these column names that are not clear + drop_cols.append(col) - # Adjust times relative to go cue on each trial - for col in df_ses_trials.columns: - if ( - ("time" in col) - and ("time_absolute" not in col) - and (col != "goCue_start_time") - and (col not in skip_cols) - ): - df_ses_trials.loc[:, col] = ( - df_ses_trials[col].values - df_ses_trials["goCue_start_time"].values - ) - df_ses_trials["goCue_start_time"] = 0.0 - - # TODO, CHECK FROM HERE - # TODO, a unit test that checks that the goCue_start_time in df_events matches goCue_start_time absolute in df_trials - # TODO, a unit test that matches that right/left licks in df_events.trial==i matches df_trials.loc[i] - # TODO, same with reward times - # TODO Trial 11, we seem to have a mismatch in reward_time - # TODO Trial 15, choice time mismatch - # Adjust event times relative to trial - events_ses = {key: nwb.acquisition[key].timestamps[:] - t0 for key in key_from_acq} - for event in [ + # Get lick and reward times + key_from_acq = [ "left_lick_time", "right_lick_time", "left_reward_delivery_time", "right_reward_delivery_time", - ]: - event_times = events_ses[event] - df_ses_trials[event] = df_ses_trials.apply( + ] + if adjust_time: + events = {key: nwb.acquisition[key].timestamps[:] - t0 for key in key_from_acq} + else: + events = {key: nwb.acquisition[key].timestamps[:] for key in key_from_acq} + + # Map events to trials + # Here we map an event to the most recent goCue + df["next_goCue_start_time_in_session"] = df["goCue_start_time_in_session"].shift( + -1, fill_value=np.inf + ) + drop_cols.append("next_goCue_start_time_in_session") + for event in key_from_acq: + event_times = events[event] + df[event] = df.apply( lambda x: np.round( event_times[ - (event_times > (x["goCue_start_time"] + x["goCue_start_time_absolute"])) - & (event_times < (x["stop_time"] + x["goCue_start_time_absolute"])) + (event_times > x["goCue_start_time_in_session"]) + & (event_times < x["next_goCue_start_time_in_session"]) ] - - x["goCue_start_time_absolute"], - 4, ), axis=1, - ) # TODO, something feels wrong here. event_times should be relative to t0 + ) # Compute time of reward for each trial - df_ses_trials["reward_time"] = df_ses_trials.apply( - lambda x: np.nanmin( - np.concatenate( - [ - [np.nan], - x["right_reward_delivery_time"], - x["left_reward_delivery_time"], - ] - ) - ), - axis=1, - ) - df_ses_trials["reward_time_absolute"] = ( - df_ses_trials["reward_time"] + df_ses_trials["goCue_start_time_absolute"] - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="All-NaN slice encountered") + df["reward_time_in_session"] = df.apply( + lambda x: np.nanmin( + np.concatenate( + [ + [np.nan], + x["right_reward_delivery_time"], + x["left_reward_delivery_time"], + ] + ) + ), + axis=1, + ) + df["reward_time_in_trial"] = df["reward_time_in_session"] - df["goCue_start_time_in_session"] # Compute time of choice for each trials - df_ses_trials["choice_time"] = df_ses_trials.apply( - lambda x: np.nanmin(np.concatenate([[np.nan], x["right_lick_time"], x["left_lick_time"]])), - axis=1, - ) - df_ses_trials["choice_time_absolute"] = ( - df_ses_trials["choice_time"] + df_ses_trials["goCue_start_time_absolute"] - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="All-NaN slice encountered") + df["choice_time_in_session"] = df.apply( + lambda x: np.nanmin( + np.concatenate([[np.nan], x["right_lick_time"], x["left_lick_time"]]) + ), + axis=1, + ) + df["choice_time_in_trial"] = df["choice_time_in_session"] - df["goCue_start_time_in_session"] # Compute boolean of whether animal was rewarded - df_ses_trials["reward"] = df_ses_trials.rewarded_historyR.astype( - int - ) | df_ses_trials.rewarded_historyL.astype(int) + df["reward"] = df.rewarded_historyR.astype(int) | df.rewarded_historyL.astype(int) + + # Sanity checks + rewarded_df = df.query("reward == 1") + assert ( + np.isnan(rewarded_df["reward_time_in_session"]).sum() == 0 + ), "Rewarded trials without reward time" + assert ( + np.isnan(rewarded_df["choice_time_in_session"]).sum() == 0 + ), "Rewarded trials without choice time" + assert np.all( + rewarded_df["choice_time_in_session"] <= rewarded_df["reward_time_in_session"] + ), "Reward before choice time" + + # TODO, fails because of manual rewards and auto rewards + # assert np.all(np.isnan(df.query('reward == 0')['reward_time_in_session'])), "Unrewarded trials with reward time" + # TODO, filter for earned rewards # Drop columns - df_ses_trials = df_ses_trials.drop( - columns=[ - "left_lick_time", - "right_lick_time", - "left_reward_delivery_time", - "right_reward_delivery_time", - ] - ) - # TODO CHECK TO HERE - return df_ses_trials + drop_cols += key_from_acq + df = df.drop(columns=drop_cols) + + if adjust_time: + print("Timestamps are adjusted so t(0) = first go cue") + return df def create_events_df(nwb_filename, adjust_time=True): @@ -517,7 +529,10 @@ def create_events_df(nwb_filename, adjust_time=True): # Sanity check that the first go cue is time 0 gocues = df.query('event == "goCue_start_time"') if (len(gocues) > 0) and (adjust_time): - assert np.isclose(gocues.iloc[0]['timestamps'], 0, rtol=0.01) + assert np.isclose(gocues.iloc[0]["timestamps"], 0, rtol=0.01) + + if adjust_time: + print("Timestamps are adjusted so t(0) = first go cue") return df @@ -602,6 +617,9 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): ses_idx = subject_id + "_" + session_date df["ses_idx"] = ses_idx + if adjust_time: + print("Timestamps are adjusted so t(0) = first go cue") + # pivot table based on timestamps if not tidy: df_pivoted = pd.pivot(df, index="timestamps", columns=["event"], values="data") From 5210400784a614bd0da63f37d80bad0f28463670 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Fri, 1 Nov 2024 12:57:55 -0700 Subject: [PATCH 07/17] linting --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 8041286..51397e5 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -425,7 +425,9 @@ def create_df_trials(nwb_filename, adjust_time=True): ), "Reward before choice time" # TODO, fails because of manual rewards and auto rewards - # assert np.all(np.isnan(df.query('reward == 0')['reward_time_in_session'])), "Unrewarded trials with reward time" + # assert ( + # np.all(np.isnan(df.query('reward == 0')['reward_time_in_session']) + # ), "Unrewarded trials with reward time" # TODO, filter for earned rewards # Drop columns From 75422018c00c59fda06be391cde06bd29abe7667 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Fri, 1 Nov 2024 13:36:24 -0700 Subject: [PATCH 08/17] bug fix and linting --- .../nwb_utils.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 51397e5..ba699ff 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -372,12 +372,10 @@ def create_df_trials(nwb_filename, adjust_time=True): for event in key_from_acq: event_times = events[event] df[event] = df.apply( - lambda x: np.round( - event_times[ - (event_times > x["goCue_start_time_in_session"]) - & (event_times < x["next_goCue_start_time_in_session"]) - ] - ), + lambda x: event_times[ + (event_times >= x["goCue_start_time_in_session"]) + & (event_times < x["next_goCue_start_time_in_session"]) + ], axis=1, ) @@ -409,6 +407,11 @@ def create_df_trials(nwb_filename, adjust_time=True): ) df["choice_time_in_trial"] = df["choice_time_in_session"] - df["goCue_start_time_in_session"] + # Filtering out choices greater than response window + slow_choice = df["choice_time_in_trial"] > df["response_duration"] + df.loc[slow_choice, "choice_time_in_session"] = np.nan + df.loc[slow_choice, "choice_time_in_trial"] = np.nan + # Compute boolean of whether animal was rewarded df["reward"] = df.rewarded_historyR.astype(int) | df.rewarded_historyL.astype(int) @@ -423,12 +426,16 @@ def create_df_trials(nwb_filename, adjust_time=True): assert np.all( rewarded_df["choice_time_in_session"] <= rewarded_df["reward_time_in_session"] ), "Reward before choice time" + assert np.all( + rewarded_df["choice_time_in_trial"] >= 0 + ), "Rewarded trial with negative choice_time_in_trial" # TODO, fails because of manual rewards and auto rewards # assert ( # np.all(np.isnan(df.query('reward == 0')['reward_time_in_session']) # ), "Unrewarded trials with reward time" - # TODO, filter for earned rewards + # TODO, figure out how to deal with earned, manual, water, rewards + # TODO, documentation of added columns # Drop columns drop_cols += key_from_acq From 0e5579ae013e2dd4b74d221e870edad5d8d338c9 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Fri, 1 Nov 2024 13:52:29 -0700 Subject: [PATCH 09/17] adding earned_reward and extra_reward --- .../nwb_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index ba699ff..3f2325a 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -413,10 +413,11 @@ def create_df_trials(nwb_filename, adjust_time=True): df.loc[slow_choice, "choice_time_in_trial"] = np.nan # Compute boolean of whether animal was rewarded - df["reward"] = df.rewarded_historyR.astype(int) | df.rewarded_historyL.astype(int) + df["earned_reward"] = df.rewarded_historyR.astype(int) | df.rewarded_historyL.astype(int) + df["extra_reward"] = (df["earned_reward"] == 0) & df["reward_time_in_session"].notnull() # Sanity checks - rewarded_df = df.query("reward == 1") + rewarded_df = df.query("earned_reward == 1") assert ( np.isnan(rewarded_df["reward_time_in_session"]).sum() == 0 ), "Rewarded trials without reward time" @@ -429,12 +430,11 @@ def create_df_trials(nwb_filename, adjust_time=True): assert np.all( rewarded_df["choice_time_in_trial"] >= 0 ), "Rewarded trial with negative choice_time_in_trial" - - # TODO, fails because of manual rewards and auto rewards - # assert ( - # np.all(np.isnan(df.query('reward == 0')['reward_time_in_session']) - # ), "Unrewarded trials with reward time" - # TODO, figure out how to deal with earned, manual, water, rewards + assert np.all( + np.isnan( + df.query("earned_reward == 0").query("extra_reward == 0")["reward_time_in_session"] + ) + ), "Unrewarded trials with reward time" # TODO, documentation of added columns # Drop columns From 9a94314d1f21e52724a35a9de388860ed655eb23 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Mon, 4 Nov 2024 08:51:43 -0800 Subject: [PATCH 10/17] fixing bug --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 3f2325a..2bbef94 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -505,8 +505,6 @@ def create_events_df(nwb_filename, adjust_time=True): events.append(df) # Add keys from trials table - # I don't like hardcoding dynamic foraging specific things here. - # I think these keys should be added to the stimulus field of the nwb trial_events = ["goCue_start_time"] for e in trial_events: stamps = nwb.trials[:][e].values @@ -522,8 +520,12 @@ def create_events_df(nwb_filename, adjust_time=True): df = df.dropna(subset="timestamps").reset_index(drop=True) # Add trial index for each event - trial_starts = nwb.trials.start_time[:] - nwb.trials.goCue_start_time[0] - last_stop = nwb.trials.stop_time[-1] - nwb.trials.goCue_start_time[0] + if adjust_time: + trial_starts = nwb.trials.start_time[:] - t0 + last_stop = nwb.trials.stop_time[-1] - t0 + else: + trial_starts = nwb.trials.start_time[:] + last_stop = nwb.trials.stop_time[-1] trial_index = [] for index, e in df.iterrows(): starts = np.where(e.timestamps > trial_starts)[0] @@ -539,6 +541,7 @@ def create_events_df(nwb_filename, adjust_time=True): gocues = df.query('event == "goCue_start_time"') if (len(gocues) > 0) and (adjust_time): assert np.isclose(gocues.iloc[0]["timestamps"], 0, rtol=0.01) + # TODO, need more checks here for time alignment on trial index. if adjust_time: print("Timestamps are adjusted so t(0) = first go cue") From d840c9a609d9cedd949f91c786806368d2c7c3e1 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Mon, 4 Nov 2024 08:54:41 -0800 Subject: [PATCH 11/17] linting --- .../nwb_utils.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 2bbef94..7e00471 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -490,7 +490,10 @@ def create_events_df(nwb_filename, adjust_time=True): event_types -= ignore_types # Determine time 0 as first go Cue - t0 = nwb.trials.goCue_start_time[0] + if adjust_time: + t0 = nwb.trials.goCue_start_time[0] + else: + t0 = 0 # Iterate over event types and build a dataframe of each events = [] @@ -499,8 +502,7 @@ def create_events_df(nwb_filename, adjust_time=True): stamps = nwb.acquisition[e].timestamps[:] data = nwb.acquisition[e].data[:] labels = [e] * len(data) - if adjust_time: - stamps = stamps - t0 + stamps = stamps - t0 df = pd.DataFrame({"timestamps": stamps, "data": data, "event": labels}) events.append(df) @@ -509,8 +511,7 @@ def create_events_df(nwb_filename, adjust_time=True): for e in trial_events: stamps = nwb.trials[:][e].values labels = [e] * len(stamps) - if adjust_time: - stamps = stamps - t0 + stamps = stamps - t0 df = pd.DataFrame({"timestamps": stamps, "event": labels}) events.append(df) @@ -520,12 +521,8 @@ def create_events_df(nwb_filename, adjust_time=True): df = df.dropna(subset="timestamps").reset_index(drop=True) # Add trial index for each event - if adjust_time: - trial_starts = nwb.trials.start_time[:] - t0 - last_stop = nwb.trials.stop_time[-1] - t0 - else: - trial_starts = nwb.trials.start_time[:] - last_stop = nwb.trials.stop_time[-1] + trial_starts = nwb.trials.start_time[:] - t0 + last_stop = nwb.trials.stop_time[-1] - t0 trial_index = [] for index, e in df.iterrows(): starts = np.where(e.timestamps > trial_starts)[0] From 8164d19cddee7abc1f152af96b09eda542b14482 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Mon, 4 Nov 2024 15:00:04 -0800 Subject: [PATCH 12/17] adding documentation for trials table --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 7e00471..07cf468 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -303,7 +303,17 @@ def create_df_trials(nwb_filename, adjust_time=True): """ Process nwb and create df_trials for every single session + ARGS: + nwb_filename (str or NWB object), the session to extract the trials from adjust_time (bool) if true, adjust t0 to be the first gocue + + RETURNS: + A pandas dataframe containing the columns of nwb.trials plus: + "_in_trial" time alignments where time is relative to the go cue on that trial + "_in_session" time alignments where time is relative to the first go cue + of the session. + earned_reward, (0 or 1) whether a reward was earned in that trial + extra_reward (bool) whether a manual reward was given in that trial """ # If we are given a filename, load the NWB object itself @@ -435,7 +445,6 @@ def create_df_trials(nwb_filename, adjust_time=True): df.query("earned_reward == 0").query("extra_reward == 0")["reward_time_in_session"] ) ), "Unrewarded trials with reward time" - # TODO, documentation of added columns # Drop columns drop_cols += key_from_acq From 82415c18b814708da740d827189ac26812e30250 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Mon, 4 Nov 2024 16:12:41 -0800 Subject: [PATCH 13/17] adding raw_timstamps --- .../nwb_utils.py | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 07cf468..18349ab 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -361,6 +361,9 @@ def create_df_trials(nwb_filename, adjust_time=True): # Clean up these column names that are not clear drop_cols.append(col) + # Add a column of raw time so users can map if they want + df["goCue_start_time_raw_time"] = df["goCue_start_time"] + # Get lick and reward times key_from_acq = [ "left_lick_time", @@ -508,20 +511,22 @@ def create_events_df(nwb_filename, adjust_time=True): events = [] for e in event_types: # For each event, get timestamps, data, and label - stamps = nwb.acquisition[e].timestamps[:] + raw_stamps = nwb.acquisition[e].timestamps[:] data = nwb.acquisition[e].data[:] labels = [e] * len(data) - stamps = stamps - t0 - df = pd.DataFrame({"timestamps": stamps, "data": data, "event": labels}) + stamps = raw_stamps - t0 + df = pd.DataFrame( + {"timestamps": stamps, "data": data, "event": labels, "raw_timestamps": raw_stamps} + ) events.append(df) # Add keys from trials table trial_events = ["goCue_start_time"] for e in trial_events: - stamps = nwb.trials[:][e].values - labels = [e] * len(stamps) - stamps = stamps - t0 - df = pd.DataFrame({"timestamps": stamps, "event": labels}) + raw_stamps = nwb.trials[:][e].values + labels = [e] * len(raw_stamps) + stamps = raw_stamps - t0 + df = pd.DataFrame({"timestamps": stamps, "event": labels, "raw_timestamps": raw_stamps}) events.append(df) # Build dataframe by concatenating each event @@ -603,19 +608,23 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): if len(event_types) == 0: return None - # Determine time 0 - t0 = nwb.trials.goCue_start_time[0] + # Determine time 0 as first go Cue + if adjust_time: + t0 = nwb.trials.goCue_start_time[0] + else: + t0 = 0 # Iterate over event types and build a dataframe of each events = [] for e in event_types: # For each event, get timestamps, data, and label - stamps = nwb.acquisition[e].timestamps[:] + raw_stamps = nwb.acquisition[e].timestamps[:] data = nwb.acquisition[e].data[:] labels = [e] * len(data) - if adjust_time: - stamps = stamps - t0 - df = pd.DataFrame({"timestamps": stamps, "data": data, "event": labels}) + stamps = raw_stamps - t0 + df = pd.DataFrame( + {"timestamps": stamps, "data": data, "event": labels, "raw_timestamps": raw_stamps} + ) events.append(df) # Build dataframe by concatenating each event From 2a9e5d1c915b28ad2a44bf19a41e554aa2d7ca3e Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Mon, 4 Nov 2024 16:19:12 -0800 Subject: [PATCH 14/17] documenting --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 18349ab..38cc380 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -448,6 +448,7 @@ def create_df_trials(nwb_filename, adjust_time=True): df.query("earned_reward == 0").query("extra_reward == 0")["reward_time_in_session"] ) ), "Unrewarded trials with reward time" + # TODO, auto water can be delievered before choice time # Drop columns drop_cols += key_from_acq From 3c47346c2a8244a22b0ea34222db78cb9803772f Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Mon, 4 Nov 2024 20:08:20 -0800 Subject: [PATCH 15/17] comments --- src/aind_dynamic_foraging_data_utils/nwb_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 38cc380..4ec3cd1 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -426,6 +426,7 @@ def create_df_trials(nwb_filename, adjust_time=True): df.loc[slow_choice, "choice_time_in_trial"] = np.nan # Compute boolean of whether animal was rewarded + # AutoWater and manual water is not included in earned_reward df["earned_reward"] = df.rewarded_historyR.astype(int) | df.rewarded_historyL.astype(int) df["extra_reward"] = (df["earned_reward"] == 0) & df["reward_time_in_session"].notnull() @@ -449,6 +450,8 @@ def create_df_trials(nwb_filename, adjust_time=True): ) ), "Unrewarded trials with reward time" # TODO, auto water can be delievered before choice time + # TODO, assigning choice/reward should check for left/right + # Drop columns drop_cols += key_from_acq From 96b328bb1d0208ead3a4cd5572e67cb4f1fa4901 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Tue, 5 Nov 2024 10:50:08 -0800 Subject: [PATCH 16/17] making a global variable --- .../nwb_utils.py | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 4ec3cd1..9574880 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -19,6 +19,9 @@ from pynwb import NWBHDF5IO from hdmf_zarr import NWBZarrIO +# If we adjust time_in_session, adjust it to this +SESSION_ALIGNMENT = "goCue_start_time" + def load_nwb_from_filename(filename): """ @@ -312,7 +315,7 @@ def create_df_trials(nwb_filename, adjust_time=True): "_in_trial" time alignments where time is relative to the go cue on that trial "_in_session" time alignments where time is relative to the first go cue of the session. - earned_reward, (0 or 1) whether a reward was earned in that trial + earned_reward, (bool) whether a reward was earned in that trial extra_reward (bool) whether a manual reward was given in that trial """ @@ -344,7 +347,7 @@ def create_df_trials(nwb_filename, adjust_time=True): skip_cols = ["right_valve_open_time", "left_valve_open_time"] # compute times relative to start of trial and start of session - t0 = nwb.trials.goCue_start_time[0] + t0 = nwb.trials[SESSION_ALIGNMENT][0] drop_cols = [] for col in df.columns: if ("time" in col) and (col not in skip_cols): @@ -362,7 +365,7 @@ def create_df_trials(nwb_filename, adjust_time=True): drop_cols.append(col) # Add a column of raw time so users can map if they want - df["goCue_start_time_raw_time"] = df["goCue_start_time"] + df[SESSION_ALIGNMENT + "_raw"] = df[SESSION_ALIGNMENT] # Get lick and reward times key_from_acq = [ @@ -407,7 +410,9 @@ def create_df_trials(nwb_filename, adjust_time=True): ), axis=1, ) - df["reward_time_in_trial"] = df["reward_time_in_session"] - df["goCue_start_time_in_session"] + df["reward_time_in_trial"] = ( + df["reward_time_in_session"] - df[SESSION_ALIGNMENT + "_in_session"] + ) # Compute time of choice for each trials with warnings.catch_warnings(): @@ -418,7 +423,9 @@ def create_df_trials(nwb_filename, adjust_time=True): ), axis=1, ) - df["choice_time_in_trial"] = df["choice_time_in_session"] - df["goCue_start_time_in_session"] + df["choice_time_in_trial"] = ( + df["choice_time_in_session"] - df[SESSION_ALIGNMENT + "_in_session"] + ) # Filtering out choices greater than response window slow_choice = df["choice_time_in_trial"] > df["response_duration"] @@ -427,31 +434,29 @@ def create_df_trials(nwb_filename, adjust_time=True): # Compute boolean of whether animal was rewarded # AutoWater and manual water is not included in earned_reward - df["earned_reward"] = df.rewarded_historyR.astype(int) | df.rewarded_historyL.astype(int) - df["extra_reward"] = (df["earned_reward"] == 0) & df["reward_time_in_session"].notnull() + df["earned_reward"] = df.rewarded_historyR | df.rewarded_historyL + df["extra_reward"] = (~df["earned_reward"]) & df["reward_time_in_session"].notnull() # Sanity checks - rewarded_df = df.query("earned_reward == 1") + rewarded_df = df.query("earned_reward") assert ( np.isnan(rewarded_df["reward_time_in_session"]).sum() == 0 ), "Rewarded trials without reward time" assert ( np.isnan(rewarded_df["choice_time_in_session"]).sum() == 0 ), "Rewarded trials without choice time" - assert np.all( - rewarded_df["choice_time_in_session"] <= rewarded_df["reward_time_in_session"] - ), "Reward before choice time" + # assert np.all( + # rewarded_df["choice_time_in_session"] <= rewarded_df["reward_time_in_session"] + # ), "Reward before choice time" + if not np.all(rewarded_df["choice_time_in_session"] <= rewarded_df["reward_time_in_session"]): + warnings.warn("Reward before choice time. This is likely due to manual rewards") + # TODO, auto water can be delievered before choice time assert np.all( rewarded_df["choice_time_in_trial"] >= 0 ), "Rewarded trial with negative choice_time_in_trial" assert np.all( - np.isnan( - df.query("earned_reward == 0").query("extra_reward == 0")["reward_time_in_session"] - ) + np.isnan(df.query("not earned_reward").query("not extra_reward")["reward_time_in_session"]) ), "Unrewarded trials with reward time" - # TODO, auto water can be delievered before choice time - # TODO, assigning choice/reward should check for left/right - # Drop columns drop_cols += key_from_acq @@ -507,7 +512,7 @@ def create_events_df(nwb_filename, adjust_time=True): # Determine time 0 as first go Cue if adjust_time: - t0 = nwb.trials.goCue_start_time[0] + t0 = nwb.trials[SESSION_ALIGNMENT][0] else: t0 = 0 @@ -553,7 +558,7 @@ def create_events_df(nwb_filename, adjust_time=True): df["trial"] = trial_index # Sanity check that the first go cue is time 0 - gocues = df.query('event == "goCue_start_time"') + gocues = df.query("event == @SESSION_ALIGNMENT") if (len(gocues) > 0) and (adjust_time): assert np.isclose(gocues.iloc[0]["timestamps"], 0, rtol=0.01) # TODO, need more checks here for time alignment on trial index. @@ -614,7 +619,7 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): # Determine time 0 as first go Cue if adjust_time: - t0 = nwb.trials.goCue_start_time[0] + t0 = nwb.trials[SESSION_ALIGNMENT][0] else: t0 = 0 From c37bf992680d8451dc03a4d300d383a1351e5165 Mon Sep 17 00:00:00 2001 From: Alex Piet Date: Fri, 8 Nov 2024 15:14:05 -0800 Subject: [PATCH 17/17] updates based on rachels comments --- .../nwb_utils.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/aind_dynamic_foraging_data_utils/nwb_utils.py b/src/aind_dynamic_foraging_data_utils/nwb_utils.py index 9574880..d1f9c6a 100644 --- a/src/aind_dynamic_foraging_data_utils/nwb_utils.py +++ b/src/aind_dynamic_foraging_data_utils/nwb_utils.py @@ -359,6 +359,9 @@ def create_df_trials(nwb_filename, adjust_time=True): # Adjust times relative to go cue on each trial if ("time" in col) and (col not in skip_cols): + # Here we always align to goCue_start_time, not SESSION_ALIGNMENT + # since this aligns events relative to the trial go cue, not the start + # of the session df[col + "_in_trial"] = df[col].values - df["goCue_start_time"].values # Clean up these column names that are not clear @@ -435,6 +438,8 @@ def create_df_trials(nwb_filename, adjust_time=True): # Compute boolean of whether animal was rewarded # AutoWater and manual water is not included in earned_reward df["earned_reward"] = df.rewarded_historyR | df.rewarded_historyL + # TODO update this section once we have reliable labels for manual rewards + # See issue #54 df["extra_reward"] = (~df["earned_reward"]) & df["reward_time_in_session"].notnull() # Sanity checks @@ -463,7 +468,9 @@ def create_df_trials(nwb_filename, adjust_time=True): df = df.drop(columns=drop_cols) if adjust_time: - print("Timestamps are adjusted so t(0) = first go cue") + print( + "Timestamps are adjusted such that `_in_session` timestamps start at the first go cue" + ) return df @@ -534,8 +541,11 @@ def create_events_df(nwb_filename, adjust_time=True): for e in trial_events: raw_stamps = nwb.trials[:][e].values labels = [e] * len(raw_stamps) + data = [1] * len(raw_stamps) stamps = raw_stamps - t0 - df = pd.DataFrame({"timestamps": stamps, "event": labels, "raw_timestamps": raw_stamps}) + df = pd.DataFrame( + {"timestamps": stamps, "data": data, "event": labels, "raw_timestamps": raw_stamps} + ) events.append(df) # Build dataframe by concatenating each event @@ -564,7 +574,9 @@ def create_events_df(nwb_filename, adjust_time=True): # TODO, need more checks here for time alignment on trial index. if adjust_time: - print("Timestamps are adjusted so t(0) = first go cue") + print( + "Timestamps are adjusted such that `_in_session` timestamps start at the first go cue" + ) return df @@ -654,7 +666,9 @@ def create_fib_df(nwb_filename, tidy=True, adjust_time=True): df["ses_idx"] = ses_idx if adjust_time: - print("Timestamps are adjusted so t(0) = first go cue") + print( + "Timestamps are adjusted such that `_in_session` timestamps start at the first go cue" + ) # pivot table based on timestamps if not tidy: