diff --git a/src/aind_dynamic_foraging_basic_analysis/metrics/session_metrics.py b/src/aind_dynamic_foraging_basic_analysis/metrics/session_metrics.py index 5c4b73d..fdb5ff8 100644 --- a/src/aind_dynamic_foraging_basic_analysis/metrics/session_metrics.py +++ b/src/aind_dynamic_foraging_basic_analysis/metrics/session_metrics.py @@ -19,104 +19,179 @@ def session_metrics(nwb): """ - Compute all session level metrics - - Includes session level metadata as a temporary organizer - Includes majority of metrics from process_nwbs.py + Compute all session metadata and performance metrics + + -- Metadata -- + block structure metrics, block, contrast, and effective + probability metrics + duration metrics, gocue, delay period, and iti + metrics + reward size metrics, left and right reward volumes + lick spout movement, movement of lickspout during session: + range, initial position, median position + autotrain categories, curriculum version, name, schema, + current_stage_actual, and if overriden + + -- Performance -- + basic performance metrics, both autowater and non-autowater specific + rates (total, finished, ignored, finished rate, + ignored rate, reward rate) + calculated metrics, foraging efficiency, foraging performance + (both normal and random seed), bias naive, + chosen probability + lick metrics, reaction mean and median, early lick rate, invalid + lick ratio, double dipping finished rates (reward and total), + lick consistency means (total, reward, and non-rewarded) New addition: chosen_probability - average difference between the chosen probability and non-chosen probability / the difference between the largest and smallest probability in the session """ - if not hasattr(nwb, "df_trials"): - print("You need to compute dfs: nwb_utils.create_trials_df(nwb)") + if not hasattr(nwb, 'df_trials'): + print('You need to compute df_trials: nwb_utils.create_trials_df(nwb)') return - + df = nwb.df_trials.copy() - # METADATA PLACEHOLDER + # Block information + def _get_block_starts(p_L, p_R): + """Find the indices of block starts""" + block_start_ind_left = np.where(np.hstack([True, np.diff(p_L) != 0]))[0] + block_start_ind_right = np.where(np.hstack([True, np.diff(p_R) != 0]))[0] + block_start_ind_effective = np.sort(np.unique(np.hstack([block_start_ind_left, block_start_ind_right]))) + return block_start_ind_left, block_start_ind_right, block_start_ind_effective + + # -- Key meta data -- session_start_time = nwb.session_start_time session_date = session_start_time.strftime("%Y-%m-%d") - subject_id_from_meta = nwb.subject.subject_id - - # Parse the file name for suffix - old_re = re.match( - r"(?P\d+)_(?P\d{4}-\d{2}-\d{2})(?:_(?P\d+))?\.json", nwb.session_id - ) - - if old_re is not None: - subject_id, session_date, nwb_suffix = old_re.groups() - nwb_suffix = int(nwb_suffix) if nwb_suffix is not None else 0 + subject_id = nwb.subject.subject_id + + # -- Block and probability analysis -- + p_L = df.reward_probabilityL.values + p_R = df.reward_probabilityR.values + p_contrast = np.max([p_L, p_R], axis=0) / (np.min([p_L, p_R], axis=0) + 1e-6) + p_contrast[p_contrast > 100] = 100 # Cap the contrast at 100 + + # Parse effective block + block_start_left, block_start_right, block_start_effective = _get_block_starts(p_L, p_R) + if 'uncoupled' not in nwb.protocol.lower(): + if not (len(block_start_left) == len(block_start_right) + and all(block_start_left == block_start_right)): + logger.warning("Blocks are not fully aligned in a Coupled task!") + + # -- Metadata dictionary -- + dict_meta = { + 'subject_id': subject_id, + 'session_date': session_date, + 'user_name': nwb.experimenter[0], + 'task': nwb.protocol, + 'session_start_time': session_start_time, + + + # Block structure metrics + 'p_reward_sum_mean': np.mean(p_L + p_R), + 'p_reward_sum_std': np.std(p_L + p_R), + 'p_reward_sum_median': np.median(p_L + p_R), + + 'p_reward_contrast_mean': np.mean(p_contrast), + 'p_reware_contrast_median': np.median(p_contrast), + + 'effective_block_length_mean': np.mean(np.diff(block_start_effective)), + 'effective_block_length_std': np.std(np.diff(block_start_effective)), + 'effective_block_length_median': np.median(np.diff(block_start_effective)), + 'effective_block_length_min': np.min(np.diff(block_start_effective)), + 'effective_block_length_max': np.max(np.diff(block_start_effective)), + + # Duration metrics + 'duration_gocue_stop_mean': df.loc[:, 'duration_gocue_stop'].mean(), + 'duration_gocue_stop_std': df.loc[:, 'duration_gocue_stop'].std(), + 'duration_gocue_stop_median': df.loc[:, 'duration_gocue_stop'].median(), + 'duration_gocue_stop_min': df.loc[:, 'duration_gocue_stop'].min(), + 'duration_gocue_stop_max': df.loc[:, 'duration_gocue_stop'].max(), + + 'duration_delay_period_mean': df.loc[:, 'duration_delay_period'].mean(), + 'duration_delay_period_std': df.loc[:, 'duration_delay_period'].std(), + 'duration_delay_period_median': df.loc[:, 'duration_delay_period'].median(), + 'duration_delay_period_min': df.loc[:, 'duration_delay_period'].min(), + 'duration_delay_period_max': df.loc[:, 'duration_delay_period'].max(), + + 'duration_iti_mean': df.loc[:, 'duration_iti'].mean(), + 'duration_iti_std': df.loc[:, 'duration_iti'].std(), + 'duration_iti_median': df.loc[:, 'duration_iti'].median(), + 'duration_iti_min': df.loc[:, 'duration_iti'].min(), + 'duration_iti_max': df.loc[:, 'duration_iti'].max(), + + # Reward size metrics + 'reward_volume_left_mean': df.loc[df.reward, 'reward_size_left'].mean(), + 'reward_volume_right_mean': df.loc[df.reward, 'reward_size_right'].mean(), + + # Lickspouts movement range (in um) + **{f'lickspout_movement_range_{axis}': + np.ptp(df[f'lickspout_position_{axis}']) for axis in 'xyz'}, + **{f'lickspout_initial_pos_{axis}': + df[f'lickspout_position_{axis}'][0] for axis in 'xyz'}, + **{f'lickspout_median_pos_{axis}': + np.median(df[f'lickspout_position_{axis}']) for axis in 'xyz'}, + } + + # Add flag for old bpod session + if 'bpod' in nwb.session_description: + dict_meta['old_bpod_session'] = True + + # Create metadata DataFrame + df_meta = pd.DataFrame(dict_meta, index=[0]) + + # Add automatic training info + if 'auto_train_engaged' in df.columns: + df_meta['auto_train', 'curriculum_name'] = np.nan if df.auto_train_curriculum_name.mode()[0].lower() == 'none' else df.auto_train_curriculum_name.mode()[0] + df_meta['auto_train', 'curriculum_version'] = np.nan if df.auto_train_curriculum_version.mode()[0].lower() == 'none' else df.auto_train_curriculum_version.mode()[0] + df_meta['auto_train', 'curriculum_schema_version'] = np.nan if df.auto_train_curriculum_schema_version.mode()[0].lower() == 'none' else df.auto_train_curriculum_schema_version.mode()[0] + df_meta['auto_train', 'current_stage_actual'] = np.nan if df.auto_train_stage.mode()[0].lower() == 'none' else df.auto_train_stage.mode()[0] + df_meta['auto_train', 'if_overriden_by_trainer'] = np.nan if all(df.auto_train_stage_overridden.isna()) else df.auto_train_stage_overridden.mode()[0] + + # Check consistency of auto train settings + df_meta['auto_train', 'if_consistent_within_session'] = len(df.groupby( + [col for col in df.columns if 'auto_train' in col] + )) == 1 else: - subject_id, session_date, session_json_time = re.match( - r"(?P\d+)_(?P\d{4}-\d{2}-\d{2})(?:_(?P