Skip to content

Commit

Permalink
adding lick stats to trial_metrics, adding rest of process_nwb stats …
Browse files Browse the repository at this point in the history
…to session_metrics
  • Loading branch information
nkeesey committed Dec 2, 2024
1 parent 54b40a8 commit 78476a4
Show file tree
Hide file tree
Showing 2 changed files with 300 additions and 139 deletions.
354 changes: 215 additions & 139 deletions src/aind_dynamic_foraging_basic_analysis/metrics/session_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,104 +19,179 @@

def session_metrics(nwb):
"""
Compute all session level metrics
Includes session level metadata as a temporary organizer
Includes majority of metrics from process_nwbs.py
Compute all session metadata and performance metrics
-- Metadata --
block structure metrics, block, contrast, and effective
probability metrics
duration metrics, gocue, delay period, and iti
metrics
reward size metrics, left and right reward volumes
lick spout movement, movement of lickspout during session:
range, initial position, median position
autotrain categories, curriculum version, name, schema,
current_stage_actual, and if overriden
-- Performance --
basic performance metrics, both autowater and non-autowater specific
rates (total, finished, ignored, finished rate,
ignored rate, reward rate)
calculated metrics, foraging efficiency, foraging performance
(both normal and random seed), bias naive,
chosen probability
lick metrics, reaction mean and median, early lick rate, invalid
lick ratio, double dipping finished rates (reward and total),
lick consistency means (total, reward, and non-rewarded)
New addition: chosen_probability - average difference between the chosen probability
and non-chosen probability / the difference between the largest and smallest probability
in the session
"""

if not hasattr(nwb, "df_trials"):
print("You need to compute dfs: nwb_utils.create_trials_df(nwb)")
if not hasattr(nwb, 'df_trials'):
print('You need to compute df_trials: nwb_utils.create_trials_df(nwb)')
return

df = nwb.df_trials.copy()

# METADATA PLACEHOLDER
# Block information
def _get_block_starts(p_L, p_R):
"""Find the indices of block starts"""
block_start_ind_left = np.where(np.hstack([True, np.diff(p_L) != 0]))[0]
block_start_ind_right = np.where(np.hstack([True, np.diff(p_R) != 0]))[0]
block_start_ind_effective = np.sort(np.unique(np.hstack([block_start_ind_left, block_start_ind_right])))
return block_start_ind_left, block_start_ind_right, block_start_ind_effective

# -- Key meta data --
session_start_time = nwb.session_start_time
session_date = session_start_time.strftime("%Y-%m-%d")
subject_id_from_meta = nwb.subject.subject_id

# Parse the file name for suffix
old_re = re.match(
r"(?P<subject_id>\d+)_(?P<date>\d{4}-\d{2}-\d{2})(?:_(?P<n>\d+))?\.json", nwb.session_id
)

if old_re is not None:
subject_id, session_date, nwb_suffix = old_re.groups()
nwb_suffix = int(nwb_suffix) if nwb_suffix is not None else 0
subject_id = nwb.subject.subject_id

# -- Block and probability analysis --
p_L = df.reward_probabilityL.values
p_R = df.reward_probabilityR.values
p_contrast = np.max([p_L, p_R], axis=0) / (np.min([p_L, p_R], axis=0) + 1e-6)
p_contrast[p_contrast > 100] = 100 # Cap the contrast at 100

# Parse effective block
block_start_left, block_start_right, block_start_effective = _get_block_starts(p_L, p_R)
if 'uncoupled' not in nwb.protocol.lower():
if not (len(block_start_left) == len(block_start_right)
and all(block_start_left == block_start_right)):
logger.warning("Blocks are not fully aligned in a Coupled task!")

# -- Metadata dictionary --
dict_meta = {
'subject_id': subject_id,
'session_date': session_date,
'user_name': nwb.experimenter[0],
'task': nwb.protocol,
'session_start_time': session_start_time,


# Block structure metrics
'p_reward_sum_mean': np.mean(p_L + p_R),
'p_reward_sum_std': np.std(p_L + p_R),
'p_reward_sum_median': np.median(p_L + p_R),

'p_reward_contrast_mean': np.mean(p_contrast),
'p_reware_contrast_median': np.median(p_contrast),

'effective_block_length_mean': np.mean(np.diff(block_start_effective)),
'effective_block_length_std': np.std(np.diff(block_start_effective)),
'effective_block_length_median': np.median(np.diff(block_start_effective)),
'effective_block_length_min': np.min(np.diff(block_start_effective)),
'effective_block_length_max': np.max(np.diff(block_start_effective)),

# Duration metrics
'duration_gocue_stop_mean': df.loc[:, 'duration_gocue_stop'].mean(),
'duration_gocue_stop_std': df.loc[:, 'duration_gocue_stop'].std(),
'duration_gocue_stop_median': df.loc[:, 'duration_gocue_stop'].median(),
'duration_gocue_stop_min': df.loc[:, 'duration_gocue_stop'].min(),
'duration_gocue_stop_max': df.loc[:, 'duration_gocue_stop'].max(),

'duration_delay_period_mean': df.loc[:, 'duration_delay_period'].mean(),
'duration_delay_period_std': df.loc[:, 'duration_delay_period'].std(),
'duration_delay_period_median': df.loc[:, 'duration_delay_period'].median(),
'duration_delay_period_min': df.loc[:, 'duration_delay_period'].min(),
'duration_delay_period_max': df.loc[:, 'duration_delay_period'].max(),

'duration_iti_mean': df.loc[:, 'duration_iti'].mean(),
'duration_iti_std': df.loc[:, 'duration_iti'].std(),
'duration_iti_median': df.loc[:, 'duration_iti'].median(),
'duration_iti_min': df.loc[:, 'duration_iti'].min(),
'duration_iti_max': df.loc[:, 'duration_iti'].max(),

# Reward size metrics
'reward_volume_left_mean': df.loc[df.reward, 'reward_size_left'].mean(),
'reward_volume_right_mean': df.loc[df.reward, 'reward_size_right'].mean(),

# Lickspouts movement range (in um)
**{f'lickspout_movement_range_{axis}':
np.ptp(df[f'lickspout_position_{axis}']) for axis in 'xyz'},
**{f'lickspout_initial_pos_{axis}':
df[f'lickspout_position_{axis}'][0] for axis in 'xyz'},
**{f'lickspout_median_pos_{axis}':
np.median(df[f'lickspout_position_{axis}']) for axis in 'xyz'},
}

# Add flag for old bpod session
if 'bpod' in nwb.session_description:
dict_meta['old_bpod_session'] = True

# Create metadata DataFrame
df_meta = pd.DataFrame(dict_meta, index=[0])

# Add automatic training info
if 'auto_train_engaged' in df.columns:
df_meta['auto_train', 'curriculum_name'] = np.nan if df.auto_train_curriculum_name.mode()[0].lower() == 'none' else df.auto_train_curriculum_name.mode()[0]
df_meta['auto_train', 'curriculum_version'] = np.nan if df.auto_train_curriculum_version.mode()[0].lower() == 'none' else df.auto_train_curriculum_version.mode()[0]
df_meta['auto_train', 'curriculum_schema_version'] = np.nan if df.auto_train_curriculum_schema_version.mode()[0].lower() == 'none' else df.auto_train_curriculum_schema_version.mode()[0]
df_meta['auto_train', 'current_stage_actual'] = np.nan if df.auto_train_stage.mode()[0].lower() == 'none' else df.auto_train_stage.mode()[0]
df_meta['auto_train', 'if_overriden_by_trainer'] = np.nan if all(df.auto_train_stage_overridden.isna()) else df.auto_train_stage_overridden.mode()[0]

# Check consistency of auto train settings
df_meta['auto_train', 'if_consistent_within_session'] = len(df.groupby(
[col for col in df.columns if 'auto_train' in col]
)) == 1
else:
subject_id, session_date, session_json_time = re.match(
r"(?P<subject_id>\d+)_(?P<date>\d{4}-\d{2}-\d{2})(?:_(?P<time>.*))\.json",
nwb.session_id,
).groups()
nwb_suffix = int(session_json_time.replace("-", ""))

# Verify metadata matches
assert subject_id == subject_id_from_meta, (
f"Subject name from the metadata ({subject_id_from_meta}) does not match "
f"that from json name ({subject_id})!!"
)
assert session_date == session_date, (
f"Session date from the metadata ({session_date}) does not match "
f"that from json name ({session_date})!!"
)

# Create session index
session_index = pd.MultiIndex.from_tuples(
[(subject_id, session_date, nwb_suffix)], names=["subject_id", "session_date", "nwb_suffix"]
)

# Get metadata from nwb.scratch
meta_dict = nwb.scratch["metadata"].to_dataframe().iloc[0].to_dict()

# Calculate performance metrics
for field in ['curriculum_name', 'curriculum_version', 'curriculum_schema_version', 'current_stage_actual', 'if_overriden_by_trainer']:
df_meta['auto_train', field] = None

# -- Performance Metrics --
n_total_trials = len(df)
n_finished_trials = (df.animal_response != IGNORE).sum()

# Actual foraging trials (autowater excluded)
n_total_trials_non_autowater = df.non_autowater_trial.sum()
n_finished_trials_non_autowater = df.non_autowater_finished_trial.sum()

n_reward_trials_non_autowater = df.reward_non_autowater.sum()
reward_rate_non_autowater_finished = (
n_reward_trials_non_autowater / n_finished_trials_non_autowater
if n_finished_trials_non_autowater > 0
else np.nan
)
reward_rate_non_autowater_finished = n_reward_trials_non_autowater / n_finished_trials_non_autowater if n_finished_trials_non_autowater > 0 else np.nan

# Calculate foraging efficiency
# Foraging efficiency
foraging_eff, foraging_eff_random_seed = compute_foraging_efficiency(
baited="without bait" not in nwb.protocol.lower(),
choice_history=df.animal_response.map({0: 0, 1: 1, 2: np.nan}).values,
reward_history=df.rewarded_historyL | df.rewarded_historyR,
p_reward=[
df.reward_probabilityL.values,
df.reward_probabilityR.values,
],
random_number=[
df.reward_random_number_left.values,
df.reward_random_number_right.values,
],
autowater_offered=(df.auto_waterL == 1) | (df.auto_waterR == 1),
)

# Override foraging_eff_random_seed for old bpod sessions
if "bpod" in nwb.session_description:
foraging_eff_random_seed = nwb.get_scratch("metadata")[
"foraging_efficiency_with_actual_random_seed"
].values[0]

finished_rate = (
n_finished_trials_non_autowater / n_total_trials_non_autowater
if n_total_trials_non_autowater > 0
else np.nan
baited='without bait' not in nwb.protocol.lower(),
choice_history=df.animal_response.map({0: 0, 1: 1, 2: np.nan}).values,
reward_history=df.rewarded_historyL | df.rewarded_historyR,
p_reward=[
df.reward_probabilityL.values,
df.reward_probabilityR.values,
],
random_number=[
df.reward_random_number_left.values,
df.reward_random_number_right.values,
],
autowater_offered=(df.auto_waterL == 1) | (df.auto_waterR == 1)
)

# New Metrics
all_lick_number = len(nwb.acquisition['left_lick_time'].timestamps) + len(nwb.acquisition['right_lick_time'].timestamps)

# Naive bias calculation
n_left = ((df.animal_response == LEFT) & (df.non_autowater_trial)).sum()
n_right = ((df.animal_response == RIGHT) & (df.non_autowater_trial)).sum()
bias_naive = 2 * (n_right / (n_left + n_right) - 0.5) if n_left + n_right > 0 else np.nan
finished_rate = n_finished_trials_non_autowater / n_total_trials_non_autowater if n_total_trials_non_autowater > 0 else np.nan

# Probability chosen calculation
probability_chosen = []
Expand All @@ -138,7 +213,7 @@ def session_metrics(nwb):
df["probability_chosen"] = probability_chosen
df["probability_not_chosen"] = probability_not_chosen

# Calculate the chosen probability
# Calculate chosen probability
average = df["probability_chosen"] - df["probability_not_chosen"]

p_larger_global = max(
Expand All @@ -152,66 +227,67 @@ def session_metrics(nwb):
mean_difference = average.mean()
chosen_probability = mean_difference / (p_larger_global - p_smaller_global)

# Pack all data
dict_meta = {
# Basic metadata
"rig": meta_dict["box"],
"user_name": nwb.experimenter[0],
"task": nwb.protocol,
# New metric
"chosen_probability": chosen_probability,
# Trial counts and rates
"total_trials": n_total_trials_non_autowater,
"finished_trials": n_finished_trials_non_autowater,
"finished_rate": finished_rate,
"total_trials_with_autowater": n_total_trials,
"finished_trials_with_autowater": n_finished_trials,
"finished_rate_with_autowater": n_finished_trials / n_total_trials,
# Reward and foraging metrics
"reward_trials": n_reward_trials_non_autowater,
"reward_rate": reward_rate_non_autowater_finished,
"foraging_eff": foraging_eff,
"foraging_eff_random_seed": foraging_eff_random_seed,
"foraging_performance": foraging_eff * finished_rate,
"foraging_performance_random_seed": foraging_eff_random_seed * finished_rate,
# Timing metrics
"reaction_time_median": df.loc[:, "reaction_time"].median(),
"reaction_time_mean": df.loc[:, "reaction_time"].mean(),
"early_lick_rate": (df.loc[:, "n_lick_all_delay_period"] > 0).sum() / n_total_trials,
# Double dipping metrics
"double_dipping_rate_finished_trials": (
df.loc[(df.animal_response != IGNORE), "n_lick_switches_gocue_stop"] > 0
).sum()
/ (df.animal_response != IGNORE).sum(),
"double_dipping_rate_finished_reward_trials": (
df.loc[df.reward, "n_lick_switches_gocue_stop"] > 0
).sum()
/ df.reward.sum(),
"double_dipping_rate_finished_noreward_trials": (
df.loc[
(df.animal_response != IGNORE) & (~df.reward),
"n_lick_switches_gocue_stop",
]
> 0
).sum()
/ ((df.animal_response != IGNORE) & (~df.reward)).sum(),
# Performance dictionary
dict_performance = {
# Basic performance metrics
'total_trials_with_autowater': n_total_trials,
'finished_trials_with_autowater': n_finished_trials,
'finished_rate_with_autowater': n_finished_trials / n_total_trials,
'ignore_rate_with_autowater': 1 - n_finished_trials / n_total_trials,
'autowater_collected': (~df.non_autowater_trial & (df.animal_response != IGNORE)).sum(),
'autowater_ignored': (~df.non_autowater_trial & (df.animal_response == IGNORE)).sum(),

'total_trials': n_total_trials_non_autowater,
'finished_trials': n_finished_trials_non_autowater,
'ignored_trials': n_total_trials_non_autowater - n_finished_trials_non_autowater,
'finished_rate': finished_rate,
'ignore_rate': 1 - finished_rate,

'reward_trials': n_reward_trials_non_autowater,
'reward_rate': reward_rate_non_autowater_finished,
'foraging_eff': foraging_eff,
'foraging_eff_random_seed': foraging_eff_random_seed,

'foraging_performance': foraging_eff * finished_rate,
'foraging_performance_random_seed': foraging_eff_random_seed * finished_rate,

'bias_naive': bias_naive,

# New Metrics
'chosen_probability': chosen_probability,

# Lick timing metrics
'reaction_time_median': df.loc[:, 'reaction_time'].median(),
'reaction_time_mean': df.loc[:, 'reaction_time'].mean(),

'early_lick_rate':
(df.loc[:, 'n_lick_all_delay_period'] > 0).sum() / n_total_trials,

'invalid_lick_ratio':
(all_lick_number - df.loc[:, 'n_lick_all_gocue_stop'].sum()) / all_lick_number,

# Lick consistency metrics
"lick_consistency_mean_finished_trials": df.loc[
(df.animal_response != IGNORE), "n_lick_consistency_gocue_stop"
].mean(),
"lick_consistency_mean_finished_reward_trials": df.loc[
df.reward, "n_lick_consistency_gocue_stop"
].mean(),
"lick_consistency_mean_finished_noreward_trials": df.loc[
(df.animal_response != IGNORE) & (~df.reward),
"n_lick_consistency_gocue_stop",
].mean(),
}

# Create DataFrame with hierarchical columns
df_meta = pd.DataFrame(dict_meta, index=session_index)
df_meta.columns = pd.MultiIndex.from_product(
[["metadata"], dict_meta.keys()], names=["type", "variable"]
)

return df_meta
'double_dipping_rate_finished_trials':
(df.loc[(df.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
/ (df.animal_response != IGNORE).sum(),
'double_dipping_rate_finished_reward_trials':
(df.loc[df.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
/ df.reward.sum(),
'double_dipping_rate_finished_noreward_trials':
(df.loc[(df.animal_response != IGNORE) & (~df.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
/ ((df.animal_response != IGNORE) & (~df.reward)).sum(),
'lick_consistency_mean_finished_trials':
df.loc[(df.animal_response != IGNORE), 'n_lick_consistency_gocue_stop'].mean(),
'lick_consistency_mean_finished_reward_trials':
df.loc[df.reward, 'n_lick_consistency_gocue_stop'].mean(),
'lick_consistency_mean_finished_noreward_trials':
df.loc[(df.animal_response != IGNORE) & (~df.reward), 'n_lick_consistency_gocue_stop'].mean(),
}

# Generate performance DataFrame
df_performance = pd.DataFrame(dict_performance, index=[0])

# Create session DataFrame
session_df = pd.DataFrame({**dict_meta, **dict_performance}, index=[0])

return session_df
Loading

0 comments on commit 78476a4

Please sign in to comment.