From 9396815545602fd6fd015c21e69083a073d285da Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 1 Jul 2024 17:47:18 +0200 Subject: [PATCH 1/8] changes to load scripts --- brainwidemap/encoding/params.py | 4 ++-- brainwidemap/encoding/pipelines/01_cache_regressors.py | 2 +- brainwidemap/encoding/pipelines/02_fit_sessions.py | 2 +- brainwidemap/encoding/pipelines/03_gather_results.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/brainwidemap/encoding/params.py b/brainwidemap/encoding/params.py index 8454f987..1d8705ee 100644 --- a/brainwidemap/encoding/params.py +++ b/brainwidemap/encoding/params.py @@ -4,5 +4,5 @@ work. """ -GLM_CACHE = "/home/gercek/scratch/glm_cache/" -GLM_FIT_PATH = "/home/gercek/scratch/results/glms/" +GLM_CACHE = "/home/gercek/Projects/glm_cache/" +GLM_FIT_PATH = "/home/gercek/Projects/results/glms/" diff --git a/brainwidemap/encoding/pipelines/01_cache_regressors.py b/brainwidemap/encoding/pipelines/01_cache_regressors.py index c71c8043..cba57518 100644 --- a/brainwidemap/encoding/pipelines/01_cache_regressors.py +++ b/brainwidemap/encoding/pipelines/01_cache_regressors.py @@ -65,7 +65,7 @@ def delayed_loadsave(subject, session_id, pid, params): T_BEF = 0.6 # Time before stimulus onset to include in the definition of the trial T_AFT = 0.6 # Time after feedback to include in the definition of a trial BINWIDTH = 0.02 # Size of binwidth for wheel velocity traces, in seconds -ABSWHEEL = True # Whether to return wheel velocity (False) or speed (True) +ABSWHEEL = False # Whether to return wheel velocity (False) or speed (True) CLU_CRITERIA = "bwm" # Criteria on cluster inclusion in cache # End parameters diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py index da7cf1be..9f0435bb 100644 --- a/brainwidemap/encoding/pipelines/02_fit_sessions.py +++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py @@ -108,7 +108,7 @@ def tmp_binf(t): "wheel_offset": -0.3, "contnorm": 5.0, "reduce_wheel_dim": False, - "dataset_fn": "2024-01-06_dataset_metadata.pkl", + "dataset_fn": "2024-03-18_dataset_metadata.pkl", "model": lm.LinearGLM, "alpha_grid": {"alpha": np.logspace(-3, 2, 50)}, "contiguous": False, diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py index e14408a5..59491c3f 100644 --- a/brainwidemap/encoding/pipelines/03_gather_results.py +++ b/brainwidemap/encoding/pipelines/03_gather_results.py @@ -53,7 +53,7 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): # Brainwide repo imports from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH - currdate = "2024-01-22" # Date on which fit was run + currdate = "2024-03-18" # Date on which fit was run n_cov = 9 # Modify if you change the model! parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl") with open(parpath, "rb") as fo: From 7680718f3dc65d7c6dc3bda04af08df0fb1af4ce Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 9 Sep 2024 15:06:28 +0200 Subject: [PATCH 2/8] Add option for early RT splits --- brainwidemap/encoding/cluster_worker.py | 29 ++- brainwidemap/encoding/design.py | 216 +++++++++++++++++- .../encoding/pipelines/01_cache_regressors.py | 6 +- .../encoding/pipelines/02_fit_sessions.py | 21 +- .../encoding/pipelines/03_gather_results.py | 42 ++-- .../encoding/pipelines/04_plot_figures.py | 68 +++--- .../encoding/scripts/twocond_plots.py | 4 +- brainwidemap/encoding/utils.py | 34 +-- 8 files changed, 314 insertions(+), 106 deletions(-) diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py index cc82ced8..e1a5a3df 100644 --- a/brainwidemap/encoding/cluster_worker.py +++ b/brainwidemap/encoding/cluster_worker.py @@ -17,7 +17,7 @@ from pandas import read_pickle # Brainwidemap repo imports -from brainwidemap.encoding.design import generate_design +from brainwidemap.encoding.design import generate_design, generate_design_earlyrts from brainwidemap.encoding.fit import fit_stepwise, fit_stepwise_with_pseudoblocks from brainwidemap.encoding.params import GLM_FIT_PATH @@ -82,10 +82,16 @@ def fit_save_inputs( t_before, fitdate, null=None, + earlyrts=False, ): stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn) sessprior = stdf["probabilityLeft"] - sessdesign = generate_design(stdf, sessprior, t_before, **params) + if not earlyrts: + sessdesign = generate_design(stdf, sessprior, t_before, **params) + else: + if "rt_thresh" not in params: + print('WARNING: No threshold to define an "early" RT passed. Defaulting to 50ms.') + sessdesign = generate_design_earlyrts(stdf, sessprior, t_before, **params) if null is None: sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params) outputfn = save_stepwise( @@ -115,11 +121,13 @@ def fit_save_inputs( if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Cluster GLM fitter. This script is called by" - "the batch script generated in " - "pipelines/02_fit_sessions.py and should in most " - "cases beyond debugging not be used in a " - "standalone fashion.") + parser = argparse.ArgumentParser( + description="Cluster GLM fitter. This script is called by" + "the batch script generated in " + "pipelines/02_fit_sessions.py and should in most " + "cases beyond debugging not be used in a " + "standalone fashion." + ) parser.add_argument( "datafile", type=Path, @@ -132,6 +140,12 @@ def fit_save_inputs( ) parser.add_argument("fitdate", help="Date of fit for output file") parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file") + parser.add_argument( + "--earlyrt", + type=bool, + default=False, + help="Whether to fit separate movement kernels to early trials", + ) args = parser.parse_args() with open(args.datafile, "rb") as fo: @@ -155,6 +169,7 @@ def fit_save_inputs( t_before, args.fitdate, null=params["null"], + earlyrts=args.earlyrt, ) print("Fitting completed successfully!") print(outputfn) diff --git a/brainwidemap/encoding/design.py b/brainwidemap/encoding/design.py index 37ecd0b5..c6f818d8 100644 --- a/brainwidemap/encoding/design.py +++ b/brainwidemap/encoding/design.py @@ -6,13 +6,12 @@ # Standard library import logging +# IBL libraries +import neurencoding.design_matrix as dm + # Third party libraries import numpy as np import pandas as pd -from scipy.stats import norm - -# IBL libraries -import neurencoding.design_matrix as dm _logger = logging.getLogger("brainwide") @@ -207,3 +206,212 @@ def stepfunc_poststim(row): _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}") return design + + +def generate_design_earlyrts( + trialsdf, + prior, + t_before, + bases, + iti_prior_win=[-0.4, -0.1], + iti_prior_val=None, + fmove_offset=-0.4, + wheel_offset=-0.4, + rt_thresh=0.05, + contnorm=5.0, + binwidth=0.02, + reduce_wheel_dim=True, + addtl_vars=None, + **kwargs, +): + """ + Generate GLM design matrix object + + Parameters + ---------- + trialsdf : pd.DataFrame + Trials dataframe with trial timings in absolute (since session start) time + prior : array-like + Vector containing the prior estimate or true prior for each trial. Must be same length as + trialsdf. If iti_prior_val is not None, this is used as the prior for the ITI period. + t_before : float + Time, in seconds, before stimulus onset that was used to define trial_start in trialsdf. + This defines the beginning of the window, relative to each trial, where we examine the + spiking data. + bases : dict + Dictionary of basis functions for each regressor. Needs keys 'stim', 'feedback', 'fmove', + (first movement) and 'wheel'. + iti_prior_win : list, optional + Two element list defining bounds relative to stimulus on which step function for ITI prior + is applied, by default [-0.4, -0.1] + iti_prior_val : float, optional + Value to use for ITI prior, by default None. If None, uses prior. + fmove_offset : float, optional + Offset, in seconds, to apply to first movement regressor, by default -0.4. This is relative + to the movement onset time, and if you want a purely anti-causal kernel should be + equivalent to the basis function length. + wheel_offset : float, optional + Offset, in seconds, to apply to wheel regressor, by default -0.4. See above. + contnorm : float, optional + Normalization factor for contrast, by default 5. + Applied as tanh(contrast * contnorm) / tanh(contnorm) + binwidth : float, optional + Size of bins to use for design matrix, in seconds, by default 0.02. This must match + the binwidth of the neural glm object later used to fit the design matrix. + reduce_wheel_dim : bool, optional + Whether to reduce the dimensionality of the wheel regressor, by default True. If True, + will use enough principal components to capture 99.999% of the variance. Smooths out + very high frequency fluctuations in the wheel velocity. + addtl_vars : dict, optional + Dictionary of additional variables in the trialsdf along with their variable types. These + are columns that are not normally part of the output of load_trials_df, and will therefore + raise an error if the design matrix building doesn't find a type specification for them. + See neurencoding.design.DesignMatrix for more details on variable types, by default None. + """ + if len(kwargs) > 0: + _logger.info( + f"keys {kwargs.keys()} were not used in generate_design," " despite being passed." + ) + trialsdf["adj_contrastL"] = np.tanh(contnorm * trialsdf["contrastLeft"]) / np.tanh(contnorm) + trialsdf["adj_contrastR"] = np.tanh(contnorm * trialsdf["contrastRight"]) / np.tanh(contnorm) + trialsdf["prior"] = prior + if iti_prior_val is not None: + trialsdf["iti_prior"] = iti_prior_val + else: + trialsdf["iti_prior"] = prior + trialsdf["prior_last"] = pd.Series(np.roll(trialsdf["prior"], 1), index=trialsdf.index) + trialsdf["iti_prior_last"] = pd.Series(np.roll(trialsdf["iti_prior"], 1), index=trialsdf.index) + trialsdf["pLeft_last"] = pd.Series( + np.roll(trialsdf["probabilityLeft"], 1), index=trialsdf.index + ) + + vartypes = { + "choice": "value", + "response_times": "timing", + "probabilityLeft": "value", + "pLeft_last": "value", + "feedbackType": "value", + "feedback_times": "timing", + "contrastLeft": "value", + "adj_contrastL": "value", + "contrastRight": "value", + "adj_contrastR": "value", + "goCue_times": "timing", + "stimOn_times": "timing", + "trial_start": "timing", + "trial_end": "timing", + "prior": "value", + "prior_last": "value", + "iti_prior": "value", + "iti_prior_last": "value", + "wheel_velocity": "continuous", + "firstMovement_times": "timing", + } + if addtl_vars is not None and isinstance(addtl_vars, dict): + vartypes.update(addtl_vars) + + def stepfunc_prestim(row): + stepvec = np.zeros(design.binf(row.duration)) + stepvec[stepbounds[0] : stepbounds[1]] = row.iti_prior_last + return stepvec + + def stepfunc_poststim(row): + zerovec = np.zeros(design.binf(row.duration)) + currtr_start = design.binf(row.stimOn_times) + currtr_end = design.binf(row.feedback_times) + zerovec[currtr_start:currtr_end] = row.prior_last + zerovec[currtr_end:] = row.prior + return zerovec + + design = dm.DesignMatrix(trialsdf, vartypes, binwidth=binwidth) + stepbounds = [ + design.binf(t_before + iti_prior_win[0]), + design.binf(t_before + iti_prior_win[1]), + ] + + design.add_covariate_timing( + "stimonL", + "stimOn_times", + bases["stim"], + cond=lambda tr: np.isfinite(tr.contrastLeft), + deltaval="adj_contrastL", + desc="Kernel conditioned on L stimulus onset", + ) + design.add_covariate_timing( + "stimonR", + "stimOn_times", + bases["stim"], + cond=lambda tr: np.isfinite(tr.contrastRight), + deltaval="adj_contrastR", + desc="Kernel conditioned on R stimulus onset", + ) + design.add_covariate_timing( + "correct", + "feedback_times", + bases["feedback"], + cond=lambda tr: tr.feedbackType == 1, + desc="Kernel conditioned on correct feedback", + ) + design.add_covariate_timing( + "incorrect", + "feedback_times", + bases["feedback"], + cond=lambda tr: tr.feedbackType == -1, + desc="Kernel conditioned on incorrect feedback", + ) + design.add_covariate_timing( + "fmoveL", + "firstMovement_times", + bases["fmove"], + offset=fmove_offset, + cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times >= rt_thresh), + desc="Lead up to first movement leading to left choice", + ) + design.add_covariate_timing( + "fmoveR", + "firstMovement_times", + bases["fmove"], + offset=fmove_offset, + cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times >= rt_thresh), + desc="Lead up to first movement leading to right choice", + ) + design.add_covariate_timing( + "fmoveL_early", + "firstMovement_times", + bases["fmove"], + offset=fmove_offset, + cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times < rt_thresh), + desc="Lead up to first movement leading to left choice", + ) + design.add_covariate_timing( + "fmoveR_early", + "firstMovement_times", + bases["fmove"], + offset=fmove_offset, + cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times < rt_thresh), + desc="Lead up to first movement leading to right choice", + ) + + design.add_covariate_raw("pLeft", stepfunc_prestim, desc="Step function on prior estimate") + design.add_covariate_raw( + "pLeft_tr", stepfunc_poststim, desc="Step function on post-stimulus prior estimate" + ) + + design.add_covariate("wheel", trialsdf["wheel_velocity"], bases["wheel"], wheel_offset) + design.compile_design_matrix() + + if reduce_wheel_dim: + _, s, v = np.linalg.svd(design[:, design.covar["wheel"]["dmcol_idx"]], full_matrices=False) + variances = s**2 / (s**2).sum() + n_keep = np.argwhere(np.cumsum(variances) >= 0.9999)[0, 0] + wheelcols = design[:, design.covar["wheel"]["dmcol_idx"]] + reduced = wheelcols @ v[:n_keep].T + bases_reduced = bases["wheel"] @ v[:n_keep].T + keepcols = ~np.isin(np.arange(design.dm.shape[1]), design.covar["wheel"]["dmcol_idx"]) + basedm = design[:, keepcols] + design.dm = np.hstack([basedm, reduced]) + design.covar["wheel"]["dmcol_idx"] = design.covar["wheel"]["dmcol_idx"][:n_keep] + design.covar["wheel"]["bases"] = bases_reduced + + _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}") + return design diff --git a/brainwidemap/encoding/pipelines/01_cache_regressors.py b/brainwidemap/encoding/pipelines/01_cache_regressors.py index cba57518..96f45522 100644 --- a/brainwidemap/encoding/pipelines/01_cache_regressors.py +++ b/brainwidemap/encoding/pipelines/01_cache_regressors.py @@ -65,7 +65,7 @@ def delayed_loadsave(subject, session_id, pid, params): T_BEF = 0.6 # Time before stimulus onset to include in the definition of the trial T_AFT = 0.6 # Time after feedback to include in the definition of a trial BINWIDTH = 0.02 # Size of binwidth for wheel velocity traces, in seconds -ABSWHEEL = False # Whether to return wheel velocity (False) or speed (True) +ABSWHEEL = True # Whether to return wheel velocity (False) or speed (True) CLU_CRITERIA = "bwm" # Criteria on cluster inclusion in cache # End parameters @@ -76,11 +76,11 @@ def delayed_loadsave(subject, session_id, pid, params): "binwidth": BINWIDTH, "abswheel": ABSWHEEL, "clu_criteria": CLU_CRITERIA, - "one_url": "https://openalyx.internationalbrainlab.org", + "one_url": "https://alyx.internationalbrainlab.org", "one_pw": "international", } -one = ONE(base_url=params["one_url"], password=params["one_pw"], silent=True) +one = ONE(base_url=params["one_url"], silent=True) dataset_futures = [] freeze = "2023_12_bwm_release" if CLU_CRITERIA == "bwm" else None diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py index 9f0435bb..6f203440 100644 --- a/brainwidemap/encoding/pipelines/02_fit_sessions.py +++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py @@ -1,19 +1,19 @@ # Standard library +import argparse import os import pickle -import argparse from datetime import date from pathlib import Path +# IBL libraries +import neurencoding.linear as lm +import neurencoding.utils as mut + # Third party libraries import numpy as np import sklearn.linear_model as skl from sklearn.model_selection import GridSearchCV -# IBL libraries -import neurencoding.linear as lm -import neurencoding.utils as mut - # Brainwide repo imports from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH from brainwidemap.encoding.utils import make_batch_slurm_singularity @@ -27,7 +27,7 @@ " parameters for the actual GLM fitting are defined within the script itself." " The arguments passed to the script via this parser are only for cluster control." " If you would like to change parameters of the actual fit please adjust the contents" - " of the \"parameters\" section in the file." + ' of the "parameters" section in the file.' ) parser.add_argument( "--basefilepath", @@ -94,12 +94,14 @@ args = parser.parse_args() + # Model parameters # The GLM constructor class requires a function that converts time to bin index, here we define it -# using the binwidth parameter created shortly. -def tmp_binf(t): +# using the binwidth parameter created shortly. +def tmp_binf(t): return np.ceil(t / params["binwidth"]).astype(int) + ######### PARAMETERS ######### params = { "binwidth": 0.02, @@ -108,7 +110,7 @@ def tmp_binf(t): "wheel_offset": -0.3, "contnorm": 5.0, "reduce_wheel_dim": False, - "dataset_fn": "2024-03-18_dataset_metadata.pkl", + "dataset_fn": "2024-08-12_dataset_metadata.pkl", "model": lm.LinearGLM, "alpha_grid": {"alpha": np.logspace(-3, 2, 50)}, "contiguous": False, @@ -128,6 +130,7 @@ def tmp_binf(t): } # Estimator relies on alpha grid in case of GridSearchCV, needs to be defined after main params params["estimator"] = GridSearchCV(skl.Ridge(), params["alpha_grid"]) +earlyrt_flag = "--earlyrt" if "rt_thresh" in params else "" # Output parameters file for workers currdate = str(date.today()) diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py index 59491c3f..e779dd92 100644 --- a/brainwidemap/encoding/pipelines/03_gather_results.py +++ b/brainwidemap/encoding/pipelines/03_gather_results.py @@ -53,11 +53,15 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): # Brainwide repo imports from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH - currdate = "2024-03-18" # Date on which fit was run + currdate = "2024-08-12" # Date on which fit was run n_cov = 9 # Modify if you change the model! parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl") + early_split = False with open(parpath, "rb") as fo: params = pickle.load(fo) + if "rt_thresh" in params: + early_split = True + n_cov += 2 datapath = Path(GLM_CACHE).joinpath(params["dataset_fn"]) with open(datapath, "rb") as fo: dataset = pickle.load(fo) @@ -85,6 +89,8 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): folds = [] for i in range(len(tmpfile["scores"])): tmpdf = tmpfile["deltas"][i]["test"] + tmpdf.index = tmpfile["clu_df"].index[tmpdf.index] + tmpdf.index.name = "clu_id" tmpdf["full_model"] = tmpfile["scores"][i]["basescores"]["test"] tmpdf["eid"] = fitname.parts[-2] tmpdf["pid"] = fitname.parts[-1].split("_")[1] @@ -96,26 +102,26 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): sess_master = pd.concat(folds) sessdfs.append(sess_master) masterscores = pd.concat(sessdfs) + kernels = [ + "stimonR", + "stimonL", + "correct", + "incorrect", + "fmoveR", + "fmoveL", + "pLeft", + "pLeft_tr", + "wheel", + "full_model", + ] + if early_split: + kernels.insert(6, "fmoveL_early") + kernels.insert(6, "fmoveR_early") + meanmaster = ( masterscores.set_index(["eid", "pid", "clu_id", "acronym", "qc_label", "fold"]) .groupby(["eid", "pid", "clu_id", "acronym", "qc_label"]) - .agg( - { - k: "mean" - for k in [ - "stimonR", - "stimonL", - "correct", - "incorrect", - "fmoveR", - "fmoveL", - "pLeft", - "pLeft_tr", - "wheel", - "full_model", - ] - } - ) + .agg({k: "mean" for k in kernels}) ) @cache diff --git a/brainwidemap/encoding/pipelines/04_plot_figures.py b/brainwidemap/encoding/pipelines/04_plot_figures.py index adec0555..c7f71cc2 100644 --- a/brainwidemap/encoding/pipelines/04_plot_figures.py +++ b/brainwidemap/encoding/pipelines/04_plot_figures.py @@ -3,7 +3,6 @@ # Third party libraries import matplotlib.pyplot as plt -from matplotlib import colors import numpy as np import pandas as pd import seaborn as sns @@ -11,11 +10,12 @@ # IBL libraries from iblatlas.atlas import BrainRegions from iblatlas.plots import plot_swanson +from matplotlib import colors # Brainwidemap repo imports from brainwidemap.encoding.params import GLM_FIT_PATH -FITDATE = "2023-03-02" +FITDATE = "2024-07-16" VARIABLES = [ "stimonR", "stimonL", @@ -23,6 +23,8 @@ "incorrect", "fmoveR", "fmoveL", + # "fmoveR_early", # Comment/uncomment if early RT split is used. + # "fmoveL_early", "pLeft", "pLeft_tr", "wheel", @@ -40,7 +42,7 @@ ABSDIFF = True # Whether to plot absolute value of difference or signed difference ANNOTATE = False # Whether to annotate brain regions IMGFMT = "png" # Format of output image -SAVEPATH = Path("/home/berk/Documents/Projects/results/plots/swanson_maps/") # Path to save plots +SAVEPATH = Path("/home/gercek/Projects/results/plots/swanson_maps/") # Path to save plots if not SAVEPATH.exists(): SAVEPATH.mkdir() @@ -83,31 +85,27 @@ br = BrainRegions() -def flatmap_variable(df, - cmap, - cmin=COLOR_RANGE[0], - cmax=COLOR_RANGE[1], - norm=None, - plswan_kwargs={}): +def flatmap_variable( + df, cmap, cmin=COLOR_RANGE[0], cmax=COLOR_RANGE[1], norm=None, plswan_kwargs={} +): fig = plt.figure(figsize=(8, 4) if not ANNOTATE else (16, 8)) ax = fig.add_subplot(111) if norm is not None: cmap_kwargs = {"norm": norm, **plswan_kwargs} elif GLOBAL_CMAP: - cmap_kwargs = { - "norm": colors.LogNorm(vmin=cmin, vmax=cmax, clip=True), - **plswan_kwargs - } + cmap_kwargs = {"norm": colors.LogNorm(vmin=cmin, vmax=cmax, clip=True), **plswan_kwargs} else: cmap_kwargs = {"vmin": cmin, "vmax": cmax, **plswan_kwargs} - ax = plot_swanson(df.index, - df.values, - hemisphere="left", - cmap=cmap, - br=br, - ax=ax, - annotate=ANNOTATE, - **cmap_kwargs) + ax = plot_swanson( + df.index, + df.values, + hemisphere="left", + cmap=cmap, + br=br, + ax=ax, + annotate=ANNOTATE, + **cmap_kwargs, + ) plt.colorbar(mappable=ax.images[0]) ax.set_xticks([]) ax.set_yticks([]) @@ -116,9 +114,9 @@ def flatmap_variable(df, def get_cmap(split): - ''' + """ for each split, get a colormap defined by Yanliang - ''' + """ varmaps = { "stimonR": "stim", "stimonL": "stim", @@ -128,14 +126,14 @@ def get_cmap(split): "incorrect": "fback", "pLeft": "block", "pLeft_tr": "block", - "wheel": "wheel" + "wheel": "wheel", } dc = { - 'stim': ["#ffffff", "#D5E1A0", "#A3C968", "#86AF40", "#517146"], - 'choice': ["#ffffff", "#F8E4AA", "#F9D766", "#E8AC22", "#DA4727"], - 'fback': ["#ffffff", "#F1D3D0", "#F5968A", "#E34335", "#A23535"], - 'block': ["#ffffff", "#D0CDE4", "#998DC3", "#6159A6", "#42328E"], - 'wheel': ["#ffffff", "#C2E1EA", "#95CBEE", "#5373B8", "#324BA0"] + "stim": ["#ffffff", "#D5E1A0", "#A3C968", "#86AF40", "#517146"], + "choice": ["#ffffff", "#F8E4AA", "#F9D766", "#E8AC22", "#DA4727"], + "fback": ["#ffffff", "#F1D3D0", "#F5968A", "#E34335", "#A23535"], + "block": ["#ffffff", "#D0CDE4", "#998DC3", "#6159A6", "#42328E"], + "wheel": ["#ffffff", "#C2E1EA", "#95CBEE", "#5373B8", "#324BA0"], } return colors.LinearSegmentedColormap.from_list("mycmap", dc[varmaps[split]]) @@ -144,9 +142,9 @@ def get_cmap(split): # Distribution of full model R^2 values, and std. dev between folds meanscores = fitdata["mean_fit_results"] full_model = meanscores["full_model"].copy() -full_model_std = (fitdata["fit_results"].groupby(["eid", "pid", "clu_id" - ]).agg({"full_model": - "std"})) +full_model_std = ( + fitdata["fit_results"].groupby(["eid", "pid", "clu_id"]).agg({"full_model": "std"}) +) joindf = full_model_std.join(full_model, how="inner", lsuffix="_std") if DISTPLOTS: @@ -168,8 +166,7 @@ def get_cmap(split): unitcounts = meanscores.groupby("region").size().astype(int) keepreg = unitcounts[unitcounts >= MIN_UNITS].index if GLOBAL_CMAP: - allmeans = meanscores.set_index( - "region", append=True)[VARIABLES].groupby("region").mean() + allmeans = meanscores.set_index("region", append=True)[VARIABLES].groupby("region").mean() cmin = np.percentile(allmeans.values.flatten(), COLOR_RANGE[0]) if cmin < 0: cmin = 1e-5 @@ -222,7 +219,8 @@ def get_cmap(split): fig.suptitle(f"{var1} $\Delta R^2$ - {var2} $\Delta R^2$") fig.savefig( DIFFPATH.joinpath( - f"{var1}_{var2}_{'abs' * ABSDIFF}diff{'_annotated' * ANNOTATE}.{IMGFMT}"), + f"{var1}_{var2}_{'abs' * ABSDIFF}diff{'_annotated' * ANNOTATE}.{IMGFMT}" + ), format=IMGFMT, dpi=450, ) diff --git a/brainwidemap/encoding/scripts/twocond_plots.py b/brainwidemap/encoding/scripts/twocond_plots.py index 61615020..c1ff8eec 100644 --- a/brainwidemap/encoding/scripts/twocond_plots.py +++ b/brainwidemap/encoding/scripts/twocond_plots.py @@ -142,8 +142,8 @@ def load_unit_fit_model(eid, pid, clu_id): "block": (lambda df: df["pLeft"], ["PL", "MOp"], "stimOn_times"), } -glm_params = pd.read_pickle(GLM_FIT_PATH + "/2023-03-07_glm_fit_pars.pkl") -meanscores = pd.read_pickle(GLM_FIT_PATH + "/2023-03-02_glm_fit.pkl")[ +glm_params = pd.read_pickle(GLM_FIT_PATH + "/2024-07-16_glm_fit_pars.pkl") +meanscores = pd.read_pickle(GLM_FIT_PATH + "/2024-07-16_glm_fit.pkl")[ "mean_fit_results" ].set_index("region", append=True) diff --git a/brainwidemap/encoding/utils.py b/brainwidemap/encoding/utils.py index 093b9d5a..6fa96dfa 100644 --- a/brainwidemap/encoding/utils.py +++ b/brainwidemap/encoding/utils.py @@ -17,7 +17,7 @@ from brainbox.io.one import SessionLoader # Brainwidemap repo imports -from brainwidemap.bwm_loading import load_trials_and_mask, bwm_units +from brainwidemap.bwm_loading import load_good_units, load_trials_and_mask, bwm_units from brainwidemap.encoding.timeseries import TimeSeries, sync @@ -81,34 +81,12 @@ def load_regressors( trials_mask=mask, ) - clusters = {} - ssl = bbone.SpikeSortingLoader(one=one, pid=pid, eid=session_id) - origspikes, tmpclu, channels = ssl.load_spike_sorting() - if "metrics" not in tmpclu: - tmpclu["metrics"] = np.ones(tmpclu["channels"].size) - clusters[pid] = ssl.merge_clusters(origspikes, tmpclu, channels) - clu_df = pd.DataFrame(clusters[pid]).set_index(["cluster_id"]) - clu_df["pid"] = pid - if clu_criteria == "bwm": - allunits = ( - bwm_units(one=one) - .rename(columns={"cluster_id": "clu_id"}) - .set_index(["eid", "pid", "clu_id"]) - ) - keepclu = clu_df.index.intersection(allunits.loc[session_id, pid, :].index) - elif clu_criteria == "all": - keepclu = clu_df.index - else: - raise ValueError("clu_criteria must be 'bwm' or 'all'") - - clu_df = clu_df.loc[keepclu] - keepmask = np.isin(origspikes.clusters, keepclu) - spikes = Bunch({k: v[keepmask] for k, v in origspikes.items()}) - sortinds = np.argsort(spikes.times) - spk_times = spikes.times[sortinds] - spk_clu = spikes.clusters[sortinds] - clu_regions = clusters[pid].acronym + spikes, clu_df = load_good_units(one=one, pid=pid) + spk_times = spikes["times"] + spk_clu = spikes["clusters"] + clu_df["pid"] = pid + clu_regions = clu_df.acronym return trialsdf, spk_times, spk_clu, clu_regions, clu_df From 1283ba51962601551534db3156e4fe4de07e99ae Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 9 Sep 2024 17:13:07 +0200 Subject: [PATCH 3/8] fixed bug in saving unit cluster IDs --- brainwidemap/encoding/pipelines/02_fit_sessions.py | 2 +- brainwidemap/encoding/pipelines/03_gather_results.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py index 6f203440..8ef3cd28 100644 --- a/brainwidemap/encoding/pipelines/02_fit_sessions.py +++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py @@ -110,7 +110,7 @@ def tmp_binf(t): "wheel_offset": -0.3, "contnorm": 5.0, "reduce_wheel_dim": False, - "dataset_fn": "2024-08-12_dataset_metadata.pkl", + "dataset_fn": "2024-09-09_dataset_metadata.pkl", "model": lm.LinearGLM, "alpha_grid": {"alpha": np.logspace(-3, 2, 50)}, "contiguous": False, diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py index e779dd92..8f13f970 100644 --- a/brainwidemap/encoding/pipelines/03_gather_results.py +++ b/brainwidemap/encoding/pipelines/03_gather_results.py @@ -89,14 +89,14 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): folds = [] for i in range(len(tmpfile["scores"])): tmpdf = tmpfile["deltas"][i]["test"] - tmpdf.index = tmpfile["clu_df"].index[tmpdf.index] tmpdf.index.name = "clu_id" tmpdf["full_model"] = tmpfile["scores"][i]["basescores"]["test"] tmpdf["eid"] = fitname.parts[-2] tmpdf["pid"] = fitname.parts[-1].split("_")[1] - tmpdf["acronym"] = tmpfile["clu_regions"][tmpdf.index] - tmpdf["qc_label"] = tmpfile["clu_df"]["label"][tmpdf.index] + tmpdf["acronym"] = tmpfile["clu_regions"] + tmpdf["qc_label"] = tmpfile["clu_df"]["label"] tmpdf["fold"] = i + tmpdf.index = tmpfile["clu_df"].iloc[tmpdf.index].cluster_id tmpdf.index.set_names(["clu_id"], inplace=True) folds.append(tmpdf.reset_index()) sess_master = pd.concat(folds) From f4b220cacfb604d844df3cff041d434642dea19c Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Fri, 13 Sep 2024 16:45:43 +0200 Subject: [PATCH 4/8] allow for early RT fitting --- brainwidemap/encoding/cluster_worker.py | 31 ++- brainwidemap/encoding/design.py | 209 ------------------ brainwidemap/encoding/environment.yaml | 2 +- .../encoding/pipelines/02_fit_sessions.py | 56 +++-- .../encoding/pipelines/03_gather_results.py | 2 +- 5 files changed, 63 insertions(+), 237 deletions(-) diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py index e1a5a3df..411079ca 100644 --- a/brainwidemap/encoding/cluster_worker.py +++ b/brainwidemap/encoding/cluster_worker.py @@ -17,7 +17,7 @@ from pandas import read_pickle # Brainwidemap repo imports -from brainwidemap.encoding.design import generate_design, generate_design_earlyrts +from brainwidemap.encoding.design import generate_design from brainwidemap.encoding.fit import fit_stepwise, fit_stepwise_with_pseudoblocks from brainwidemap.encoding.params import GLM_FIT_PATH @@ -83,15 +83,29 @@ def fit_save_inputs( fitdate, null=None, earlyrts=False, + laterts=False, ): stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn) sessprior = stdf["probabilityLeft"] - if not earlyrts: + if not earlyrts and not laterts: sessdesign = generate_design(stdf, sessprior, t_before, **params) else: + # Handle early and late RT flags, compute median for session if necessary if "rt_thresh" not in params: - print('WARNING: No threshold to define an "early" RT passed. Defaulting to 50ms.') - sessdesign = generate_design_earlyrts(stdf, sessprior, t_before, **params) + raise ValueError("Must specify rt_thresh if fitting early or late RTs") + if laterts and earlyrts: + raise ValueError( + "Cannot fit both early and late RTs. Disable both flags to fit all trials." + ) + if params["rt_thresh"] == "session_median": + params["rt_thresh"] = np.median(stdf["firstMovement_times"] - stdf["trial_start"]) + + if earlyrts: + mask = (stdf["firstMovement_times"] - stdf["trial_start"]) < params["rt_thresh"] + elif laterts: + mask = (stdf["firstMovement_times"] - stdf["trial_start"]) >= params["rt_thresh"] + stdf = stdf[mask] + sessdesign = generate_design(stdf, sessprior, t_before, **params) if null is None: sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params) outputfn = save_stepwise( @@ -142,10 +156,14 @@ def fit_save_inputs( parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file") parser.add_argument( "--earlyrt", - type=bool, - default=False, + action="store_true", help="Whether to fit separate movement kernels to early trials", ) + parser.add_argument( + "--latert", + action="store_true", + help="Whether to fit separate movement kernels to late trials", + ) args = parser.parse_args() with open(args.datafile, "rb") as fo: @@ -170,6 +188,7 @@ def fit_save_inputs( args.fitdate, null=params["null"], earlyrts=args.earlyrt, + laterts=args.latert, ) print("Fitting completed successfully!") print(outputfn) diff --git a/brainwidemap/encoding/design.py b/brainwidemap/encoding/design.py index c6f818d8..989517eb 100644 --- a/brainwidemap/encoding/design.py +++ b/brainwidemap/encoding/design.py @@ -206,212 +206,3 @@ def stepfunc_poststim(row): _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}") return design - - -def generate_design_earlyrts( - trialsdf, - prior, - t_before, - bases, - iti_prior_win=[-0.4, -0.1], - iti_prior_val=None, - fmove_offset=-0.4, - wheel_offset=-0.4, - rt_thresh=0.05, - contnorm=5.0, - binwidth=0.02, - reduce_wheel_dim=True, - addtl_vars=None, - **kwargs, -): - """ - Generate GLM design matrix object - - Parameters - ---------- - trialsdf : pd.DataFrame - Trials dataframe with trial timings in absolute (since session start) time - prior : array-like - Vector containing the prior estimate or true prior for each trial. Must be same length as - trialsdf. If iti_prior_val is not None, this is used as the prior for the ITI period. - t_before : float - Time, in seconds, before stimulus onset that was used to define trial_start in trialsdf. - This defines the beginning of the window, relative to each trial, where we examine the - spiking data. - bases : dict - Dictionary of basis functions for each regressor. Needs keys 'stim', 'feedback', 'fmove', - (first movement) and 'wheel'. - iti_prior_win : list, optional - Two element list defining bounds relative to stimulus on which step function for ITI prior - is applied, by default [-0.4, -0.1] - iti_prior_val : float, optional - Value to use for ITI prior, by default None. If None, uses prior. - fmove_offset : float, optional - Offset, in seconds, to apply to first movement regressor, by default -0.4. This is relative - to the movement onset time, and if you want a purely anti-causal kernel should be - equivalent to the basis function length. - wheel_offset : float, optional - Offset, in seconds, to apply to wheel regressor, by default -0.4. See above. - contnorm : float, optional - Normalization factor for contrast, by default 5. - Applied as tanh(contrast * contnorm) / tanh(contnorm) - binwidth : float, optional - Size of bins to use for design matrix, in seconds, by default 0.02. This must match - the binwidth of the neural glm object later used to fit the design matrix. - reduce_wheel_dim : bool, optional - Whether to reduce the dimensionality of the wheel regressor, by default True. If True, - will use enough principal components to capture 99.999% of the variance. Smooths out - very high frequency fluctuations in the wheel velocity. - addtl_vars : dict, optional - Dictionary of additional variables in the trialsdf along with their variable types. These - are columns that are not normally part of the output of load_trials_df, and will therefore - raise an error if the design matrix building doesn't find a type specification for them. - See neurencoding.design.DesignMatrix for more details on variable types, by default None. - """ - if len(kwargs) > 0: - _logger.info( - f"keys {kwargs.keys()} were not used in generate_design," " despite being passed." - ) - trialsdf["adj_contrastL"] = np.tanh(contnorm * trialsdf["contrastLeft"]) / np.tanh(contnorm) - trialsdf["adj_contrastR"] = np.tanh(contnorm * trialsdf["contrastRight"]) / np.tanh(contnorm) - trialsdf["prior"] = prior - if iti_prior_val is not None: - trialsdf["iti_prior"] = iti_prior_val - else: - trialsdf["iti_prior"] = prior - trialsdf["prior_last"] = pd.Series(np.roll(trialsdf["prior"], 1), index=trialsdf.index) - trialsdf["iti_prior_last"] = pd.Series(np.roll(trialsdf["iti_prior"], 1), index=trialsdf.index) - trialsdf["pLeft_last"] = pd.Series( - np.roll(trialsdf["probabilityLeft"], 1), index=trialsdf.index - ) - - vartypes = { - "choice": "value", - "response_times": "timing", - "probabilityLeft": "value", - "pLeft_last": "value", - "feedbackType": "value", - "feedback_times": "timing", - "contrastLeft": "value", - "adj_contrastL": "value", - "contrastRight": "value", - "adj_contrastR": "value", - "goCue_times": "timing", - "stimOn_times": "timing", - "trial_start": "timing", - "trial_end": "timing", - "prior": "value", - "prior_last": "value", - "iti_prior": "value", - "iti_prior_last": "value", - "wheel_velocity": "continuous", - "firstMovement_times": "timing", - } - if addtl_vars is not None and isinstance(addtl_vars, dict): - vartypes.update(addtl_vars) - - def stepfunc_prestim(row): - stepvec = np.zeros(design.binf(row.duration)) - stepvec[stepbounds[0] : stepbounds[1]] = row.iti_prior_last - return stepvec - - def stepfunc_poststim(row): - zerovec = np.zeros(design.binf(row.duration)) - currtr_start = design.binf(row.stimOn_times) - currtr_end = design.binf(row.feedback_times) - zerovec[currtr_start:currtr_end] = row.prior_last - zerovec[currtr_end:] = row.prior - return zerovec - - design = dm.DesignMatrix(trialsdf, vartypes, binwidth=binwidth) - stepbounds = [ - design.binf(t_before + iti_prior_win[0]), - design.binf(t_before + iti_prior_win[1]), - ] - - design.add_covariate_timing( - "stimonL", - "stimOn_times", - bases["stim"], - cond=lambda tr: np.isfinite(tr.contrastLeft), - deltaval="adj_contrastL", - desc="Kernel conditioned on L stimulus onset", - ) - design.add_covariate_timing( - "stimonR", - "stimOn_times", - bases["stim"], - cond=lambda tr: np.isfinite(tr.contrastRight), - deltaval="adj_contrastR", - desc="Kernel conditioned on R stimulus onset", - ) - design.add_covariate_timing( - "correct", - "feedback_times", - bases["feedback"], - cond=lambda tr: tr.feedbackType == 1, - desc="Kernel conditioned on correct feedback", - ) - design.add_covariate_timing( - "incorrect", - "feedback_times", - bases["feedback"], - cond=lambda tr: tr.feedbackType == -1, - desc="Kernel conditioned on incorrect feedback", - ) - design.add_covariate_timing( - "fmoveL", - "firstMovement_times", - bases["fmove"], - offset=fmove_offset, - cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times >= rt_thresh), - desc="Lead up to first movement leading to left choice", - ) - design.add_covariate_timing( - "fmoveR", - "firstMovement_times", - bases["fmove"], - offset=fmove_offset, - cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times >= rt_thresh), - desc="Lead up to first movement leading to right choice", - ) - design.add_covariate_timing( - "fmoveL_early", - "firstMovement_times", - bases["fmove"], - offset=fmove_offset, - cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times < rt_thresh), - desc="Lead up to first movement leading to left choice", - ) - design.add_covariate_timing( - "fmoveR_early", - "firstMovement_times", - bases["fmove"], - offset=fmove_offset, - cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times < rt_thresh), - desc="Lead up to first movement leading to right choice", - ) - - design.add_covariate_raw("pLeft", stepfunc_prestim, desc="Step function on prior estimate") - design.add_covariate_raw( - "pLeft_tr", stepfunc_poststim, desc="Step function on post-stimulus prior estimate" - ) - - design.add_covariate("wheel", trialsdf["wheel_velocity"], bases["wheel"], wheel_offset) - design.compile_design_matrix() - - if reduce_wheel_dim: - _, s, v = np.linalg.svd(design[:, design.covar["wheel"]["dmcol_idx"]], full_matrices=False) - variances = s**2 / (s**2).sum() - n_keep = np.argwhere(np.cumsum(variances) >= 0.9999)[0, 0] - wheelcols = design[:, design.covar["wheel"]["dmcol_idx"]] - reduced = wheelcols @ v[:n_keep].T - bases_reduced = bases["wheel"] @ v[:n_keep].T - keepcols = ~np.isin(np.arange(design.dm.shape[1]), design.covar["wheel"]["dmcol_idx"]) - basedm = design[:, keepcols] - design.dm = np.hstack([basedm, reduced]) - design.covar["wheel"]["dmcol_idx"] = design.covar["wheel"]["dmcol_idx"][:n_keep] - design.covar["wheel"]["bases"] = bases_reduced - - _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}") - return design diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml index 22c49eef..f50a46d4 100644 --- a/brainwidemap/encoding/environment.yaml +++ b/brainwidemap/encoding/environment.yaml @@ -1,6 +1,6 @@ name: iblenv dependencies: - - python=3.9 + - python=3.10 - apptools >= 4.5.0 - boto3 - click diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py index 8ef3cd28..fccde714 100644 --- a/brainwidemap/encoding/pipelines/02_fit_sessions.py +++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py @@ -1,6 +1,5 @@ # Standard library import argparse -import os import pickle from datetime import date from pathlib import Path @@ -32,7 +31,7 @@ parser.add_argument( "--basefilepath", type=Path, - default=Path("~/").expanduser().joinpath("bwm_stepwise_glm_leaveoneout"), + default=Path("~/").expanduser().joinpath("jobscripts/bwm_stepwise_glm_leaveoneout"), help="Base filename for batch scripts", ) parser.add_argument( @@ -85,12 +84,6 @@ "--job_cores", type=int, default=32, help="Number of cores to request per job." ) parser.add_argument("--mem", type=str, default="12GB", help="Memory to request per job.") -parser.add_argument( - "--submit_batch", - action="store_true", - default=False, - help="Submit batch jobs to SLURM cluster using the script.", -) args = parser.parse_args() @@ -110,7 +103,7 @@ def tmp_binf(t): "wheel_offset": -0.3, "contnorm": 5.0, "reduce_wheel_dim": False, - "dataset_fn": "2024-09-09_dataset_metadata.pkl", + "dataset_fn": "2024-08-12_dataset_metadata.pkl", "model": lm.LinearGLM, "alpha_grid": {"alpha": np.logspace(-3, 2, 50)}, "contiguous": False, @@ -120,6 +113,7 @@ def tmp_binf(t): "seqsel_kwargs": {"direction": "backward", "n_features_to_select": 8}, "seqselfit_kwargs": {"full_scores": True}, "seed": 0, + "rt_thresh": "session_median", } params["bases"] = { @@ -130,7 +124,14 @@ def tmp_binf(t): } # Estimator relies on alpha grid in case of GridSearchCV, needs to be defined after main params params["estimator"] = GridSearchCV(skl.Ridge(), params["alpha_grid"]) -earlyrt_flag = "--earlyrt" if "rt_thresh" in params else "" +if "rt_thresh" in params: + earlyrt_flag = "--earlyrt" + latert_flag = "--latert" + earlyrt_fn = "_early_rt" +else: + earlyrt_flag = "" + latert_flag = "" + earlyrt_fn = "" # Output parameters file for workers currdate = str(date.today()) @@ -145,7 +146,7 @@ def tmp_binf(t): # Generate batch script make_batch_slurm_singularity( - str(args.basefilepath), + str(args.basefilepath) + earlyrt_fn, str(Path(__file__).parents[1].joinpath("cluster_worker.py")), job_name=args.jobname, partition=args.partition, @@ -159,14 +160,29 @@ def tmp_binf(t): cores_per_job=args.job_cores, memory=args.mem, array_size=f"1-{njobs}", - f_args=[str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate], + f_args=[earlyrt_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate], ) - -# If SUBMIT_BATCH, then actually execute the batch job -if args.submit_batch: - os.system(f"sbatch {str(args.basefilepath) + '_batch.sh'}") -else: - print( - f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};" - " user must submit it themselves. Good luck!" +if len(earlyrt_fn) > 0: + make_batch_slurm_singularity( + str(args.basefilepath) + "_late_rt", + str(Path(__file__).parents[1].joinpath("cluster_worker.py")), + job_name=args.jobname, + partition=args.partition, + time=args.timelimit, + singularity_modules=args.singularity_modules, + container_image=args.singularity_image, + img_condapath=args.singularity_conda, + img_envname=args.singularity_env, + local_pathadd=Path(__file__).parents[3], + logpath=args.logpath, + cores_per_job=args.job_cores, + memory=args.mem, + array_size=f"1-{njobs}", + f_args=[latert_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate], ) + +# If SUBMIT_BATCH, then actually execute the batch jo +print( + f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};" + " user must submit it themselves. Good luck!" +) diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py index 8f13f970..2686db1a 100644 --- a/brainwidemap/encoding/pipelines/03_gather_results.py +++ b/brainwidemap/encoding/pipelines/03_gather_results.py @@ -53,7 +53,7 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): # Brainwide repo imports from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH - currdate = "2024-08-12" # Date on which fit was run + currdate = "2024-09-09" # Date on which fit was run n_cov = 9 # Modify if you change the model! parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl") early_split = False From 64661229886f85a41ffc0af0bda3d6838bb5c5ce Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 16 Sep 2024 11:58:05 +0200 Subject: [PATCH 5/8] Docker updates and early RT fitting --- brainwidemap/encoding/Dockerfile | 15 +++-------- brainwidemap/encoding/cluster_worker.py | 12 +++++++-- brainwidemap/encoding/environment.yaml | 26 +++++++------------ .../encoding/pipelines/02_fit_sessions.py | 1 + .../encoding/pipelines/03_gather_results.py | 8 +----- 5 files changed, 25 insertions(+), 37 deletions(-) diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile index 87213ec8..64b63664 100644 --- a/brainwidemap/encoding/Dockerfile +++ b/brainwidemap/encoding/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 +FROM ubuntu:latest # This can optionally be built with just ubuntu, rather than the nvidia cuda container. # If saving space is a concern, this is the way to go. LABEL description="Core container which has the basic necessities to run analyses in the\ @@ -15,20 +15,11 @@ COPY ./environment.yaml /data/environment.yaml SHELL ["/bin/bash", "-c"] # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯ RUN apt update && apt install -y wget git libgl1-mesa-dev -RUN wget -O Mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" -RUN bash Mambaforge.sh -b -p /opt/conda && rm Mambaforge.sh +RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" +RUN bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \ mamba install --yes conda-build &&\ mamba env create -n iblenv --file=environment.yaml" -RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\ - conda activate iblenv &&\ - mamba install --yes pytorch pytorch-cuda=11.7 -c pytorch -c nvidia &&\ - conda clean --all -f -y" -RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\ - conda activate iblenv &&\ - pip install globus-sdk iblutil ibllib iblapps ibl-neuropixel ONE-api phylib pynrrd slidingRP &&\ - git clone https://github.com/berkgercek/neurencoding &&\ - conda develop ./neurencoding" # The below allows interactively running the container with the correct environment, but be warned # that this will not work with commands passed to the container in a non-interactive shell. # In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py index 411079ca..6201b6be 100644 --- a/brainwidemap/encoding/cluster_worker.py +++ b/brainwidemap/encoding/cluster_worker.py @@ -38,9 +38,9 @@ def _create_sub_sess_path(parent, subject, session): return sesspath -def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate): +def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate, splitstr=""): sesspath = _create_sub_sess_path(GLM_FIT_PATH, subject, session_id) - fn = sesspath.joinpath(f"{fitdate}_{probes}_stepwise_regression.pkl") + fn = sesspath.joinpath(f"{fitdate}_{probes}{splitstr}_stepwise_regression.pkl") outdict = { "params": params, "probes": probes, @@ -87,6 +87,14 @@ def fit_save_inputs( ): stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn) sessprior = stdf["probabilityLeft"] + match (earlyrts, laterts): + case (False, False): + splitstr = "" + case (True, False): + splitstr = "_earlyrt" + case (False, True): + splitstr = "_latert" + paramsp["splitstr"] = splitstr if not earlyrts and not laterts: sessdesign = generate_design(stdf, sessprior, t_before, **params) else: diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml index f50a46d4..b8c2af47 100644 --- a/brainwidemap/encoding/environment.yaml +++ b/brainwidemap/encoding/environment.yaml @@ -1,26 +1,11 @@ name: iblenv dependencies: - python=3.10 - - apptools >= 4.5.0 - - boto3 - - click - - colorcet - - colorlog - - cython - - dataclasses - - flake8 - - graphviz - - h5py - ipython - matplotlib - numba - numpy - pandas - - plotly - - pyarrow - - pyflakes >= 2.4.0 - - pytest - - requests - scikit-learn - scipy >=1.4.1 - seaborn @@ -28,4 +13,13 @@ dependencies: - tqdm - pip - pip: - - opencv-python + - iblutil + - ibllib + - iblapps + - ibl-neuropixel + - ONE-api + - phylib + - phy + - pynrrd + - slidingRP + - neurencoding diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py index fccde714..826ce4bf 100644 --- a/brainwidemap/encoding/pipelines/02_fit_sessions.py +++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py @@ -114,6 +114,7 @@ def tmp_binf(t): "seqselfit_kwargs": {"full_scores": True}, "seed": 0, "rt_thresh": "session_median", + "mintrials": 50, } params["bases"] = { diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py index 2686db1a..1dd32c77 100644 --- a/brainwidemap/encoding/pipelines/03_gather_results.py +++ b/brainwidemap/encoding/pipelines/03_gather_results.py @@ -53,15 +53,12 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): # Brainwide repo imports from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH - currdate = "2024-09-09" # Date on which fit was run + currdate = "2024-09-15" # Date on which fit was run n_cov = 9 # Modify if you change the model! parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl") early_split = False with open(parpath, "rb") as fo: params = pickle.load(fo) - if "rt_thresh" in params: - early_split = True - n_cov += 2 datapath = Path(GLM_CACHE).joinpath(params["dataset_fn"]) with open(datapath, "rb") as fo: dataset = pickle.load(fo) @@ -114,9 +111,6 @@ def label_cerebellum(acronym, brainregions=BrainRegions()): "wheel", "full_model", ] - if early_split: - kernels.insert(6, "fmoveL_early") - kernels.insert(6, "fmoveR_early") meanmaster = ( masterscores.set_index(["eid", "pid", "clu_id", "acronym", "qc_label", "fold"]) From 7033d5c15ccc842218a8c3e81739bf553078d904 Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 16 Sep 2024 11:59:30 +0200 Subject: [PATCH 6/8] bugfix --- brainwidemap/encoding/cluster_worker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py index 6201b6be..5a01b4ba 100644 --- a/brainwidemap/encoding/cluster_worker.py +++ b/brainwidemap/encoding/cluster_worker.py @@ -94,7 +94,6 @@ def fit_save_inputs( splitstr = "_earlyrt" case (False, True): splitstr = "_latert" - paramsp["splitstr"] = splitstr if not earlyrts and not laterts: sessdesign = generate_design(stdf, sessprior, t_before, **params) else: @@ -117,7 +116,7 @@ def fit_save_inputs( if null is None: sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params) outputfn = save_stepwise( - subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate + subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate, splitstr ) elif null == "pseudosession_pleft_iti": sessfit, nullfits = fit_stepwise_with_pseudoblocks( From fa92142159e95b3e4f953d31d326824b9660adae Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 16 Sep 2024 12:00:20 +0200 Subject: [PATCH 7/8] dockerfile fix --- brainwidemap/encoding/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile index 64b63664..61fc42d4 100644 --- a/brainwidemap/encoding/Dockerfile +++ b/brainwidemap/encoding/Dockerfile @@ -16,7 +16,7 @@ SHELL ["/bin/bash", "-c"] # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯ RUN apt update && apt install -y wget git libgl1-mesa-dev RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -RUN bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh +RUN /bin/bash -c "bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh" RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \ mamba install --yes conda-build &&\ mamba env create -n iblenv --file=environment.yaml" From 9651085731456e339144ba5b0fddacf91c8906e2 Mon Sep 17 00:00:00 2001 From: Berk Gercek Date: Mon, 16 Sep 2024 13:32:57 +0200 Subject: [PATCH 8/8] Update docker spec and remove pytorch deps --- brainwidemap/encoding/Dockerfile | 10 ++++++++-- brainwidemap/encoding/environment.yaml | 13 +------------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile index 61fc42d4..88305d6f 100644 --- a/brainwidemap/encoding/Dockerfile +++ b/brainwidemap/encoding/Dockerfile @@ -15,11 +15,17 @@ COPY ./environment.yaml /data/environment.yaml SHELL ["/bin/bash", "-c"] # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯ RUN apt update && apt install -y wget git libgl1-mesa-dev -RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -RUN /bin/bash -c "bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh" +RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" +RUN bash Miniforge3.sh -b -p /opt/conda && rm Miniforge3.sh +RUN wget -O iblreq.txt "https://raw.githubusercontent.com/int-brain-lab/ibllib/master/requirements.txt" +RUN head -n -1 iblreq.txt > requirements.txt +RUN rm iblreq.txt RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \ mamba install --yes conda-build &&\ mamba env create -n iblenv --file=environment.yaml" +RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \ + conda activate iblenv && pip install -r requirements.txt && pip install ibllib --no-deps" +RUN rm requirements.txt # The below allows interactively running the container with the correct environment, but be warned # that this will not work with commands passed to the container in a non-interactive shell. # In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml index b8c2af47..490877f2 100644 --- a/brainwidemap/encoding/environment.yaml +++ b/brainwidemap/encoding/environment.yaml @@ -4,7 +4,6 @@ dependencies: - ipython - matplotlib - numba - - numpy - pandas - scikit-learn - scipy >=1.4.1 @@ -12,14 +11,4 @@ dependencies: - statsmodels - tqdm - pip - - pip: - - iblutil - - ibllib - - iblapps - - ibl-neuropixel - - ONE-api - - phylib - - phy - - pynrrd - - slidingRP - - neurencoding + - pyqt<6