From 9396815545602fd6fd015c21e69083a073d285da Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 1 Jul 2024 17:47:18 +0200
Subject: [PATCH 1/8] changes to load scripts

---
 brainwidemap/encoding/params.py                        | 4 ++--
 brainwidemap/encoding/pipelines/01_cache_regressors.py | 2 +-
 brainwidemap/encoding/pipelines/02_fit_sessions.py     | 2 +-
 brainwidemap/encoding/pipelines/03_gather_results.py   | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/brainwidemap/encoding/params.py b/brainwidemap/encoding/params.py
index 8454f987..1d8705ee 100644
--- a/brainwidemap/encoding/params.py
+++ b/brainwidemap/encoding/params.py
@@ -4,5 +4,5 @@
 work.
 """
 
-GLM_CACHE = "/home/gercek/scratch/glm_cache/"
-GLM_FIT_PATH = "/home/gercek/scratch/results/glms/"
+GLM_CACHE = "/home/gercek/Projects/glm_cache/"
+GLM_FIT_PATH = "/home/gercek/Projects/results/glms/"
diff --git a/brainwidemap/encoding/pipelines/01_cache_regressors.py b/brainwidemap/encoding/pipelines/01_cache_regressors.py
index c71c8043..cba57518 100644
--- a/brainwidemap/encoding/pipelines/01_cache_regressors.py
+++ b/brainwidemap/encoding/pipelines/01_cache_regressors.py
@@ -65,7 +65,7 @@ def delayed_loadsave(subject, session_id, pid, params):
 T_BEF = 0.6  # Time before stimulus onset to include in the definition of the trial
 T_AFT = 0.6  # Time after feedback to include in the definition of a trial
 BINWIDTH = 0.02  # Size of binwidth for wheel velocity traces, in seconds
-ABSWHEEL = True  # Whether to return wheel velocity (False) or speed (True)
+ABSWHEEL = False  # Whether to return wheel velocity (False) or speed (True)
 CLU_CRITERIA = "bwm"  # Criteria on cluster inclusion in cache
 # End parameters
 
diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py
index da7cf1be..9f0435bb 100644
--- a/brainwidemap/encoding/pipelines/02_fit_sessions.py
+++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py
@@ -108,7 +108,7 @@ def tmp_binf(t):
     "wheel_offset": -0.3,
     "contnorm": 5.0,
     "reduce_wheel_dim": False,
-    "dataset_fn": "2024-01-06_dataset_metadata.pkl",
+    "dataset_fn": "2024-03-18_dataset_metadata.pkl",
     "model": lm.LinearGLM,
     "alpha_grid": {"alpha": np.logspace(-3, 2, 50)},
     "contiguous": False,
diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py
index e14408a5..59491c3f 100644
--- a/brainwidemap/encoding/pipelines/03_gather_results.py
+++ b/brainwidemap/encoding/pipelines/03_gather_results.py
@@ -53,7 +53,7 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
     # Brainwide repo imports
     from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
 
-    currdate = "2024-01-22"  # Date on which fit was run
+    currdate = "2024-03-18"  # Date on which fit was run
     n_cov = 9  # Modify if you change the model!
     parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl")
     with open(parpath, "rb") as fo:

From 7680718f3dc65d7c6dc3bda04af08df0fb1af4ce Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 9 Sep 2024 15:06:28 +0200
Subject: [PATCH 2/8] Add option for early RT splits

---
 brainwidemap/encoding/cluster_worker.py       |  29 ++-
 brainwidemap/encoding/design.py               | 216 +++++++++++++++++-
 .../encoding/pipelines/01_cache_regressors.py |   6 +-
 .../encoding/pipelines/02_fit_sessions.py     |  21 +-
 .../encoding/pipelines/03_gather_results.py   |  42 ++--
 .../encoding/pipelines/04_plot_figures.py     |  68 +++---
 .../encoding/scripts/twocond_plots.py         |   4 +-
 brainwidemap/encoding/utils.py                |  34 +--
 8 files changed, 314 insertions(+), 106 deletions(-)

diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py
index cc82ced8..e1a5a3df 100644
--- a/brainwidemap/encoding/cluster_worker.py
+++ b/brainwidemap/encoding/cluster_worker.py
@@ -17,7 +17,7 @@
 from pandas import read_pickle
 
 # Brainwidemap repo imports
-from brainwidemap.encoding.design import generate_design
+from brainwidemap.encoding.design import generate_design, generate_design_earlyrts
 from brainwidemap.encoding.fit import fit_stepwise, fit_stepwise_with_pseudoblocks
 from brainwidemap.encoding.params import GLM_FIT_PATH
 
@@ -82,10 +82,16 @@ def fit_save_inputs(
     t_before,
     fitdate,
     null=None,
+    earlyrts=False,
 ):
     stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn)
     sessprior = stdf["probabilityLeft"]
-    sessdesign = generate_design(stdf, sessprior, t_before, **params)
+    if not earlyrts:
+        sessdesign = generate_design(stdf, sessprior, t_before, **params)
+    else:
+        if "rt_thresh" not in params:
+            print('WARNING: No threshold to define an "early" RT passed. Defaulting to 50ms.')
+        sessdesign = generate_design_earlyrts(stdf, sessprior, t_before, **params)
     if null is None:
         sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params)
         outputfn = save_stepwise(
@@ -115,11 +121,13 @@ def fit_save_inputs(
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Cluster GLM fitter. This script is called by"
-                                                 "the batch script generated in "
-                                                 "pipelines/02_fit_sessions.py and should in most "
-                                                 "cases beyond debugging not be used in a "
-                                                 "standalone fashion.")
+    parser = argparse.ArgumentParser(
+        description="Cluster GLM fitter. This script is called by"
+        "the batch script generated in "
+        "pipelines/02_fit_sessions.py and should in most "
+        "cases beyond debugging not be used in a "
+        "standalone fashion."
+    )
     parser.add_argument(
         "datafile",
         type=Path,
@@ -132,6 +140,12 @@ def fit_save_inputs(
     )
     parser.add_argument("fitdate", help="Date of fit for output file")
     parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file")
+    parser.add_argument(
+        "--earlyrt",
+        type=bool,
+        default=False,
+        help="Whether to fit separate movement kernels to early trials",
+    )
     args = parser.parse_args()
 
     with open(args.datafile, "rb") as fo:
@@ -155,6 +169,7 @@ def fit_save_inputs(
         t_before,
         args.fitdate,
         null=params["null"],
+        earlyrts=args.earlyrt,
     )
     print("Fitting completed successfully!")
     print(outputfn)
diff --git a/brainwidemap/encoding/design.py b/brainwidemap/encoding/design.py
index 37ecd0b5..c6f818d8 100644
--- a/brainwidemap/encoding/design.py
+++ b/brainwidemap/encoding/design.py
@@ -6,13 +6,12 @@
 # Standard library
 import logging
 
+# IBL libraries
+import neurencoding.design_matrix as dm
+
 # Third party libraries
 import numpy as np
 import pandas as pd
-from scipy.stats import norm
-
-# IBL libraries
-import neurencoding.design_matrix as dm
 
 _logger = logging.getLogger("brainwide")
 
@@ -207,3 +206,212 @@ def stepfunc_poststim(row):
 
     _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}")
     return design
+
+
+def generate_design_earlyrts(
+    trialsdf,
+    prior,
+    t_before,
+    bases,
+    iti_prior_win=[-0.4, -0.1],
+    iti_prior_val=None,
+    fmove_offset=-0.4,
+    wheel_offset=-0.4,
+    rt_thresh=0.05,
+    contnorm=5.0,
+    binwidth=0.02,
+    reduce_wheel_dim=True,
+    addtl_vars=None,
+    **kwargs,
+):
+    """
+    Generate GLM design matrix object
+
+    Parameters
+    ----------
+    trialsdf : pd.DataFrame
+        Trials dataframe with trial timings in absolute (since session start) time
+    prior : array-like
+        Vector containing the prior estimate or true prior for each trial. Must be same length as
+        trialsdf. If iti_prior_val is not None, this is used as the prior for the ITI period.
+    t_before : float
+        Time, in seconds, before stimulus onset that was used to define trial_start in trialsdf.
+        This defines the beginning of the window, relative to each trial, where we examine the
+        spiking data.
+    bases : dict
+        Dictionary of basis functions for each regressor. Needs keys 'stim', 'feedback', 'fmove',
+        (first movement) and 'wheel'.
+    iti_prior_win : list, optional
+        Two element list defining bounds relative to stimulus on which step function for ITI prior
+        is applied, by default [-0.4, -0.1]
+    iti_prior_val : float, optional
+        Value to use for ITI prior, by default None. If None, uses prior.
+    fmove_offset : float, optional
+        Offset, in seconds, to apply to first movement regressor, by default -0.4. This is relative
+        to the movement onset time, and if you want a purely anti-causal kernel should be
+        equivalent to the basis function length.
+    wheel_offset : float, optional
+        Offset, in seconds, to apply to wheel regressor, by default -0.4. See above.
+    contnorm : float, optional
+        Normalization factor for contrast, by default 5.
+        Applied as tanh(contrast * contnorm) / tanh(contnorm)
+    binwidth : float, optional
+        Size of bins to use for design matrix, in seconds, by default 0.02. This must match
+        the binwidth of the neural glm object later used to fit the design matrix.
+    reduce_wheel_dim : bool, optional
+        Whether to reduce the dimensionality of the wheel regressor, by default True. If True,
+        will use enough principal components to capture 99.999% of the variance. Smooths out
+        very high frequency fluctuations in the wheel velocity.
+    addtl_vars : dict, optional
+        Dictionary of additional variables in the trialsdf along with their variable types. These
+        are columns that are not normally part of the output of load_trials_df, and will therefore
+        raise an error if the design matrix building doesn't find a type specification for them.
+        See neurencoding.design.DesignMatrix for more details on variable types, by default None.
+    """
+    if len(kwargs) > 0:
+        _logger.info(
+            f"keys {kwargs.keys()} were not used in generate_design," " despite being passed."
+        )
+    trialsdf["adj_contrastL"] = np.tanh(contnorm * trialsdf["contrastLeft"]) / np.tanh(contnorm)
+    trialsdf["adj_contrastR"] = np.tanh(contnorm * trialsdf["contrastRight"]) / np.tanh(contnorm)
+    trialsdf["prior"] = prior
+    if iti_prior_val is not None:
+        trialsdf["iti_prior"] = iti_prior_val
+    else:
+        trialsdf["iti_prior"] = prior
+    trialsdf["prior_last"] = pd.Series(np.roll(trialsdf["prior"], 1), index=trialsdf.index)
+    trialsdf["iti_prior_last"] = pd.Series(np.roll(trialsdf["iti_prior"], 1), index=trialsdf.index)
+    trialsdf["pLeft_last"] = pd.Series(
+        np.roll(trialsdf["probabilityLeft"], 1), index=trialsdf.index
+    )
+
+    vartypes = {
+        "choice": "value",
+        "response_times": "timing",
+        "probabilityLeft": "value",
+        "pLeft_last": "value",
+        "feedbackType": "value",
+        "feedback_times": "timing",
+        "contrastLeft": "value",
+        "adj_contrastL": "value",
+        "contrastRight": "value",
+        "adj_contrastR": "value",
+        "goCue_times": "timing",
+        "stimOn_times": "timing",
+        "trial_start": "timing",
+        "trial_end": "timing",
+        "prior": "value",
+        "prior_last": "value",
+        "iti_prior": "value",
+        "iti_prior_last": "value",
+        "wheel_velocity": "continuous",
+        "firstMovement_times": "timing",
+    }
+    if addtl_vars is not None and isinstance(addtl_vars, dict):
+        vartypes.update(addtl_vars)
+
+    def stepfunc_prestim(row):
+        stepvec = np.zeros(design.binf(row.duration))
+        stepvec[stepbounds[0] : stepbounds[1]] = row.iti_prior_last
+        return stepvec
+
+    def stepfunc_poststim(row):
+        zerovec = np.zeros(design.binf(row.duration))
+        currtr_start = design.binf(row.stimOn_times)
+        currtr_end = design.binf(row.feedback_times)
+        zerovec[currtr_start:currtr_end] = row.prior_last
+        zerovec[currtr_end:] = row.prior
+        return zerovec
+
+    design = dm.DesignMatrix(trialsdf, vartypes, binwidth=binwidth)
+    stepbounds = [
+        design.binf(t_before + iti_prior_win[0]),
+        design.binf(t_before + iti_prior_win[1]),
+    ]
+
+    design.add_covariate_timing(
+        "stimonL",
+        "stimOn_times",
+        bases["stim"],
+        cond=lambda tr: np.isfinite(tr.contrastLeft),
+        deltaval="adj_contrastL",
+        desc="Kernel conditioned on L stimulus onset",
+    )
+    design.add_covariate_timing(
+        "stimonR",
+        "stimOn_times",
+        bases["stim"],
+        cond=lambda tr: np.isfinite(tr.contrastRight),
+        deltaval="adj_contrastR",
+        desc="Kernel conditioned on R stimulus onset",
+    )
+    design.add_covariate_timing(
+        "correct",
+        "feedback_times",
+        bases["feedback"],
+        cond=lambda tr: tr.feedbackType == 1,
+        desc="Kernel conditioned on correct feedback",
+    )
+    design.add_covariate_timing(
+        "incorrect",
+        "feedback_times",
+        bases["feedback"],
+        cond=lambda tr: tr.feedbackType == -1,
+        desc="Kernel conditioned on incorrect feedback",
+    )
+    design.add_covariate_timing(
+        "fmoveL",
+        "firstMovement_times",
+        bases["fmove"],
+        offset=fmove_offset,
+        cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times >= rt_thresh),
+        desc="Lead up to first movement leading to left choice",
+    )
+    design.add_covariate_timing(
+        "fmoveR",
+        "firstMovement_times",
+        bases["fmove"],
+        offset=fmove_offset,
+        cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times >= rt_thresh),
+        desc="Lead up to first movement leading to right choice",
+    )
+    design.add_covariate_timing(
+        "fmoveL_early",
+        "firstMovement_times",
+        bases["fmove"],
+        offset=fmove_offset,
+        cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times < rt_thresh),
+        desc="Lead up to first movement leading to left choice",
+    )
+    design.add_covariate_timing(
+        "fmoveR_early",
+        "firstMovement_times",
+        bases["fmove"],
+        offset=fmove_offset,
+        cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times < rt_thresh),
+        desc="Lead up to first movement leading to right choice",
+    )
+
+    design.add_covariate_raw("pLeft", stepfunc_prestim, desc="Step function on prior estimate")
+    design.add_covariate_raw(
+        "pLeft_tr", stepfunc_poststim, desc="Step function on post-stimulus prior estimate"
+    )
+
+    design.add_covariate("wheel", trialsdf["wheel_velocity"], bases["wheel"], wheel_offset)
+    design.compile_design_matrix()
+
+    if reduce_wheel_dim:
+        _, s, v = np.linalg.svd(design[:, design.covar["wheel"]["dmcol_idx"]], full_matrices=False)
+        variances = s**2 / (s**2).sum()
+        n_keep = np.argwhere(np.cumsum(variances) >= 0.9999)[0, 0]
+        wheelcols = design[:, design.covar["wheel"]["dmcol_idx"]]
+        reduced = wheelcols @ v[:n_keep].T
+        bases_reduced = bases["wheel"] @ v[:n_keep].T
+        keepcols = ~np.isin(np.arange(design.dm.shape[1]), design.covar["wheel"]["dmcol_idx"])
+        basedm = design[:, keepcols]
+        design.dm = np.hstack([basedm, reduced])
+        design.covar["wheel"]["dmcol_idx"] = design.covar["wheel"]["dmcol_idx"][:n_keep]
+        design.covar["wheel"]["bases"] = bases_reduced
+
+    _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}")
+    return design
diff --git a/brainwidemap/encoding/pipelines/01_cache_regressors.py b/brainwidemap/encoding/pipelines/01_cache_regressors.py
index cba57518..96f45522 100644
--- a/brainwidemap/encoding/pipelines/01_cache_regressors.py
+++ b/brainwidemap/encoding/pipelines/01_cache_regressors.py
@@ -65,7 +65,7 @@ def delayed_loadsave(subject, session_id, pid, params):
 T_BEF = 0.6  # Time before stimulus onset to include in the definition of the trial
 T_AFT = 0.6  # Time after feedback to include in the definition of a trial
 BINWIDTH = 0.02  # Size of binwidth for wheel velocity traces, in seconds
-ABSWHEEL = False  # Whether to return wheel velocity (False) or speed (True)
+ABSWHEEL = True  # Whether to return wheel velocity (False) or speed (True)
 CLU_CRITERIA = "bwm"  # Criteria on cluster inclusion in cache
 # End parameters
 
@@ -76,11 +76,11 @@ def delayed_loadsave(subject, session_id, pid, params):
     "binwidth": BINWIDTH,
     "abswheel": ABSWHEEL,
     "clu_criteria": CLU_CRITERIA,
-    "one_url": "https://openalyx.internationalbrainlab.org",
+    "one_url": "https://alyx.internationalbrainlab.org",
     "one_pw": "international",
 }
 
-one = ONE(base_url=params["one_url"], password=params["one_pw"], silent=True)
+one = ONE(base_url=params["one_url"], silent=True)
 dataset_futures = []
 
 freeze = "2023_12_bwm_release" if CLU_CRITERIA == "bwm" else None
diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py
index 9f0435bb..6f203440 100644
--- a/brainwidemap/encoding/pipelines/02_fit_sessions.py
+++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py
@@ -1,19 +1,19 @@
 # Standard library
+import argparse
 import os
 import pickle
-import argparse
 from datetime import date
 from pathlib import Path
 
+# IBL libraries
+import neurencoding.linear as lm
+import neurencoding.utils as mut
+
 # Third party libraries
 import numpy as np
 import sklearn.linear_model as skl
 from sklearn.model_selection import GridSearchCV
 
-# IBL libraries
-import neurencoding.linear as lm
-import neurencoding.utils as mut
-
 # Brainwide repo imports
 from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
 from brainwidemap.encoding.utils import make_batch_slurm_singularity
@@ -27,7 +27,7 @@
     " parameters for the actual GLM fitting are defined within the script itself."
     " The arguments passed to the script via this parser are only for cluster control."
     " If you would like to change parameters of the actual fit please adjust the contents"
-    " of the \"parameters\" section in the file."
+    ' of the "parameters" section in the file.'
 )
 parser.add_argument(
     "--basefilepath",
@@ -94,12 +94,14 @@
 
 args = parser.parse_args()
 
+
 # Model parameters
 # The GLM constructor class requires a function that converts time to bin index, here we define it
-# using the binwidth parameter created shortly. 
-def tmp_binf(t): 
+# using the binwidth parameter created shortly.
+def tmp_binf(t):
     return np.ceil(t / params["binwidth"]).astype(int)
 
+
 ######### PARAMETERS #########
 params = {
     "binwidth": 0.02,
@@ -108,7 +110,7 @@ def tmp_binf(t):
     "wheel_offset": -0.3,
     "contnorm": 5.0,
     "reduce_wheel_dim": False,
-    "dataset_fn": "2024-03-18_dataset_metadata.pkl",
+    "dataset_fn": "2024-08-12_dataset_metadata.pkl",
     "model": lm.LinearGLM,
     "alpha_grid": {"alpha": np.logspace(-3, 2, 50)},
     "contiguous": False,
@@ -128,6 +130,7 @@ def tmp_binf(t):
 }
 # Estimator relies on alpha grid in case of GridSearchCV, needs to be defined after main params
 params["estimator"] = GridSearchCV(skl.Ridge(), params["alpha_grid"])
+earlyrt_flag = "--earlyrt" if "rt_thresh" in params else ""
 
 # Output parameters file for workers
 currdate = str(date.today())
diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py
index 59491c3f..e779dd92 100644
--- a/brainwidemap/encoding/pipelines/03_gather_results.py
+++ b/brainwidemap/encoding/pipelines/03_gather_results.py
@@ -53,11 +53,15 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
     # Brainwide repo imports
     from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
 
-    currdate = "2024-03-18"  # Date on which fit was run
+    currdate = "2024-08-12"  # Date on which fit was run
     n_cov = 9  # Modify if you change the model!
     parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl")
+    early_split = False
     with open(parpath, "rb") as fo:
         params = pickle.load(fo)
+    if "rt_thresh" in params:
+        early_split = True
+        n_cov += 2
     datapath = Path(GLM_CACHE).joinpath(params["dataset_fn"])
     with open(datapath, "rb") as fo:
         dataset = pickle.load(fo)
@@ -85,6 +89,8 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
         folds = []
         for i in range(len(tmpfile["scores"])):
             tmpdf = tmpfile["deltas"][i]["test"]
+            tmpdf.index = tmpfile["clu_df"].index[tmpdf.index]
+            tmpdf.index.name = "clu_id"
             tmpdf["full_model"] = tmpfile["scores"][i]["basescores"]["test"]
             tmpdf["eid"] = fitname.parts[-2]
             tmpdf["pid"] = fitname.parts[-1].split("_")[1]
@@ -96,26 +102,26 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
         sess_master = pd.concat(folds)
         sessdfs.append(sess_master)
     masterscores = pd.concat(sessdfs)
+    kernels = [
+        "stimonR",
+        "stimonL",
+        "correct",
+        "incorrect",
+        "fmoveR",
+        "fmoveL",
+        "pLeft",
+        "pLeft_tr",
+        "wheel",
+        "full_model",
+    ]
+    if early_split:
+        kernels.insert(6, "fmoveL_early")
+        kernels.insert(6, "fmoveR_early")
+
     meanmaster = (
         masterscores.set_index(["eid", "pid", "clu_id", "acronym", "qc_label", "fold"])
         .groupby(["eid", "pid", "clu_id", "acronym", "qc_label"])
-        .agg(
-            {
-                k: "mean"
-                for k in [
-                    "stimonR",
-                    "stimonL",
-                    "correct",
-                    "incorrect",
-                    "fmoveR",
-                    "fmoveL",
-                    "pLeft",
-                    "pLeft_tr",
-                    "wheel",
-                    "full_model",
-                ]
-            }
-        )
+        .agg({k: "mean" for k in kernels})
     )
 
     @cache
diff --git a/brainwidemap/encoding/pipelines/04_plot_figures.py b/brainwidemap/encoding/pipelines/04_plot_figures.py
index adec0555..c7f71cc2 100644
--- a/brainwidemap/encoding/pipelines/04_plot_figures.py
+++ b/brainwidemap/encoding/pipelines/04_plot_figures.py
@@ -3,7 +3,6 @@
 
 # Third party libraries
 import matplotlib.pyplot as plt
-from matplotlib import colors
 import numpy as np
 import pandas as pd
 import seaborn as sns
@@ -11,11 +10,12 @@
 # IBL libraries
 from iblatlas.atlas import BrainRegions
 from iblatlas.plots import plot_swanson
+from matplotlib import colors
 
 # Brainwidemap repo imports
 from brainwidemap.encoding.params import GLM_FIT_PATH
 
-FITDATE = "2023-03-02"
+FITDATE = "2024-07-16"
 VARIABLES = [
     "stimonR",
     "stimonL",
@@ -23,6 +23,8 @@
     "incorrect",
     "fmoveR",
     "fmoveL",
+    # "fmoveR_early",  # Comment/uncomment if early RT split is used.
+    # "fmoveL_early",
     "pLeft",
     "pLeft_tr",
     "wheel",
@@ -40,7 +42,7 @@
 ABSDIFF = True  # Whether to plot absolute value of difference or signed difference
 ANNOTATE = False  # Whether to annotate brain regions
 IMGFMT = "png"  # Format of output image
-SAVEPATH = Path("/home/berk/Documents/Projects/results/plots/swanson_maps/")  # Path to save plots
+SAVEPATH = Path("/home/gercek/Projects/results/plots/swanson_maps/")  # Path to save plots
 
 if not SAVEPATH.exists():
     SAVEPATH.mkdir()
@@ -83,31 +85,27 @@
 br = BrainRegions()
 
 
-def flatmap_variable(df,
-                     cmap,
-                     cmin=COLOR_RANGE[0],
-                     cmax=COLOR_RANGE[1],
-                     norm=None,
-                     plswan_kwargs={}):
+def flatmap_variable(
+    df, cmap, cmin=COLOR_RANGE[0], cmax=COLOR_RANGE[1], norm=None, plswan_kwargs={}
+):
     fig = plt.figure(figsize=(8, 4) if not ANNOTATE else (16, 8))
     ax = fig.add_subplot(111)
     if norm is not None:
         cmap_kwargs = {"norm": norm, **plswan_kwargs}
     elif GLOBAL_CMAP:
-        cmap_kwargs = {
-            "norm": colors.LogNorm(vmin=cmin, vmax=cmax, clip=True),
-            **plswan_kwargs
-        }
+        cmap_kwargs = {"norm": colors.LogNorm(vmin=cmin, vmax=cmax, clip=True), **plswan_kwargs}
     else:
         cmap_kwargs = {"vmin": cmin, "vmax": cmax, **plswan_kwargs}
-    ax = plot_swanson(df.index,
-                      df.values,
-                      hemisphere="left",
-                      cmap=cmap,
-                      br=br,
-                      ax=ax,
-                      annotate=ANNOTATE,
-                      **cmap_kwargs)
+    ax = plot_swanson(
+        df.index,
+        df.values,
+        hemisphere="left",
+        cmap=cmap,
+        br=br,
+        ax=ax,
+        annotate=ANNOTATE,
+        **cmap_kwargs,
+    )
     plt.colorbar(mappable=ax.images[0])
     ax.set_xticks([])
     ax.set_yticks([])
@@ -116,9 +114,9 @@ def flatmap_variable(df,
 
 
 def get_cmap(split):
-    '''
+    """
     for each split, get a colormap defined by Yanliang
-    '''
+    """
     varmaps = {
         "stimonR": "stim",
         "stimonL": "stim",
@@ -128,14 +126,14 @@ def get_cmap(split):
         "incorrect": "fback",
         "pLeft": "block",
         "pLeft_tr": "block",
-        "wheel": "wheel"
+        "wheel": "wheel",
     }
     dc = {
-        'stim': ["#ffffff", "#D5E1A0", "#A3C968", "#86AF40", "#517146"],
-        'choice': ["#ffffff", "#F8E4AA", "#F9D766", "#E8AC22", "#DA4727"],
-        'fback': ["#ffffff", "#F1D3D0", "#F5968A", "#E34335", "#A23535"],
-        'block': ["#ffffff", "#D0CDE4", "#998DC3", "#6159A6", "#42328E"],
-        'wheel': ["#ffffff", "#C2E1EA", "#95CBEE", "#5373B8", "#324BA0"]
+        "stim": ["#ffffff", "#D5E1A0", "#A3C968", "#86AF40", "#517146"],
+        "choice": ["#ffffff", "#F8E4AA", "#F9D766", "#E8AC22", "#DA4727"],
+        "fback": ["#ffffff", "#F1D3D0", "#F5968A", "#E34335", "#A23535"],
+        "block": ["#ffffff", "#D0CDE4", "#998DC3", "#6159A6", "#42328E"],
+        "wheel": ["#ffffff", "#C2E1EA", "#95CBEE", "#5373B8", "#324BA0"],
     }
     return colors.LinearSegmentedColormap.from_list("mycmap", dc[varmaps[split]])
 
@@ -144,9 +142,9 @@ def get_cmap(split):
 # Distribution of full model R^2 values, and std. dev between folds
 meanscores = fitdata["mean_fit_results"]
 full_model = meanscores["full_model"].copy()
-full_model_std = (fitdata["fit_results"].groupby(["eid", "pid", "clu_id"
-                                                  ]).agg({"full_model":
-                                                          "std"}))
+full_model_std = (
+    fitdata["fit_results"].groupby(["eid", "pid", "clu_id"]).agg({"full_model": "std"})
+)
 joindf = full_model_std.join(full_model, how="inner", lsuffix="_std")
 
 if DISTPLOTS:
@@ -168,8 +166,7 @@ def get_cmap(split):
 unitcounts = meanscores.groupby("region").size().astype(int)
 keepreg = unitcounts[unitcounts >= MIN_UNITS].index
 if GLOBAL_CMAP:
-    allmeans = meanscores.set_index(
-        "region", append=True)[VARIABLES].groupby("region").mean()
+    allmeans = meanscores.set_index("region", append=True)[VARIABLES].groupby("region").mean()
     cmin = np.percentile(allmeans.values.flatten(), COLOR_RANGE[0])
     if cmin < 0:
         cmin = 1e-5
@@ -222,7 +219,8 @@ def get_cmap(split):
         fig.suptitle(f"{var1} $\Delta R^2$ - {var2} $\Delta R^2$")
         fig.savefig(
             DIFFPATH.joinpath(
-                f"{var1}_{var2}_{'abs' * ABSDIFF}diff{'_annotated' * ANNOTATE}.{IMGFMT}"),
+                f"{var1}_{var2}_{'abs' * ABSDIFF}diff{'_annotated' * ANNOTATE}.{IMGFMT}"
+            ),
             format=IMGFMT,
             dpi=450,
         )
diff --git a/brainwidemap/encoding/scripts/twocond_plots.py b/brainwidemap/encoding/scripts/twocond_plots.py
index 61615020..c1ff8eec 100644
--- a/brainwidemap/encoding/scripts/twocond_plots.py
+++ b/brainwidemap/encoding/scripts/twocond_plots.py
@@ -142,8 +142,8 @@ def load_unit_fit_model(eid, pid, clu_id):
     "block": (lambda df: df["pLeft"], ["PL", "MOp"], "stimOn_times"),
 }
 
-glm_params = pd.read_pickle(GLM_FIT_PATH + "/2023-03-07_glm_fit_pars.pkl")
-meanscores = pd.read_pickle(GLM_FIT_PATH + "/2023-03-02_glm_fit.pkl")[
+glm_params = pd.read_pickle(GLM_FIT_PATH + "/2024-07-16_glm_fit_pars.pkl")
+meanscores = pd.read_pickle(GLM_FIT_PATH + "/2024-07-16_glm_fit.pkl")[
     "mean_fit_results"
 ].set_index("region", append=True)
 
diff --git a/brainwidemap/encoding/utils.py b/brainwidemap/encoding/utils.py
index 093b9d5a..6fa96dfa 100644
--- a/brainwidemap/encoding/utils.py
+++ b/brainwidemap/encoding/utils.py
@@ -17,7 +17,7 @@
 from brainbox.io.one import SessionLoader
 
 # Brainwidemap repo imports
-from brainwidemap.bwm_loading import load_trials_and_mask, bwm_units
+from brainwidemap.bwm_loading import load_good_units, load_trials_and_mask, bwm_units
 from brainwidemap.encoding.timeseries import TimeSeries, sync
 
 
@@ -81,34 +81,12 @@ def load_regressors(
         trials_mask=mask,
     )
 
-    clusters = {}
-    ssl = bbone.SpikeSortingLoader(one=one, pid=pid, eid=session_id)
-    origspikes, tmpclu, channels = ssl.load_spike_sorting()
-    if "metrics" not in tmpclu:
-        tmpclu["metrics"] = np.ones(tmpclu["channels"].size)
-    clusters[pid] = ssl.merge_clusters(origspikes, tmpclu, channels)
-    clu_df = pd.DataFrame(clusters[pid]).set_index(["cluster_id"])
-    clu_df["pid"] = pid
-
     if clu_criteria == "bwm":
-        allunits = (
-            bwm_units(one=one)
-            .rename(columns={"cluster_id": "clu_id"})
-            .set_index(["eid", "pid", "clu_id"])
-        )
-        keepclu = clu_df.index.intersection(allunits.loc[session_id, pid, :].index)
-    elif clu_criteria == "all":
-        keepclu = clu_df.index
-    else:
-        raise ValueError("clu_criteria must be 'bwm' or 'all'")
-
-    clu_df = clu_df.loc[keepclu]
-    keepmask = np.isin(origspikes.clusters, keepclu)
-    spikes = Bunch({k: v[keepmask] for k, v in origspikes.items()})
-    sortinds = np.argsort(spikes.times)
-    spk_times = spikes.times[sortinds]
-    spk_clu = spikes.clusters[sortinds]
-    clu_regions = clusters[pid].acronym
+        spikes, clu_df = load_good_units(one=one, pid=pid)
+    spk_times = spikes["times"]
+    spk_clu = spikes["clusters"]
+    clu_df["pid"] = pid
+    clu_regions = clu_df.acronym
     return trialsdf, spk_times, spk_clu, clu_regions, clu_df
 
 

From 1283ba51962601551534db3156e4fe4de07e99ae Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 9 Sep 2024 17:13:07 +0200
Subject: [PATCH 3/8] fixed bug in saving unit cluster IDs

---
 brainwidemap/encoding/pipelines/02_fit_sessions.py   | 2 +-
 brainwidemap/encoding/pipelines/03_gather_results.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py
index 6f203440..8ef3cd28 100644
--- a/brainwidemap/encoding/pipelines/02_fit_sessions.py
+++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py
@@ -110,7 +110,7 @@ def tmp_binf(t):
     "wheel_offset": -0.3,
     "contnorm": 5.0,
     "reduce_wheel_dim": False,
-    "dataset_fn": "2024-08-12_dataset_metadata.pkl",
+    "dataset_fn": "2024-09-09_dataset_metadata.pkl",
     "model": lm.LinearGLM,
     "alpha_grid": {"alpha": np.logspace(-3, 2, 50)},
     "contiguous": False,
diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py
index e779dd92..8f13f970 100644
--- a/brainwidemap/encoding/pipelines/03_gather_results.py
+++ b/brainwidemap/encoding/pipelines/03_gather_results.py
@@ -89,14 +89,14 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
         folds = []
         for i in range(len(tmpfile["scores"])):
             tmpdf = tmpfile["deltas"][i]["test"]
-            tmpdf.index = tmpfile["clu_df"].index[tmpdf.index]
             tmpdf.index.name = "clu_id"
             tmpdf["full_model"] = tmpfile["scores"][i]["basescores"]["test"]
             tmpdf["eid"] = fitname.parts[-2]
             tmpdf["pid"] = fitname.parts[-1].split("_")[1]
-            tmpdf["acronym"] = tmpfile["clu_regions"][tmpdf.index]
-            tmpdf["qc_label"] = tmpfile["clu_df"]["label"][tmpdf.index]
+            tmpdf["acronym"] = tmpfile["clu_regions"]
+            tmpdf["qc_label"] = tmpfile["clu_df"]["label"]
             tmpdf["fold"] = i
+            tmpdf.index = tmpfile["clu_df"].iloc[tmpdf.index].cluster_id
             tmpdf.index.set_names(["clu_id"], inplace=True)
             folds.append(tmpdf.reset_index())
         sess_master = pd.concat(folds)

From f4b220cacfb604d844df3cff041d434642dea19c Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Fri, 13 Sep 2024 16:45:43 +0200
Subject: [PATCH 4/8] allow for early RT fitting

---
 brainwidemap/encoding/cluster_worker.py       |  31 ++-
 brainwidemap/encoding/design.py               | 209 ------------------
 brainwidemap/encoding/environment.yaml        |   2 +-
 .../encoding/pipelines/02_fit_sessions.py     |  56 +++--
 .../encoding/pipelines/03_gather_results.py   |   2 +-
 5 files changed, 63 insertions(+), 237 deletions(-)

diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py
index e1a5a3df..411079ca 100644
--- a/brainwidemap/encoding/cluster_worker.py
+++ b/brainwidemap/encoding/cluster_worker.py
@@ -17,7 +17,7 @@
 from pandas import read_pickle
 
 # Brainwidemap repo imports
-from brainwidemap.encoding.design import generate_design, generate_design_earlyrts
+from brainwidemap.encoding.design import generate_design
 from brainwidemap.encoding.fit import fit_stepwise, fit_stepwise_with_pseudoblocks
 from brainwidemap.encoding.params import GLM_FIT_PATH
 
@@ -83,15 +83,29 @@ def fit_save_inputs(
     fitdate,
     null=None,
     earlyrts=False,
+    laterts=False,
 ):
     stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn)
     sessprior = stdf["probabilityLeft"]
-    if not earlyrts:
+    if not earlyrts and not laterts:
         sessdesign = generate_design(stdf, sessprior, t_before, **params)
     else:
+        # Handle early and late RT flags, compute median for session if necessary
         if "rt_thresh" not in params:
-            print('WARNING: No threshold to define an "early" RT passed. Defaulting to 50ms.')
-        sessdesign = generate_design_earlyrts(stdf, sessprior, t_before, **params)
+            raise ValueError("Must specify rt_thresh if fitting early or late RTs")
+        if laterts and earlyrts:
+            raise ValueError(
+                "Cannot fit both early and late RTs. Disable both flags to fit all trials."
+            )
+        if params["rt_thresh"] == "session_median":
+            params["rt_thresh"] = np.median(stdf["firstMovement_times"] - stdf["trial_start"])
+
+        if earlyrts:
+            mask = (stdf["firstMovement_times"] - stdf["trial_start"]) < params["rt_thresh"]
+        elif laterts:
+            mask = (stdf["firstMovement_times"] - stdf["trial_start"]) >= params["rt_thresh"]
+        stdf = stdf[mask]
+        sessdesign = generate_design(stdf, sessprior, t_before, **params)
     if null is None:
         sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params)
         outputfn = save_stepwise(
@@ -142,10 +156,14 @@ def fit_save_inputs(
     parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file")
     parser.add_argument(
         "--earlyrt",
-        type=bool,
-        default=False,
+        action="store_true",
         help="Whether to fit separate movement kernels to early trials",
     )
+    parser.add_argument(
+        "--latert",
+        action="store_true",
+        help="Whether to fit separate movement kernels to late trials",
+    )
     args = parser.parse_args()
 
     with open(args.datafile, "rb") as fo:
@@ -170,6 +188,7 @@ def fit_save_inputs(
         args.fitdate,
         null=params["null"],
         earlyrts=args.earlyrt,
+        laterts=args.latert,
     )
     print("Fitting completed successfully!")
     print(outputfn)
diff --git a/brainwidemap/encoding/design.py b/brainwidemap/encoding/design.py
index c6f818d8..989517eb 100644
--- a/brainwidemap/encoding/design.py
+++ b/brainwidemap/encoding/design.py
@@ -206,212 +206,3 @@ def stepfunc_poststim(row):
 
     _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}")
     return design
-
-
-def generate_design_earlyrts(
-    trialsdf,
-    prior,
-    t_before,
-    bases,
-    iti_prior_win=[-0.4, -0.1],
-    iti_prior_val=None,
-    fmove_offset=-0.4,
-    wheel_offset=-0.4,
-    rt_thresh=0.05,
-    contnorm=5.0,
-    binwidth=0.02,
-    reduce_wheel_dim=True,
-    addtl_vars=None,
-    **kwargs,
-):
-    """
-    Generate GLM design matrix object
-
-    Parameters
-    ----------
-    trialsdf : pd.DataFrame
-        Trials dataframe with trial timings in absolute (since session start) time
-    prior : array-like
-        Vector containing the prior estimate or true prior for each trial. Must be same length as
-        trialsdf. If iti_prior_val is not None, this is used as the prior for the ITI period.
-    t_before : float
-        Time, in seconds, before stimulus onset that was used to define trial_start in trialsdf.
-        This defines the beginning of the window, relative to each trial, where we examine the
-        spiking data.
-    bases : dict
-        Dictionary of basis functions for each regressor. Needs keys 'stim', 'feedback', 'fmove',
-        (first movement) and 'wheel'.
-    iti_prior_win : list, optional
-        Two element list defining bounds relative to stimulus on which step function for ITI prior
-        is applied, by default [-0.4, -0.1]
-    iti_prior_val : float, optional
-        Value to use for ITI prior, by default None. If None, uses prior.
-    fmove_offset : float, optional
-        Offset, in seconds, to apply to first movement regressor, by default -0.4. This is relative
-        to the movement onset time, and if you want a purely anti-causal kernel should be
-        equivalent to the basis function length.
-    wheel_offset : float, optional
-        Offset, in seconds, to apply to wheel regressor, by default -0.4. See above.
-    contnorm : float, optional
-        Normalization factor for contrast, by default 5.
-        Applied as tanh(contrast * contnorm) / tanh(contnorm)
-    binwidth : float, optional
-        Size of bins to use for design matrix, in seconds, by default 0.02. This must match
-        the binwidth of the neural glm object later used to fit the design matrix.
-    reduce_wheel_dim : bool, optional
-        Whether to reduce the dimensionality of the wheel regressor, by default True. If True,
-        will use enough principal components to capture 99.999% of the variance. Smooths out
-        very high frequency fluctuations in the wheel velocity.
-    addtl_vars : dict, optional
-        Dictionary of additional variables in the trialsdf along with their variable types. These
-        are columns that are not normally part of the output of load_trials_df, and will therefore
-        raise an error if the design matrix building doesn't find a type specification for them.
-        See neurencoding.design.DesignMatrix for more details on variable types, by default None.
-    """
-    if len(kwargs) > 0:
-        _logger.info(
-            f"keys {kwargs.keys()} were not used in generate_design," " despite being passed."
-        )
-    trialsdf["adj_contrastL"] = np.tanh(contnorm * trialsdf["contrastLeft"]) / np.tanh(contnorm)
-    trialsdf["adj_contrastR"] = np.tanh(contnorm * trialsdf["contrastRight"]) / np.tanh(contnorm)
-    trialsdf["prior"] = prior
-    if iti_prior_val is not None:
-        trialsdf["iti_prior"] = iti_prior_val
-    else:
-        trialsdf["iti_prior"] = prior
-    trialsdf["prior_last"] = pd.Series(np.roll(trialsdf["prior"], 1), index=trialsdf.index)
-    trialsdf["iti_prior_last"] = pd.Series(np.roll(trialsdf["iti_prior"], 1), index=trialsdf.index)
-    trialsdf["pLeft_last"] = pd.Series(
-        np.roll(trialsdf["probabilityLeft"], 1), index=trialsdf.index
-    )
-
-    vartypes = {
-        "choice": "value",
-        "response_times": "timing",
-        "probabilityLeft": "value",
-        "pLeft_last": "value",
-        "feedbackType": "value",
-        "feedback_times": "timing",
-        "contrastLeft": "value",
-        "adj_contrastL": "value",
-        "contrastRight": "value",
-        "adj_contrastR": "value",
-        "goCue_times": "timing",
-        "stimOn_times": "timing",
-        "trial_start": "timing",
-        "trial_end": "timing",
-        "prior": "value",
-        "prior_last": "value",
-        "iti_prior": "value",
-        "iti_prior_last": "value",
-        "wheel_velocity": "continuous",
-        "firstMovement_times": "timing",
-    }
-    if addtl_vars is not None and isinstance(addtl_vars, dict):
-        vartypes.update(addtl_vars)
-
-    def stepfunc_prestim(row):
-        stepvec = np.zeros(design.binf(row.duration))
-        stepvec[stepbounds[0] : stepbounds[1]] = row.iti_prior_last
-        return stepvec
-
-    def stepfunc_poststim(row):
-        zerovec = np.zeros(design.binf(row.duration))
-        currtr_start = design.binf(row.stimOn_times)
-        currtr_end = design.binf(row.feedback_times)
-        zerovec[currtr_start:currtr_end] = row.prior_last
-        zerovec[currtr_end:] = row.prior
-        return zerovec
-
-    design = dm.DesignMatrix(trialsdf, vartypes, binwidth=binwidth)
-    stepbounds = [
-        design.binf(t_before + iti_prior_win[0]),
-        design.binf(t_before + iti_prior_win[1]),
-    ]
-
-    design.add_covariate_timing(
-        "stimonL",
-        "stimOn_times",
-        bases["stim"],
-        cond=lambda tr: np.isfinite(tr.contrastLeft),
-        deltaval="adj_contrastL",
-        desc="Kernel conditioned on L stimulus onset",
-    )
-    design.add_covariate_timing(
-        "stimonR",
-        "stimOn_times",
-        bases["stim"],
-        cond=lambda tr: np.isfinite(tr.contrastRight),
-        deltaval="adj_contrastR",
-        desc="Kernel conditioned on R stimulus onset",
-    )
-    design.add_covariate_timing(
-        "correct",
-        "feedback_times",
-        bases["feedback"],
-        cond=lambda tr: tr.feedbackType == 1,
-        desc="Kernel conditioned on correct feedback",
-    )
-    design.add_covariate_timing(
-        "incorrect",
-        "feedback_times",
-        bases["feedback"],
-        cond=lambda tr: tr.feedbackType == -1,
-        desc="Kernel conditioned on incorrect feedback",
-    )
-    design.add_covariate_timing(
-        "fmoveL",
-        "firstMovement_times",
-        bases["fmove"],
-        offset=fmove_offset,
-        cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times >= rt_thresh),
-        desc="Lead up to first movement leading to left choice",
-    )
-    design.add_covariate_timing(
-        "fmoveR",
-        "firstMovement_times",
-        bases["fmove"],
-        offset=fmove_offset,
-        cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times >= rt_thresh),
-        desc="Lead up to first movement leading to right choice",
-    )
-    design.add_covariate_timing(
-        "fmoveL_early",
-        "firstMovement_times",
-        bases["fmove"],
-        offset=fmove_offset,
-        cond=lambda tr: (tr.choice == 1) & (tr.firstMovement_times < rt_thresh),
-        desc="Lead up to first movement leading to left choice",
-    )
-    design.add_covariate_timing(
-        "fmoveR_early",
-        "firstMovement_times",
-        bases["fmove"],
-        offset=fmove_offset,
-        cond=lambda tr: (tr.choice == -1) & (tr.firstMovement_times < rt_thresh),
-        desc="Lead up to first movement leading to right choice",
-    )
-
-    design.add_covariate_raw("pLeft", stepfunc_prestim, desc="Step function on prior estimate")
-    design.add_covariate_raw(
-        "pLeft_tr", stepfunc_poststim, desc="Step function on post-stimulus prior estimate"
-    )
-
-    design.add_covariate("wheel", trialsdf["wheel_velocity"], bases["wheel"], wheel_offset)
-    design.compile_design_matrix()
-
-    if reduce_wheel_dim:
-        _, s, v = np.linalg.svd(design[:, design.covar["wheel"]["dmcol_idx"]], full_matrices=False)
-        variances = s**2 / (s**2).sum()
-        n_keep = np.argwhere(np.cumsum(variances) >= 0.9999)[0, 0]
-        wheelcols = design[:, design.covar["wheel"]["dmcol_idx"]]
-        reduced = wheelcols @ v[:n_keep].T
-        bases_reduced = bases["wheel"] @ v[:n_keep].T
-        keepcols = ~np.isin(np.arange(design.dm.shape[1]), design.covar["wheel"]["dmcol_idx"])
-        basedm = design[:, keepcols]
-        design.dm = np.hstack([basedm, reduced])
-        design.covar["wheel"]["dmcol_idx"] = design.covar["wheel"]["dmcol_idx"][:n_keep]
-        design.covar["wheel"]["bases"] = bases_reduced
-
-    _logger.info(f"Condition of design matrix: {np.linalg.cond(design.dm)}")
-    return design
diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml
index 22c49eef..f50a46d4 100644
--- a/brainwidemap/encoding/environment.yaml
+++ b/brainwidemap/encoding/environment.yaml
@@ -1,6 +1,6 @@
 name: iblenv
 dependencies:
-  - python=3.9
+  - python=3.10
   - apptools >= 4.5.0
   - boto3
   - click
diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py
index 8ef3cd28..fccde714 100644
--- a/brainwidemap/encoding/pipelines/02_fit_sessions.py
+++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py
@@ -1,6 +1,5 @@
 # Standard library
 import argparse
-import os
 import pickle
 from datetime import date
 from pathlib import Path
@@ -32,7 +31,7 @@
 parser.add_argument(
     "--basefilepath",
     type=Path,
-    default=Path("~/").expanduser().joinpath("bwm_stepwise_glm_leaveoneout"),
+    default=Path("~/").expanduser().joinpath("jobscripts/bwm_stepwise_glm_leaveoneout"),
     help="Base filename for batch scripts",
 )
 parser.add_argument(
@@ -85,12 +84,6 @@
     "--job_cores", type=int, default=32, help="Number of cores to request per job."
 )
 parser.add_argument("--mem", type=str, default="12GB", help="Memory to request per job.")
-parser.add_argument(
-    "--submit_batch",
-    action="store_true",
-    default=False,
-    help="Submit batch jobs to SLURM cluster using the script.",
-)
 
 args = parser.parse_args()
 
@@ -110,7 +103,7 @@ def tmp_binf(t):
     "wheel_offset": -0.3,
     "contnorm": 5.0,
     "reduce_wheel_dim": False,
-    "dataset_fn": "2024-09-09_dataset_metadata.pkl",
+    "dataset_fn": "2024-08-12_dataset_metadata.pkl",
     "model": lm.LinearGLM,
     "alpha_grid": {"alpha": np.logspace(-3, 2, 50)},
     "contiguous": False,
@@ -120,6 +113,7 @@ def tmp_binf(t):
     "seqsel_kwargs": {"direction": "backward", "n_features_to_select": 8},
     "seqselfit_kwargs": {"full_scores": True},
     "seed": 0,
+    "rt_thresh": "session_median",
 }
 
 params["bases"] = {
@@ -130,7 +124,14 @@ def tmp_binf(t):
 }
 # Estimator relies on alpha grid in case of GridSearchCV, needs to be defined after main params
 params["estimator"] = GridSearchCV(skl.Ridge(), params["alpha_grid"])
-earlyrt_flag = "--earlyrt" if "rt_thresh" in params else ""
+if "rt_thresh" in params:
+    earlyrt_flag = "--earlyrt"
+    latert_flag = "--latert"
+    earlyrt_fn = "_early_rt"
+else:
+    earlyrt_flag = ""
+    latert_flag = ""
+    earlyrt_fn = ""
 
 # Output parameters file for workers
 currdate = str(date.today())
@@ -145,7 +146,7 @@ def tmp_binf(t):
 
 # Generate batch script
 make_batch_slurm_singularity(
-    str(args.basefilepath),
+    str(args.basefilepath) + earlyrt_fn,
     str(Path(__file__).parents[1].joinpath("cluster_worker.py")),
     job_name=args.jobname,
     partition=args.partition,
@@ -159,14 +160,29 @@ def tmp_binf(t):
     cores_per_job=args.job_cores,
     memory=args.mem,
     array_size=f"1-{njobs}",
-    f_args=[str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
+    f_args=[earlyrt_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
 )
-
-# If SUBMIT_BATCH, then actually execute the batch job
-if args.submit_batch:
-    os.system(f"sbatch {str(args.basefilepath) + '_batch.sh'}")
-else:
-    print(
-        f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};"
-        " user must submit it themselves. Good luck!"
+if len(earlyrt_fn) > 0:
+    make_batch_slurm_singularity(
+        str(args.basefilepath) + "_late_rt",
+        str(Path(__file__).parents[1].joinpath("cluster_worker.py")),
+        job_name=args.jobname,
+        partition=args.partition,
+        time=args.timelimit,
+        singularity_modules=args.singularity_modules,
+        container_image=args.singularity_image,
+        img_condapath=args.singularity_conda,
+        img_envname=args.singularity_env,
+        local_pathadd=Path(__file__).parents[3],
+        logpath=args.logpath,
+        cores_per_job=args.job_cores,
+        memory=args.mem,
+        array_size=f"1-{njobs}",
+        f_args=[latert_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
     )
+
+# If SUBMIT_BATCH, then actually execute the batch jo
+print(
+    f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};"
+    " user must submit it themselves. Good luck!"
+)
diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py
index 8f13f970..2686db1a 100644
--- a/brainwidemap/encoding/pipelines/03_gather_results.py
+++ b/brainwidemap/encoding/pipelines/03_gather_results.py
@@ -53,7 +53,7 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
     # Brainwide repo imports
     from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
 
-    currdate = "2024-08-12"  # Date on which fit was run
+    currdate = "2024-09-09"  # Date on which fit was run
     n_cov = 9  # Modify if you change the model!
     parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl")
     early_split = False

From 64661229886f85a41ffc0af0bda3d6838bb5c5ce Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 16 Sep 2024 11:58:05 +0200
Subject: [PATCH 5/8] Docker updates and early RT fitting

---
 brainwidemap/encoding/Dockerfile              | 15 +++--------
 brainwidemap/encoding/cluster_worker.py       | 12 +++++++--
 brainwidemap/encoding/environment.yaml        | 26 +++++++------------
 .../encoding/pipelines/02_fit_sessions.py     |  1 +
 .../encoding/pipelines/03_gather_results.py   |  8 +-----
 5 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile
index 87213ec8..64b63664 100644
--- a/brainwidemap/encoding/Dockerfile
+++ b/brainwidemap/encoding/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
+FROM ubuntu:latest
 # This can optionally be built with just ubuntu, rather than the nvidia cuda container.
 # If saving space is a concern, this is the way to go.
 LABEL description="Core container which has the basic necessities to run analyses in the\
@@ -15,20 +15,11 @@ COPY ./environment.yaml /data/environment.yaml
 SHELL ["/bin/bash", "-c"]
 # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯
 RUN apt update && apt install -y wget git libgl1-mesa-dev
-RUN wget -O Mambaforge.sh  "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-RUN bash Mambaforge.sh -b -p /opt/conda && rm Mambaforge.sh
+RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+RUN bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh
 RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
  mamba install --yes conda-build &&\
  mamba env create -n iblenv --file=environment.yaml"
-RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
- conda activate iblenv &&\
- mamba install --yes pytorch pytorch-cuda=11.7 -c pytorch -c nvidia &&\
- conda clean --all -f -y"
-RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
- conda activate iblenv &&\
- pip install globus-sdk iblutil ibllib iblapps ibl-neuropixel ONE-api phylib pynrrd slidingRP &&\
- git clone https://github.com/berkgercek/neurencoding &&\
- conda develop ./neurencoding"
 # The below allows interactively running the container with the correct environment, but be warned
 # that this will not work with commands passed to the container in a non-interactive shell.
 # In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not
diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py
index 411079ca..6201b6be 100644
--- a/brainwidemap/encoding/cluster_worker.py
+++ b/brainwidemap/encoding/cluster_worker.py
@@ -38,9 +38,9 @@ def _create_sub_sess_path(parent, subject, session):
     return sesspath
 
 
-def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate):
+def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate, splitstr=""):
     sesspath = _create_sub_sess_path(GLM_FIT_PATH, subject, session_id)
-    fn = sesspath.joinpath(f"{fitdate}_{probes}_stepwise_regression.pkl")
+    fn = sesspath.joinpath(f"{fitdate}_{probes}{splitstr}_stepwise_regression.pkl")
     outdict = {
         "params": params,
         "probes": probes,
@@ -87,6 +87,14 @@ def fit_save_inputs(
 ):
     stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn)
     sessprior = stdf["probabilityLeft"]
+    match (earlyrts, laterts):
+        case (False, False):
+            splitstr = ""
+        case (True, False):
+            splitstr = "_earlyrt"
+        case (False, True):
+            splitstr = "_latert"
+    paramsp["splitstr"] = splitstr
     if not earlyrts and not laterts:
         sessdesign = generate_design(stdf, sessprior, t_before, **params)
     else:
diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml
index f50a46d4..b8c2af47 100644
--- a/brainwidemap/encoding/environment.yaml
+++ b/brainwidemap/encoding/environment.yaml
@@ -1,26 +1,11 @@
 name: iblenv
 dependencies:
   - python=3.10
-  - apptools >= 4.5.0
-  - boto3
-  - click
-  - colorcet
-  - colorlog
-  - cython
-  - dataclasses
-  - flake8
-  - graphviz
-  - h5py
   - ipython
   - matplotlib
   - numba
   - numpy
   - pandas
-  - plotly
-  - pyarrow
-  - pyflakes >= 2.4.0
-  - pytest
-  - requests
   - scikit-learn
   - scipy >=1.4.1
   - seaborn
@@ -28,4 +13,13 @@ dependencies:
   - tqdm
   - pip
   - pip:
-    - opencv-python
+    - iblutil
+    - ibllib
+    - iblapps
+    - ibl-neuropixel
+    - ONE-api
+    - phylib
+    - phy
+    - pynrrd
+    - slidingRP
+    - neurencoding
diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py
index fccde714..826ce4bf 100644
--- a/brainwidemap/encoding/pipelines/02_fit_sessions.py
+++ b/brainwidemap/encoding/pipelines/02_fit_sessions.py
@@ -114,6 +114,7 @@ def tmp_binf(t):
     "seqselfit_kwargs": {"full_scores": True},
     "seed": 0,
     "rt_thresh": "session_median",
+    "mintrials": 50,
 }
 
 params["bases"] = {
diff --git a/brainwidemap/encoding/pipelines/03_gather_results.py b/brainwidemap/encoding/pipelines/03_gather_results.py
index 2686db1a..1dd32c77 100644
--- a/brainwidemap/encoding/pipelines/03_gather_results.py
+++ b/brainwidemap/encoding/pipelines/03_gather_results.py
@@ -53,15 +53,12 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
     # Brainwide repo imports
     from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
 
-    currdate = "2024-09-09"  # Date on which fit was run
+    currdate = "2024-09-15"  # Date on which fit was run
     n_cov = 9  # Modify if you change the model!
     parpath = Path(GLM_FIT_PATH).joinpath(f"{currdate}_glm_fit_pars.pkl")
     early_split = False
     with open(parpath, "rb") as fo:
         params = pickle.load(fo)
-    if "rt_thresh" in params:
-        early_split = True
-        n_cov += 2
     datapath = Path(GLM_CACHE).joinpath(params["dataset_fn"])
     with open(datapath, "rb") as fo:
         dataset = pickle.load(fo)
@@ -114,9 +111,6 @@ def label_cerebellum(acronym, brainregions=BrainRegions()):
         "wheel",
         "full_model",
     ]
-    if early_split:
-        kernels.insert(6, "fmoveL_early")
-        kernels.insert(6, "fmoveR_early")
 
     meanmaster = (
         masterscores.set_index(["eid", "pid", "clu_id", "acronym", "qc_label", "fold"])

From 7033d5c15ccc842218a8c3e81739bf553078d904 Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 16 Sep 2024 11:59:30 +0200
Subject: [PATCH 6/8] bugfix

---
 brainwidemap/encoding/cluster_worker.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py
index 6201b6be..5a01b4ba 100644
--- a/brainwidemap/encoding/cluster_worker.py
+++ b/brainwidemap/encoding/cluster_worker.py
@@ -94,7 +94,6 @@ def fit_save_inputs(
             splitstr = "_earlyrt"
         case (False, True):
             splitstr = "_latert"
-    paramsp["splitstr"] = splitstr
     if not earlyrts and not laterts:
         sessdesign = generate_design(stdf, sessprior, t_before, **params)
     else:
@@ -117,7 +116,7 @@ def fit_save_inputs(
     if null is None:
         sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params)
         outputfn = save_stepwise(
-            subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate
+            subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate, splitstr
         )
     elif null == "pseudosession_pleft_iti":
         sessfit, nullfits = fit_stepwise_with_pseudoblocks(

From fa92142159e95b3e4f953d31d326824b9660adae Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 16 Sep 2024 12:00:20 +0200
Subject: [PATCH 7/8] dockerfile fix

---
 brainwidemap/encoding/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile
index 64b63664..61fc42d4 100644
--- a/brainwidemap/encoding/Dockerfile
+++ b/brainwidemap/encoding/Dockerfile
@@ -16,7 +16,7 @@ SHELL ["/bin/bash", "-c"]
 # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯
 RUN apt update && apt install -y wget git libgl1-mesa-dev
 RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-RUN bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh
+RUN /bin/bash -c "bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh"
 RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
  mamba install --yes conda-build &&\
  mamba env create -n iblenv --file=environment.yaml"

From 9651085731456e339144ba5b0fddacf91c8906e2 Mon Sep 17 00:00:00 2001
From: Berk Gercek <gercek.berk@gmail.com>
Date: Mon, 16 Sep 2024 13:32:57 +0200
Subject: [PATCH 8/8] Update docker spec and remove pytorch deps

---
 brainwidemap/encoding/Dockerfile       | 10 ++++++++--
 brainwidemap/encoding/environment.yaml | 13 +------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile
index 61fc42d4..88305d6f 100644
--- a/brainwidemap/encoding/Dockerfile
+++ b/brainwidemap/encoding/Dockerfile
@@ -15,11 +15,17 @@ COPY ./environment.yaml /data/environment.yaml
 SHELL ["/bin/bash", "-c"]
 # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯
 RUN apt update && apt install -y wget git libgl1-mesa-dev
-RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-RUN /bin/bash -c "bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/conda && rm Mambaforge.sh"
+RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+RUN bash Miniforge3.sh -b -p /opt/conda && rm Miniforge3.sh
+RUN wget -O iblreq.txt "https://raw.githubusercontent.com/int-brain-lab/ibllib/master/requirements.txt"
+RUN head -n -1 iblreq.txt > requirements.txt
+RUN rm iblreq.txt
 RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
  mamba install --yes conda-build &&\
  mamba env create -n iblenv --file=environment.yaml"
+RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
+ conda activate iblenv && pip install -r requirements.txt && pip install ibllib --no-deps"
+RUN rm requirements.txt
 # The below allows interactively running the container with the correct environment, but be warned
 # that this will not work with commands passed to the container in a non-interactive shell.
 # In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not
diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml
index b8c2af47..490877f2 100644
--- a/brainwidemap/encoding/environment.yaml
+++ b/brainwidemap/encoding/environment.yaml
@@ -4,7 +4,6 @@ dependencies:
   - ipython
   - matplotlib
   - numba
-  - numpy
   - pandas
   - scikit-learn
   - scipy >=1.4.1
@@ -12,14 +11,4 @@ dependencies:
   - statsmodels
   - tqdm
   - pip
-  - pip:
-    - iblutil
-    - ibllib
-    - iblapps
-    - ibl-neuropixel
-    - ONE-api
-    - phylib
-    - phy
-    - pynrrd
-    - slidingRP
-    - neurencoding
+  - pyqt<6