Merge remote-tracking branch 'origin/encoding-revisions' into plotting

berk's updates
int-brain-lab · Oct 7, 2024 · 23d49d9 · 23d49d9
2 parents 3827cc4 + 9651085
commit 23d49d9
Show file tree

Hide file tree

Showing 11 changed files with 176 additions and 160 deletions.
diff --git a/brainwidemap/encoding/Dockerfile b/brainwidemap/encoding/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
+FROM ubuntu:latest
 # This can optionally be built with just ubuntu, rather than the nvidia cuda container.
 # If saving space is a concern, this is the way to go.
 LABEL description="Core container which has the basic necessities to run analyses in the\
@@ -15,20 +15,17 @@ COPY ./environment.yaml /data/environment.yaml
 SHELL ["/bin/bash", "-c"]
 # For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯
 RUN apt update && apt install -y wget git libgl1-mesa-dev
-RUN wget -O Mambaforge.sh  "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-RUN bash Mambaforge.sh -b -p /opt/conda && rm Mambaforge.sh
+RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+RUN bash Miniforge3.sh -b -p /opt/conda && rm Miniforge3.sh
+RUN wget -O iblreq.txt "https://raw.githubusercontent.com/int-brain-lab/ibllib/master/requirements.txt"
+RUN head -n -1 iblreq.txt > requirements.txt
+RUN rm iblreq.txt
 RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
  mamba install --yes conda-build &&\
  mamba env create -n iblenv --file=environment.yaml"
-RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
- conda activate iblenv &&\
- mamba install --yes pytorch pytorch-cuda=11.7 -c pytorch -c nvidia &&\
- conda clean --all -f -y"
-RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
- conda activate iblenv &&\
- pip install globus-sdk iblutil ibllib iblapps ibl-neuropixel ONE-api phylib pynrrd slidingRP &&\
- git clone https://github.com/berkgercek/neurencoding &&\
- conda develop ./neurencoding"
+RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
+ conda activate iblenv && pip install -r requirements.txt && pip install ibllib --no-deps"
+RUN rm requirements.txt
 # The below allows interactively running the container with the correct environment, but be warned
 # that this will not work with commands passed to the container in a non-interactive shell.
 # In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not

diff --git a/brainwidemap/encoding/cluster_worker.py b/brainwidemap/encoding/cluster_worker.py
@@ -38,9 +38,9 @@ def _create_sub_sess_path(parent, subject, session):
     return sesspath
 
 
-def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate):
+def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate, splitstr=""):
     sesspath = _create_sub_sess_path(GLM_FIT_PATH, subject, session_id)
-    fn = sesspath.joinpath(f"{fitdate}_{probes}_stepwise_regression.pkl")
+    fn = sesspath.joinpath(f"{fitdate}_{probes}{splitstr}_stepwise_regression.pkl")
     outdict = {
         "params": params,
         "probes": probes,
@@ -82,14 +82,41 @@ def fit_save_inputs(
     t_before,
     fitdate,
     null=None,
+    earlyrts=False,
+    laterts=False,
 ):
     stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn)
     sessprior = stdf["probabilityLeft"]
-    sessdesign = generate_design(stdf, sessprior, t_before, **params)
+    match (earlyrts, laterts):
+        case (False, False):
+            splitstr = ""
+        case (True, False):
+            splitstr = "_earlyrt"
+        case (False, True):
+            splitstr = "_latert"
+    if not earlyrts and not laterts:
+        sessdesign = generate_design(stdf, sessprior, t_before, **params)
+    else:
+        # Handle early and late RT flags, compute median for session if necessary
+        if "rt_thresh" not in params:
+            raise ValueError("Must specify rt_thresh if fitting early or late RTs")
+        if laterts and earlyrts:
+            raise ValueError(
+                "Cannot fit both early and late RTs. Disable both flags to fit all trials."
+            )
+        if params["rt_thresh"] == "session_median":
+            params["rt_thresh"] = np.median(stdf["firstMovement_times"] - stdf["trial_start"])
+
+        if earlyrts:
+            mask = (stdf["firstMovement_times"] - stdf["trial_start"]) < params["rt_thresh"]
+        elif laterts:
+            mask = (stdf["firstMovement_times"] - stdf["trial_start"]) >= params["rt_thresh"]
+        stdf = stdf[mask]
+        sessdesign = generate_design(stdf, sessprior, t_before, **params)
     if null is None:
         sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params)
         outputfn = save_stepwise(
-            subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate
+            subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate, splitstr
         )
     elif null == "pseudosession_pleft_iti":
         sessfit, nullfits = fit_stepwise_with_pseudoblocks(
@@ -115,11 +142,13 @@ def fit_save_inputs(
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Cluster GLM fitter. This script is called by"
-                                                 "the batch script generated in "
-                                                 "pipelines/02_fit_sessions.py and should in most "
-                                                 "cases beyond debugging not be used in a "
-                                                 "standalone fashion.")
+    parser = argparse.ArgumentParser(
+        description="Cluster GLM fitter. This script is called by"
+        "the batch script generated in "
+        "pipelines/02_fit_sessions.py and should in most "
+        "cases beyond debugging not be used in a "
+        "standalone fashion."
+    )
     parser.add_argument(
         "datafile",
         type=Path,
@@ -132,6 +161,16 @@ def fit_save_inputs(
     )
     parser.add_argument("fitdate", help="Date of fit for output file")
     parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file")
+    parser.add_argument(
+        "--earlyrt",
+        action="store_true",
+        help="Whether to fit separate movement kernels to early trials",
+    )
+    parser.add_argument(
+        "--latert",
+        action="store_true",
+        help="Whether to fit separate movement kernels to late trials",
+    )
     args = parser.parse_args()
 
     with open(args.datafile, "rb") as fo:
@@ -155,6 +194,8 @@ def fit_save_inputs(
         t_before,
         args.fitdate,
         null=params["null"],
+        earlyrts=args.earlyrt,
+        laterts=args.latert,
     )
     print("Fitting completed successfully!")
     print(outputfn)
diff --git a/brainwidemap/encoding/design.py b/brainwidemap/encoding/design.py
@@ -6,13 +6,12 @@
 # Standard library
 import logging
 
+# IBL libraries
+import neurencoding.design_matrix as dm
+
 # Third party libraries
 import numpy as np
 import pandas as pd
-from scipy.stats import norm
-
-# IBL libraries
-import neurencoding.design_matrix as dm
 
 _logger = logging.getLogger("brainwide")
 

diff --git a/brainwidemap/encoding/environment.yaml b/brainwidemap/encoding/environment.yaml
@@ -1,31 +1,14 @@
 name: iblenv
 dependencies:
-  - python=3.9
-  - apptools >= 4.5.0
-  - boto3
-  - click
-  - colorcet
-  - colorlog
-  - cython
-  - dataclasses
-  - flake8
-  - graphviz
-  - h5py
+  - python=3.10
   - ipython
   - matplotlib
   - numba
-  - numpy
   - pandas
-  - plotly
-  - pyarrow
-  - pyflakes >= 2.4.0
-  - pytest
-  - requests
   - scikit-learn
   - scipy >=1.4.1
   - seaborn
   - statsmodels
   - tqdm
   - pip
-  - pip:
-    - opencv-python
+  - pyqt<6
diff --git a/brainwidemap/encoding/params.py b/brainwidemap/encoding/params.py
@@ -4,5 +4,5 @@
 work.
 """
 
-GLM_CACHE = "/home/gercek/scratch/glm_cache/"
-GLM_FIT_PATH = "/home/gercek/scratch/results/glms/"
+GLM_CACHE = "/home/gercek/Projects/glm_cache/"
+GLM_FIT_PATH = "/home/gercek/Projects/results/glms/"
diff --git a/brainwidemap/encoding/pipelines/01_cache_regressors.py b/brainwidemap/encoding/pipelines/01_cache_regressors.py
@@ -76,11 +76,11 @@ def delayed_loadsave(subject, session_id, pid, params):
     "binwidth": BINWIDTH,
     "abswheel": ABSWHEEL,
     "clu_criteria": CLU_CRITERIA,
-    "one_url": "https://openalyx.internationalbrainlab.org",
+    "one_url": "https://alyx.internationalbrainlab.org",
     "one_pw": "international",
 }
 
-one = ONE(base_url=params["one_url"], password=params["one_pw"], silent=True)
+one = ONE(base_url=params["one_url"], silent=True)
 dataset_futures = []
 
 freeze = "2023_12_bwm_release" if CLU_CRITERIA == "bwm" else None

diff --git a/brainwidemap/encoding/pipelines/02_fit_sessions.py b/brainwidemap/encoding/pipelines/02_fit_sessions.py
@@ -1,19 +1,18 @@
 # Standard library
-import os
-import pickle
 import argparse
+import pickle
 from datetime import date
 from pathlib import Path
 
+# IBL libraries
+import neurencoding.linear as lm
+import neurencoding.utils as mut
+
 # Third party libraries
 import numpy as np
 import sklearn.linear_model as skl
 from sklearn.model_selection import GridSearchCV
 
-# IBL libraries
-import neurencoding.linear as lm
-import neurencoding.utils as mut
-
 # Brainwide repo imports
 from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
 from brainwidemap.encoding.utils import make_batch_slurm_singularity
@@ -27,12 +26,12 @@
     " parameters for the actual GLM fitting are defined within the script itself."
     " The arguments passed to the script via this parser are only for cluster control."
     " If you would like to change parameters of the actual fit please adjust the contents"
-    " of the \"parameters\" section in the file."
+    ' of the "parameters" section in the file.'
 )
 parser.add_argument(
     "--basefilepath",
     type=Path,
-    default=Path("~/").expanduser().joinpath("bwm_stepwise_glm_leaveoneout"),
+    default=Path("~/").expanduser().joinpath("jobscripts/bwm_stepwise_glm_leaveoneout"),
     help="Base filename for batch scripts",
 )
 parser.add_argument(
@@ -85,21 +84,17 @@
     "--job_cores", type=int, default=32, help="Number of cores to request per job."
 )
 parser.add_argument("--mem", type=str, default="12GB", help="Memory to request per job.")
-parser.add_argument(
-    "--submit_batch",
-    action="store_true",
-    default=False,
-    help="Submit batch jobs to SLURM cluster using the script.",
-)
 
 args = parser.parse_args()
 
+
 # Model parameters
 # The GLM constructor class requires a function that converts time to bin index, here we define it
-# using the binwidth parameter created shortly. 
-def tmp_binf(t): 
+# using the binwidth parameter created shortly.
+def tmp_binf(t):
     return np.ceil(t / params["binwidth"]).astype(int)
 
+
 ######### PARAMETERS #########
 params = {
     "binwidth": 0.02,
@@ -108,7 +103,7 @@ def tmp_binf(t):
     "wheel_offset": -0.3,
     "contnorm": 5.0,
     "reduce_wheel_dim": False,
-    "dataset_fn": "2024-01-06_dataset_metadata.pkl",
+    "dataset_fn": "2024-08-12_dataset_metadata.pkl",
     "model": lm.LinearGLM,
     "alpha_grid": {"alpha": np.logspace(-3, 2, 50)},
     "contiguous": False,
@@ -118,6 +113,8 @@ def tmp_binf(t):
     "seqsel_kwargs": {"direction": "backward", "n_features_to_select": 8},
     "seqselfit_kwargs": {"full_scores": True},
     "seed": 0,
+    "rt_thresh": "session_median",
+    "mintrials": 50,
 }
 
 params["bases"] = {
@@ -128,6 +125,14 @@ def tmp_binf(t):
 }
 # Estimator relies on alpha grid in case of GridSearchCV, needs to be defined after main params
 params["estimator"] = GridSearchCV(skl.Ridge(), params["alpha_grid"])
+if "rt_thresh" in params:
+    earlyrt_flag = "--earlyrt"
+    latert_flag = "--latert"
+    earlyrt_fn = "_early_rt"
+else:
+    earlyrt_flag = ""
+    latert_flag = ""
+    earlyrt_fn = ""
 
 # Output parameters file for workers
 currdate = str(date.today())
@@ -142,7 +147,7 @@ def tmp_binf(t):
 
 # Generate batch script
 make_batch_slurm_singularity(
-    str(args.basefilepath),
+    str(args.basefilepath) + earlyrt_fn,
     str(Path(__file__).parents[1].joinpath("cluster_worker.py")),
     job_name=args.jobname,
     partition=args.partition,
@@ -156,14 +161,29 @@ def tmp_binf(t):
     cores_per_job=args.job_cores,
     memory=args.mem,
     array_size=f"1-{njobs}",
-    f_args=[str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
+    f_args=[earlyrt_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
 )
-
-# If SUBMIT_BATCH, then actually execute the batch job
-if args.submit_batch:
-    os.system(f"sbatch {str(args.basefilepath) + '_batch.sh'}")
-else:
-    print(
-        f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};"
-        " user must submit it themselves. Good luck!"
+if len(earlyrt_fn) > 0:
+    make_batch_slurm_singularity(
+        str(args.basefilepath) + "_late_rt",
+        str(Path(__file__).parents[1].joinpath("cluster_worker.py")),
+        job_name=args.jobname,
+        partition=args.partition,
+        time=args.timelimit,
+        singularity_modules=args.singularity_modules,
+        container_image=args.singularity_image,
+        img_condapath=args.singularity_conda,
+        img_envname=args.singularity_env,
+        local_pathadd=Path(__file__).parents[3],
+        logpath=args.logpath,
+        cores_per_job=args.job_cores,
+        memory=args.mem,
+        array_size=f"1-{njobs}",
+        f_args=[latert_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
     )
+
+# If SUBMIT_BATCH, then actually execute the batch jo
+print(
+    f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};"
+    " user must submit it themselves. Good luck!"
+)