Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/encoding-revisions' into plotting
Browse files Browse the repository at this point in the history
berk's updates
  • Loading branch information
mschart committed Oct 7, 2024
2 parents 3827cc4 + 9651085 commit 23d49d9
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 160 deletions.
21 changes: 9 additions & 12 deletions brainwidemap/encoding/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
FROM ubuntu:latest
# This can optionally be built with just ubuntu, rather than the nvidia cuda container.
# If saving space is a concern, this is the way to go.
LABEL description="Core container which has the basic necessities to run analyses in the\
Expand All @@ -15,20 +15,17 @@ COPY ./environment.yaml /data/environment.yaml
SHELL ["/bin/bash", "-c"]
# For some reason ibllib.io.video needs opencv which requires libgl1-mesa-dev ¯\_(ツ)_/¯
RUN apt update && apt install -y wget git libgl1-mesa-dev
RUN wget -O Mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
RUN bash Mambaforge.sh -b -p /opt/conda && rm Mambaforge.sh
RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
RUN bash Miniforge3.sh -b -p /opt/conda && rm Miniforge3.sh
RUN wget -O iblreq.txt "https://raw.githubusercontent.com/int-brain-lab/ibllib/master/requirements.txt"
RUN head -n -1 iblreq.txt > requirements.txt
RUN rm iblreq.txt
RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
mamba install --yes conda-build &&\
mamba env create -n iblenv --file=environment.yaml"
RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
conda activate iblenv &&\
mamba install --yes pytorch pytorch-cuda=11.7 -c pytorch -c nvidia &&\
conda clean --all -f -y"
RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh &&\
conda activate iblenv &&\
pip install globus-sdk iblutil ibllib iblapps ibl-neuropixel ONE-api phylib pynrrd slidingRP &&\
git clone https://github.com/berkgercek/neurencoding &&\
conda develop ./neurencoding"
RUN /bin/bash -c "source /opt/conda/etc/profile.d/conda.sh && \
conda activate iblenv && pip install -r requirements.txt && pip install ibllib --no-deps"
RUN rm requirements.txt
# The below allows interactively running the container with the correct environment, but be warned
# that this will not work with commands passed to the container in a non-interactive shell.
# In the case of e.g. `docker run thiscontainer python myscript.py`, the environment will not
Expand Down
59 changes: 50 additions & 9 deletions brainwidemap/encoding/cluster_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def _create_sub_sess_path(parent, subject, session):
return sesspath


def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate):
def save_stepwise(subject, session_id, fitout, params, probes, input_fn, clu_reg, clu_df, fitdate, splitstr=""):
sesspath = _create_sub_sess_path(GLM_FIT_PATH, subject, session_id)
fn = sesspath.joinpath(f"{fitdate}_{probes}_stepwise_regression.pkl")
fn = sesspath.joinpath(f"{fitdate}_{probes}{splitstr}_stepwise_regression.pkl")
outdict = {
"params": params,
"probes": probes,
Expand Down Expand Up @@ -82,14 +82,41 @@ def fit_save_inputs(
t_before,
fitdate,
null=None,
earlyrts=False,
laterts=False,
):
stdf, sspkt, sspkclu, sclureg, scluqc = get_cached_regressors(eidfn)
sessprior = stdf["probabilityLeft"]
sessdesign = generate_design(stdf, sessprior, t_before, **params)
match (earlyrts, laterts):
case (False, False):
splitstr = ""
case (True, False):
splitstr = "_earlyrt"
case (False, True):
splitstr = "_latert"
if not earlyrts and not laterts:
sessdesign = generate_design(stdf, sessprior, t_before, **params)
else:
# Handle early and late RT flags, compute median for session if necessary
if "rt_thresh" not in params:
raise ValueError("Must specify rt_thresh if fitting early or late RTs")
if laterts and earlyrts:
raise ValueError(
"Cannot fit both early and late RTs. Disable both flags to fit all trials."
)
if params["rt_thresh"] == "session_median":
params["rt_thresh"] = np.median(stdf["firstMovement_times"] - stdf["trial_start"])

if earlyrts:
mask = (stdf["firstMovement_times"] - stdf["trial_start"]) < params["rt_thresh"]
elif laterts:
mask = (stdf["firstMovement_times"] - stdf["trial_start"]) >= params["rt_thresh"]
stdf = stdf[mask]
sessdesign = generate_design(stdf, sessprior, t_before, **params)
if null is None:
sessfit = fit_stepwise(sessdesign, sspkt, sspkclu, **params)
outputfn = save_stepwise(
subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate
subject, eid, sessfit, params, probes, eidfn, sclureg, scluqc, fitdate, splitstr
)
elif null == "pseudosession_pleft_iti":
sessfit, nullfits = fit_stepwise_with_pseudoblocks(
Expand All @@ -115,11 +142,13 @@ def fit_save_inputs(


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Cluster GLM fitter. This script is called by"
"the batch script generated in "
"pipelines/02_fit_sessions.py and should in most "
"cases beyond debugging not be used in a "
"standalone fashion.")
parser = argparse.ArgumentParser(
description="Cluster GLM fitter. This script is called by"
"the batch script generated in "
"pipelines/02_fit_sessions.py and should in most "
"cases beyond debugging not be used in a "
"standalone fashion."
)
parser.add_argument(
"datafile",
type=Path,
Expand All @@ -132,6 +161,16 @@ def fit_save_inputs(
)
parser.add_argument("fitdate", help="Date of fit for output file")
parser.add_argument("--impostor_path", type=Path, help="Path to main impostor df file")
parser.add_argument(
"--earlyrt",
action="store_true",
help="Whether to fit separate movement kernels to early trials",
)
parser.add_argument(
"--latert",
action="store_true",
help="Whether to fit separate movement kernels to late trials",
)
args = parser.parse_args()

with open(args.datafile, "rb") as fo:
Expand All @@ -155,6 +194,8 @@ def fit_save_inputs(
t_before,
args.fitdate,
null=params["null"],
earlyrts=args.earlyrt,
laterts=args.latert,
)
print("Fitting completed successfully!")
print(outputfn)
7 changes: 3 additions & 4 deletions brainwidemap/encoding/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
# Standard library
import logging

# IBL libraries
import neurencoding.design_matrix as dm

# Third party libraries
import numpy as np
import pandas as pd
from scipy.stats import norm

# IBL libraries
import neurencoding.design_matrix as dm

_logger = logging.getLogger("brainwide")

Expand Down
21 changes: 2 additions & 19 deletions brainwidemap/encoding/environment.yaml
Original file line number Diff line number Diff line change
@@ -1,31 +1,14 @@
name: iblenv
dependencies:
- python=3.9
- apptools >= 4.5.0
- boto3
- click
- colorcet
- colorlog
- cython
- dataclasses
- flake8
- graphviz
- h5py
- python=3.10
- ipython
- matplotlib
- numba
- numpy
- pandas
- plotly
- pyarrow
- pyflakes >= 2.4.0
- pytest
- requests
- scikit-learn
- scipy >=1.4.1
- seaborn
- statsmodels
- tqdm
- pip
- pip:
- opencv-python
- pyqt<6
4 changes: 2 additions & 2 deletions brainwidemap/encoding/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
work.
"""

GLM_CACHE = "/home/gercek/scratch/glm_cache/"
GLM_FIT_PATH = "/home/gercek/scratch/results/glms/"
GLM_CACHE = "/home/gercek/Projects/glm_cache/"
GLM_FIT_PATH = "/home/gercek/Projects/results/glms/"
4 changes: 2 additions & 2 deletions brainwidemap/encoding/pipelines/01_cache_regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ def delayed_loadsave(subject, session_id, pid, params):
"binwidth": BINWIDTH,
"abswheel": ABSWHEEL,
"clu_criteria": CLU_CRITERIA,
"one_url": "https://openalyx.internationalbrainlab.org",
"one_url": "https://alyx.internationalbrainlab.org",
"one_pw": "international",
}

one = ONE(base_url=params["one_url"], password=params["one_pw"], silent=True)
one = ONE(base_url=params["one_url"], silent=True)
dataset_futures = []

freeze = "2023_12_bwm_release" if CLU_CRITERIA == "bwm" else None
Expand Down
74 changes: 47 additions & 27 deletions brainwidemap/encoding/pipelines/02_fit_sessions.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
# Standard library
import os
import pickle
import argparse
import pickle
from datetime import date
from pathlib import Path

# IBL libraries
import neurencoding.linear as lm
import neurencoding.utils as mut

# Third party libraries
import numpy as np
import sklearn.linear_model as skl
from sklearn.model_selection import GridSearchCV

# IBL libraries
import neurencoding.linear as lm
import neurencoding.utils as mut

# Brainwide repo imports
from brainwidemap.encoding.params import GLM_CACHE, GLM_FIT_PATH
from brainwidemap.encoding.utils import make_batch_slurm_singularity
Expand All @@ -27,12 +26,12 @@
" parameters for the actual GLM fitting are defined within the script itself."
" The arguments passed to the script via this parser are only for cluster control."
" If you would like to change parameters of the actual fit please adjust the contents"
" of the \"parameters\" section in the file."
' of the "parameters" section in the file.'
)
parser.add_argument(
"--basefilepath",
type=Path,
default=Path("~/").expanduser().joinpath("bwm_stepwise_glm_leaveoneout"),
default=Path("~/").expanduser().joinpath("jobscripts/bwm_stepwise_glm_leaveoneout"),
help="Base filename for batch scripts",
)
parser.add_argument(
Expand Down Expand Up @@ -85,21 +84,17 @@
"--job_cores", type=int, default=32, help="Number of cores to request per job."
)
parser.add_argument("--mem", type=str, default="12GB", help="Memory to request per job.")
parser.add_argument(
"--submit_batch",
action="store_true",
default=False,
help="Submit batch jobs to SLURM cluster using the script.",
)

args = parser.parse_args()


# Model parameters
# The GLM constructor class requires a function that converts time to bin index, here we define it
# using the binwidth parameter created shortly.
def tmp_binf(t):
# using the binwidth parameter created shortly.
def tmp_binf(t):
return np.ceil(t / params["binwidth"]).astype(int)


######### PARAMETERS #########
params = {
"binwidth": 0.02,
Expand All @@ -108,7 +103,7 @@ def tmp_binf(t):
"wheel_offset": -0.3,
"contnorm": 5.0,
"reduce_wheel_dim": False,
"dataset_fn": "2024-01-06_dataset_metadata.pkl",
"dataset_fn": "2024-08-12_dataset_metadata.pkl",
"model": lm.LinearGLM,
"alpha_grid": {"alpha": np.logspace(-3, 2, 50)},
"contiguous": False,
Expand All @@ -118,6 +113,8 @@ def tmp_binf(t):
"seqsel_kwargs": {"direction": "backward", "n_features_to_select": 8},
"seqselfit_kwargs": {"full_scores": True},
"seed": 0,
"rt_thresh": "session_median",
"mintrials": 50,
}

params["bases"] = {
Expand All @@ -128,6 +125,14 @@ def tmp_binf(t):
}
# Estimator relies on alpha grid in case of GridSearchCV, needs to be defined after main params
params["estimator"] = GridSearchCV(skl.Ridge(), params["alpha_grid"])
if "rt_thresh" in params:
earlyrt_flag = "--earlyrt"
latert_flag = "--latert"
earlyrt_fn = "_early_rt"
else:
earlyrt_flag = ""
latert_flag = ""
earlyrt_fn = ""

# Output parameters file for workers
currdate = str(date.today())
Expand All @@ -142,7 +147,7 @@ def tmp_binf(t):

# Generate batch script
make_batch_slurm_singularity(
str(args.basefilepath),
str(args.basefilepath) + earlyrt_fn,
str(Path(__file__).parents[1].joinpath("cluster_worker.py")),
job_name=args.jobname,
partition=args.partition,
Expand All @@ -156,14 +161,29 @@ def tmp_binf(t):
cores_per_job=args.job_cores,
memory=args.mem,
array_size=f"1-{njobs}",
f_args=[str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
f_args=[earlyrt_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
)

# If SUBMIT_BATCH, then actually execute the batch job
if args.submit_batch:
os.system(f"sbatch {str(args.basefilepath) + '_batch.sh'}")
else:
print(
f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};"
" user must submit it themselves. Good luck!"
if len(earlyrt_fn) > 0:
make_batch_slurm_singularity(
str(args.basefilepath) + "_late_rt",
str(Path(__file__).parents[1].joinpath("cluster_worker.py")),
job_name=args.jobname,
partition=args.partition,
time=args.timelimit,
singularity_modules=args.singularity_modules,
container_image=args.singularity_image,
img_condapath=args.singularity_conda,
img_envname=args.singularity_env,
local_pathadd=Path(__file__).parents[3],
logpath=args.logpath,
cores_per_job=args.job_cores,
memory=args.mem,
array_size=f"1-{njobs}",
f_args=[latert_flag, str(datapath), str(parpath), r"${SLURM_ARRAY_TASK_ID}", currdate],
)

# If SUBMIT_BATCH, then actually execute the batch jo
print(
f"Batch file generated at {str(args.basefilepath) + '_batch.sh'};"
" user must submit it themselves. Good luck!"
)
Loading

0 comments on commit 23d49d9

Please sign in to comment.