diff --git a/articles/slurmjobs.html b/articles/slurmjobs.html index d047576..af839fc 100644 --- a/articles/slurmjobs.html +++ b/articles/slurmjobs.html @@ -76,6 +76,13 @@

Nicholas J.
Lieber Institute for Brain Development
+
+

Leonardo +Collado-Torres

+
+ Lieber Institute for Brain DevelopmentCenter for Computational +Biology, Johns Hopkins +University
lcolladotor@gmail.com

3 October 2023

@@ -180,6 +187,187 @@

Quick start to using slurmjobs
+

Creating Shell Scripts to sbatch +

+

When processing data on a SLURM-managed system, primarily +running R code, you’ll likely find yourself writing many “wrapper” shell +scripts that can be submitted via sbatch to the job +scheduler. This process requires precise SLURM-specific syntax and a +large amount of repetition. job_single aims to reduce +required configuration from the user to just a handful of options that +tend to vary most often between shell scripts (e.g. memory, number of +CPUs), and automate the rest of the shell-script-creation process.

+

Shell scripts created by job_single log key +reproducibility information, such as the user, job ID, job name, node +name, and when the job starts and ends.

+
+#   With 'create_shell = FALSE', the contents of the potential shell script are
+#   only printed to the screen
+job_single(
+    name = "my_shell_script", memory = "10G", cores = 2, create_shell = FALSE
+)
+#> 2023-10-03 19:38:13.572525 creating the logs directory at:  logs
+#> #!/bin/bash
+#> #SBATCH -p shared
+#> #SBATCH --mem-per-cpu=10G
+#> #SBATCH --job-name=my_shell_script
+#> #SBATCH -c 2
+#> #SBATCH -o logs/my_shell_script.txt
+#> #SBATCH -e logs/my_shell_script.txt
+#> #SBATCH --mail-type=ALL
+#> 
+#> set -e
+#> 
+#> echo "**** Job starts ****"
+#> date
+#> 
+#> echo "**** JHPCE info ****"
+#> echo "User: ${USER}"
+#> echo "Job id: ${SLURM_JOB_ID}"
+#> echo "Job name: ${SLURM_JOB_NAME}"
+#> echo "Node name: ${SLURMD_NODENAME}"
+#> echo "Task id: ${SLURM_ARRAY_TASK_ID}"
+#> 
+#> ## Load the R module
+#> module load conda_R
+#> 
+#> ## List current modules for reproducibility
+#> module list
+#> 
+#> ## Edit with your job command
+#> Rscript -e "options(width = 120); sessioninfo::session_info()"
+#> 
+#> echo "**** Job ends ****"
+#> date
+#> 
+#> ## This script was made using slurmjobs version 0.99.0
+#> ## available from http://research.libd.org/slurmjobs/
+

Similarly, we can specify task_num to create an array +job– in this case, one with 10 tasks.

+
+job_single(
+    name = "my_array_job", memory = "5G", cores = 1, create_shell = FALSE,
+    task_num = 10
+)
+#> 2023-10-03 19:38:13.667186 creating the logs directory at:  logs
+#> #!/bin/bash
+#> #SBATCH -p shared
+#> #SBATCH --mem-per-cpu=5G
+#> #SBATCH --job-name=my_array_job
+#> #SBATCH -c 1
+#> #SBATCH -o logs/my_array_job.%a.txt
+#> #SBATCH -e logs/my_array_job.%a.txt
+#> #SBATCH --mail-type=ALL
+#> #SBATCH --array=1-10%20
+#> 
+#> set -e
+#> 
+#> echo "**** Job starts ****"
+#> date
+#> 
+#> echo "**** JHPCE info ****"
+#> echo "User: ${USER}"
+#> echo "Job id: ${SLURM_JOB_ID}"
+#> echo "Job name: ${SLURM_JOB_NAME}"
+#> echo "Node name: ${SLURMD_NODENAME}"
+#> echo "Task id: ${SLURM_ARRAY_TASK_ID}"
+#> 
+#> ## Load the R module
+#> module load conda_R
+#> 
+#> ## List current modules for reproducibility
+#> module list
+#> 
+#> ## Edit with your job command
+#> Rscript -e "options(width = 120); sessioninfo::session_info()"
+#> 
+#> echo "**** Job ends ****"
+#> date
+#> 
+#> ## This script was made using slurmjobs version 0.99.0
+#> ## available from http://research.libd.org/slurmjobs/
+

job_loop() is a little bit more complicated since you +have to specify the loops named list argument. The +loops argument specifies the bash variable +names and values to loop through for creating a series of +bash scripts that will get submitted to SLURM. This type of +bash script is something we use frequently, for example in +the compute_weights.sh +script (Collado-Torres, Burke, Peterson, Shin, Straub, Rajpurohit, +Semick, Ulrich, Price, Valencia, Tao, Deep-Soboslay, Hyde, Kleinman, +Weinberger, and Jaffe, 2019). This type of script generator I believe is +something Alyssa Frazee taught me +back in the day which you can see in some old repositories such as leekgroup/derSoftware. +Besides the loops argument, job_loop() shares +most of the options with job_single().

+
+job_loop(
+    loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
+    name = "bsp2_test"
+)
+#> #!/bin/bash
+#> 
+#> ## Usage:
+#> # sh bsp2_test.sh
+#> 
+#> ## Create the logs directory
+#> mkdir -p logs
+#> 
+#> for region in DLPFC HIPPO; do
+#>     for feature in gene exon tx jxn; do
+#> 
+#>     ## Internal script name
+#>     SHORT="bsp2_test_${region}_${feature}"
+#> 
+#>     # Construct shell file
+#>     echo "Creating script bsp2_test_${region}_${feature}"
+#>     cat > .${SHORT}.sh <<EOF
+#> #!/bin/bash
+#> #SBATCH -p shared
+#> #SBATCH --mem-per-cpu=10G
+#> #SBATCH --job-name=${SHORT}
+#> #SBATCH -c 1
+#> #SBATCH -o logs/${SHORT}.txt
+#> #SBATCH -e logs/${SHORT}.txt
+#> #SBATCH --mail-type=ALL
+#> 
+#> set -e
+#> 
+#> echo "**** Job starts ****"
+#> date
+#> 
+#> echo "**** JHPCE info ****"
+#> echo "User: \${USER}"
+#> echo "Job id: \${SLURM_JOB_ID}"
+#> echo "Job name: \${SLURM_JOB_NAME}"
+#> echo "Node name: \${SLURMD_NODENAME}"
+#> echo "Task id: \${SLURM_ARRAY_TASK_ID}"
+#> 
+#> ## Load the R module
+#> module load conda_R
+#> 
+#> ## List current modules for reproducibility
+#> module list
+#> 
+#> ## Edit with your job command
+#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
+#> 
+#> echo "**** Job ends ****"
+#> date
+#> 
+#> ## This script was made using slurmjobs version 0.99.0
+#> ## available from http://research.libd.org/slurmjobs/
+#> 
+#> 
+#> EOF
+#> 
+#>     call="sbatch .${SHORT}.sh"
+#>     echo $call
+#>     $call
+#>     done
+#> done
+
+

Monitoring Running Jobs

The job_info() function provides wrappers around the @@ -193,9 +381,9 @@

Monitoring Running Jobsjob_df = job_info(user = NULL, partition = "shared") here, to get every user’s jobs running on the “shared” partition. We’ll load an example output directly here.

-
+
 #   On a real SLURM system
-job_df = readRDS(
+job_df <- readRDS(
     system.file("extdata", "job_info_df.rds", package = "slurmjobs")
 )
 print(job_df)
@@ -219,7 +407,7 @@ 

Monitoring Running Jobs -
+
 job_df |>
     #   Or your username here
     filter(user == "user17") |>
@@ -255,8 +443,8 @@ 

Analyzing Finished Jobsjob_report as available in the slurmjobs package.

-
-job_df = readRDS(
+
-

Now let’s choose a better memory request,

-
-stat_df = job_df |>
+

Now let’s choose a better memory request:

+
+stat_df <- job_df |>
     #   This example includes tasks that fail. We're only interested in memory
     #   for successfully completed tasks
-    filter(status != 'FAILED') |>
+    filter(status != "FAILED") |>
     summarize(
         mean_mem = mean(max_vmem_gb),
         std_mem = sd(max_vmem_gb),
@@ -288,7 +476,7 @@ 

Analyzing Finished Jobs # We could choose a new memory request as 3 standard deviations above the mean # of actual memory usage -new_limit = stat_df$mean_mem + 3 * stat_df$std_mem +new_limit <- stat_df$mean_mem + 3 * stat_df$std_mem print( sprintf( @@ -331,7 +519,7 @@

Reproducibility

This package was developed using biocthis.

Code for creating the vignette

-
+
 ## Create the vignette
 library("rmarkdown")
 system.time(render("slurmjobs.Rmd", "BiocStyle::html_document"))
@@ -340,9 +528,9 @@ 

Reproducibilitylibrary("knitr") knit("slurmjobs.Rmd", tangle = TRUE)

Date the vignette was generated.

-
#> [1] "2023-10-03 18:04:05 UTC"
+
#> [1] "2023-10-03 19:38:15 UTC"

Wallclock time spent generating the vignette.

-
#> Time difference of 1.944 secs
+
#> Time difference of 3.242 secs

R session information.

#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 #>  setting  value
@@ -358,60 +546,63 @@ 

Reproducibility#> pandoc 3.1.1 @ /usr/local/bin/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> backports 1.4.1 2021-12-13 [1] RSPM (R 4.3.0) -#> bibtex 0.5.1 2023-01-26 [1] RSPM (R 4.3.0) -#> BiocManager 1.30.22 2023-08-08 [2] CRAN (R 4.3.1) -#> BiocStyle * 2.28.1 2023-09-14 [1] Bioconductor -#> bookdown 0.35 2023-08-09 [1] RSPM (R 4.3.0) -#> bslib 0.5.1 2023-08-11 [2] RSPM (R 4.3.0) -#> cachem 1.0.8 2023-05-01 [2] RSPM (R 4.3.0) -#> cli 3.6.1 2023-03-23 [2] RSPM (R 4.3.0) -#> desc 1.4.2 2022-09-08 [2] RSPM (R 4.3.0) -#> digest 0.6.33 2023-07-07 [2] RSPM (R 4.3.0) -#> dplyr * 1.1.3 2023-09-03 [1] RSPM (R 4.3.0) -#> evaluate 0.21 2023-05-05 [2] RSPM (R 4.3.0) -#> fansi 1.0.4 2023-01-22 [2] RSPM (R 4.3.0) -#> fastmap 1.1.1 2023-02-24 [2] RSPM (R 4.3.0) -#> fs 1.6.3 2023-07-20 [2] RSPM (R 4.3.0) -#> generics 0.1.3 2022-07-05 [1] RSPM (R 4.3.0) -#> glue 1.6.2 2022-02-24 [2] RSPM (R 4.3.0) -#> htmltools 0.5.6 2023-08-10 [2] RSPM (R 4.3.0) -#> httr 1.4.7 2023-08-15 [2] RSPM (R 4.3.0) -#> jquerylib 0.1.4 2021-04-26 [2] RSPM (R 4.3.0) -#> jsonlite 1.8.7 2023-06-29 [2] RSPM (R 4.3.0) -#> knitr 1.44 2023-09-11 [2] RSPM (R 4.3.0) -#> lifecycle 1.0.3 2022-10-07 [2] RSPM (R 4.3.0) -#> lubridate 1.9.3 2023-09-27 [1] RSPM (R 4.3.0) -#> magrittr 2.0.3 2022-03-30 [2] RSPM (R 4.3.0) -#> memoise 2.0.1 2021-11-26 [2] RSPM (R 4.3.0) -#> pillar 1.9.0 2023-03-22 [2] RSPM (R 4.3.0) -#> pkgconfig 2.0.3 2019-09-22 [2] RSPM (R 4.3.0) -#> pkgdown 2.0.7 2022-12-14 [2] RSPM (R 4.3.0) -#> plyr 1.8.8 2022-11-11 [1] RSPM (R 4.3.0) -#> purrr 1.0.2 2023-08-10 [2] RSPM (R 4.3.0) -#> R6 2.5.1 2021-08-19 [2] RSPM (R 4.3.0) -#> ragg 1.2.5 2023-01-12 [2] RSPM (R 4.3.0) -#> Rcpp 1.0.11 2023-07-06 [2] RSPM (R 4.3.0) -#> RefManageR * 1.4.0 2022-09-30 [1] RSPM (R 4.3.0) -#> rlang 1.1.1 2023-04-28 [2] RSPM (R 4.3.0) -#> rmarkdown 2.25 2023-09-18 [2] RSPM (R 4.3.0) -#> rprojroot 2.0.3 2022-04-02 [2] RSPM (R 4.3.0) -#> sass 0.4.7 2023-07-15 [2] RSPM (R 4.3.0) -#> sessioninfo * 1.2.2 2021-12-06 [2] RSPM (R 4.3.0) -#> slurmjobs * 0.99.0 2023-10-03 [1] local -#> stringi 1.7.12 2023-01-11 [2] RSPM (R 4.3.0) -#> stringr 1.5.0 2022-12-02 [2] RSPM (R 4.3.0) -#> systemfonts 1.0.4 2022-02-11 [2] RSPM (R 4.3.0) -#> textshaping 0.3.6 2021-10-13 [2] RSPM (R 4.3.0) -#> tibble 3.2.1 2023-03-20 [2] RSPM (R 4.3.0) -#> tidyselect 1.2.0 2022-10-10 [1] RSPM (R 4.3.0) -#> timechange 0.2.0 2023-01-11 [1] RSPM (R 4.3.0) -#> utf8 1.2.3 2023-01-31 [2] RSPM (R 4.3.0) -#> vctrs 0.6.3 2023-06-14 [2] RSPM (R 4.3.0) -#> xfun 0.40 2023-08-09 [2] RSPM (R 4.3.0) -#> xml2 1.3.5 2023-07-06 [2] RSPM (R 4.3.0) -#> yaml 2.3.7 2023-01-23 [2] RSPM (R 4.3.0) +#> package * version date (UTC) lib source +#> backports 1.4.1 2021-12-13 [1] RSPM (R 4.3.0) +#> bibtex 0.5.1 2023-01-26 [1] RSPM (R 4.3.0) +#> BiocManager 1.30.22 2023-08-08 [2] CRAN (R 4.3.1) +#> BiocStyle * 2.28.1 2023-09-14 [1] Bioconductor +#> bookdown 0.35 2023-08-09 [1] RSPM (R 4.3.0) +#> bslib 0.5.1 2023-08-11 [2] RSPM (R 4.3.0) +#> cachem 1.0.8 2023-05-01 [2] RSPM (R 4.3.0) +#> cli 3.6.1 2023-03-23 [2] RSPM (R 4.3.0) +#> crayon 1.5.2 2022-09-29 [2] RSPM (R 4.3.0) +#> curl 5.0.2 2023-08-14 [2] RSPM (R 4.3.0) +#> desc 1.4.2 2022-09-08 [2] RSPM (R 4.3.0) +#> digest 0.6.33 2023-07-07 [2] RSPM (R 4.3.0) +#> dplyr * 1.1.3 2023-09-03 [1] RSPM (R 4.3.0) +#> evaluate 0.21 2023-05-05 [2] RSPM (R 4.3.0) +#> fansi 1.0.4 2023-01-22 [2] RSPM (R 4.3.0) +#> fastmap 1.1.1 2023-02-24 [2] RSPM (R 4.3.0) +#> fs 1.6.3 2023-07-20 [2] RSPM (R 4.3.0) +#> generics 0.1.3 2022-07-05 [1] RSPM (R 4.3.0) +#> glue 1.6.2 2022-02-24 [2] RSPM (R 4.3.0) +#> htmltools 0.5.6 2023-08-10 [2] RSPM (R 4.3.0) +#> httr 1.4.7 2023-08-15 [2] RSPM (R 4.3.0) +#> jquerylib 0.1.4 2021-04-26 [2] RSPM (R 4.3.0) +#> jsonlite 1.8.7 2023-06-29 [2] RSPM (R 4.3.0) +#> knitcitations * 1.0.12 2021-01-10 [1] RSPM (R 4.3.0) +#> knitr 1.44 2023-09-11 [2] RSPM (R 4.3.0) +#> lifecycle 1.0.3 2022-10-07 [2] RSPM (R 4.3.0) +#> lubridate 1.9.3 2023-09-27 [1] RSPM (R 4.3.0) +#> magrittr 2.0.3 2022-03-30 [2] RSPM (R 4.3.0) +#> memoise 2.0.1 2021-11-26 [2] RSPM (R 4.3.0) +#> pillar 1.9.0 2023-03-22 [2] RSPM (R 4.3.0) +#> pkgconfig 2.0.3 2019-09-22 [2] RSPM (R 4.3.0) +#> pkgdown 2.0.7 2022-12-14 [2] RSPM (R 4.3.0) +#> plyr 1.8.8 2022-11-11 [1] RSPM (R 4.3.0) +#> purrr 1.0.2 2023-08-10 [2] RSPM (R 4.3.0) +#> R6 2.5.1 2021-08-19 [2] RSPM (R 4.3.0) +#> ragg 1.2.5 2023-01-12 [2] RSPM (R 4.3.0) +#> Rcpp 1.0.11 2023-07-06 [2] RSPM (R 4.3.0) +#> RefManageR * 1.4.0 2022-09-30 [1] RSPM (R 4.3.0) +#> rlang 1.1.1 2023-04-28 [2] RSPM (R 4.3.0) +#> rmarkdown 2.25 2023-09-18 [2] RSPM (R 4.3.0) +#> rprojroot 2.0.3 2022-04-02 [2] RSPM (R 4.3.0) +#> sass 0.4.7 2023-07-15 [2] RSPM (R 4.3.0) +#> sessioninfo * 1.2.2 2021-12-06 [2] RSPM (R 4.3.0) +#> slurmjobs * 0.99.0 2023-10-03 [1] local +#> stringi 1.7.12 2023-01-11 [2] RSPM (R 4.3.0) +#> stringr 1.5.0 2022-12-02 [2] RSPM (R 4.3.0) +#> systemfonts 1.0.4 2022-02-11 [2] RSPM (R 4.3.0) +#> textshaping 0.3.6 2021-10-13 [2] RSPM (R 4.3.0) +#> tibble 3.2.1 2023-03-20 [2] RSPM (R 4.3.0) +#> tidyselect 1.2.0 2022-10-10 [1] RSPM (R 4.3.0) +#> timechange 0.2.0 2023-01-11 [1] RSPM (R 4.3.0) +#> utf8 1.2.3 2023-01-31 [2] RSPM (R 4.3.0) +#> vctrs 0.6.3 2023-06-14 [2] RSPM (R 4.3.0) +#> xfun 0.40 2023-08-09 [2] RSPM (R 4.3.0) +#> xml2 1.3.5 2023-07-06 [2] RSPM (R 4.3.0) +#> yaml 2.3.7 2023-01-23 [2] RSPM (R 4.3.0) #> #> [1] /__w/_temp/Library #> [2] /usr/local/lib/R/site-library diff --git a/index.html b/index.html index 5f37f93..92b3150 100644 --- a/index.html +++ b/index.html @@ -77,7 +77,7 @@

-

The goal of slurmjobs is to …

+

slurmjobs provides helper functions for interacting with SLURM-managed high-performance-computing environments from R. It includes functions for creating submittable jobs (including array jobs), monitoring partitions, and extracting info about running or complete jobs. For details, check out the documentation site.

Installation instructions

@@ -93,32 +93,10 @@

Installation instructionsBiocManager::install("LieberInstitute/slurmjobs")

-

Example -

-

This is a basic example which shows you how to solve a common problem:

-
-library("slurmjobs")
-## basic example code
-

What is special about using README.Rmd instead of just README.md? You can include R chunks like so:

-
-summary(cars)
-#>      speed           dist       
-#>  Min.   : 4.0   Min.   :  2.00  
-#>  1st Qu.:12.0   1st Qu.: 26.00  
-#>  Median :15.0   Median : 36.00  
-#>  Mean   :15.4   Mean   : 42.98  
-#>  3rd Qu.:19.0   3rd Qu.: 56.00  
-#>  Max.   :25.0   Max.   :120.00
-

You’ll still need to render README.Rmd regularly, to keep README.md up-to-date.

-

You can also embed plots, for example:

-

-

In that case, don’t forget to commit and push the resulting figure files, so they display on GitHub!

-
-

Citation

Below is the citation output from using citation('slurmjobs') in R. Please run this yourself to check for any updates on how to cite slurmjobs.

-
+
 print(citation("slurmjobs"), bibtex = TRUE)
 #> To cite package 'slurmjobs' in publications use:
 #> 
diff --git a/pkgdown.yml b/pkgdown.yml
index 3df754a..fb079a0 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -3,5 +3,5 @@ pkgdown: 2.0.7
 pkgdown_sha: ~
 articles:
   slurmjobs: slurmjobs.html
-last_built: 2023-10-03T18:03Z
+last_built: 2023-10-03T19:38Z
 
diff --git a/reference/job_loop.html b/reference/job_loop.html
index bdee18f..a760bc1 100644
--- a/reference/job_loop.html
+++ b/reference/job_loop.html
@@ -175,6 +175,7 @@ 

Examples

#> #SBATCH -e logs/${SHORT}.txt #> #SBATCH --mail-type=ALL #> +#> set -e #> #> echo "**** Job starts ****" #> date @@ -198,7 +199,7 @@

Examples

#> echo "**** Job ends ****" #> date #> -#> ## This script was made using slurmjobs version testing +#> ## This script was made using slurmjobs version 0.99.0 #> ## available from http://research.libd.org/slurmjobs/ #> #> @@ -244,6 +245,7 @@

Examples

#> #SBATCH --mail-type=ALL #> #SBATCH --array=1-10%20 #> +#> set -e #> #> echo "**** Job starts ****" #> date @@ -267,7 +269,7 @@

Examples

#> echo "**** Job ends ****" #> date #> -#> ## This script was made using slurmjobs version testing +#> ## This script was made using slurmjobs version 0.99.0 #> ## available from http://research.libd.org/slurmjobs/ #> #> diff --git a/reference/job_report.html b/reference/job_report.html index a8ea7f0..bd58d44 100644 --- a/reference/job_report.html +++ b/reference/job_report.html @@ -92,8 +92,8 @@

Examples


 #    Must be run in a SLURM environment
 if (system("which sbatch") == 0) {
-    job_df = job_report('234904')
-    
+    job_df <- job_report("234904")
+
     #    Check max virtual memory reached by this job
     print(job_df$max_vmem_gb)
 }
diff --git a/reference/job_single.html b/reference/job_single.html
index 5d17992..60a2158 100644
--- a/reference/job_single.html
+++ b/reference/job_single.html
@@ -168,6 +168,7 @@ 

Examples

#> #SBATCH -e logs/jhpce_job.txt #> #SBATCH --mail-type=ALL #> +#> set -e #> #> echo "**** Job starts ****" #> date @@ -191,7 +192,7 @@

Examples

#> echo "**** Job ends ****" #> date #> -#> ## This script was made using slurmjobs version testing +#> ## This script was made using slurmjobs version 0.99.0 #> ## available from http://research.libd.org/slurmjobs/ #> @@ -209,6 +210,7 @@

Examples

#> #SBATCH -e logs/jhpce_job.txt #> #SBATCH --mail-type=ALL #> +#> set -e #> #> echo "**** Job starts ****" #> date @@ -232,7 +234,7 @@

Examples

#> echo "**** Job ends ****" #> date #> -#> ## This script was made using slurmjobs version testing +#> ## This script was made using slurmjobs version 0.99.0 #> ## available from http://research.libd.org/slurmjobs/ #> @@ -248,6 +250,7 @@

Examples

#> #SBATCH --mail-type=ALL #> #SBATCH --array=1-20%20 #> +#> set -e #> #> echo "**** Job starts ****" #> date @@ -271,7 +274,7 @@

Examples

#> echo "**** Job ends ****" #> date #> -#> ## This script was made using slurmjobs version testing +#> ## This script was made using slurmjobs version 0.99.0 #> ## available from http://research.libd.org/slurmjobs/ #>