From 70ec3da418760eb154dc351be3ff51bc3eb869ab Mon Sep 17 00:00:00 2001 From: jeffersonfparil Date: Sun, 23 Jun 2024 14:56:18 +1000 Subject: [PATCH] drafted + will need to update error codes and complete docs in README.md --- conda.yml | 268 ++++++++++++++++++ inst/exec_Rscript/0-submit.sh | 37 +++ ...1-jp3h-SunJun23062044AEST2024-RAND27285.sh | 121 ++++++++ inst/exec_Rscript/1-checks_and_submision.sh | 121 ++++++++ ...1-jp3h-SunJun23062044AEST2024-RAND27285.sh | 122 ++++++++ inst/exec_Rscript/2-gp_slurm_job.sh | 122 ++++++++ inst/exec_Rscript/config.txt | 23 ++ 7 files changed, 814 insertions(+) create mode 100644 conda.yml create mode 100755 inst/exec_Rscript/0-submit.sh create mode 100755 inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh create mode 100755 inst/exec_Rscript/1-checks_and_submision.sh create mode 100755 inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh create mode 100755 inst/exec_Rscript/2-gp_slurm_job.sh create mode 100644 inst/exec_Rscript/config.txt diff --git a/conda.yml b/conda.yml new file mode 100644 index 0000000..f830034 --- /dev/null +++ b/conda.yml @@ -0,0 +1,268 @@ +name: genomic_selection +channels: + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.1=anacondar_1 + - bat=0.24.0=he8a937b_0 + - binutils_impl_linux-64=2.40=hf600244_0 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=h7f98852_4 + - c-ares=1.19.1=hd590300_0 + - ca-certificates=2024.3.11=h06a4308_0 + - cairo=1.16.0=h0c91306_1017 + - curl=8.3.0=hca28451_0 + - expat=2.5.0=hcb278e6_1 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freetype=2.12.1=h267a509_2 + - fribidi=1.0.10=h36c2ea0_0 + - gcc_impl_linux-64=13.2.0=h338b0a0_2 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=13.2.0=h76e1118_2 + - git=2.42.0=pl5321h86e50cf_0 + - graphite2=1.3.13=h58526e2_1001 + - gxx_impl_linux-64=13.2.0=h338b0a0_2 + - harfbuzz=8.2.1=h3d44ed6_0 + - helix=24.03=h1ffa460_0 + - htop=3.2.2=h8228510_0 + - icu=73.2=h59595ed_0 + - kernel-headers_linux-64=2.6.32=he073ed8_16 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.21.2=h659d440_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libblas=3.9.0=18_linux64_openblas + - libcurl=8.3.0=hca28451_0 + - libdeflate=1.19=hd590300_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=h516909a_1 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=13.2.0=ha9c7c90_2 + - libgcc-ng=13.2.0=h807b86a_2 + - libgfortran-ng=13.2.0=h69a702a_2 + - libgfortran5=13.2.0=ha4646dd_2 + - libgit2=1.7.1=hca3a8ce_0 + - libglib=2.78.0=hebfc3b9_0 + - libgomp=13.2.0=h807b86a_2 + - libiconv=1.17=h166bdaf_0 + - libjpeg-turbo=3.0.0=hd590300_1 + - liblapack=3.9.0=18_linux64_openblas + - libnghttp2=1.52.0=h61bc06f_0 + - libnl=3.8.0=hd590300_0 + - libnsl=2.0.1=hd590300_0 + - libopenblas=0.3.24=pthreads_h413a1c8_0 + - libpng=1.6.39=h753d276_0 + - libsanitizer=13.2.0=h7e041cc_2 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_2 + - libstdcxx-ng=13.2.0=h7e041cc_2 + - libtiff=4.6.0=ha9c0a0a_2 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp-base=1.3.2=hd590300_0 + - libxcb=1.15=h0b41bf4_0 + - libxml2=2.12.5=h232c23b_0 + - libzlib=1.2.13=hd590300_5 + - make=4.3=hd18ef5c_1 + - micro=2.0.8=ha8f183a_1 + - ncurses=6.4=hcb278e6_0 + - openssl=3.2.1=hd590300_1 + - pandoc=2.12=h06a4308_3 + - pango=1.50.14=ha41ecd1_2 + - pcre2=10.40=hc3806b6_0 + - perl=5.32.1=4_hd590300_perl5 + - pixman=0.42.2=h59595ed_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - r-anytime=0.3.9=r43h884c59f_0 + - r-ape=5.7_1=r43h08d816e_1 + - r-argparse=2.2.2=r43h6115d3f_0 + - r-askpass=1.2.0=r43h76d94ec_0 + - r-assertthat=0.2.1=r43hc72bb7e_4 + - r-base=4.3.1=h93585b2_6 + - r-base64enc=0.1_3=r43h57805ef_1006 + - r-bglr=1.1.0=r43h57805ef_2 + - r-bh=1.81.0_1=r43h6115d3f_0 + - r-bigmemory=4.6.1=r43ha503ecb_2 + - r-bigmemory.sri=0.1.6=r43hc72bb7e_1 + - r-bit=4.0.5=r43h76d94ec_0 + - r-bit64=4.0.5=r43h76d94ec_0 + - r-blob=1.2.4=r43h6115d3f_0 + - r-boot=1.3_28.1=r43h6115d3f_0 + - r-brew=1.0_10=r43hc72bb7e_0 + - r-brio=1.1.3=r43h57805ef_2 + - r-bslib=0.6.1=r43hc72bb7e_0 + - r-cachem=1.0.8=r43h57805ef_1 + - r-callr=3.7.3=r43hc72bb7e_1 + - r-cli=3.6.1=r43ha503ecb_1 + - r-clipr=0.8.0=r43hc72bb7e_2 + - r-cluster=2.1.4=r43h61816a4_1 + - r-codetools=0.2_19=r43hc72bb7e_1 + - r-colorspace=2.1_0=r43h76d94ec_0 + - r-commonmark=1.9.1=r43h57805ef_0 + - r-cpp11=0.4.7=r43hc72bb7e_0 + - r-crayon=1.5.2=r43hc72bb7e_2 + - r-credentials=2.0.1=r43h142f84f_0 + - r-crosstalk=1.2.0=r43h6115d3f_0 + - r-curl=5.1.0=r43hf9611b0_0 + - r-data.table=1.14.8=r43h76d94ec_0 + - r-dbi=1.1.3=r43h6115d3f_0 + - r-desc=1.4.2=r43hc72bb7e_2 + - r-devtools=2.4.5=r43hc72bb7e_2 + - r-diffobj=0.3.5=r43h57805ef_2 + - r-digest=0.6.33=r43ha503ecb_0 + - r-domc=1.3.8=r43ha770c72_2 + - r-doparallel=1.0.17=r43hc72bb7e_2 + - r-downlit=0.4.3=r43h6115d3f_0 + - r-dplyr=1.1.3=r43ha503ecb_0 + - r-ellipsis=0.3.2=r43h57805ef_2 + - r-evaluate=0.22=r43hc72bb7e_0 + - r-fansi=1.0.4=r43h57805ef_1 + - r-farver=2.1.1=r43h884c59f_0 + - r-fastmap=1.1.1=r43ha503ecb_1 + - r-findpython=1.0.8=r43h6115d3f_0 + - r-fontawesome=0.5.2=r43h6115d3f_0 + - r-foreach=1.5.2=r43hc72bb7e_2 + - r-fs=1.6.3=r43ha503ecb_0 + - r-generics=0.1.3=r43hc72bb7e_2 + - r-gert=2.0.1=r43hc25a090_0 + - r-ggplot2=3.4.4=r43h6115d3f_0 + - r-gh=1.4.0=r43hc72bb7e_1 + - r-gitcreds=0.1.2=r43hc72bb7e_2 + - r-glmnet=4.1_8=r43hcf54a89_0 + - r-glue=1.6.2=r43h57805ef_2 + - r-gtable=0.3.4=r43h6115d3f_0 + - r-highr=0.10=r43hc72bb7e_1 + - r-htmltools=0.5.7=r43ha503ecb_0 + - r-htmlwidgets=1.6.4=r43hc72bb7e_1 + - r-httpuv=1.6.14=r43ha503ecb_0 + - r-httr=1.4.7=r43h6115d3f_0 + - r-httr2=1.0.0=r43hc72bb7e_0 + - r-ini=0.3.1=r43hc72bb7e_1005 + - r-isoband=0.2.7=r43h884c59f_0 + - r-iterators=1.0.14=r43hc72bb7e_2 + - r-jquerylib=0.1.4=r43hc72bb7e_2 + - r-jsonlite=1.8.7=r43h57805ef_0 + - r-knitr=1.45=r43hc72bb7e_0 + - r-labeling=0.4.3=r43h6115d3f_0 + - r-later=1.3.2=r43ha503ecb_0 + - r-lattice=0.21_9=r43h57805ef_0 + - r-lazyeval=0.2.2=r43h76d94ec_0 + - r-lifecycle=1.0.3=r43hc72bb7e_2 + - r-lme4=1.1_34=r43h884c59f_0 + - r-magrittr=2.0.3=r43h57805ef_2 + - r-mass=7.3_60=r43h57805ef_1 + - r-matrix=1.6_1.1=r43h316c678_0 + - r-memoise=2.0.1=r43hc72bb7e_2 + - r-memuse=4.2_3=r43h57805ef_1 + - r-mgcv=1.9_0=r43h316c678_0 + - r-mime=0.12=r43h57805ef_2 + - r-miniui=0.1.1.1=r43hc72bb7e_1004 + - r-minqa=1.2.6=r43hb5eb8f6_0 + - r-munsell=0.5.0=r43h6115d3f_0 + - r-nlme=3.1_163=r43h61816a4_0 + - r-nloptr=2.0.3=r43hb5eb8f6_0 + - r-openssl=2.1.1=r43h76d94ec_0 + - r-permute=0.9_7=r43hc72bb7e_2 + - r-pillar=1.9.0=r43hc72bb7e_1 + - r-pinfsc50=1.2.0=r43ha770c72_2 + - r-pkgbuild=1.4.2=r43hc72bb7e_0 + - r-pkgconfig=2.0.3=r43hc72bb7e_3 + - r-pkgdown=2.0.7=r43hc72bb7e_1 + - r-pkgload=1.3.3=r43hc72bb7e_0 + - r-plogr=0.2.0=r43h6115d3f_0 + - r-plotly=4.10.2=r43h6115d3f_0 + - r-praise=1.0.0=r43hc72bb7e_1007 + - r-prettyunits=1.2.0=r43hc72bb7e_0 + - r-processx=3.8.2=r43h57805ef_0 + - r-profvis=0.3.8=r43h57805ef_3 + - r-promises=1.2.1=r43h884c59f_0 + - r-ps=1.7.5=r43h57805ef_1 + - r-purrr=1.0.2=r43h76d94ec_0 + - r-r6=2.5.1=r43hc72bb7e_2 + - r-ragg=1.2.7=r43h73ae6e3_0 + - r-rappdirs=0.3.3=r43h57805ef_2 + - r-rcmdcheck=1.4.0=r43h785f33e_2 + - r-rcolorbrewer=1.1_3=r43h6115d3f_1 + - r-rcpp=1.0.11=r43h7df8631_0 + - r-rcpparmadillo=0.12.6.4.0=r43h884c59f_0 + - r-rcppeigen=0.3.3.9.3=r43h08d816e_1 + - r-rcppprogress=0.4.2=r43h142f84f_0 + - r-rematch2=2.1.2=r43hc72bb7e_3 + - r-remotes=2.4.2.1=r43h142f84f_0 + - r-rgl=1.2.1=r43h884c59f_0 + - r-rlang=1.1.1=r43ha503ecb_1 + - r-rmarkdown=2.25=r43h6115d3f_0 + - r-roxygen2=7.3.1=r43ha503ecb_0 + - r-rprojroot=2.0.3=r43hc72bb7e_0 + - r-rrblup=4.6.2=r43hc72bb7e_1 + - r-rsqlite=2.3.1=r43h884c59f_0 + - r-rstudioapi=0.15.0=r43h6115d3f_0 + - r-rversions=2.1.2=r43hc72bb7e_2 + - r-sass=0.4.8=r43ha503ecb_0 + - r-scales=1.2.1=r43h6115d3f_0 + - r-sessioninfo=1.2.2=r43hc72bb7e_2 + - r-shape=1.4.6=r43ha770c72_2 + - r-shiny=1.8.0=r43h785f33e_0 + - r-shinycssloaders=1.0.0=r43h6115d3f_0 + - r-shinywidgets=0.8.0=r43h142f84f_0 + - r-sommer=4.3.2=r43h884c59f_0 + - r-sourcetools=0.1.7_1=r43ha503ecb_1 + - r-stringi=1.7.12=r43h9facbd6_3 + - r-stringr=1.5.0=r43h785f33e_1 + - r-survival=3.5_7=r43h57805ef_0 + - r-sys=3.4.2=r43h57805ef_1 + - r-systemfonts=1.0.5=r43h884c59f_0 + - r-testthat=3.2.0=r43ha503ecb_0 + - r-textshaping=0.3.7=r43h884c59f_0 + - r-tibble=3.2.1=r43h57805ef_2 + - r-tidyr=1.3.0=r43h884c59f_0 + - r-tidyselect=1.2.0=r43hc72bb7e_1 + - r-tinytex=0.49=r43hc72bb7e_1 + - r-truncnorm=1.0_9=r43h57805ef_1 + - r-txtplot=1.0_4=r43h142f84f_0 + - r-urlchecker=1.0.1=r43hc72bb7e_2 + - r-usethis=2.2.3=r43hc72bb7e_0 + - r-utf8=1.2.3=r43h57805ef_1 + - r-uuid=1.1_1=r43h57805ef_0 + - r-vcfr=1.14.0=r43h33b523d_1 + - r-vctrs=0.6.3=r43ha503ecb_0 + - r-vegan=2.6_4=r43hd9ac46e_1 + - r-viridislite=0.4.2=r43hc72bb7e_1 + - r-waldo=0.5.1=r43hc72bb7e_1 + - r-whisker=0.4.1=r43hc72bb7e_1 + - r-withr=2.5.1=r43hc72bb7e_0 + - r-xfun=0.42=r43ha503ecb_0 + - r-xml2=1.3.6=r43hbfba7a4_1 + - r-xopen=1.0.0=r43hc72bb7e_1005 + - r-xtable=1.8_4=r43hc72bb7e_5 + - r-yaml=2.3.8=r43h57805ef_0 + - r-zip=2.3.1=r43h57805ef_0 + - readline=8.2=h8228510_1 + - sed=4.8=he412f7d_0 + - sysroot_linux-64=2.12=he073ed8_16 + - tk=8.6.13=h2797004_0 + - tktable=2.10=h0c5db8f_5 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.6=h8ee46fc_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxrender=0.9.11=hd590300_0 + - xorg-libxt=1.3.0=hd590300_1 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.5=hfc55251_0 +prefix: /home/jp3h/.conda/envs/genomic_selection diff --git a/inst/exec_Rscript/0-submit.sh b/inst/exec_Rscript/0-submit.sh new file mode 100755 index 0000000..b32880f --- /dev/null +++ b/inst/exec_Rscript/0-submit.sh @@ -0,0 +1,37 @@ +#!/bin/bash +### Unique name of the run given a specific instance of config.txt +RUN_NAME=$(hostname)-${USER}-$(date | sed 's/ //g' | sed 's/://g')-RAND${RANDOM} +### Extract config variables +CONFIG_GENO=$(sed "s/\"/'/g" config.txt | sed -n '1p') +CONFIG_PHENO=$(sed "s/\"/'/g" config.txt | sed -n '2p') +CONFIG_KFOLDS=$(sed "s/\"/'/g" config.txt | sed -n '3p') +CONFIG_NREPS=$(sed "s/\"/'/g" config.txt | sed -n '4p') +CONFIG_DIR_OUT=$(sed "s/\"/'/g" config.txt | sed -n '5p') +CONFIG_JOB_NAME=$(sed "s/\"/'/g" config.txt | sed -n '6p') +CONFIG_ACCOUNT_NAME=$(sed "s/\"/'/g" config.txt | sed -n '7p') +CONFIG_NTASKS=$(sed "s/\"/'/g" config.txt | sed -n '8p') +CONFIG_NCPUS=$(sed "s/\"/'/g" config.txt | sed -n '9p') +CONFIG_MEM=$(sed "s/\"/'/g" config.txt | sed -n '10p') +CONFIG_TIME_LIMIT=$(sed "s/\"/'/g" config.txt | sed -n '11p') +### Create the checks and submission scripts using the config variables +sed "s|GENOTYPE_DATA_RDS=\${DIR_SRC}/input/test_geno.Rds|$CONFIG_GENO|g" 1-checks_and_submision.sh | \ + sed "s|PHENOTYPE_DATA_TSV=\${DIR_SRC}/input/test_pheno.tsv|$CONFIG_PHENO|g" | \ + sed "s|KFOLDS=5|$CONFIG_KFOLDS|g" | \ + sed "s|NREPS=3|$CONFIG_NREPS|g" | \ + sed "s|DIR_OUT=\${DIR_SRC}|$CONFIG_DIR_OUT|g" | \ + sed "s|2-gp_slurm_job.sh|2-gp_slurm_job-${RUN_NAME}.sh|g" \ +> 1-checks_and_submision-${RUN_NAME}.sh +### Create the slurm job scripts using the config variables +sed "s|SBATCH --job-name='GS'|$CONFIG_JOB_NAME|g" 2-gp_slurm_job.sh | \ + sed "s|SBATCH --account='dbiopast1'|$CONFIG_ACCOUNT_NAME|g" | \ + sed "s|SBATCH --ntasks=1|$CONFIG_NTASKS|g" | \ + sed "s|SBATCH --cpus-per-task=16|$CONFIG_NCPUS|g" | \ + sed "s|SBATCH --mem=100G|$CONFIG_MEM|g" | \ + sed "s|SBATCH --time=1-0:0:00|$CONFIG_TIME_LIMIT|g" \ +> 2-gp_slurm_job-${RUN_NAME}.sh +### Check input and submit the slurm job +chmod +x 1-checks_and_submision-${RUN_NAME}.sh +chmod +x 2-gp_slurm_job-${RUN_NAME}.sh +./1-checks_and_submision-${RUN_NAME}.sh +### Clean-up after tests +# rm *RAND*.sh \ No newline at end of file diff --git a/inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh b/inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh new file mode 100755 index 0000000..1703865 --- /dev/null +++ b/inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +### Full path to the location of the executable Rscript `gp.R`` which should co-locate with this script: `0-checks_and_submission.sh`` as well as `1-gp_slurm_job.sh`. +DIR_SRC=$(dirname $0) +cd $DIR_SRC +DIR_SRC=$(pwd) +################################## +### Load the conda environment ### +################################## +echo "Loading the conda environment..." +module load Miniconda3/22.11.1-1 +conda init bash +source ~/.bashrc +if [ $(conda env list | grep "^genomic_selection " | wc -l) -gt 0 ] +then + conda activate genomic_selection +else + echo "Installing the conda environment..." + conda env create -f ${DIR_SRC}/../../conda.yml + conda activate genomic_selection +fi +####################################### +### Install gp if not installed yet ### +####################################### +echo "Checking in the R library gp exists and installing it if not..." +Rscript -e 'if (!require("gp", character.only = TRUE)) {install.packages("devtools", repos="https://cloud.r-project.org"); devtools::install_github("jeffersonfparil/gp")}' +################################################################ +### TOP-LEVEL SLURM ARRAY JOB SUBMISSION SCRIPT +### Please edit the input variables below to match your dataset: +################################################################ +### Input variables (use the absolute path to files to be precise) +### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. +### - The genotype data can be coded as any numeric range of values, e.g. (0,1,2), (-1,0,1), and (0.00,0.25,0.50,0.75,1.00) or as biallelic characters, e.g. for diploids: "AA", "AB", "BB", and for tetraploids: "AAAA", "AAAB", "AABB", "ABBB", and "BBBB".. It is recommended that this data should be filtered and imputed beforehand. +### - The rows are expected to have names of the samples corresponding to the names in the phenotype file. +### - The columns are expected to contain the loci names but does need to follow a specific format: chromosome name and position separated by a tab character (`\t`) and an optional allele identifier, e.g. `chr-1\t12345\tallele_A` +GENOTYPE_DATA_RDS=${DIR_SRC}/input_tmp/test_geno.Rds +### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column. +### - Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc. +### - Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to. +### - Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING. +PHENOTYPE_DATA_TSV=${DIR_SRC}/input_tmp/test_pheno.tsv +### (3) Number of folds for k-fold cross-validation. +KFOLDS=2 +### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data. +NREPS=2 +### (5) Output directory where the output/ folder will be created +DIR_OUT=${DIR_SRC}/output_tmp + +### Check if the genotype file exists +if [ ! -f $GENOTYPE_DATA_RDS ] +then + echo "Error: The genotype file: $GENOTYPE_DATA_RDS does not exist. Are you specifying the full path? Is the name correct?" + exit 101 +else + echo "Passed: The genotype file: $GENOTYPE_DATA_RDS exists." +fi +### Check if the phenotype file exists +if [ ! -f $PHENOTYPE_DATA_TSV ] +then + echo "Error: The phenotype file: $PHENOTYPE_DATA_TSV does not exist. Are you specifying the full path? Is the name correct?" + exit 102 +else + echo "Passed: The phenotype file: $PHENOTYPE_DATA_TSV exists." +fi +### Check if the genotype file is a valid Rds file +echo 'args = commandArgs(trailingOnly=TRUE) +geno = suppressWarnings(tryCatch(readRDS(args[1]), error=function(e){print("Error loading genotype file.")})) +' > test_geno_rds.R +if [ $(Rscript test_geno_rds.R $GENOTYPE_DATA_RDS | grep -i "error" | wc -l) -eq 1 ] +then + echo "Error: The genotype file: $GENOTYPE_DATA_RDS is not an Rds file." + exit 103 +else + echo "Passed: The genotype file: $GENOTYPE_DATA_RDS is an Rds file." +fi +rm test_geno_rds.R +### Check if the phenotype file is formatted according to the required specifications +echo 'args = commandArgs(trailingOnly=TRUE) +pheno = suppressWarnings(tryCatch(read.delim(args[1], sep="\t", header=TRUE), error=function(e){print("Error loading phenotype file.")})) +' > test_pheno_rds.R +if [ $(Rscript test_pheno_rds.R $PHENOTYPE_DATA_TSV | grep -i "error" | wc -l) -eq 1 ] +then + echo "Error: The phenotype file: $GENOTYPE_DATA_RDS is not formatted according to specifications. It should be tab-delimited and a header line must be present." + exit 104 +else + echo "Passed: The phenotype file: $GENOTYPE_DATA_RDS is tab-delimited with a header line." +fi +rm test_pheno_rds.R +### Check if the output directory belongs to the user +if [ ! -w $DIR_OUT ] +then + echo "Error: You do not have permission to write in the output directory: $DIR_OUT. Please use an output directory you have write acess to." + exit 106 +else + echo "Passed: You have permission to write in the output directory: $DIR_OUT." +fi +### Check if the confighured slurm array job script exists +if [ ! -f ${DIR_SRC}/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh ] +then + echo "Error: The executable code directory: $DIR_SRC does not contain the script: 2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh. Are you sure this is the genomic_selection repo directory?" + exit 107 +else + echo "Passed: The executable code directory: $DIR_SRC contains the script: 2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh." +fi +### Initialise the output directory which will contain all the output Rds files across populations and traits +if [ ! -d ${DIR_OUT}/output ] +then + mkdir ${DIR_OUT}/output +fi +### Submit an array of jobs equivalent to the number of traits in the phenotype file +cd $DIR_SRC/ +N_TRAITS=$(echo $(head -n1 $PHENOTYPE_DATA_TSV | awk '{print NF}') - 2 | bc) +N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l) +sbatch --array 1-$(echo "${N_TRAITS} * ${N_POPS}" | bc) \ + 2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh \ + ${GENOTYPE_DATA_RDS} \ + ${PHENOTYPE_DATA_TSV} \ + ${KFOLDS} \ + ${NREPS} \ + ${DIR_SRC} \ + ${DIR_OUT} \ No newline at end of file diff --git a/inst/exec_Rscript/1-checks_and_submision.sh b/inst/exec_Rscript/1-checks_and_submision.sh new file mode 100755 index 0000000..2791fc1 --- /dev/null +++ b/inst/exec_Rscript/1-checks_and_submision.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +### Full path to the location of the executable Rscript `gp.R`` which should co-locate with this script: `0-checks_and_submission.sh`` as well as `1-gp_slurm_job.sh`. +DIR_SRC=$(dirname $0) +cd $DIR_SRC +DIR_SRC=$(pwd) +################################## +### Load the conda environment ### +################################## +echo "Loading the conda environment..." +module load Miniconda3/22.11.1-1 +conda init bash +source ~/.bashrc +if [ $(conda env list | grep "^genomic_selection " | wc -l) -gt 0 ] +then + conda activate genomic_selection +else + echo "Installing the conda environment..." + conda env create -f ${DIR_SRC}/../../conda.yml + conda activate genomic_selection +fi +####################################### +### Install gp if not installed yet ### +####################################### +echo "Checking if the R library gp exists and installing it, if not..." +Rscript -e 'if (!require("gp", character.only = TRUE)) {install.packages("devtools", repos="https://cloud.r-project.org"); devtools::install_github("jeffersonfparil/gp")}' +################################################################ +### TOP-LEVEL SLURM ARRAY JOB SUBMISSION SCRIPT +### Please edit the input variables below to match your dataset: +################################################################ +### Input variables (use the absolute path to files to be precise) +### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. +### - The genotype data can be coded as any numeric range of values, e.g. (0,1,2), (-1,0,1), and (0.00,0.25,0.50,0.75,1.00) or as biallelic characters, e.g. for diploids: "AA", "AB", "BB", and for tetraploids: "AAAA", "AAAB", "AABB", "ABBB", and "BBBB".. It is recommended that this data should be filtered and imputed beforehand. +### - The rows are expected to have names of the samples corresponding to the names in the phenotype file. +### - The columns are expected to contain the loci names but does need to follow a specific format: chromosome name and position separated by a tab character (`\t`) and an optional allele identifier, e.g. `chr-1\t12345\tallele_A` +GENOTYPE_DATA_RDS=${DIR_SRC}/input/test_geno.Rds +### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column. +### - Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc. +### - Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to. +### - Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING. +PHENOTYPE_DATA_TSV=${DIR_SRC}/input/test_pheno.tsv +### (3) Number of folds for k-fold cross-validation. +KFOLDS=5 +### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data. +NREPS=3 +### (5) Output directory where the output/ folder will be created +DIR_OUT=${DIR_SRC} + +### Check if the genotype file exists +if [ ! -f $GENOTYPE_DATA_RDS ] +then + echo "Error: The genotype file: $GENOTYPE_DATA_RDS does not exist. Are you specifying the full path? Is the name correct?" + exit 101 +else + echo "Passed: The genotype file: $GENOTYPE_DATA_RDS exists." +fi +### Check if the phenotype file exists +if [ ! -f $PHENOTYPE_DATA_TSV ] +then + echo "Error: The phenotype file: $PHENOTYPE_DATA_TSV does not exist. Are you specifying the full path? Is the name correct?" + exit 102 +else + echo "Passed: The phenotype file: $PHENOTYPE_DATA_TSV exists." +fi +### Check if the genotype file is a valid Rds file +echo 'args = commandArgs(trailingOnly=TRUE) +geno = suppressWarnings(tryCatch(readRDS(args[1]), error=function(e){print("Error loading genotype file.")})) +' > test_geno_rds.R +if [ $(Rscript test_geno_rds.R $GENOTYPE_DATA_RDS | grep -i "error" | wc -l) -eq 1 ] +then + echo "Error: The genotype file: $GENOTYPE_DATA_RDS is not an Rds file." + exit 103 +else + echo "Passed: The genotype file: $GENOTYPE_DATA_RDS is an Rds file." +fi +rm test_geno_rds.R +### Check if the phenotype file is formatted according to the required specifications +echo 'args = commandArgs(trailingOnly=TRUE) +pheno = suppressWarnings(tryCatch(read.delim(args[1], sep="\t", header=TRUE), error=function(e){print("Error loading phenotype file.")})) +' > test_pheno_rds.R +if [ $(Rscript test_pheno_rds.R $PHENOTYPE_DATA_TSV | grep -i "error" | wc -l) -eq 1 ] +then + echo "Error: The phenotype file: $GENOTYPE_DATA_RDS is not formatted according to specifications. It should be tab-delimited and a header line must be present." + exit 104 +else + echo "Passed: The phenotype file: $GENOTYPE_DATA_RDS is tab-delimited with a header line." +fi +rm test_pheno_rds.R +### Check if the output directory belongs to the user +if [ ! -w $DIR_OUT ] +then + echo "Error: You do not have permission to write in the output directory: $DIR_OUT. Please use an output directory you have write acess to." + exit 106 +else + echo "Passed: You have permission to write in the output directory: $DIR_OUT." +fi +### Check if the confighured slurm array job script exists +if [ ! -f ${DIR_SRC}/2-gp_slurm_job.sh ] +then + echo "Error: The executable code directory: $DIR_SRC does not contain the script: 2-gp_slurm_job.sh. Are you sure this is the genomic_selection repo directory?" + exit 107 +else + echo "Passed: The executable code directory: $DIR_SRC contains the script: 2-gp_slurm_job.sh." +fi +### Initialise the output directory which will contain all the output Rds files across populations and traits +if [ ! -d ${DIR_OUT}/output ] +then + mkdir ${DIR_OUT}/output +fi +### Submit an array of jobs equivalent to the number of traits in the phenotype file +cd $DIR_SRC/ +N_TRAITS=$(echo $(head -n1 $PHENOTYPE_DATA_TSV | awk '{print NF}') - 2 | bc) +N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l) +sbatch --array 1-$(echo "${N_TRAITS} * ${N_POPS}" | bc) \ + 2-gp_slurm_job.sh \ + ${GENOTYPE_DATA_RDS} \ + ${PHENOTYPE_DATA_TSV} \ + ${KFOLDS} \ + ${NREPS} \ + ${DIR_SRC} \ + ${DIR_OUT} \ No newline at end of file diff --git a/inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh b/inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh new file mode 100755 index 0000000..7b3b3a9 --- /dev/null +++ b/inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh @@ -0,0 +1,122 @@ +#!/bin/bash +#SBATCH --job-name='test' +#SBATCH --account='dbiopast2' ### EDIT ME: Pick the appropriate account name, e.g. dbiopast1 or dbiopast2 +#SBATCH --ntasks=1 ### LEAVE ME:Request a single task as we will be submitting this as an array job where each job corresponds to a trait +#SBATCH --cpus-per-task=8 ### EDIT ME: Parallelisation across replications, folds and models (more cpu means faster execution time but probably longer time to wait for the Slurm scheduler to find resources to allocate to the job) +#SBATCH --mem=64G ### EDIT ME: Proportional to the input data (will need to test the appropriate memory required, hint use `seff ${JOBID}`) +#SBATCH --time=0-2:0:00 ### EDIT ME: Proportional to the input data, number of folds, replications, and models to be used +################################################################################################### +### Edit the Slurm settings above to match your requirements. +################################################################################################### + +################################################################################################### +### The variables below will be exported from `00_gs_slurm_job_wrapper.sh`: +################################################################################################### +### Input variables (use the absolute path to files to be precise) +### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. +### Should have no missing data or else will be imputed via mean value imputation. +GENOTYPE_DATA_RDS=$1 +### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column. +### Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc. +### Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to. +### Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING. +PHENOTYPE_DATA_TSV=$2 +### (3) Number of folds for k-fold cross-validation. +KFOLDS=$3 +### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data. +NREPS=$4 +### (5) Full path to the location of the executable Rscript gp.R +DIR_SRC=$5 +### (5) Full path to the location of the executable Rscript gp.R +DIR_OUT=$6 + +################################################################################################### +### Edit the code below, if and only if you have read the documentation or familiar with `src/*.R`: +################################################################################################### +### Define the trait and population to include +N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l) +TRAIT_IDX=$(echo "((${SLURM_ARRAY_TASK_ID}-1) / ${N_POPS}) + 1" | bc) +POP_IDX=$(echo "${SLURM_ARRAY_TASK_ID} % ${N_POPS}" | bc) +if [ "${POP_IDX}" -eq 0 ] +then + POP_IDX=${N_POPS} +fi +COLUMN_ID=$(echo 2 + ${TRAIT_IDX} | bc) +TRAIT=$(head -n1 $PHENOTYPE_DATA_TSV | cut -f${COLUMN_ID}) +POP=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | head -n${POP_IDX} | tail -n1) +### Skip leave-one-population-out cross-validation if there is only one population +if [ "${N_POPS}" -eq 1 ] +then + BOOL_ACROSS=FALSE +else + if [ "${POP_IDX}" -eq 1 ] + then + BOOL_ACROSS=TRUE + else + BOOL_ACROSS=FALSE + fi +fi +### Output directories +DIR_OUT_MAIN=${DIR_OUT}/output +DIR_OUT_SUB=${DIR_OUT_MAIN}/output-${TRAIT}-${POP} +if [ ! -d DIR_OUT_MAIN ] +then + mkdir $DIR_OUT_MAIN +fi +mkdir $DIR_OUT_SUB +### Log messages +echo JOB_${SLURM_ARRAY_TASK_ID}-TRAIT_${TRAIT}-POP_${POP} > ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log +echo "========================================== +------------------------------------------- + Job Info +------------------------------------------- +SLURM_JOB_ID = $SLURM_JOB_ID +SLURM_JOB_NAME = $SLURM_JOB_NAME +SLURM_JOB_NODELIST = $SLURM_JOB_NODELIST +SLURM_SUBMIT_HOST = $SLURM_SUBMIT_HOST +SLURM_SUBMIT_DIR = $SLURM_SUBMIT_DIR +SLURM_NTASKS = $SLURM_NTASKS +SLURM_ARRAY_TASK_ID = $SLURM_ARRAY_TASK_ID +SLURM_MEM_PER_NODE = $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) GB +SLURM_CPUS_PER_TASK = $SLURM_CPUS_PER_TASK +------------------------------------------- + Variables +------------------------------------------- +GENOTYPE_DATA_RDS : $GENOTYPE_DATA_RDS +PHENOTYPE_DATA_TSV : $PHENOTYPE_DATA_TSV +KFOLDS : $KFOLDS +NREPS : $NREPS +TRAIT : $TRAIT +POPULATION : $POP +------------------------------------------- + Output directory +------------------------------------------- +${DIR_OUT_SUB} +==========================================" >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log + +### Load the conda environment +module load Miniconda3/22.11.1-1 +conda init bash +source ~/.bashrc +conda activate genomic_selection + +### Run within and across population replicated k-fold cross-validation and prediction of missing phenotypes +time \ +Rscript ${DIR_SRC}/gp.R \ + --fname-geno $GENOTYPE_DATA_RDS \ + --fname-pheno $PHENOTYPE_DATA_TSV \ + --population $POP \ + --dir-output $DIR_OUT_SUB \ + --pheno-idx-col-y $COLUMN_ID \ + --bool-within TRUE \ + --bool-across $BOOL_ACROSS \ + --n-folds $KFOLDS \ + --n-reps $NREPS \ + --bool-parallel TRUE \ + --max-mem-Gb $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) \ + --n-threads $SLURM_CPUS_PER_TASK \ + --verbose TRUE >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log +### Clean-up +mv ${DIR_OUT_SUB}/GENOMIC_PREDICTIONS_OUTPUT-*.Rds ${DIR_OUT_MAIN} +mv ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log ${DIR_OUT_MAIN} +rm -R ${DIR_OUT_SUB} \ No newline at end of file diff --git a/inst/exec_Rscript/2-gp_slurm_job.sh b/inst/exec_Rscript/2-gp_slurm_job.sh new file mode 100755 index 0000000..e8315f4 --- /dev/null +++ b/inst/exec_Rscript/2-gp_slurm_job.sh @@ -0,0 +1,122 @@ +#!/bin/bash +#SBATCH --job-name='GS' +#SBATCH --account='dbiopast1' ### EDIT ME: Pick the appropriate account name, e.g. dbiopast1 or dbiopast2 +#SBATCH --ntasks=1 ### LEAVE ME:Request a single task as we will be submitting this as an array job where each job corresponds to a trait +#SBATCH --cpus-per-task=16 ### EDIT ME: Parallelisation across replications, folds and models (more cpu means faster execution time but probably longer time to wait for the Slurm scheduler to find resources to allocate to the job) +#SBATCH --mem=100G ### EDIT ME: Proportional to the input data (will need to test the appropriate memory required, hint use `seff ${JOBID}`) +#SBATCH --time=1-0:0:00 ### EDIT ME: Proportional to the input data, number of folds, replications, and models to be used +################################################################################################### +### Edit the Slurm settings above to match your requirements. +################################################################################################### + +################################################################################################### +### The variables below will be exported from `00_gs_slurm_job_wrapper.sh`: +################################################################################################### +### Input variables (use the absolute path to files to be precise) +### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. +### Should have no missing data or else will be imputed via mean value imputation. +GENOTYPE_DATA_RDS=$1 +### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column. +### Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc. +### Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to. +### Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING. +PHENOTYPE_DATA_TSV=$2 +### (3) Number of folds for k-fold cross-validation. +KFOLDS=$3 +### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data. +NREPS=$4 +### (5) Full path to the location of the executable Rscript gp.R +DIR_SRC=$5 +### (5) Full path to the location of the executable Rscript gp.R +DIR_OUT=$6 + +################################################################################################### +### Edit the code below, if and only if you have read the documentation or familiar with `src/*.R`: +################################################################################################### +### Define the trait and population to include +N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l) +TRAIT_IDX=$(echo "((${SLURM_ARRAY_TASK_ID}-1) / ${N_POPS}) + 1" | bc) +POP_IDX=$(echo "${SLURM_ARRAY_TASK_ID} % ${N_POPS}" | bc) +if [ "${POP_IDX}" -eq 0 ] +then + POP_IDX=${N_POPS} +fi +COLUMN_ID=$(echo 2 + ${TRAIT_IDX} | bc) +TRAIT=$(head -n1 $PHENOTYPE_DATA_TSV | cut -f${COLUMN_ID}) +POP=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | head -n${POP_IDX} | tail -n1) +### Skip leave-one-population-out cross-validation if there is only one population +if [ "${N_POPS}" -eq 1 ] +then + BOOL_ACROSS=FALSE +else + if [ "${POP_IDX}" -eq 1 ] + then + BOOL_ACROSS=TRUE + else + BOOL_ACROSS=FALSE + fi +fi +### Output directories +DIR_OUT_MAIN=${DIR_OUT}/output +DIR_OUT_SUB=${DIR_OUT_MAIN}/output-${TRAIT}-${POP} +if [ ! -d DIR_OUT_MAIN ] +then + mkdir $DIR_OUT_MAIN +fi +mkdir $DIR_OUT_SUB +### Log messages +echo JOB_${SLURM_ARRAY_TASK_ID}-TRAIT_${TRAIT}-POP_${POP} > ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log +echo "========================================== +------------------------------------------- + Job Info +------------------------------------------- +SLURM_JOB_ID = $SLURM_JOB_ID +SLURM_JOB_NAME = $SLURM_JOB_NAME +SLURM_JOB_NODELIST = $SLURM_JOB_NODELIST +SLURM_SUBMIT_HOST = $SLURM_SUBMIT_HOST +SLURM_SUBMIT_DIR = $SLURM_SUBMIT_DIR +SLURM_NTASKS = $SLURM_NTASKS +SLURM_ARRAY_TASK_ID = $SLURM_ARRAY_TASK_ID +SLURM_MEM_PER_NODE = $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) GB +SLURM_CPUS_PER_TASK = $SLURM_CPUS_PER_TASK +------------------------------------------- + Variables +------------------------------------------- +GENOTYPE_DATA_RDS : $GENOTYPE_DATA_RDS +PHENOTYPE_DATA_TSV : $PHENOTYPE_DATA_TSV +KFOLDS : $KFOLDS +NREPS : $NREPS +TRAIT : $TRAIT +POPULATION : $POP +------------------------------------------- + Output directory +------------------------------------------- +${DIR_OUT_SUB} +==========================================" >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log + +### Load the conda environment +module load Miniconda3/22.11.1-1 +conda init bash +source ~/.bashrc +conda activate genomic_selection + +### Run within and across population replicated k-fold cross-validation and prediction of missing phenotypes +time \ +Rscript ${DIR_SRC}/gp.R \ + --fname-geno $GENOTYPE_DATA_RDS \ + --fname-pheno $PHENOTYPE_DATA_TSV \ + --population $POP \ + --dir-output $DIR_OUT_SUB \ + --pheno-idx-col-y $COLUMN_ID \ + --bool-within TRUE \ + --bool-across $BOOL_ACROSS \ + --n-folds $KFOLDS \ + --n-reps $NREPS \ + --bool-parallel TRUE \ + --max-mem-Gb $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) \ + --n-threads $SLURM_CPUS_PER_TASK \ + --verbose TRUE >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log +### Clean-up +mv ${DIR_OUT_SUB}/GENOMIC_PREDICTIONS_OUTPUT-*.Rds ${DIR_OUT_MAIN} +mv ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log ${DIR_OUT_MAIN} +rm -R ${DIR_OUT_SUB} \ No newline at end of file diff --git a/inst/exec_Rscript/config.txt b/inst/exec_Rscript/config.txt new file mode 100644 index 0000000..2863ff2 --- /dev/null +++ b/inst/exec_Rscript/config.txt @@ -0,0 +1,23 @@ +GENOTYPE_DATA_RDS=${DIR_SRC}/input_tmp/test_geno.Rds +PHENOTYPE_DATA_TSV=${DIR_SRC}/input_tmp/test_pheno.tsv +KFOLDS=2 +NREPS=2 +DIR_OUT=${DIR_SRC}/output_tmp +SBATCH --job-name="test" +SBATCH --account="dbiopast2" +SBATCH --ntasks=1 +SBATCH --cpus-per-task=8 +SBATCH --mem=64G +SBATCH --time=0-2:0:00 + +# GENOTYPE_DATA_RDS=${DIR_SRC}/input/test_geno.Rds +# PHENOTYPE_DATA_TSV=${DIR_SRC}/input/test_pheno.tsv +# KFOLDS=2 +# NREPS=2 +# DIR_OUT=${DIR_SRC} +# SBATCH --job-name="test" +# SBATCH --account="dbiopast2" +# SBATCH --ntasks=1 +# SBATCH --cpus-per-task=8 +# SBATCH --mem=64G +# SBATCH --time=0-2:0:00 \ No newline at end of file