From 70ec3da418760eb154dc351be3ff51bc3eb869ab Mon Sep 17 00:00:00 2001
From: jeffersonfparil <jeffersonparil@gmail.com>
Date: Sun, 23 Jun 2024 14:56:18 +1000
Subject: [PATCH] drafted + will need to update error codes and complete docs
 in README.md

---
 conda.yml                                     | 268 ++++++++++++++++++
 inst/exec_Rscript/0-submit.sh                 |  37 +++
 ...1-jp3h-SunJun23062044AEST2024-RAND27285.sh | 121 ++++++++
 inst/exec_Rscript/1-checks_and_submision.sh   | 121 ++++++++
 ...1-jp3h-SunJun23062044AEST2024-RAND27285.sh | 122 ++++++++
 inst/exec_Rscript/2-gp_slurm_job.sh           | 122 ++++++++
 inst/exec_Rscript/config.txt                  |  23 ++
 7 files changed, 814 insertions(+)
 create mode 100644 conda.yml
 create mode 100755 inst/exec_Rscript/0-submit.sh
 create mode 100755 inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh
 create mode 100755 inst/exec_Rscript/1-checks_and_submision.sh
 create mode 100755 inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh
 create mode 100755 inst/exec_Rscript/2-gp_slurm_job.sh
 create mode 100644 inst/exec_Rscript/config.txt

diff --git a/conda.yml b/conda.yml
new file mode 100644
index 0000000..f830034
--- /dev/null
+++ b/conda.yml
@@ -0,0 +1,268 @@
+name: genomic_selection
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - _r-mutex=1.0.1=anacondar_1
+  - bat=0.24.0=he8a937b_0
+  - binutils_impl_linux-64=2.40=hf600244_0
+  - bwidget=1.9.14=ha770c72_1
+  - bzip2=1.0.8=h7f98852_4
+  - c-ares=1.19.1=hd590300_0
+  - ca-certificates=2024.3.11=h06a4308_0
+  - cairo=1.16.0=h0c91306_1017
+  - curl=8.3.0=hca28451_0
+  - expat=2.5.0=hcb278e6_1
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=3.000=h77eed37_0
+  - font-ttf-source-code-pro=2.038=h77eed37_0
+  - font-ttf-ubuntu=0.83=hab24e00_0
+  - fontconfig=2.14.2=h14ed4e7_0
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - freetype=2.12.1=h267a509_2
+  - fribidi=1.0.10=h36c2ea0_0
+  - gcc_impl_linux-64=13.2.0=h338b0a0_2
+  - gettext=0.21.1=h27087fc_0
+  - gfortran_impl_linux-64=13.2.0=h76e1118_2
+  - git=2.42.0=pl5321h86e50cf_0
+  - graphite2=1.3.13=h58526e2_1001
+  - gxx_impl_linux-64=13.2.0=h338b0a0_2
+  - harfbuzz=8.2.1=h3d44ed6_0
+  - helix=24.03=h1ffa460_0
+  - htop=3.2.2=h8228510_0
+  - icu=73.2=h59595ed_0
+  - kernel-headers_linux-64=2.6.32=he073ed8_16
+  - keyutils=1.6.1=h166bdaf_0
+  - krb5=1.21.2=h659d440_0
+  - ld_impl_linux-64=2.40=h41732ed_0
+  - lerc=4.0.0=h27087fc_0
+  - libblas=3.9.0=18_linux64_openblas
+  - libcurl=8.3.0=hca28451_0
+  - libdeflate=1.19=hd590300_0
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=h516909a_1
+  - libexpat=2.5.0=hcb278e6_1
+  - libffi=3.4.2=h7f98852_5
+  - libgcc-devel_linux-64=13.2.0=ha9c7c90_2
+  - libgcc-ng=13.2.0=h807b86a_2
+  - libgfortran-ng=13.2.0=h69a702a_2
+  - libgfortran5=13.2.0=ha4646dd_2
+  - libgit2=1.7.1=hca3a8ce_0
+  - libglib=2.78.0=hebfc3b9_0
+  - libgomp=13.2.0=h807b86a_2
+  - libiconv=1.17=h166bdaf_0
+  - libjpeg-turbo=3.0.0=hd590300_1
+  - liblapack=3.9.0=18_linux64_openblas
+  - libnghttp2=1.52.0=h61bc06f_0
+  - libnl=3.8.0=hd590300_0
+  - libnsl=2.0.1=hd590300_0
+  - libopenblas=0.3.24=pthreads_h413a1c8_0
+  - libpng=1.6.39=h753d276_0
+  - libsanitizer=13.2.0=h7e041cc_2
+  - libssh2=1.11.0=h0841786_0
+  - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_2
+  - libstdcxx-ng=13.2.0=h7e041cc_2
+  - libtiff=4.6.0=ha9c0a0a_2
+  - libuuid=2.38.1=h0b41bf4_0
+  - libwebp-base=1.3.2=hd590300_0
+  - libxcb=1.15=h0b41bf4_0
+  - libxml2=2.12.5=h232c23b_0
+  - libzlib=1.2.13=hd590300_5
+  - make=4.3=hd18ef5c_1
+  - micro=2.0.8=ha8f183a_1
+  - ncurses=6.4=hcb278e6_0
+  - openssl=3.2.1=hd590300_1
+  - pandoc=2.12=h06a4308_3
+  - pango=1.50.14=ha41ecd1_2
+  - pcre2=10.40=hc3806b6_0
+  - perl=5.32.1=4_hd590300_perl5
+  - pixman=0.42.2=h59595ed_0
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - r-anytime=0.3.9=r43h884c59f_0
+  - r-ape=5.7_1=r43h08d816e_1
+  - r-argparse=2.2.2=r43h6115d3f_0
+  - r-askpass=1.2.0=r43h76d94ec_0
+  - r-assertthat=0.2.1=r43hc72bb7e_4
+  - r-base=4.3.1=h93585b2_6
+  - r-base64enc=0.1_3=r43h57805ef_1006
+  - r-bglr=1.1.0=r43h57805ef_2
+  - r-bh=1.81.0_1=r43h6115d3f_0
+  - r-bigmemory=4.6.1=r43ha503ecb_2
+  - r-bigmemory.sri=0.1.6=r43hc72bb7e_1
+  - r-bit=4.0.5=r43h76d94ec_0
+  - r-bit64=4.0.5=r43h76d94ec_0
+  - r-blob=1.2.4=r43h6115d3f_0
+  - r-boot=1.3_28.1=r43h6115d3f_0
+  - r-brew=1.0_10=r43hc72bb7e_0
+  - r-brio=1.1.3=r43h57805ef_2
+  - r-bslib=0.6.1=r43hc72bb7e_0
+  - r-cachem=1.0.8=r43h57805ef_1
+  - r-callr=3.7.3=r43hc72bb7e_1
+  - r-cli=3.6.1=r43ha503ecb_1
+  - r-clipr=0.8.0=r43hc72bb7e_2
+  - r-cluster=2.1.4=r43h61816a4_1
+  - r-codetools=0.2_19=r43hc72bb7e_1
+  - r-colorspace=2.1_0=r43h76d94ec_0
+  - r-commonmark=1.9.1=r43h57805ef_0
+  - r-cpp11=0.4.7=r43hc72bb7e_0
+  - r-crayon=1.5.2=r43hc72bb7e_2
+  - r-credentials=2.0.1=r43h142f84f_0
+  - r-crosstalk=1.2.0=r43h6115d3f_0
+  - r-curl=5.1.0=r43hf9611b0_0
+  - r-data.table=1.14.8=r43h76d94ec_0
+  - r-dbi=1.1.3=r43h6115d3f_0
+  - r-desc=1.4.2=r43hc72bb7e_2
+  - r-devtools=2.4.5=r43hc72bb7e_2
+  - r-diffobj=0.3.5=r43h57805ef_2
+  - r-digest=0.6.33=r43ha503ecb_0
+  - r-domc=1.3.8=r43ha770c72_2
+  - r-doparallel=1.0.17=r43hc72bb7e_2
+  - r-downlit=0.4.3=r43h6115d3f_0
+  - r-dplyr=1.1.3=r43ha503ecb_0
+  - r-ellipsis=0.3.2=r43h57805ef_2
+  - r-evaluate=0.22=r43hc72bb7e_0
+  - r-fansi=1.0.4=r43h57805ef_1
+  - r-farver=2.1.1=r43h884c59f_0
+  - r-fastmap=1.1.1=r43ha503ecb_1
+  - r-findpython=1.0.8=r43h6115d3f_0
+  - r-fontawesome=0.5.2=r43h6115d3f_0
+  - r-foreach=1.5.2=r43hc72bb7e_2
+  - r-fs=1.6.3=r43ha503ecb_0
+  - r-generics=0.1.3=r43hc72bb7e_2
+  - r-gert=2.0.1=r43hc25a090_0
+  - r-ggplot2=3.4.4=r43h6115d3f_0
+  - r-gh=1.4.0=r43hc72bb7e_1
+  - r-gitcreds=0.1.2=r43hc72bb7e_2
+  - r-glmnet=4.1_8=r43hcf54a89_0
+  - r-glue=1.6.2=r43h57805ef_2
+  - r-gtable=0.3.4=r43h6115d3f_0
+  - r-highr=0.10=r43hc72bb7e_1
+  - r-htmltools=0.5.7=r43ha503ecb_0
+  - r-htmlwidgets=1.6.4=r43hc72bb7e_1
+  - r-httpuv=1.6.14=r43ha503ecb_0
+  - r-httr=1.4.7=r43h6115d3f_0
+  - r-httr2=1.0.0=r43hc72bb7e_0
+  - r-ini=0.3.1=r43hc72bb7e_1005
+  - r-isoband=0.2.7=r43h884c59f_0
+  - r-iterators=1.0.14=r43hc72bb7e_2
+  - r-jquerylib=0.1.4=r43hc72bb7e_2
+  - r-jsonlite=1.8.7=r43h57805ef_0
+  - r-knitr=1.45=r43hc72bb7e_0
+  - r-labeling=0.4.3=r43h6115d3f_0
+  - r-later=1.3.2=r43ha503ecb_0
+  - r-lattice=0.21_9=r43h57805ef_0
+  - r-lazyeval=0.2.2=r43h76d94ec_0
+  - r-lifecycle=1.0.3=r43hc72bb7e_2
+  - r-lme4=1.1_34=r43h884c59f_0
+  - r-magrittr=2.0.3=r43h57805ef_2
+  - r-mass=7.3_60=r43h57805ef_1
+  - r-matrix=1.6_1.1=r43h316c678_0
+  - r-memoise=2.0.1=r43hc72bb7e_2
+  - r-memuse=4.2_3=r43h57805ef_1
+  - r-mgcv=1.9_0=r43h316c678_0
+  - r-mime=0.12=r43h57805ef_2
+  - r-miniui=0.1.1.1=r43hc72bb7e_1004
+  - r-minqa=1.2.6=r43hb5eb8f6_0
+  - r-munsell=0.5.0=r43h6115d3f_0
+  - r-nlme=3.1_163=r43h61816a4_0
+  - r-nloptr=2.0.3=r43hb5eb8f6_0
+  - r-openssl=2.1.1=r43h76d94ec_0
+  - r-permute=0.9_7=r43hc72bb7e_2
+  - r-pillar=1.9.0=r43hc72bb7e_1
+  - r-pinfsc50=1.2.0=r43ha770c72_2
+  - r-pkgbuild=1.4.2=r43hc72bb7e_0
+  - r-pkgconfig=2.0.3=r43hc72bb7e_3
+  - r-pkgdown=2.0.7=r43hc72bb7e_1
+  - r-pkgload=1.3.3=r43hc72bb7e_0
+  - r-plogr=0.2.0=r43h6115d3f_0
+  - r-plotly=4.10.2=r43h6115d3f_0
+  - r-praise=1.0.0=r43hc72bb7e_1007
+  - r-prettyunits=1.2.0=r43hc72bb7e_0
+  - r-processx=3.8.2=r43h57805ef_0
+  - r-profvis=0.3.8=r43h57805ef_3
+  - r-promises=1.2.1=r43h884c59f_0
+  - r-ps=1.7.5=r43h57805ef_1
+  - r-purrr=1.0.2=r43h76d94ec_0
+  - r-r6=2.5.1=r43hc72bb7e_2
+  - r-ragg=1.2.7=r43h73ae6e3_0
+  - r-rappdirs=0.3.3=r43h57805ef_2
+  - r-rcmdcheck=1.4.0=r43h785f33e_2
+  - r-rcolorbrewer=1.1_3=r43h6115d3f_1
+  - r-rcpp=1.0.11=r43h7df8631_0
+  - r-rcpparmadillo=0.12.6.4.0=r43h884c59f_0
+  - r-rcppeigen=0.3.3.9.3=r43h08d816e_1
+  - r-rcppprogress=0.4.2=r43h142f84f_0
+  - r-rematch2=2.1.2=r43hc72bb7e_3
+  - r-remotes=2.4.2.1=r43h142f84f_0
+  - r-rgl=1.2.1=r43h884c59f_0
+  - r-rlang=1.1.1=r43ha503ecb_1
+  - r-rmarkdown=2.25=r43h6115d3f_0
+  - r-roxygen2=7.3.1=r43ha503ecb_0
+  - r-rprojroot=2.0.3=r43hc72bb7e_0
+  - r-rrblup=4.6.2=r43hc72bb7e_1
+  - r-rsqlite=2.3.1=r43h884c59f_0
+  - r-rstudioapi=0.15.0=r43h6115d3f_0
+  - r-rversions=2.1.2=r43hc72bb7e_2
+  - r-sass=0.4.8=r43ha503ecb_0
+  - r-scales=1.2.1=r43h6115d3f_0
+  - r-sessioninfo=1.2.2=r43hc72bb7e_2
+  - r-shape=1.4.6=r43ha770c72_2
+  - r-shiny=1.8.0=r43h785f33e_0
+  - r-shinycssloaders=1.0.0=r43h6115d3f_0
+  - r-shinywidgets=0.8.0=r43h142f84f_0
+  - r-sommer=4.3.2=r43h884c59f_0
+  - r-sourcetools=0.1.7_1=r43ha503ecb_1
+  - r-stringi=1.7.12=r43h9facbd6_3
+  - r-stringr=1.5.0=r43h785f33e_1
+  - r-survival=3.5_7=r43h57805ef_0
+  - r-sys=3.4.2=r43h57805ef_1
+  - r-systemfonts=1.0.5=r43h884c59f_0
+  - r-testthat=3.2.0=r43ha503ecb_0
+  - r-textshaping=0.3.7=r43h884c59f_0
+  - r-tibble=3.2.1=r43h57805ef_2
+  - r-tidyr=1.3.0=r43h884c59f_0
+  - r-tidyselect=1.2.0=r43hc72bb7e_1
+  - r-tinytex=0.49=r43hc72bb7e_1
+  - r-truncnorm=1.0_9=r43h57805ef_1
+  - r-txtplot=1.0_4=r43h142f84f_0
+  - r-urlchecker=1.0.1=r43hc72bb7e_2
+  - r-usethis=2.2.3=r43hc72bb7e_0
+  - r-utf8=1.2.3=r43h57805ef_1
+  - r-uuid=1.1_1=r43h57805ef_0
+  - r-vcfr=1.14.0=r43h33b523d_1
+  - r-vctrs=0.6.3=r43ha503ecb_0
+  - r-vegan=2.6_4=r43hd9ac46e_1
+  - r-viridislite=0.4.2=r43hc72bb7e_1
+  - r-waldo=0.5.1=r43hc72bb7e_1
+  - r-whisker=0.4.1=r43hc72bb7e_1
+  - r-withr=2.5.1=r43hc72bb7e_0
+  - r-xfun=0.42=r43ha503ecb_0
+  - r-xml2=1.3.6=r43hbfba7a4_1
+  - r-xopen=1.0.0=r43hc72bb7e_1005
+  - r-xtable=1.8_4=r43hc72bb7e_5
+  - r-yaml=2.3.8=r43h57805ef_0
+  - r-zip=2.3.1=r43h57805ef_0
+  - readline=8.2=h8228510_1
+  - sed=4.8=he412f7d_0
+  - sysroot_linux-64=2.12=he073ed8_16
+  - tk=8.6.13=h2797004_0
+  - tktable=2.10=h0c5db8f_5
+  - xorg-kbproto=1.0.7=h7f98852_1002
+  - xorg-libice=1.1.1=hd590300_0
+  - xorg-libsm=1.2.4=h7391055_0
+  - xorg-libx11=1.8.6=h8ee46fc_0
+  - xorg-libxau=1.0.11=hd590300_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xorg-libxext=1.3.4=h0b41bf4_2
+  - xorg-libxrender=0.9.11=hd590300_0
+  - xorg-libxt=1.3.0=hd590300_1
+  - xorg-renderproto=0.11.1=h7f98852_1002
+  - xorg-xextproto=7.3.0=h0b41bf4_1003
+  - xorg-xproto=7.0.31=h7f98852_1007
+  - xz=5.2.6=h166bdaf_0
+  - zlib=1.2.13=hd590300_5
+  - zstd=1.5.5=hfc55251_0
+prefix: /home/jp3h/.conda/envs/genomic_selection
diff --git a/inst/exec_Rscript/0-submit.sh b/inst/exec_Rscript/0-submit.sh
new file mode 100755
index 0000000..b32880f
--- /dev/null
+++ b/inst/exec_Rscript/0-submit.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+### Unique name of the run given a specific instance of config.txt
+RUN_NAME=$(hostname)-${USER}-$(date | sed 's/ //g' | sed 's/://g')-RAND${RANDOM}
+### Extract config variables
+CONFIG_GENO=$(sed "s/\"/'/g" config.txt | sed -n '1p')
+CONFIG_PHENO=$(sed "s/\"/'/g" config.txt | sed -n '2p')
+CONFIG_KFOLDS=$(sed "s/\"/'/g" config.txt | sed -n '3p')
+CONFIG_NREPS=$(sed "s/\"/'/g" config.txt | sed -n '4p')
+CONFIG_DIR_OUT=$(sed "s/\"/'/g" config.txt | sed -n '5p')
+CONFIG_JOB_NAME=$(sed "s/\"/'/g" config.txt | sed -n '6p')
+CONFIG_ACCOUNT_NAME=$(sed "s/\"/'/g" config.txt | sed -n '7p')
+CONFIG_NTASKS=$(sed "s/\"/'/g" config.txt | sed -n '8p')
+CONFIG_NCPUS=$(sed "s/\"/'/g" config.txt | sed -n '9p')
+CONFIG_MEM=$(sed "s/\"/'/g" config.txt | sed -n '10p')
+CONFIG_TIME_LIMIT=$(sed "s/\"/'/g" config.txt | sed -n '11p')
+### Create the checks and submission scripts using the config variables
+sed "s|GENOTYPE_DATA_RDS=\${DIR_SRC}/input/test_geno.Rds|$CONFIG_GENO|g" 1-checks_and_submision.sh | \
+    sed "s|PHENOTYPE_DATA_TSV=\${DIR_SRC}/input/test_pheno.tsv|$CONFIG_PHENO|g" | \
+    sed "s|KFOLDS=5|$CONFIG_KFOLDS|g" | \
+    sed "s|NREPS=3|$CONFIG_NREPS|g" | \
+    sed "s|DIR_OUT=\${DIR_SRC}|$CONFIG_DIR_OUT|g" | \
+    sed "s|2-gp_slurm_job.sh|2-gp_slurm_job-${RUN_NAME}.sh|g" \
+> 1-checks_and_submision-${RUN_NAME}.sh
+### Create the slurm job scripts using the config variables
+sed "s|SBATCH --job-name='GS'|$CONFIG_JOB_NAME|g" 2-gp_slurm_job.sh | \
+    sed "s|SBATCH --account='dbiopast1'|$CONFIG_ACCOUNT_NAME|g" | \
+    sed "s|SBATCH --ntasks=1|$CONFIG_NTASKS|g" | \
+    sed "s|SBATCH --cpus-per-task=16|$CONFIG_NCPUS|g" | \
+    sed "s|SBATCH --mem=100G|$CONFIG_MEM|g" | \
+    sed "s|SBATCH --time=1-0:0:00|$CONFIG_TIME_LIMIT|g" \
+> 2-gp_slurm_job-${RUN_NAME}.sh
+### Check input and submit the slurm job
+chmod +x 1-checks_and_submision-${RUN_NAME}.sh
+chmod +x 2-gp_slurm_job-${RUN_NAME}.sh
+./1-checks_and_submision-${RUN_NAME}.sh
+### Clean-up after tests
+# rm *RAND*.sh
\ No newline at end of file
diff --git a/inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh b/inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh
new file mode 100755
index 0000000..1703865
--- /dev/null
+++ b/inst/exec_Rscript/1-checks_and_submision-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+### Full path to the location of the executable Rscript `gp.R`` which should co-locate with this script: `0-checks_and_submission.sh`` as well as `1-gp_slurm_job.sh`.
+DIR_SRC=$(dirname $0)
+cd $DIR_SRC
+DIR_SRC=$(pwd)
+##################################
+### Load the conda environment ###
+##################################
+echo "Loading the conda environment..."
+module load Miniconda3/22.11.1-1
+conda init bash
+source ~/.bashrc
+if [ $(conda env list | grep "^genomic_selection " | wc -l) -gt 0 ]
+then
+    conda activate genomic_selection
+else
+    echo "Installing the conda environment..."
+    conda env create -f ${DIR_SRC}/../../conda.yml
+    conda activate genomic_selection
+fi
+#######################################
+### Install gp if not installed yet ###
+#######################################
+echo "Checking in the R library gp exists and installing it if not..."
+Rscript -e 'if (!require("gp", character.only = TRUE)) {install.packages("devtools", repos="https://cloud.r-project.org"); devtools::install_github("jeffersonfparil/gp")}'
+################################################################
+### TOP-LEVEL SLURM ARRAY JOB SUBMISSION SCRIPT
+### Please edit the input variables below to match your dataset:
+################################################################
+### Input variables (use the absolute path to files to be precise)
+### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. 
+###     - The genotype data can be coded as any numeric range of values, e.g. (0,1,2), (-1,0,1), and (0.00,0.25,0.50,0.75,1.00) or as biallelic characters, e.g. for diploids: "AA", "AB", "BB", and for tetraploids: "AAAA", "AAAB", "AABB", "ABBB", and "BBBB".. It is recommended that this data should be filtered and imputed beforehand.
+###     - The rows are expected to have names of the samples corresponding to the names in the phenotype file.
+###     - The columns are expected to contain the loci names but does need to follow a specific format: chromosome name and position separated by a tab character (`\t`) and an optional allele identifier, e.g. `chr-1\t12345\tallele_A`
+GENOTYPE_DATA_RDS=${DIR_SRC}/input_tmp/test_geno.Rds
+### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column.
+###     - Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc.
+###     - Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to.
+###     - Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING.
+PHENOTYPE_DATA_TSV=${DIR_SRC}/input_tmp/test_pheno.tsv
+### (3) Number of folds for k-fold cross-validation.
+KFOLDS=2
+### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data.
+NREPS=2
+### (5) Output directory where the output/ folder will be created
+DIR_OUT=${DIR_SRC}/output_tmp
+
+### Check if the genotype file exists
+if [ ! -f $GENOTYPE_DATA_RDS ]
+then
+    echo "Error: The genotype file: $GENOTYPE_DATA_RDS does not exist. Are you specifying the full path? Is the name correct?"
+    exit 101
+else
+    echo "Passed: The genotype file: $GENOTYPE_DATA_RDS exists."
+fi
+### Check if the phenotype file exists
+if [ ! -f $PHENOTYPE_DATA_TSV ]
+then
+    echo "Error: The phenotype file: $PHENOTYPE_DATA_TSV does not exist. Are you specifying the full path? Is the name correct?"
+    exit 102
+else
+    echo "Passed: The phenotype file: $PHENOTYPE_DATA_TSV exists."
+fi
+### Check if the genotype file is a valid Rds file
+echo 'args = commandArgs(trailingOnly=TRUE)
+geno = suppressWarnings(tryCatch(readRDS(args[1]), error=function(e){print("Error loading genotype file.")}))
+' > test_geno_rds.R
+if [ $(Rscript test_geno_rds.R $GENOTYPE_DATA_RDS | grep -i "error" | wc -l) -eq 1 ]
+then
+    echo "Error: The genotype file: $GENOTYPE_DATA_RDS is not an Rds file."
+    exit 103
+else
+    echo "Passed: The genotype file: $GENOTYPE_DATA_RDS is an Rds file."
+fi
+rm test_geno_rds.R
+### Check if the phenotype file is formatted according to the required specifications
+echo 'args = commandArgs(trailingOnly=TRUE)
+pheno = suppressWarnings(tryCatch(read.delim(args[1], sep="\t", header=TRUE), error=function(e){print("Error loading phenotype file.")}))
+' > test_pheno_rds.R
+if [ $(Rscript test_pheno_rds.R $PHENOTYPE_DATA_TSV | grep -i "error" | wc -l) -eq 1 ]
+then
+    echo "Error: The phenotype file: $GENOTYPE_DATA_RDS is not formatted according to specifications. It should be tab-delimited and a header line must be present."
+    exit 104
+else
+    echo "Passed: The phenotype file: $GENOTYPE_DATA_RDS is tab-delimited with a header line."
+fi
+rm test_pheno_rds.R
+### Check if the output directory belongs to the user
+if [ ! -w $DIR_OUT ]
+then
+    echo "Error: You do not have permission to write in the output directory: $DIR_OUT. Please use an output directory you have write acess to."
+    exit 106
+else
+    echo "Passed: You have permission to write in the output directory: $DIR_OUT."
+fi
+### Check if the confighured slurm array job script exists
+if [ ! -f ${DIR_SRC}/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh ]
+then
+    echo "Error: The executable code directory: $DIR_SRC does not contain the script: 2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh. Are you sure this is the genomic_selection repo directory?"
+    exit 107
+else
+    echo "Passed: The executable code directory: $DIR_SRC contains the script: 2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh."
+fi
+### Initialise the output directory which will contain all the output Rds files across populations and traits
+if [ ! -d ${DIR_OUT}/output ]
+then
+    mkdir ${DIR_OUT}/output
+fi
+### Submit an array of jobs equivalent to the number of traits in the phenotype file
+cd $DIR_SRC/
+N_TRAITS=$(echo $(head -n1 $PHENOTYPE_DATA_TSV | awk '{print NF}') - 2 | bc)
+N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l)
+sbatch --array 1-$(echo "${N_TRAITS} * ${N_POPS}" | bc) \
+    2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh \
+        ${GENOTYPE_DATA_RDS} \
+        ${PHENOTYPE_DATA_TSV} \
+        ${KFOLDS} \
+        ${NREPS} \
+        ${DIR_SRC} \
+        ${DIR_OUT}
\ No newline at end of file
diff --git a/inst/exec_Rscript/1-checks_and_submision.sh b/inst/exec_Rscript/1-checks_and_submision.sh
new file mode 100755
index 0000000..2791fc1
--- /dev/null
+++ b/inst/exec_Rscript/1-checks_and_submision.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+### Full path to the location of the executable Rscript `gp.R`` which should co-locate with this script: `0-checks_and_submission.sh`` as well as `1-gp_slurm_job.sh`.
+DIR_SRC=$(dirname $0)
+cd $DIR_SRC
+DIR_SRC=$(pwd)
+##################################
+### Load the conda environment ###
+##################################
+echo "Loading the conda environment..."
+module load Miniconda3/22.11.1-1
+conda init bash
+source ~/.bashrc
+if [ $(conda env list | grep "^genomic_selection " | wc -l) -gt 0 ]
+then
+    conda activate genomic_selection
+else
+    echo "Installing the conda environment..."
+    conda env create -f ${DIR_SRC}/../../conda.yml
+    conda activate genomic_selection
+fi
+#######################################
+### Install gp if not installed yet ###
+#######################################
+echo "Checking if the R library gp exists and installing it, if not..."
+Rscript -e 'if (!require("gp", character.only = TRUE)) {install.packages("devtools", repos="https://cloud.r-project.org"); devtools::install_github("jeffersonfparil/gp")}'
+################################################################
+### TOP-LEVEL SLURM ARRAY JOB SUBMISSION SCRIPT
+### Please edit the input variables below to match your dataset:
+################################################################
+### Input variables (use the absolute path to files to be precise)
+### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. 
+###     - The genotype data can be coded as any numeric range of values, e.g. (0,1,2), (-1,0,1), and (0.00,0.25,0.50,0.75,1.00) or as biallelic characters, e.g. for diploids: "AA", "AB", "BB", and for tetraploids: "AAAA", "AAAB", "AABB", "ABBB", and "BBBB".. It is recommended that this data should be filtered and imputed beforehand.
+###     - The rows are expected to have names of the samples corresponding to the names in the phenotype file.
+###     - The columns are expected to contain the loci names but does need to follow a specific format: chromosome name and position separated by a tab character (`\t`) and an optional allele identifier, e.g. `chr-1\t12345\tallele_A`
+GENOTYPE_DATA_RDS=${DIR_SRC}/input/test_geno.Rds
+### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column.
+###     - Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc.
+###     - Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to.
+###     - Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING.
+PHENOTYPE_DATA_TSV=${DIR_SRC}/input/test_pheno.tsv
+### (3) Number of folds for k-fold cross-validation.
+KFOLDS=5
+### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data.
+NREPS=3
+### (5) Output directory where the output/ folder will be created
+DIR_OUT=${DIR_SRC}
+
+### Check if the genotype file exists
+if [ ! -f $GENOTYPE_DATA_RDS ]
+then
+    echo "Error: The genotype file: $GENOTYPE_DATA_RDS does not exist. Are you specifying the full path? Is the name correct?"
+    exit 101
+else
+    echo "Passed: The genotype file: $GENOTYPE_DATA_RDS exists."
+fi
+### Check if the phenotype file exists
+if [ ! -f $PHENOTYPE_DATA_TSV ]
+then
+    echo "Error: The phenotype file: $PHENOTYPE_DATA_TSV does not exist. Are you specifying the full path? Is the name correct?"
+    exit 102
+else
+    echo "Passed: The phenotype file: $PHENOTYPE_DATA_TSV exists."
+fi
+### Check if the genotype file is a valid Rds file
+echo 'args = commandArgs(trailingOnly=TRUE)
+geno = suppressWarnings(tryCatch(readRDS(args[1]), error=function(e){print("Error loading genotype file.")}))
+' > test_geno_rds.R
+if [ $(Rscript test_geno_rds.R $GENOTYPE_DATA_RDS | grep -i "error" | wc -l) -eq 1 ]
+then
+    echo "Error: The genotype file: $GENOTYPE_DATA_RDS is not an Rds file."
+    exit 103
+else
+    echo "Passed: The genotype file: $GENOTYPE_DATA_RDS is an Rds file."
+fi
+rm test_geno_rds.R
+### Check if the phenotype file is formatted according to the required specifications
+echo 'args = commandArgs(trailingOnly=TRUE)
+pheno = suppressWarnings(tryCatch(read.delim(args[1], sep="\t", header=TRUE), error=function(e){print("Error loading phenotype file.")}))
+' > test_pheno_rds.R
+if [ $(Rscript test_pheno_rds.R $PHENOTYPE_DATA_TSV | grep -i "error" | wc -l) -eq 1 ]
+then
+    echo "Error: The phenotype file: $GENOTYPE_DATA_RDS is not formatted according to specifications. It should be tab-delimited and a header line must be present."
+    exit 104
+else
+    echo "Passed: The phenotype file: $GENOTYPE_DATA_RDS is tab-delimited with a header line."
+fi
+rm test_pheno_rds.R
+### Check if the output directory belongs to the user
+if [ ! -w $DIR_OUT ]
+then
+    echo "Error: You do not have permission to write in the output directory: $DIR_OUT. Please use an output directory you have write acess to."
+    exit 106
+else
+    echo "Passed: You have permission to write in the output directory: $DIR_OUT."
+fi
+### Check if the confighured slurm array job script exists
+if [ ! -f ${DIR_SRC}/2-gp_slurm_job.sh ]
+then
+    echo "Error: The executable code directory: $DIR_SRC does not contain the script: 2-gp_slurm_job.sh. Are you sure this is the genomic_selection repo directory?"
+    exit 107
+else
+    echo "Passed: The executable code directory: $DIR_SRC contains the script: 2-gp_slurm_job.sh."
+fi
+### Initialise the output directory which will contain all the output Rds files across populations and traits
+if [ ! -d ${DIR_OUT}/output ]
+then
+    mkdir ${DIR_OUT}/output
+fi
+### Submit an array of jobs equivalent to the number of traits in the phenotype file
+cd $DIR_SRC/
+N_TRAITS=$(echo $(head -n1 $PHENOTYPE_DATA_TSV | awk '{print NF}') - 2 | bc)
+N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l)
+sbatch --array 1-$(echo "${N_TRAITS} * ${N_POPS}" | bc) \
+    2-gp_slurm_job.sh \
+        ${GENOTYPE_DATA_RDS} \
+        ${PHENOTYPE_DATA_TSV} \
+        ${KFOLDS} \
+        ${NREPS} \
+        ${DIR_SRC} \
+        ${DIR_OUT}
\ No newline at end of file
diff --git a/inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh b/inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh
new file mode 100755
index 0000000..7b3b3a9
--- /dev/null
+++ b/inst/exec_Rscript/2-gp_slurm_job-dev1-jp3h-SunJun23062044AEST2024-RAND27285.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#SBATCH --job-name='test'
+#SBATCH --account='dbiopast2'   ### EDIT ME: Pick the appropriate account name, e.g. dbiopast1 or dbiopast2
+#SBATCH --ntasks=1              ### LEAVE ME:Request a single task as we will be submitting this as an array job where each job corresponds to a trait
+#SBATCH --cpus-per-task=8      ### EDIT ME: Parallelisation across replications, folds and models (more cpu means faster execution time but probably longer time to wait for the Slurm scheduler to find resources to allocate to the job)
+#SBATCH --mem=64G              ### EDIT ME: Proportional to the input data (will need to test the appropriate memory required, hint use `seff ${JOBID}`)
+#SBATCH --time=0-2:0:00         ### EDIT ME: Proportional to the input data, number of folds, replications, and models to be used
+###################################################################################################
+### Edit the Slurm settings above to match your requirements. 
+###################################################################################################
+
+###################################################################################################
+### The variables below will be exported from `00_gs_slurm_job_wrapper.sh`:
+###################################################################################################
+### Input variables (use the absolute path to files to be precise)
+### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. 
+###     Should have no missing data or else will be imputed via mean value imputation.
+GENOTYPE_DATA_RDS=$1
+### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column.
+###     Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc.
+###     Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to.
+###     Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING.
+PHENOTYPE_DATA_TSV=$2
+### (3) Number of folds for k-fold cross-validation.
+KFOLDS=$3
+### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data.
+NREPS=$4
+### (5) Full path to the location of the executable Rscript gp.R
+DIR_SRC=$5
+### (5) Full path to the location of the executable Rscript gp.R
+DIR_OUT=$6
+
+###################################################################################################
+### Edit the code below, if and only if you have read the documentation or familiar with `src/*.R`:
+###################################################################################################
+### Define the trait and population to include
+N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l)
+TRAIT_IDX=$(echo "((${SLURM_ARRAY_TASK_ID}-1) / ${N_POPS}) + 1" | bc)
+POP_IDX=$(echo "${SLURM_ARRAY_TASK_ID} % ${N_POPS}" | bc)
+if [ "${POP_IDX}" -eq 0 ]
+then
+    POP_IDX=${N_POPS}
+fi
+COLUMN_ID=$(echo 2 + ${TRAIT_IDX} | bc)
+TRAIT=$(head -n1 $PHENOTYPE_DATA_TSV | cut -f${COLUMN_ID})
+POP=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | head -n${POP_IDX} | tail -n1)
+### Skip leave-one-population-out cross-validation if there is only one population
+if [ "${N_POPS}" -eq 1 ]
+then
+    BOOL_ACROSS=FALSE
+else
+    if [ "${POP_IDX}" -eq 1 ]
+    then
+        BOOL_ACROSS=TRUE
+    else
+        BOOL_ACROSS=FALSE
+    fi
+fi
+### Output directories
+DIR_OUT_MAIN=${DIR_OUT}/output
+DIR_OUT_SUB=${DIR_OUT_MAIN}/output-${TRAIT}-${POP}
+if [ ! -d DIR_OUT_MAIN ]
+then
+    mkdir $DIR_OUT_MAIN
+fi
+mkdir $DIR_OUT_SUB
+### Log messages
+echo JOB_${SLURM_ARRAY_TASK_ID}-TRAIT_${TRAIT}-POP_${POP} > ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log
+echo "==========================================
+-------------------------------------------
+	    Job Info
+-------------------------------------------
+SLURM_JOB_ID = $SLURM_JOB_ID
+SLURM_JOB_NAME = $SLURM_JOB_NAME
+SLURM_JOB_NODELIST = $SLURM_JOB_NODELIST
+SLURM_SUBMIT_HOST = $SLURM_SUBMIT_HOST
+SLURM_SUBMIT_DIR = $SLURM_SUBMIT_DIR
+SLURM_NTASKS = $SLURM_NTASKS
+SLURM_ARRAY_TASK_ID = $SLURM_ARRAY_TASK_ID
+SLURM_MEM_PER_NODE = $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) GB
+SLURM_CPUS_PER_TASK = $SLURM_CPUS_PER_TASK
+-------------------------------------------
+	    Variables
+-------------------------------------------
+GENOTYPE_DATA_RDS	: $GENOTYPE_DATA_RDS
+PHENOTYPE_DATA_TSV	: $PHENOTYPE_DATA_TSV
+KFOLDS		        : $KFOLDS
+NREPS		        : $NREPS
+TRAIT		        : $TRAIT
+POPULATION          : $POP
+-------------------------------------------
+	    Output directory
+-------------------------------------------
+${DIR_OUT_SUB}
+==========================================" >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log
+
+### Load the conda environment
+module load Miniconda3/22.11.1-1
+conda init bash
+source ~/.bashrc
+conda activate genomic_selection
+
+### Run within and across population replicated k-fold cross-validation and prediction of missing phenotypes
+time \
+Rscript ${DIR_SRC}/gp.R \
+    --fname-geno $GENOTYPE_DATA_RDS \
+    --fname-pheno $PHENOTYPE_DATA_TSV \
+    --population $POP \
+    --dir-output $DIR_OUT_SUB \
+    --pheno-idx-col-y $COLUMN_ID \
+    --bool-within TRUE \
+    --bool-across $BOOL_ACROSS \
+    --n-folds $KFOLDS \
+    --n-reps $NREPS \
+    --bool-parallel TRUE \
+    --max-mem-Gb $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) \
+    --n-threads $SLURM_CPUS_PER_TASK \
+    --verbose TRUE >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log
+### Clean-up
+mv ${DIR_OUT_SUB}/GENOMIC_PREDICTIONS_OUTPUT-*.Rds ${DIR_OUT_MAIN}
+mv ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log ${DIR_OUT_MAIN}
+rm -R ${DIR_OUT_SUB}
\ No newline at end of file
diff --git a/inst/exec_Rscript/2-gp_slurm_job.sh b/inst/exec_Rscript/2-gp_slurm_job.sh
new file mode 100755
index 0000000..e8315f4
--- /dev/null
+++ b/inst/exec_Rscript/2-gp_slurm_job.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#SBATCH --job-name='GS'
+#SBATCH --account='dbiopast1'   ### EDIT ME: Pick the appropriate account name, e.g. dbiopast1 or dbiopast2
+#SBATCH --ntasks=1              ### LEAVE ME:Request a single task as we will be submitting this as an array job where each job corresponds to a trait
+#SBATCH --cpus-per-task=16      ### EDIT ME: Parallelisation across replications, folds and models (more cpu means faster execution time but probably longer time to wait for the Slurm scheduler to find resources to allocate to the job)
+#SBATCH --mem=100G              ### EDIT ME: Proportional to the input data (will need to test the appropriate memory required, hint use `seff ${JOBID}`)
+#SBATCH --time=1-0:0:00         ### EDIT ME: Proportional to the input data, number of folds, replications, and models to be used
+###################################################################################################
+### Edit the Slurm settings above to match your requirements. 
+###################################################################################################
+
+###################################################################################################
+### The variables below will be exported from `00_gs_slurm_job_wrapper.sh`:
+###################################################################################################
+### Input variables (use the absolute path to files to be precise)
+### (1) R matrix object with n rows corresponding to samples, and p columns corresponding to the markers or loci. 
+###     Should have no missing data or else will be imputed via mean value imputation.
+GENOTYPE_DATA_RDS=$1
+### (2) Tab-delimited phenotype file where column 1: sample names, column 2: population name, columns 3 and so on refers to the phenotype values of one trait per column.
+###     Headers for the columns should be named appropriately, e.g. ID, POP, TRAIT1, TRAIT2, etc.
+###     Missing values are allowed for samples whose phenotypes will be predicted by the best model identified within the population they belong to.
+###     Missing values may be coded as empty cells, -, NA, na, NaN, missing, and/or MISSING.
+PHENOTYPE_DATA_TSV=$2
+### (3) Number of folds for k-fold cross-validation.
+KFOLDS=$3
+### (4) Number of replications of the k-fold cross-validation each representing a random sorting of the samples hence yielding different ways of partitioning the data.
+NREPS=$4
+### (5) Full path to the location of the executable Rscript gp.R
+DIR_SRC=$5
+### (5) Full path to the location of the executable Rscript gp.R
+DIR_OUT=$6
+
+###################################################################################################
+### Edit the code below, if and only if you have read the documentation or familiar with `src/*.R`:
+###################################################################################################
+### Define the trait and population to include
+N_POPS=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | wc -l)
+TRAIT_IDX=$(echo "((${SLURM_ARRAY_TASK_ID}-1) / ${N_POPS}) + 1" | bc)
+POP_IDX=$(echo "${SLURM_ARRAY_TASK_ID} % ${N_POPS}" | bc)
+if [ "${POP_IDX}" -eq 0 ]
+then
+    POP_IDX=${N_POPS}
+fi
+COLUMN_ID=$(echo 2 + ${TRAIT_IDX} | bc)
+TRAIT=$(head -n1 $PHENOTYPE_DATA_TSV | cut -f${COLUMN_ID})
+POP=$(tail -n+2 $PHENOTYPE_DATA_TSV | cut -f2 - | sort | uniq | head -n${POP_IDX} | tail -n1)
+### Skip leave-one-population-out cross-validation if there is only one population
+if [ "${N_POPS}" -eq 1 ]
+then
+    BOOL_ACROSS=FALSE
+else
+    if [ "${POP_IDX}" -eq 1 ]
+    then
+        BOOL_ACROSS=TRUE
+    else
+        BOOL_ACROSS=FALSE
+    fi
+fi
+### Output directories
+DIR_OUT_MAIN=${DIR_OUT}/output
+DIR_OUT_SUB=${DIR_OUT_MAIN}/output-${TRAIT}-${POP}
+if [ ! -d DIR_OUT_MAIN ]
+then
+    mkdir $DIR_OUT_MAIN
+fi
+mkdir $DIR_OUT_SUB
+### Log messages
+echo JOB_${SLURM_ARRAY_TASK_ID}-TRAIT_${TRAIT}-POP_${POP} > ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log
+echo "==========================================
+-------------------------------------------
+	    Job Info
+-------------------------------------------
+SLURM_JOB_ID = $SLURM_JOB_ID
+SLURM_JOB_NAME = $SLURM_JOB_NAME
+SLURM_JOB_NODELIST = $SLURM_JOB_NODELIST
+SLURM_SUBMIT_HOST = $SLURM_SUBMIT_HOST
+SLURM_SUBMIT_DIR = $SLURM_SUBMIT_DIR
+SLURM_NTASKS = $SLURM_NTASKS
+SLURM_ARRAY_TASK_ID = $SLURM_ARRAY_TASK_ID
+SLURM_MEM_PER_NODE = $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) GB
+SLURM_CPUS_PER_TASK = $SLURM_CPUS_PER_TASK
+-------------------------------------------
+	    Variables
+-------------------------------------------
+GENOTYPE_DATA_RDS	: $GENOTYPE_DATA_RDS
+PHENOTYPE_DATA_TSV	: $PHENOTYPE_DATA_TSV
+KFOLDS		        : $KFOLDS
+NREPS		        : $NREPS
+TRAIT		        : $TRAIT
+POPULATION          : $POP
+-------------------------------------------
+	    Output directory
+-------------------------------------------
+${DIR_OUT_SUB}
+==========================================" >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log
+
+### Load the conda environment
+module load Miniconda3/22.11.1-1
+conda init bash
+source ~/.bashrc
+conda activate genomic_selection
+
+### Run within and across population replicated k-fold cross-validation and prediction of missing phenotypes
+time \
+Rscript ${DIR_SRC}/gp.R \
+    --fname-geno $GENOTYPE_DATA_RDS \
+    --fname-pheno $PHENOTYPE_DATA_TSV \
+    --population $POP \
+    --dir-output $DIR_OUT_SUB \
+    --pheno-idx-col-y $COLUMN_ID \
+    --bool-within TRUE \
+    --bool-across $BOOL_ACROSS \
+    --n-folds $KFOLDS \
+    --n-reps $NREPS \
+    --bool-parallel TRUE \
+    --max-mem-Gb $(echo "$SLURM_MEM_PER_NODE / (2^10)" | bc) \
+    --n-threads $SLURM_CPUS_PER_TASK \
+    --verbose TRUE >> ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log
+### Clean-up
+mv ${DIR_OUT_SUB}/GENOMIC_PREDICTIONS_OUTPUT-*.Rds ${DIR_OUT_MAIN}
+mv ${DIR_OUT_SUB}/job_info-${TRAIT}-${POP}.log ${DIR_OUT_MAIN}
+rm -R ${DIR_OUT_SUB}
\ No newline at end of file
diff --git a/inst/exec_Rscript/config.txt b/inst/exec_Rscript/config.txt
new file mode 100644
index 0000000..2863ff2
--- /dev/null
+++ b/inst/exec_Rscript/config.txt
@@ -0,0 +1,23 @@
+GENOTYPE_DATA_RDS=${DIR_SRC}/input_tmp/test_geno.Rds
+PHENOTYPE_DATA_TSV=${DIR_SRC}/input_tmp/test_pheno.tsv
+KFOLDS=2
+NREPS=2
+DIR_OUT=${DIR_SRC}/output_tmp
+SBATCH --job-name="test"
+SBATCH --account="dbiopast2"
+SBATCH --ntasks=1
+SBATCH --cpus-per-task=8
+SBATCH --mem=64G
+SBATCH --time=0-2:0:00
+
+# GENOTYPE_DATA_RDS=${DIR_SRC}/input/test_geno.Rds
+# PHENOTYPE_DATA_TSV=${DIR_SRC}/input/test_pheno.tsv
+# KFOLDS=2
+# NREPS=2
+# DIR_OUT=${DIR_SRC}
+# SBATCH --job-name="test"
+# SBATCH --account="dbiopast2"
+# SBATCH --ntasks=1
+# SBATCH --cpus-per-task=8
+# SBATCH --mem=64G
+# SBATCH --time=0-2:0:00
\ No newline at end of file