From 3056d890b4fecdeee66e5b7cab541a338b07684a Mon Sep 17 00:00:00 2001
From: jeffersonfparil <jeffersonparil@gmail.com>
Date: Thu, 6 Jun 2024 15:46:25 +1000
Subject: [PATCH] more tests

---
 .Rbuildignore                                 |  2 +-
 .gitignore                                    |  3 +-
 R/main.R                                      |  2 +-
 exec/tests/test.sh                            | 45 ++++++++++++++++++-
 ...ross_validation_across_populations_bulk.Rd |  2 +
 ...ross_validation_across_populations_lopo.Rd |  2 +
 ..._validation_across_populations_pairwise.Rd |  2 +
 man/fn_cross_validation_within_population.Rd  |  2 +
 man/fn_cv_1.Rd                                |  2 +
 man/gp.Rd                                     |  8 ++++
 10 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index c58864e..d48e14e 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,2 +1,2 @@
 .github/
-exec/tests/
\ No newline at end of file
+exec/
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3fd71d6..4436bd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,4 +49,5 @@ po/*~
 rsconnect/
 
 # Test files
-exec/tests/outdir/
\ No newline at end of file
+exec/tests/outdir/
+exec/tests/logfile
\ No newline at end of file
diff --git a/R/main.R b/R/main.R
index 8496a61..12f1aad 100644
--- a/R/main.R
+++ b/R/main.R
@@ -473,7 +473,7 @@ gp = function(args) {
             GENOMIC_PREDICTIONS = NA
         } else {
             ### Find the best model in the args$population
-            df_agg = aggregate(corr ~ model, data=METRICS_WITHIN_POP, FUN=mean, na.rm=TRUE)
+            df_agg = stats::aggregate(corr ~ model, data=METRICS_WITHIN_POP, FUN=mean, na.rm=TRUE)
             idx = which(df_agg$corr == max(df_agg$corr, na.rm=TRUE))[1]
             model = df_agg$model[idx]
             ### Define additional model input/s
diff --git a/exec/tests/test.sh b/exec/tests/test.sh
index be9b9f0..2596cd2 100644
--- a/exec/tests/test.sh
+++ b/exec/tests/test.sh
@@ -1,4 +1,8 @@
 #!/bin/bash
+
+#############
+### GRAPE ###
+#############
 DIR='gp/exec/tests/'
 cd $DIR
 time \
@@ -17,7 +21,9 @@ Rscript ../gp.R \
 --n-threads 32 \
 --verbose TRUE
 
-
+################
+### RYEGRASS ###
+################
 DIR='/group/pasture/Jeff/gp/exec/tests'
 cd $DIR
 fname_geno='/group/pasture/Jeff/ryegrass/workdir/STR_NUE_WUE_HS-1717536141.3435302.3200855812-IMPUTED.tsv'
@@ -47,6 +53,43 @@ tail logfile
 grep -A1 "ERROR:" logfile
 ls -lhtr outdir/*.Rds
 
+
+###############
+### LUCERNE ###
+###############
+DIR='/group/pasture/Jeff/gp/exec/tests'
+cd $DIR
+fname_geno='/group/pasture/Jeff/lucerne/workdir/FINAL-IMPUTED-noTrailingAllele-filteredSNPlist.Rds'
+fname_pheno='/group/pasture/Jeff/lucerne/workdir/Lucerne_PhenomicsDB_2024-05-27-BiomassPredicted.tsv'
+n_traits=$(head -n1 $fname_pheno | awk '{print NF}')
+touch outdir/lucerne/logfile
+time \
+for idx_pheno in $(seq 3 $n_traits)
+do
+    time \
+    Rscript ../gp.R \
+        --fname-geno $fname_geno \
+        --fname-pheno $fname_pheno \
+        --population "DB-MS-31-22-001" \
+        --dir-output outdir/lucerne \
+        --pheno-idx-col-y $idx_pheno \
+        --bool-within TRUE \
+        --bool-across TRUE \
+        --n-folds 5 \
+        --n-reps 1 \
+        --bool-parallel TRUE \
+        --max-mem-Gb 60 \
+        --n-threads 32 \
+        --verbose TRUE >> outdir/lucerne/logfile
+done
+tail outdir/lucerne/logfile
+grep -A1 "ERROR:" outdir/lucerne/logfile
+ls -lhtr outdir/lucerne/*.Rds
+
+
+########################################
+### COMPLETE SET OF INPUT PARAMETERS ###
+########################################
 # Rscript ../gp.R \
 # --fname-geno= \
 # --fname-pheno= \
diff --git a/man/fn_cross_validation_across_populations_bulk.Rd b/man/fn_cross_validation_across_populations_bulk.Rd
index 645a91a..c2e0dad 100644
--- a/man/fn_cross_validation_across_populations_bulk.Rd
+++ b/man/fn_cross_validation_across_populations_bulk.Rd
@@ -85,6 +85,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
diff --git a/man/fn_cross_validation_across_populations_lopo.Rd b/man/fn_cross_validation_across_populations_lopo.Rd
index 66d4ba0..c00f662 100644
--- a/man/fn_cross_validation_across_populations_lopo.Rd
+++ b/man/fn_cross_validation_across_populations_lopo.Rd
@@ -75,6 +75,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
diff --git a/man/fn_cross_validation_across_populations_pairwise.Rd b/man/fn_cross_validation_across_populations_pairwise.Rd
index e69e9e1..c1934fb 100644
--- a/man/fn_cross_validation_across_populations_pairwise.Rd
+++ b/man/fn_cross_validation_across_populations_pairwise.Rd
@@ -75,6 +75,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
diff --git a/man/fn_cross_validation_within_population.Rd b/man/fn_cross_validation_within_population.Rd
index 9350048..31ec425 100644
--- a/man/fn_cross_validation_within_population.Rd
+++ b/man/fn_cross_validation_within_population.Rd
@@ -85,6 +85,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
diff --git a/man/fn_cv_1.Rd b/man/fn_cv_1.Rd
index a03d340..0398a01 100644
--- a/man/fn_cv_1.Rd
+++ b/man/fn_cv_1.Rd
@@ -64,6 +64,8 @@ i.e. prefix (which can include an existing directory) of Bayesian (BGLR) model t
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
diff --git a/man/gp.Rd b/man/gp.Rd
index bc7f4b3..b786e1f 100644
--- a/man/gp.Rd
+++ b/man/gp.Rd
@@ -90,6 +90,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
@@ -121,6 +123,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
@@ -152,6 +156,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error
@@ -183,6 +189,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio
 \item $model: genomic prediction model name
 \item $pop_training: population/s used in the training set (separated by commas if more than 1)
 \item $pop_validation: population/s used in the validation set (separated by commas if more than 1)
+\item $n_training: number of samples/entries/pools in the training set
+\item $n_validation: number of samples/entries/pools in the validation set
 \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies
 \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16)
 \item $mbe: mean bias error