From 3056d890b4fecdeee66e5b7cab541a338b07684a Mon Sep 17 00:00:00 2001 From: jeffersonfparil Date: Thu, 6 Jun 2024 15:46:25 +1000 Subject: [PATCH] more tests --- .Rbuildignore | 2 +- .gitignore | 3 +- R/main.R | 2 +- exec/tests/test.sh | 45 ++++++++++++++++++- ...ross_validation_across_populations_bulk.Rd | 2 + ...ross_validation_across_populations_lopo.Rd | 2 + ..._validation_across_populations_pairwise.Rd | 2 + man/fn_cross_validation_within_population.Rd | 2 + man/fn_cv_1.Rd | 2 + man/gp.Rd | 8 ++++ 10 files changed, 66 insertions(+), 4 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index c58864e..d48e14e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,2 @@ .github/ -exec/tests/ \ No newline at end of file +exec/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3fd71d6..4436bd5 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,5 @@ po/*~ rsconnect/ # Test files -exec/tests/outdir/ \ No newline at end of file +exec/tests/outdir/ +exec/tests/logfile \ No newline at end of file diff --git a/R/main.R b/R/main.R index 8496a61..12f1aad 100644 --- a/R/main.R +++ b/R/main.R @@ -473,7 +473,7 @@ gp = function(args) { GENOMIC_PREDICTIONS = NA } else { ### Find the best model in the args$population - df_agg = aggregate(corr ~ model, data=METRICS_WITHIN_POP, FUN=mean, na.rm=TRUE) + df_agg = stats::aggregate(corr ~ model, data=METRICS_WITHIN_POP, FUN=mean, na.rm=TRUE) idx = which(df_agg$corr == max(df_agg$corr, na.rm=TRUE))[1] model = df_agg$model[idx] ### Define additional model input/s diff --git a/exec/tests/test.sh b/exec/tests/test.sh index be9b9f0..2596cd2 100644 --- a/exec/tests/test.sh +++ b/exec/tests/test.sh @@ -1,4 +1,8 @@ #!/bin/bash + +############# +### GRAPE ### +############# DIR='gp/exec/tests/' cd $DIR time \ @@ -17,7 +21,9 @@ Rscript ../gp.R \ --n-threads 32 \ --verbose TRUE - +################ +### RYEGRASS ### +################ DIR='/group/pasture/Jeff/gp/exec/tests' cd $DIR fname_geno='/group/pasture/Jeff/ryegrass/workdir/STR_NUE_WUE_HS-1717536141.3435302.3200855812-IMPUTED.tsv' @@ -47,6 +53,43 @@ tail logfile grep -A1 "ERROR:" logfile ls -lhtr outdir/*.Rds + +############### +### LUCERNE ### +############### +DIR='/group/pasture/Jeff/gp/exec/tests' +cd $DIR +fname_geno='/group/pasture/Jeff/lucerne/workdir/FINAL-IMPUTED-noTrailingAllele-filteredSNPlist.Rds' +fname_pheno='/group/pasture/Jeff/lucerne/workdir/Lucerne_PhenomicsDB_2024-05-27-BiomassPredicted.tsv' +n_traits=$(head -n1 $fname_pheno | awk '{print NF}') +touch outdir/lucerne/logfile +time \ +for idx_pheno in $(seq 3 $n_traits) +do + time \ + Rscript ../gp.R \ + --fname-geno $fname_geno \ + --fname-pheno $fname_pheno \ + --population "DB-MS-31-22-001" \ + --dir-output outdir/lucerne \ + --pheno-idx-col-y $idx_pheno \ + --bool-within TRUE \ + --bool-across TRUE \ + --n-folds 5 \ + --n-reps 1 \ + --bool-parallel TRUE \ + --max-mem-Gb 60 \ + --n-threads 32 \ + --verbose TRUE >> outdir/lucerne/logfile +done +tail outdir/lucerne/logfile +grep -A1 "ERROR:" outdir/lucerne/logfile +ls -lhtr outdir/lucerne/*.Rds + + +######################################## +### COMPLETE SET OF INPUT PARAMETERS ### +######################################## # Rscript ../gp.R \ # --fname-geno= \ # --fname-pheno= \ diff --git a/man/fn_cross_validation_across_populations_bulk.Rd b/man/fn_cross_validation_across_populations_bulk.Rd index 645a91a..c2e0dad 100644 --- a/man/fn_cross_validation_across_populations_bulk.Rd +++ b/man/fn_cross_validation_across_populations_bulk.Rd @@ -85,6 +85,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13 \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error diff --git a/man/fn_cross_validation_across_populations_lopo.Rd b/man/fn_cross_validation_across_populations_lopo.Rd index 66d4ba0..c00f662 100644 --- a/man/fn_cross_validation_across_populations_lopo.Rd +++ b/man/fn_cross_validation_across_populations_lopo.Rd @@ -75,6 +75,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13 \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error diff --git a/man/fn_cross_validation_across_populations_pairwise.Rd b/man/fn_cross_validation_across_populations_pairwise.Rd index e69e9e1..c1934fb 100644 --- a/man/fn_cross_validation_across_populations_pairwise.Rd +++ b/man/fn_cross_validation_across_populations_pairwise.Rd @@ -75,6 +75,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13 \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error diff --git a/man/fn_cross_validation_within_population.Rd b/man/fn_cross_validation_within_population.Rd index 9350048..31ec425 100644 --- a/man/fn_cross_validation_within_population.Rd +++ b/man/fn_cross_validation_within_population.Rd @@ -85,6 +85,8 @@ https://link.springer.com/protocol/10.1007/978-1-62703-447-0_13 \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error diff --git a/man/fn_cv_1.Rd b/man/fn_cv_1.Rd index a03d340..0398a01 100644 --- a/man/fn_cv_1.Rd +++ b/man/fn_cv_1.Rd @@ -64,6 +64,8 @@ i.e. prefix (which can include an existing directory) of Bayesian (BGLR) model t \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error diff --git a/man/gp.Rd b/man/gp.Rd index bc7f4b3..b786e1f 100644 --- a/man/gp.Rd +++ b/man/gp.Rd @@ -90,6 +90,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error @@ -121,6 +123,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error @@ -152,6 +156,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error @@ -183,6 +189,8 @@ in within population cross-validation (see ?fn_cross_validation_within_populatio \item $model: genomic prediction model name \item $pop_training: population/s used in the training set (separated by commas if more than 1) \item $pop_validation: population/s used in the validation set (separated by commas if more than 1) +\item $n_training: number of samples/entries/pools in the training set +\item $n_validation: number of samples/entries/pools in the validation set \item $duration_mins: time taken in minutes to fit the genomic prediction model and assess the prediction accuracies \item $n_non_zero: number of non-zero estimated effects (effects greater than machine epsilon ~2.2e-16) \item $mbe: mean bias error