diff --git a/R/main.R b/R/main.R index 24634b9..0cd78f5 100644 --- a/R/main.R +++ b/R/main.R @@ -38,7 +38,7 @@ #' (see ?fn_load_phenotype for details) #' - $pheno_idx_col_y: column number in the phenotype file corresponding to the numeric phenotype data #' (see ?fn_load_phenotype for details) -#' - $pheno_na_strings: strings of characters corresponding to missing data in the phenotype file +#' - $pheno_vec_na_strings: strings of characters corresponding to missing data in the phenotype file #' (see ?fn_load_phenotype for details) #' - $pheno_bool_remove_outliers: remove outliers from the phenotype file? #' - $pheno_bool_remove_NA: remove samples missing phenotype data in the phenotype file? @@ -236,7 +236,7 @@ #' pheno_idx_col_id=1, #' pheno_idx_col_pop=2, #' pheno_idx_col_y=3, -#' pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), +#' pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), #' pheno_bool_remove_outliers=TRUE, #' pheno_bool_remove_NA=FALSE, #' bool_within=TRUE, @@ -286,7 +286,7 @@ gp = function(args) { # pheno_idx_col_id=1, # pheno_idx_col_pop=2, # pheno_idx_col_y=3, - # pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), + # pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), # pheno_bool_remove_outliers=FALSE, # pheno_bool_remove_NA=FALSE, # bool_within=TRUE, @@ -318,7 +318,7 @@ gp = function(args) { idx_col_id=args$pheno_idx_col_id, idx_col_pop=args$pheno_idx_col_pop, idx_col_y=args$pheno_idx_col_y, - na_strings=args$pheno_na_strings, + na_strings=args$pheno_vec_na_strings, verbose=args$verbose ) if (methods::is(list_pheno, "gpError")) {return(list_pheno)} diff --git a/inst/exec_Rscript/0-submit.sh b/inst/exec_Rscript/0-submit.sh index b32880f..443fb9b 100755 --- a/inst/exec_Rscript/0-submit.sh +++ b/inst/exec_Rscript/0-submit.sh @@ -9,10 +9,10 @@ CONFIG_NREPS=$(sed "s/\"/'/g" config.txt | sed -n '4p') CONFIG_DIR_OUT=$(sed "s/\"/'/g" config.txt | sed -n '5p') CONFIG_JOB_NAME=$(sed "s/\"/'/g" config.txt | sed -n '6p') CONFIG_ACCOUNT_NAME=$(sed "s/\"/'/g" config.txt | sed -n '7p') -CONFIG_NTASKS=$(sed "s/\"/'/g" config.txt | sed -n '8p') -CONFIG_NCPUS=$(sed "s/\"/'/g" config.txt | sed -n '9p') -CONFIG_MEM=$(sed "s/\"/'/g" config.txt | sed -n '10p') -CONFIG_TIME_LIMIT=$(sed "s/\"/'/g" config.txt | sed -n '11p') +CONFIG_NCPUS=$(sed "s/\"/'/g" config.txt | sed -n '8p') +CONFIG_MEM=$(sed "s/\"/'/g" config.txt | sed -n '9p') +CONFIG_TIME_LIMIT=$(sed "s/\"/'/g" config.txt | sed -n '10p') +CONFIG_MODELS=$(sed "s/\"/'/g" config.txt | sed -n '11p') ### Create the checks and submission scripts using the config variables sed "s|GENOTYPE_DATA_RDS=\${DIR_SRC}/input/test_geno.Rds|$CONFIG_GENO|g" 1-checks_and_submision.sh | \ sed "s|PHENOTYPE_DATA_TSV=\${DIR_SRC}/input/test_pheno.tsv|$CONFIG_PHENO|g" | \ @@ -24,10 +24,10 @@ sed "s|GENOTYPE_DATA_RDS=\${DIR_SRC}/input/test_geno.Rds|$CONFIG_GENO|g" 1-check ### Create the slurm job scripts using the config variables sed "s|SBATCH --job-name='GS'|$CONFIG_JOB_NAME|g" 2-gp_slurm_job.sh | \ sed "s|SBATCH --account='dbiopast1'|$CONFIG_ACCOUNT_NAME|g" | \ - sed "s|SBATCH --ntasks=1|$CONFIG_NTASKS|g" | \ sed "s|SBATCH --cpus-per-task=16|$CONFIG_NCPUS|g" | \ sed "s|SBATCH --mem=100G|$CONFIG_MEM|g" | \ - sed "s|SBATCH --time=1-0:0:00|$CONFIG_TIME_LIMIT|g" \ + sed "s|SBATCH --time=1-0:0:00|$CONFIG_TIME_LIMIT|g" | \ + sed "s|--vec-models-to-test ridge,lasso,elastic_net,Bayes_A,Bayes_B,Bayes_C,gBLUP|--vec-models-to-test $CONFIG_MODELS|g" \ > 2-gp_slurm_job-${RUN_NAME}.sh ### Check input and submit the slurm job chmod +x 1-checks_and_submision-${RUN_NAME}.sh diff --git a/inst/exec_Rscript/2-gp_slurm_job.sh b/inst/exec_Rscript/2-gp_slurm_job.sh index e8315f4..7359981 100755 --- a/inst/exec_Rscript/2-gp_slurm_job.sh +++ b/inst/exec_Rscript/2-gp_slurm_job.sh @@ -110,6 +110,7 @@ Rscript ${DIR_SRC}/gp.R \ --pheno-idx-col-y $COLUMN_ID \ --bool-within TRUE \ --bool-across $BOOL_ACROSS \ + --vec-models-to-test ridge,lasso,elastic_net,Bayes_A,Bayes_B,Bayes_C,gBLUP \ --n-folds $KFOLDS \ --n-reps $NREPS \ --bool-parallel TRUE \ diff --git a/inst/exec_Rscript/config.txt b/inst/exec_Rscript/config.txt index 9aaf480..536872c 100644 --- a/inst/exec_Rscript/config.txt +++ b/inst/exec_Rscript/config.txt @@ -5,7 +5,7 @@ NREPS=2 DIR_OUT=${DIR_SRC} SBATCH --job-name="test" SBATCH --account="dbiopast2" -SBATCH --ntasks=1 SBATCH --cpus-per-task=4 SBATCH --mem=10G -SBATCH --time=0-0:10:00 \ No newline at end of file +SBATCH --time=0-0:10:00 +ridge,Bayes_A,Bayes_B,Bayes_C,gBLUP \ No newline at end of file diff --git a/inst/exec_Rscript/gp.R b/inst/exec_Rscript/gp.R index f4d1aae..9823c84 100644 --- a/inst/exec_Rscript/gp.R +++ b/inst/exec_Rscript/gp.R @@ -28,7 +28,7 @@ parser$add_argument("--pheno-header", dest="p parser$add_argument("--pheno-idx-col-id", dest="pheno_idx_col_id", type="integer", default=1, help="Column number in the phenotype file corresponding to the sample names [default=1].") parser$add_argument("--pheno-idx-col-pop", dest="pheno_idx_col_pop", type="integer", default=2, help="Column number in the phenotype file corresponding to the population/grouping names [default=2].") parser$add_argument("--pheno-idx-col-y", dest="pheno_idx_col_y", type="integer", default=3, help="Column number in the phenotype file corresponding to the numeric phenotype data [default=3].") -parser$add_argument("--pheno-na-strings", dest="pheno_na_strings", type="character", default=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), help="Strings of characters corresponding to missing data in the phenotype file [default=c('', '-', 'NA', 'na', 'NaN', 'missing', 'MISSING')].") +parser$add_argument("--pheno-na-strings", dest="pheno_vec_na_strings", type="character", default=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), help="Strings of characters corresponding to missing data in the phenotype file [default=c('', '-', 'NA', 'na', 'NaN', 'missing', 'MISSING')].") parser$add_argument("--pheno-bool-remove-outliers", dest="pheno_bool_remove_outliers", type="logical", default=FALSE, help="Remove outliers from the phenotype file [default=FALSE]?") parser$add_argument("--pheno-bool-remove-NA", dest="pheno_bool_remove_NA", type="logical", default=FALSE, help="Remove samples missing phenotype data in the phenotype file? [default=FALSE].") parser$add_argument("--bool-within", dest="bool_within", type="logical", default=TRUE, help="Perform within population k-fold cross-validation? [default=TRUE].") @@ -63,7 +63,11 @@ print(paste0(" - with a total of ", args$n_threads, " threads available and print(paste0(" a total memory of ", args$max_mem_Gb, " Gb.")) print(paste0("Start time: ", time_ini)) print("Input parameters:") +### Parse input vectors +args$vec_models_to_test = unlist(strsplit(gsub(" ", "", args$vec_models_to_test), ",")) +args$pheno_na_strings = unlist(strsplit(gsub(" ", "", args$pheno_na_strings), ",")) print(args) +### Run fname_out_Rds = gp::gp(args=args) time_fin = Sys.time() time_duration_minutes = as.numeric(difftime(time_fin, time_ini, units="min")) diff --git a/man/gp.Rd b/man/gp.Rd index 9b0164b..95c2dfe 100644 --- a/man/gp.Rd +++ b/man/gp.Rd @@ -46,7 +46,7 @@ the top most sparse samples will be removed (see ?fn_filter_genotype for details (see ?fn_load_phenotype for details) \item $pheno_idx_col_y: column number in the phenotype file corresponding to the numeric phenotype data (see ?fn_load_phenotype for details) -\item $pheno_na_strings: strings of characters corresponding to missing data in the phenotype file +\item $pheno_vec_na_strings: strings of characters corresponding to missing data in the phenotype file (see ?fn_load_phenotype for details) \item $pheno_bool_remove_outliers: remove outliers from the phenotype file? \item $pheno_bool_remove_NA: remove samples missing phenotype data in the phenotype file? @@ -274,7 +274,7 @@ args = list( pheno_idx_col_id=1, pheno_idx_col_pop=2, pheno_idx_col_y=3, - pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), + pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), pheno_bool_remove_outliers=TRUE, pheno_bool_remove_NA=FALSE, bool_within=TRUE, diff --git a/tests/testthat/test-main.R b/tests/testthat/test-main.R index d988cee..75d84c1 100644 --- a/tests/testthat/test-main.R +++ b/tests/testthat/test-main.R @@ -31,7 +31,7 @@ test_that("gp", { pheno_idx_col_id=1, pheno_idx_col_pop=2, pheno_idx_col_y=3, - pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), + pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), pheno_bool_remove_outliers=TRUE, pheno_bool_remove_NA=FALSE, bool_within=TRUE,