diff --git a/R/load.R b/R/load.R index bd88ea3..5e4da86 100644 --- a/R/load.R +++ b/R/load.R @@ -133,6 +133,15 @@ fn_G_split_off_alternative_allele = function(G, verbose=FALSE) { } ### Extract sample/entry/pool, and loci names list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose) + if (methods::is(list_ids_chr_pos_all, "gpError")) { + error = chain(list_ids_chr_pos_all, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_G_split_off_alternative_allele(...). ", + "Error type returned by list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose)." + ))) + return(error) + } ### Iterate across loci removing the trailing alternative allele vec_loci = sort(unique(paste0(list_ids_chr_pos_all$vec_chr, "\t", list_ids_chr_pos_all$vec_pos))) p = length(vec_loci) @@ -207,6 +216,15 @@ fn_G_numeric_to_non_numeric = function(G, ploidy=2, verbose=FALSE) { } ### Extract sample/entry/pool, and loci names list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose) + if (methods::is(list_ids_chr_pos_all, "gpError")) { + error = chain(list_ids_chr_pos_all, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_G_numeric_to_non_numeric(...). ", + "Error type returned by list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose)." + ))) + return(error) + } ### Extract the unique loci and sort per chromosome df_loci = as.data.frame(matrix(unlist(strsplit(unique(paste0(list_ids_chr_pos_all$vec_chr, "\t", list_ids_chr_pos_all$vec_pos)), "\t")), byrow=TRUE, ncol=2)) colnames(df_loci) = c("chr", "pos") @@ -340,10 +358,19 @@ fn_G_non_numeric_to_numeric = function(G_non_numeric, retain_minus_one_alleles_p colnames(G) = vec_colnames ### Return numeric allele frequency genotype matrix with or without all the alleles if (retain_minus_one_alleles_per_locus) { - return(fn_G_split_off_alternative_allele(G=G, verbose=verbose)$G) - } else { - return(G) + list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose) + if (methods::is(list_G_G_alt, "gpError")) { + error = chain(list_G_G_alt, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_G_non_numeric_to_numeric(...). ", + "Error type returned by list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose)." + ))) + return(error) + } } + ### Output + return(list_G_G_alt$G) } #' Convert numeric allele frequency matrix into a vcfR::vcf object @@ -384,12 +411,30 @@ fn_G_to_vcf = function(G, min_depth=100, max_depth=1000, verbose=FALSE) { } ### Split-off the alternative allele from the reference allele list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose) + if (methods::is(list_G_G_alt, "gpError")) { + error = chain(list_G_G_alt, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_G_to_vcf(...). ", + "Error type returned by list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose)." + ))) + return(error) + } G_alt = list_G_G_alt$G_alt G = list_G_G_alt$G n = nrow(G) p = ncol(G) ### Extract the names of the entries/samples/pools and loci list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose) + if (methods::is(list_ids_chr_pos_all, "gpError")) { + error = chain(list_ids_chr_pos_all, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_G_to_vcf(...). ", + "Error type returned by list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose)." + ))) + return(error) + } ### Make sure the input matrix is biallelic ### Also, convert the tabs in the loci-alleles names into dashes so as not to interfere with the VCF format vec_loci_names = paste0(list_ids_chr_pos_all$vec_chr, "_", list_ids_chr_pos_all$vec_pos) @@ -510,7 +555,10 @@ fn_vcf_to_G = function(vcf, min_depth=0, max_depth=Inf, retain_minus_one_alleles } vec_pool_names = colnames(vcf@gt)[-1] vec_elements = unique(vcf@gt[, 1]) - if (!((sum(grepl("AD", vec_elements)) == length(vec_elements)) | (sum(grepl("GT", vec_elements)) == length(vec_elements)))) { + ### Make sure that all the loci consistently have either the AD or GT field across all type of field combinations, e.g. c(GT:AD, AD, GT:PL:AD) where the AD field is consistenly present + bool_consistent_AD = sum(grepl("AD", vec_elements)) == length(vec_elements) + bool_consistent_GT = sum(grepl("GT", vec_elements)) == length(vec_elements) + if (!(bool_consistent_AD | bool_consistent_GT)) { error = methods::new("gpError", code=000, message=paste0( @@ -520,7 +568,9 @@ fn_vcf_to_G = function(vcf, min_depth=0, max_depth=Inf, retain_minus_one_alleles "Reformat your vcf file to have the same fields across loci.")) return(error) } - if ((!(sum(grepl("AD", vec_elements)==length(vec_elements))) & !(sum(grepl("GT", vec_elements))==length(vec_elements))) | !(sum(grepl("DP", vec_elements))==length(vec_elements))) { + ### Also make sure that the GP field is present so that we can filter by depth + bool_consistent_DP = sum(grepl("DP", vec_elements)) == length(vec_elements) + if (!bool_consistent_DP) { error = methods::new("gpError", code=000, message=paste0( @@ -705,10 +755,12 @@ fn_classify_allele_frequencies = function(G, ploidy=2, verbose=FALSE) { #' @param non_numeric_Rds save non-numeric Rds genotype file? (Default=FALSE) #' @param verbose show genotype and phenotype data simulation messages? (Default=FALSE) #' @returns -#' $fname_geno_vcf: filename of the simulated genotype data as a vcf file -#' $fname_geno_tsv: filename of the simulated genotype data as a tab-delimited allele frequency table file -#' $fname_geno_rds: filename of the simulated named genotype matrix as an Rds file -#' $fname_pheno_tsv: filename of the simulated phenotype data as a tab-delimited file +#' Ok: +#' $fname_geno_vcf: filename of the simulated genotype data as a vcf file +#' $fname_geno_tsv: filename of the simulated genotype data as a tab-delimited allele frequency table file +#' $fname_geno_rds: filename of the simulated named genotype matrix as an Rds file +#' $fname_pheno_tsv: filename of the simulated phenotype data as a tab-delimited file +#' Err: gpError #' @examples #' list_sim = fn_simulate_data(verbose=TRUE, #' save_geno_vcf=TRUE, save_geno_tsv=TRUE, save_geno_rds=TRUE, save_pheno_tsv=TRUE) @@ -769,13 +821,12 @@ fn_simulate_data = function(n=100, l=1000, ploidy=2, n_alleles=2, min_depth=5, m if (save_geno_vcf) { vcf = fn_G_to_vcf(G=G, min_depth=min_depth, max_depth=max_depth, verbose=verbose) if (methods::is(vcf, "gpError")) { - error = methods::new("gpError", + error = chain(vcf, methods::new("gpError", code=000, message=paste0( "Error in load::fn_simulate_data(...). ", "Please set n_alleles=2 as we can only convert biallelic loci into VCF format at the moment." - )) - error = chain(vcf, error) + ))) return(error) } # fname_geno_vcf = file.path(getwd(), paste0("simulated_genotype-", date, ".vcf.gz")) @@ -788,6 +839,15 @@ fn_simulate_data = function(n=100, l=1000, ploidy=2, n_alleles=2, min_depth=5, m } if (save_geno_tsv) { list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose) + if (methods::is(list_ids_chr_pos_all, "gpError")) { + error = chain(list_ids_chr_pos_all, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_simulate_data(...). ", + "Error type returned by list_ids_chr_pos_all = fn_G_extract_names(mat_genotypes=G, verbose=verbose)." + ))) + return(error) + } df_geno = data.frame(chr=list_ids_chr_pos_all$vec_chr, pos=list_ids_chr_pos_all$vec_pos, allele=list_ids_chr_pos_all$vec_all, t(G)) # fname_geno_tsv = file.path(getwd(), paste0("simulated_genotype-", date, ".tsv")) fname_geno_tsv = tempfile(fileext=".tsv") @@ -806,6 +866,15 @@ fn_simulate_data = function(n=100, l=1000, ploidy=2, n_alleles=2, min_depth=5, m saveRDS(G, file=fname_geno_rds) } else { G_non_numeric = fn_G_numeric_to_non_numeric(G=G, ploidy=ploidy, verbose=verbose) + if (methods::is(G_non_numeric, "gpError")) { + error = chain(G_non_numeric, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_simulate_data(...). ", + "Error type returned by G_non_numeric = fn_G_numeric_to_non_numeric(G=G, ploidy=ploidy, verbose=verbose)." + ))) + return(error) + } saveRDS(G_non_numeric, file=fname_geno_rds) } if (verbose) { @@ -874,7 +943,16 @@ fn_load_genotype = function(fname_geno, ploidy=NULL, retain_minus_one_alleles_pe ################ G = readRDS(fname_geno) if (is.numeric(G)==FALSE) { - fn_G_non_numeric_to_numeric(G_non_numeric=G, verbose=verbose) + G = fn_G_non_numeric_to_numeric(G_non_numeric=G, verbose=verbose) + if (methods::is(G, "gpError")) { + error = chain(G, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_load_genotype(...). ", + "Error type returned by G = fn_G_non_numeric_to_numeric(G_non_numeric=G, verbose=verbose)." + ))) + return(error) + } } if (verbose) {print("Genotype loaded from an RDS file.")} return(G) @@ -923,11 +1001,29 @@ fn_load_genotype = function(fname_geno, ploidy=NULL, retain_minus_one_alleles_pe }) ### Retain a-1 allele/s per locus, i.e. remove duplicates assuming all loci are biallelic if (retain_minus_one_alleles_per_locus==TRUE) { - G = fn_G_split_off_alternative_allele(G=G, verbose=verbose)$G + list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose) + if (methods::is(list_G_G_alt, "gpError")) { + error = chain(list_G_G_alt, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_load_genotype(...). ", + "Error type returned by list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose)." + ))) + return(error) + } } ### Bin allele frequencies if (!is.null(ploidy)) { - G = fn_classify_allele_frequencies(G=G, ploidy=ploidy, verbose=verbose) + G = fn_classify_allele_frequencies(G=list_G_G_alt$G, ploidy=ploidy, verbose=verbose) + if (methods::is(G, "gpError")) { + error = chain(G, methods::new("gpError", + code=000, + message=paste0( + "Error in load::fn_load_genotype(...). ", + "Error type returned by G = fn_classify_allele_frequencies(G=list_G_G_alt$G, ploidy=ploidy, verbose=verbose)" + ))) + return(error) + } } ### Show the allele frequency stats if (verbose) { @@ -1508,7 +1604,7 @@ fn_merge_genotype_and_phenotype = function(G, list_pheno, COVAR=NULL, verbose=FA ### All samples with genotype data will be included and samples without phenotype data will be set to NA (all.x=TRUE) ### Samples with phenotype but without genotype data are omitted. M = merge( - data.frame(id=rownames(G), G), + data.frame(id=rownames(G), G, check.names=FALSE), data.frame(id=names(list_pheno$y), pop=list_pheno$pop, y=list_pheno$y), by="id", all.x=TRUE) if (sum(!is.na(M$y)) == 0) { diff --git a/R/metrics.R b/R/metrics.R index f9c28a9..8af7ac8 100644 --- a/R/metrics.R +++ b/R/metrics.R @@ -9,6 +9,7 @@ #' @param y_true numeric vector observed phenotype values #' @param y_pred numeric vector predicted phenotype values #' @returns +#' Ok: #' $mbe: mean bias error #' $mae: mean absolute error #' $rmse: root mean squared error @@ -31,6 +32,7 @@ #' - predicted trait values are due to purely additive effects, #' - variance in the predicted trait values is proportional to the additive genetic variance (~Va) #' - variance in the true trait values represent the total phenotypic variance (Vp) +#' Err: gpError #' @examples #' y_pred = stats::rnorm(100) #' y_true = y_pred + stats::rnorm(100) diff --git a/R/models.R b/R/models.R index eddce7e..ba0f532 100644 --- a/R/models.R +++ b/R/models.R @@ -73,13 +73,14 @@ fn_ols = function(list_merged, vec_idx_training, vec_idx_validation, other_param # other_params=list(diag_inflate=1e-6) # verbose = TRUE ################################################### + ### Input sanity check if (methods::is(list_merged, "gpError")) { error = chain(list_merged, methods::new("gpError", code=000, message=paste0( "Error in models::fn_ols(...). ", - "Input data is an error type." + "Input data (list_merged) is an error type." ))) return(error) } @@ -179,55 +180,82 @@ fn_ols = function(list_merged, vec_idx_training, vec_idx_validation, other_param n_non_zero=n_non_zero)) } -fn_ridge = function(G, y, vec_idx_training, vec_idx_validation, other_params=list(covariate=NULL)) { - # n = 100; n_alleles = 2 - # G = simquantgen::fn_simulate_genotypes(n=n, l=500, ploidy=2, n_alleles=n_alleles, min_allele_freq=0.001, n_chr=5, max_pos=135e6, dist_bp_at_50perc_r2=5e6, n_threads=2) - # y = simquantgen::fn_simulate_phenotypes(G, n_alleles=n_alleles, dist_effects="norm", n_effects=10, purely_additive=TRUE, n_networks=1, n_effects_per_network=50, h2=0.75, pheno_reps=1)$Y - # y = scale(y, center=TRUE, scale=TRUE) - # vec_idx_validation = c(1:round(n*0.1)) - # vec_idx_training = !(c(1:n) %in% vec_idx_validation) - # other_params=list(covariate=NULL) - ### Make sure that our genotype and phenotype data are labelled, i.e. have row and column names - if (is.null(rownames(G)) | is.null(colnames(G)) | is.null(rownames(y))) { - print("Error: Genotype and/or phenotype data have no labels. Please include the row names (entry names) and column names (loci names).") - return(-1) +fn_ridge = function(list_merged, vec_idx_training, vec_idx_validation, other_params=list(n_folds=10)) { + ################################################### + ### TEST + # list_sim = fn_simulate_data(n_pop=3, verbose=TRUE) + # G = fn_load_genotype(fname_geno=list_sim$fname_geno_vcf) + # list_pheno = fn_load_phenotype(fname_pheno=list_sim$fname_pheno_tsv) + # COVAR = G %*% t(G); # rownames(COVAR) = NULL + # list_merged = fn_merge_genotype_and_phenotype(G=G, list_pheno=list_pheno, COVAR=COVAR, verbose=TRUE) + # n = nrow(list_merged$G) + # vec_idx_training = sample(c(1:n), floor(n/2)) + # vec_idx_validation = c(1:n)[!(c(1:n) %in% vec_idx_training)] + # other_params=list(n_folds=10) + # verbose = TRUE + ################################################### + ### Input sanity check + if (methods::is(list_merged, "gpError")) { + error = chain(list_merged, + methods::new("gpError", + code=000, + message=paste0( + "Error in models::fn_ridge(...). ", + "Input data (list_merged) is an error type." + ))) + return(error) } - X_training = G[vec_idx_training, ] - y_training = y[vec_idx_training] - X_validation = G[vec_idx_validation, ] - y_validation = y[vec_idx_validation] + if (is.logical(c(vec_idx_training, vec_idx_training)) | + (sum(is.na(c(vec_idx_training, vec_idx_training))) > 0) | + (min(c(vec_idx_training, vec_idx_training)) < 0) | + (max(c(vec_idx_training, vec_idx_training)) > nrow(list_merged$G))) { + error = methods::new("gpError", + code=000, + message=paste0( + "Error in models::fn_ridge(...). ", + "Please make sure that the vector of indexes for the training and validation sets are not booleans. ", + "We require the indexes to be positive integers without missing values. ", + "Also, the maximum index (", max(c(vec_idx_training, vec_idx_training)), + ") should be less than or equal to the total number of samples/entries/pools in the input data: (", + nrow(list_merged$G), ").")) + return(error) + } + X_training = list_merged$G[vec_idx_training, ] + y_training = list_merged$list_pheno$y[vec_idx_training] + X_validation = list_merged$G[vec_idx_validation, ] + y_validation = list_merged$list_pheno$y[vec_idx_validation] ### Adding covariate to explanatory matrix - C = other_params$covariate - if (is.null(C) == FALSE) { - if (nrow(C) < max(c(vec_idx_training, vec_idx_validation))) { - print("Error: The covariate matrix is incompatible with the genotype data, i.e. there are less rows in the covariate matrix that the indices in vec_idx_training oridx_validation.") - return(-1) - } - X_training = cbind(C[vec_idx_training, ], X_training) - X_validation = cbind(C[vec_idx_validation, ], X_validation) + if (!is.null(list_merged$COVAR)) { + X_training = cbind(list_merged$COVAR[vec_idx_training, ], X_training) + X_validation = cbind(list_merged$COVAR[vec_idx_validation, ], X_validation) } - sol = glmnet::cv.glmnet(x=X_training, y=y_training, alpha=0) ### Ridge -> alpha = 0.0 - ### If the optimisation picks a lambda which does not provide estimates of the effects of the genotype data then we use the next best lambda - n_non_zero = sol$nzero[sol$lambda==sol$lambda.min] - if (n_non_zero == 0) { - idx = which(sol$lambda==sol$lambda.min) - for (i in 1:(length(sol$lambda)-1)) { - if (sol$nzero[idx] == 0) { - idx = idx + 1 - } - } - n_non_zero = sol$nzero[idx] - b_hat = c(sol$glmnet.fit$a0[idx], sol$glmnet.fit$beta[, idx]) + ### Solve via ridge regularisation + sol = glmnet::cv.glmnet(x=X_training, y=y_training, alpha=0, nfolds=other_params$n_folds, parallel=FALSE) ### Ridge -> alpha = 0.0 + ### Find the first lambda with the lowest squared error (deviance) while having non-zero SNP effects + for (i in 1:length(sol$lambda)) { + # i = 1 + # n_non_zero = sol$nzero[i] + b_hat = c(sol$glmnet.fit$a0[i], sol$glmnet.fit$beta[, i]) + names(b_hat) = c("intercept", colnames(X_training)) y_pred = cbind(rep(1, length(vec_idx_validation)), X_validation) %*% b_hat - } else { - y_pred = stats::predict(sol, newx=X_validation, s="lambda.min") + n_non_zero = sum(b_hat >= .Machine$double.eps) + if (n_non_zero <= 1) { + next + } else { + break + } } - colnames(y_pred) = "pred" - df_y = merge(data.frame(id=rownames(y)[vec_idx_validation], true=y_validation), data.frame(id=rownames(y_pred), pred=y_pred), by="id") - perf = fn_prediction_performance_metrics(y_true=df_y$true, y_pred=df_y$pred) - perf$y_pred = y_pred - perf$n_non_zero = n_non_zero; names(perf$n_non_zero) = NULL - return(perf) + df_y_validation = merge( + data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation), + data.frame(id=rownames(y_pred), y_pred=y_pred), + by="id") + list_perf = fn_prediction_performance_metrics(y_true=df_y_validation$y_true, y_pred=df_y_validation$y_pred, verbose=verbose) + ### Output + return(list( + list_perf=list_perf, + df_y_validation=df_y_validation, + vec_effects=b, + n_non_zero=n_non_zero)) } fn_lasso = function(G, y, vec_idx_training, vec_idx_validation, other_params=list(covariate=NULL)) { @@ -524,113 +552,3 @@ fn_gBLUP = function(G, y, vec_idx_training, vec_idx_validation, other_params=lis perf$n_non_zero = NA; names(perf$n_non_zero) = NULL ### This is empty because individual alleles or markers were not used directly in the gBLUP model return(perf) } - -##################################################################### -############################### Tests ############################### -##################################################################### -tests_models = function() { - source(paste0(dirname_functions, "/simquantgen/R/sim.R")) - set.seed(123) - n = 100; n_alleles = 2 - G = simquantgen::fn_simulate_genotypes(n=n, l=500, ploidy=2, n_alleles=n_alleles, min_allele_freq=0.001, n_chr=5, max_pos=135e6, dist_bp_at_50perc_r2=5e6, n_threads=2, verbose=TRUE) - y = simquantgen::fn_simulate_phenotypes(G, n_alleles=n_alleles, dist_effects="norm", n_effects=10, purely_additive=TRUE, n_networks=1, n_effects_per_network=50, h2=0.5, pheno_reps=1, verbose=TRUE)$Y - z = fn_simulate_phenotypes(G, n_alleles=n_alleles, dist_effects="norm", n_effects=10, purely_additive=FALSE, n_networks=50, n_effects_per_network=500, h2=0.5, pheno_reps=1, verbose=TRUE)$Y - C = matrix(stats::rnorm(3*n), ncol=3) - vec_idx_validation = c(1:round(n*0.1)) - vec_idx_training = !(c(1:n) %in% vec_idx_validation) - test_that( - "fn_ols", { - print("fn_ols:") - perf = fn_ols(G, y, vec_idx_training, vec_idx_validation) - expect_equal(perf$corr > 0.0, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) - perf = fn_ols(G, y, vec_idx_training, vec_idx_validation, other_params=list(tol=.Machine$double.eps, covariate=C)) - expect_equal(perf$corr > 0.0, TRUE) - expect_equal(perf$n_non_zero == 503, TRUE) - } - ) - - test_that( - "fn_ridge", { - print("fn_ridge:") - perf = fn_ridge(G, y, vec_idx_training, vec_idx_validation) - expect_equal(perf$corr > 0.0, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) - perf = fn_ridge(G, y, vec_idx_training, vec_idx_validation, other_params=list(covariate=C)) - expect_equal(perf$corr > 0.0, TRUE) - expect_equal(perf$n_non_zero == 503, TRUE) - } - ) - - test_that( - "fn_lasso", { - print("fn_lasso:") - perf = fn_lasso(G, y, vec_idx_training, vec_idx_validation) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero < 500, TRUE) - perf = fn_lasso(G, y, vec_idx_training, vec_idx_validation, other_params=list(covariate=C)) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero < 503, TRUE) - } - ) - - test_that( - "fn_elastic_net", { - print("fn_elastic_net:") - perf = fn_elastic_net(G, y, vec_idx_training, vec_idx_validation) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero < 500, TRUE) - perf = fn_elastic_net(G, y, vec_idx_training, vec_idx_validation, other_params=list(covariate=C)) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero < 503, TRUE) - } - ) - - test_that( - "fn_Bayes_A", { - print("fn_Bayes_A:") - perf = fn_Bayes_A(G, y, vec_idx_training, vec_idx_validation, other_params=list(nIter=1000, burnIn=100, h2=0.5, out_prefix="bglr_bayesA")) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) - perf = fn_Bayes_A(G, y, vec_idx_training, vec_idx_validation, other_params=list(nIter=1000, burnIn=100, h2=0.5, out_prefix="bglr_bayesA", covariate=C)) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) ### Excludes the fixed covariate effects - } - ) - - test_that( - "fn_Bayes_B", { - print("fn_Bayes_B:") - perf = fn_Bayes_B(G, y, vec_idx_training, vec_idx_validation, other_params=list(nIter=1000, burnIn=100, h2=0.5, out_prefix="bglr_bayesB")) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) - perf = fn_Bayes_B(G, y, vec_idx_training, vec_idx_validation, other_params=list(nIter=1000, burnIn=100, h2=0.5, out_prefix="bglr_bayesB", covariate=C)) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) ### Excludes the fixed covariate effects - } - ) - - test_that( - "fn_Bayes_C", { - print("fn_Bayes_C:") - perf = fn_Bayes_C(G, y, vec_idx_training, vec_idx_validation, other_params=list(nIter=1000, burnIn=100, h2=0.5, out_prefix="bglr_bayesC")) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) - perf = fn_Bayes_C(G, y, vec_idx_training, vec_idx_validation, other_params=list(nIter=1000, burnIn=100, h2=0.5, out_prefix="bglr_bayesC", covariate=C)) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(perf$n_non_zero == 500, TRUE) ### Excludes the fixed covariate effects - } - ) - - test_that( - "fn_gBLUP", { - print("fn_gBLUP:") - perf = fn_gBLUP(G, y, vec_idx_training, vec_idx_validation) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(is.na(perf$n_non_zero), TRUE) - perf = fn_gBLUP(G, y, vec_idx_training, vec_idx_validation, other_params=list(covariate=C)) - expect_equal(perf$corr > 0.1, TRUE) - expect_equal(is.na(perf$n_non_zero), TRUE) ### Excludes the fixed covariate effects - } - ) -} diff --git a/README.md b/README.md index e1cc900..65f683d 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,8 @@ devtools::document() ## Unit tests +On Nix: `nix-shell --run bash --pure`. See [shell.nix](./shell.nix). + Value modularity and write tests for each function definition. See the main tests function and each Rscript for individual unit tests: diff --git a/tests/testthat/test-load.R b/tests/testthat/test-load.R index da053ac..e4d4a36 100644 --- a/tests/testthat/test-load.R +++ b/tests/testthat/test-load.R @@ -1,5 +1,5 @@ # library(testthat) -# source("/group/pasture/Jeff/gp/R/load.R") +# source("R/load.R") test_that("fn_G_extract_names", { n = 100 diff --git a/tests/testthat/test-metrics.R b/tests/testthat/test-metrics.R index 9d24af7..d19c3a9 100644 --- a/tests/testthat/test-metrics.R +++ b/tests/testthat/test-metrics.R @@ -1,6 +1,6 @@ # library(testthat) -# source("/group/pasture/Jeff/gp/R/load.R") -# source("/group/pasture/Jeff/gp/R/metrics.R") +# source("R/load.R") +# source("R/metrics.R") test_that("fn_prediction_performance_metrics", { n = 100 diff --git a/tests/testthat/test-models.R b/tests/testthat/test-models.R index 7212ae5..cf17840 100644 --- a/tests/testthat/test-models.R +++ b/tests/testthat/test-models.R @@ -1,7 +1,7 @@ # library(testthat) -# source("/group/pasture/Jeff/gp/R/load.R") -# source("/group/pasture/Jeff/gp/R/metrics.R") -# source("/group/pasture/Jeff/gp/R/models.R") +# source("R/load.R") +# source("R/metrics.R") +# source("R/models.R") test_that("fn_ols", { set.seed(123)