diff --git a/R/main.R b/R/main.R index 19092a1..4fb0e30 100644 --- a/R/main.R +++ b/R/main.R @@ -511,7 +511,6 @@ gp = function(args) { vec_idx_validation = which(is.na(list_merged$list_pheno$y)) vec_idx_training = which(!is.na(list_merged$list_pheno$y)) if (length(vec_idx_validation)==0) { - ADDITIVE_GENETIC_EFFECTS = NA GENOMIC_PREDICTIONS = NA } else { ### Find the best model in the args$population @@ -529,13 +528,40 @@ gp = function(args) { other_params = list(n_folds=10) } perf = eval(parse(text=paste0("fn_", model, "(list_merged=list_merged, vec_idx_training=vec_idx_training, vec_idx_validation=vec_idx_validation, other_params=other_params, verbose=args$verbose)"))) - ADDITIVE_GENETIC_EFFECTS = list(b=perf$vec_effects, model=model) GENOMIC_PREDICTIONS = data.frame(perf$df_y_validation, model=model) } } else { - ADDITIVE_GENETIC_EFFECTS = NA GENOMIC_PREDICTIONS = NA } + ############################################################################### + ### EXTRACT WITHIN POPULATION ADDITIVE GENETIC EFFECTS OF EACH MODEL TESTED ### + ############################################################################### + ### Extract the additive genetic effects from penalised and Bayesian regression models, and sample BLUPs from gBLUP + if (args$bool_within) { + vec_idx_training = which(!is.na(list_merged$list_pheno$y)) + ADDITIVE_GENETIC_EFFECTS = NULL + for (model in args$vec_models_to_test) { + # model = args$vec_models_to_test[1] + if ((grepl("Bayes", model)==TRUE) | (grepl("gBLUP", model)==TRUE)) { + if (is.null(args$dir_output)) { + args$dir_output = dirname(tempfile()) + } + vec_idx_validation = c() + other_params = list(nIter=12e3, burnIn=2e3, out_prefix=file.path(args$dir_output, paste0("bglr_", model))) + } else { + vec_idx_validation = vec_idx_training + other_params = list(n_folds=10) + } + perf = eval(parse(text=paste0("fn_", model, "(list_merged=list_merged, vec_idx_training=vec_idx_training, vec_idx_validation=vec_idx_validation, other_params=other_params, verbose=FALSE)"))) + if (is.null(ADDITIVE_GENETIC_EFFECTS)) { + ADDITIVE_GENETIC_EFFECTS = eval(parse(text=paste0("list(`", model, "`=list(b=perf$vec_effects))"))) + } else { + eval(parse(text=paste0("ADDITIVE_GENETIC_EFFECTS$`", model, "`=list(b=perf$vec_effects)"))) + } + } + } else { + ADDITIVE_GENETIC_EFFECTS = NA + } ### Clean-up rm("list_merged") gc() diff --git a/R/models.R b/R/models.R index b0a9079..c8ad8a4 100644 --- a/R/models.R +++ b/R/models.R @@ -315,7 +315,7 @@ fn_ridge = function(list_merged, vec_idx_training, vec_idx_validation, other_par txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation), data.frame(id=rownames(y_pred), y_pred=y_pred), @@ -459,7 +459,7 @@ fn_lasso = function(list_merged, vec_idx_training, vec_idx_validation, other_par txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation), data.frame(id=rownames(y_pred), y_pred=y_pred), @@ -603,7 +603,7 @@ fn_elastic_net = function(list_merged, vec_idx_training, vec_idx_validation, oth txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation), data.frame(id=rownames(y_pred), y_pred=y_pred), @@ -744,7 +744,7 @@ fn_Bayes_A = function(list_merged, vec_idx_training, vec_idx_validation, txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]), data.frame(id=names(yNA)[vec_idx_validation], y_pred=sol$yHat[vec_idx_validation]), @@ -891,7 +891,7 @@ fn_Bayes_B = function(list_merged, vec_idx_training, vec_idx_validation, txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]), data.frame(id=names(yNA)[vec_idx_validation], y_pred=sol$yHat[vec_idx_validation]), @@ -1038,7 +1038,7 @@ fn_Bayes_C = function(list_merged, vec_idx_training, vec_idx_validation, txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]), data.frame(id=names(yNA)[vec_idx_validation], y_pred=sol$yHat[vec_idx_validation]), @@ -1190,7 +1190,7 @@ fn_gBLUP = function(list_merged, vec_idx_training, vec_idx_validation, other_par txtplot::txtdensity(u_hat[!is.na(u_hat) & !is.infinite(u_hat)]) print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)")) } - ### Evalute prediction performance + ### Evaluate prediction performance df_y_validation = merge( data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]), data.frame(id=names(mod$U$`u:id`$y), y_pred=unlist(mod$U)), diff --git a/exec/tests/test.sh b/exec/tests/test.sh index bb48922..d384857 100644 --- a/exec/tests/test.sh +++ b/exec/tests/test.sh @@ -40,27 +40,52 @@ grep -A10 "Finished after" output/job_info-*.log # vec_correlations = c() # for (fname in vec_fnames_Rds) { # # fname = vec_fnames_Rds[1] -# list_output = readRDS(fname, check.names=FALSE) +# list_output = readRDS(fname) # vec_traits = c(vec_traits, list_output$TRAIT_NAME) # vec_populations = c(vec_populations, unique(list_output$METRICS_WITHIN_POP$pop_training)) # vec_training_size = c(vec_training_size, mean(list_output$METRICS_WITHIN_POP$n_training)) # vec_validation_size = c(vec_validation_size, mean(list_output$METRICS_WITHIN_POP$n_validation, na.rm=TRUE)) # vec_n_folds = c(vec_n_folds, max(list_output$METRICS_WITHIN_POP$fold, na.rm=TRUE)) # vec_n_reps = c(vec_n_reps, max(list_output$METRICS_WITHIN_POP$rep, na.rm=TRUE)) -# vec_correlations = c(vec_correlations, mean(list_output$METRICS_WITHIN_POP$corr, na.rm=TRUE)) +# vec_idx = which(list_output$METRICS_WITHIN_POP$model == "Bayes_A") +# vec_correlations = c(vec_correlations, mean(list_output$METRICS_WITHIN_POP$corr[vec_idx], na.rm=TRUE)) # } # df = data.frame( # trait=vec_traits, # population=vec_populations, -# training_siz=vec_training_size, -# validation_siz=vec_validation_size, +# training_size=vec_training_size, +# validation_size=vec_validation_size, # n_fold=vec_n_folds, # n_rep=vec_n_reps, # correlation=vec_correlations) # df = df[order(df$trait), ] # vec_idx_sort_by_trait_name_length = unlist(lapply(df$trait, FUN=function(x){length(unlist(strsplit(x, "")))})) # df = df[order(vec_idx_sort_by_trait_name_length, decreasing=FALSE), ] -# write.table(df, file="SUMMARY_WITHIN_POP_CORRELATIONS.tsv", sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) +# write.table(df, file="SUMMARY_WITHIN_POP_CORRELATIONS-Bayes_A.tsv", sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) + +# ### Extract lucerne additive effects for Bayes B in traits: +# cd output/ +# R +# vec_fnames_Rds = list.files(path=".", pattern="*.Rds") +# vec_requested_dates = c( +# "biomass_SUM_2024a_20240213", +# "biomass_AUT_2024_20240312", +# "biomass_LSP_2023_20231023") + +# for (date in vec_requested_dates) { +# # date = vec_requested_dates[1] +# vec_idx = which(grepl(date, vec_fnames_Rds)) +# for (idx in vec_idx) { +# # idx = vec_idx[1] +# list_output = readRDS(vec_fnames_Rds[idx]) +# if (!is.na(list_output$ADDITIVE_GENETIC_EFFECTS[1])[1]) { +# df = data.frame(loc=names(list_output$ADDITIVE_GENETIC_EFFECTS$b), b=list_output$ADDITIVE_GENETIC_EFFECTS$b) +# trait_pop = paste(rev(rev(unlist(strsplit(vec_fnames_Rds[idx], "-"))[-1])[-1]), collapse="-") +# fname_out = paste0("MARKER_EFFECTS-", list_output$ADDITIVE_GENETIC_EFFECTS$model, "-", trait_pop, ".tsv") +# write.table(df, file=fname_out, sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE) +# } +# } +# } ######################################## ### COMPLETE SET OF INPUT PARAMETERS ### diff --git a/app.R b/inst/plot_gs_gp/app.R similarity index 97% rename from app.R rename to inst/plot_gs_gp/app.R index 9d69882..bcc3352 100644 --- a/app.R +++ b/inst/plot_gs_gp/app.R @@ -41,7 +41,8 @@ ui <- page_fillable( shinyWidgets::pickerInput(inputId="model", label="Filter by model:", choices="", multiple=TRUE, options=list(`live-search`=TRUE, `actions-box`=TRUE)), shinyWidgets::pickerInput(inputId="metric", label="Use the genomic prediction accuracy metric:", choices="", multiple=FALSE, options=list(`live-search`=TRUE, `actions-box`=TRUE)), shinyWidgets::pickerInput(inputId="group_by", label="Group by:", choices=c("trait", "pop_training", "model"), selected=c("trait", "pop", "model"), multiple=TRUE, options=list(`live-search`=TRUE, `actions-box`=TRUE)), - shinyWidgets::pickerInput(inputId="sort_by", label="Sort by:", choices=c("increasing_mean", "decreasing_mean", "increasing_median", "decreasing_median", "alphabetical"), selected="increasing_mean", multiple=FALSE, options=list(`live-search`=TRUE, `actions-box`=TRUE)) + shinyWidgets::pickerInput(inputId="sort_by", label="Sort by:", choices=c("increasing_mean", "decreasing_mean", "increasing_median", "decreasing_median", "alphabetical"), selected="increasing_mean", multiple=FALSE, options=list(`live-search`=TRUE, `actions-box`=TRUE)), + shinyWidgets::materialSwitch(inputId="within_box_with_labels", label="Mean-labelled boxplot", value=FALSE, status="primary", right=FALSE) ), mainPanel( width=750, @@ -484,14 +485,37 @@ server <- function(input, output, session) { df$grouping = factor(df$grouping, levels=as.character(df_agg$grouping[order(df_agg$metric, decreasing=TRUE)])) } ### Plot - p = plot_ly(data=df, - y=~metric, - x=~grouping, - type='violin', - box=list(visible=TRUE), - meanline = list(visible=TRUE), - split=~grouping - ) + if (!input$within_box_with_labels) { + p = plot_ly(data=df, + y=~metric, + x=~grouping, + type='violin', + box=list(visible=TRUE), + meanline = list(visible=TRUE), + split=~grouping + ) + } else { + list_bp = boxplot(metric ~ grouping, data=df, plot=FALSE) + n_groups = ncol(list_bp$stats) + df_agg = aggregate(metric ~ grouping, data=df, FUN=mean, na.rm=TRUE) + colnames(df_agg)[2] = "metric_mean" + df = merge(merge(df, df_agg, sort=FALSE, by="grouping"), + data.frame(grouping=list_bp$names, pos_x=seq(0, n_groups+1, length=n_groups+1)[1:n_groups], pos_y=1.05*min(df$metric, na.rm=TRUE)), + sort=FALSE, by="grouping") + p = plot_ly(data=df, + y=~metric, + x=~grouping, + type='box', + boxmean=TRUE, + split=~grouping + ) + p = p %>% add_annotations( + x=~grouping, + y=~pos_y, + text=~round(metric_mean, 4), + showarrow=FALSE + ) + } p = p %>% layout( title=title, yaxis=list(title=input$metric), @@ -668,7 +692,7 @@ server <- function(input, output, session) { p = plotly::plot_ly(data=df, x=~x, y=~y, type="scatter", mode='markers', text=~ID) } else { ### Prepare pop_validation x model matrix of GP metric data - vec_lopo_pop = sort(unique(df_metrics_across_pop$pop_validation)) + vec_lopo_pop = sort(input$lopo_pop) n = length(vec_lopo_pop) m = length(input$lopo_model) M = matrix(NA, nrow=n, ncol=m)