Skip to content

Commit

Permalink
emitting additive genetic effects and/or BLUPs for each model and pop…
Browse files Browse the repository at this point in the history
…ulation using the entire dataset
  • Loading branch information
jeffersonfparil committed Jun 14, 2024
1 parent 23a8b41 commit 7c4980f
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 25 deletions.
32 changes: 29 additions & 3 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,6 @@ gp = function(args) {
vec_idx_validation = which(is.na(list_merged$list_pheno$y))
vec_idx_training = which(!is.na(list_merged$list_pheno$y))
if (length(vec_idx_validation)==0) {
ADDITIVE_GENETIC_EFFECTS = NA
GENOMIC_PREDICTIONS = NA
} else {
### Find the best model in the args$population
Expand All @@ -529,13 +528,40 @@ gp = function(args) {
other_params = list(n_folds=10)
}
perf = eval(parse(text=paste0("fn_", model, "(list_merged=list_merged, vec_idx_training=vec_idx_training, vec_idx_validation=vec_idx_validation, other_params=other_params, verbose=args$verbose)")))
ADDITIVE_GENETIC_EFFECTS = list(b=perf$vec_effects, model=model)
GENOMIC_PREDICTIONS = data.frame(perf$df_y_validation, model=model)
}
} else {
ADDITIVE_GENETIC_EFFECTS = NA
GENOMIC_PREDICTIONS = NA
}
###############################################################################
### EXTRACT WITHIN POPULATION ADDITIVE GENETIC EFFECTS OF EACH MODEL TESTED ###
###############################################################################
### Extract the additive genetic effects from penalised and Bayesian regression models, and sample BLUPs from gBLUP
if (args$bool_within) {
vec_idx_training = which(!is.na(list_merged$list_pheno$y))
ADDITIVE_GENETIC_EFFECTS = NULL
for (model in args$vec_models_to_test) {
# model = args$vec_models_to_test[1]
if ((grepl("Bayes", model)==TRUE) | (grepl("gBLUP", model)==TRUE)) {
if (is.null(args$dir_output)) {
args$dir_output = dirname(tempfile())
}
vec_idx_validation = c()
other_params = list(nIter=12e3, burnIn=2e3, out_prefix=file.path(args$dir_output, paste0("bglr_", model)))
} else {
vec_idx_validation = vec_idx_training
other_params = list(n_folds=10)
}
perf = eval(parse(text=paste0("fn_", model, "(list_merged=list_merged, vec_idx_training=vec_idx_training, vec_idx_validation=vec_idx_validation, other_params=other_params, verbose=FALSE)")))
if (is.null(ADDITIVE_GENETIC_EFFECTS)) {
ADDITIVE_GENETIC_EFFECTS = eval(parse(text=paste0("list(`", model, "`=list(b=perf$vec_effects))")))
} else {
eval(parse(text=paste0("ADDITIVE_GENETIC_EFFECTS$`", model, "`=list(b=perf$vec_effects)")))
}
}
} else {
ADDITIVE_GENETIC_EFFECTS = NA
}
### Clean-up
rm("list_merged")
gc()
Expand Down
14 changes: 7 additions & 7 deletions R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ fn_ridge = function(list_merged, vec_idx_training, vec_idx_validation, other_par
txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation),
data.frame(id=rownames(y_pred), y_pred=y_pred),
Expand Down Expand Up @@ -459,7 +459,7 @@ fn_lasso = function(list_merged, vec_idx_training, vec_idx_validation, other_par
txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation),
data.frame(id=rownames(y_pred), y_pred=y_pred),
Expand Down Expand Up @@ -603,7 +603,7 @@ fn_elastic_net = function(list_merged, vec_idx_training, vec_idx_validation, oth
txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(y_validation), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=y_validation),
data.frame(id=rownames(y_pred), y_pred=y_pred),
Expand Down Expand Up @@ -744,7 +744,7 @@ fn_Bayes_A = function(list_merged, vec_idx_training, vec_idx_validation,
txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]),
data.frame(id=names(yNA)[vec_idx_validation], y_pred=sol$yHat[vec_idx_validation]),
Expand Down Expand Up @@ -891,7 +891,7 @@ fn_Bayes_B = function(list_merged, vec_idx_training, vec_idx_validation,
txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]),
data.frame(id=names(yNA)[vec_idx_validation], y_pred=sol$yHat[vec_idx_validation]),
Expand Down Expand Up @@ -1038,7 +1038,7 @@ fn_Bayes_C = function(list_merged, vec_idx_training, vec_idx_validation,
txtplot::txtplot(b_hat[!is.na(b_hat) & !is.infinite(b_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]),
data.frame(id=names(yNA)[vec_idx_validation], y_pred=sol$yHat[vec_idx_validation]),
Expand Down Expand Up @@ -1190,7 +1190,7 @@ fn_gBLUP = function(list_merged, vec_idx_training, vec_idx_validation, other_par
txtplot::txtdensity(u_hat[!is.na(u_hat) & !is.infinite(u_hat)])
print(paste0("Number of non-zero effects: ", n_non_zero, " (", round(100*n_non_zero/p), "%)"))
}
### Evalute prediction performance
### Evaluate prediction performance
df_y_validation = merge(
data.frame(id=names(list_merged$list_pheno$y[vec_idx_validation]), pop=list_merged$list_pheno$pop[vec_idx_validation], y_true=list_merged$list_pheno$y[vec_idx_validation]),
data.frame(id=names(mod$U$`u:id`$y), y_pred=unlist(mod$U)),
Expand Down
35 changes: 30 additions & 5 deletions exec/tests/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,27 +40,52 @@ grep -A10 "Finished after" output/job_info-*.log
# vec_correlations = c()
# for (fname in vec_fnames_Rds) {
# # fname = vec_fnames_Rds[1]
# list_output = readRDS(fname, check.names=FALSE)
# list_output = readRDS(fname)
# vec_traits = c(vec_traits, list_output$TRAIT_NAME)
# vec_populations = c(vec_populations, unique(list_output$METRICS_WITHIN_POP$pop_training))
# vec_training_size = c(vec_training_size, mean(list_output$METRICS_WITHIN_POP$n_training))
# vec_validation_size = c(vec_validation_size, mean(list_output$METRICS_WITHIN_POP$n_validation, na.rm=TRUE))
# vec_n_folds = c(vec_n_folds, max(list_output$METRICS_WITHIN_POP$fold, na.rm=TRUE))
# vec_n_reps = c(vec_n_reps, max(list_output$METRICS_WITHIN_POP$rep, na.rm=TRUE))
# vec_correlations = c(vec_correlations, mean(list_output$METRICS_WITHIN_POP$corr, na.rm=TRUE))
# vec_idx = which(list_output$METRICS_WITHIN_POP$model == "Bayes_A")
# vec_correlations = c(vec_correlations, mean(list_output$METRICS_WITHIN_POP$corr[vec_idx], na.rm=TRUE))
# }
# df = data.frame(
# trait=vec_traits,
# population=vec_populations,
# training_siz=vec_training_size,
# validation_siz=vec_validation_size,
# training_size=vec_training_size,
# validation_size=vec_validation_size,
# n_fold=vec_n_folds,
# n_rep=vec_n_reps,
# correlation=vec_correlations)
# df = df[order(df$trait), ]
# vec_idx_sort_by_trait_name_length = unlist(lapply(df$trait, FUN=function(x){length(unlist(strsplit(x, "")))}))
# df = df[order(vec_idx_sort_by_trait_name_length, decreasing=FALSE), ]
# write.table(df, file="SUMMARY_WITHIN_POP_CORRELATIONS.tsv", sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
# write.table(df, file="SUMMARY_WITHIN_POP_CORRELATIONS-Bayes_A.tsv", sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)

# ### Extract lucerne additive effects for Bayes B in traits:
# cd output/
# R
# vec_fnames_Rds = list.files(path=".", pattern="*.Rds")
# vec_requested_dates = c(
# "biomass_SUM_2024a_20240213",
# "biomass_AUT_2024_20240312",
# "biomass_LSP_2023_20231023")

# for (date in vec_requested_dates) {
# # date = vec_requested_dates[1]
# vec_idx = which(grepl(date, vec_fnames_Rds))
# for (idx in vec_idx) {
# # idx = vec_idx[1]
# list_output = readRDS(vec_fnames_Rds[idx])
# if (!is.na(list_output$ADDITIVE_GENETIC_EFFECTS[1])[1]) {
# df = data.frame(loc=names(list_output$ADDITIVE_GENETIC_EFFECTS$b), b=list_output$ADDITIVE_GENETIC_EFFECTS$b)
# trait_pop = paste(rev(rev(unlist(strsplit(vec_fnames_Rds[idx], "-"))[-1])[-1]), collapse="-")
# fname_out = paste0("MARKER_EFFECTS-", list_output$ADDITIVE_GENETIC_EFFECTS$model, "-", trait_pop, ".tsv")
# write.table(df, file=fname_out, sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
# }
# }
# }

########################################
### COMPLETE SET OF INPUT PARAMETERS ###
Expand Down
44 changes: 34 additions & 10 deletions app.R → inst/plot_gs_gp/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ ui <- page_fillable(
shinyWidgets::pickerInput(inputId="model", label="Filter by model:", choices="", multiple=TRUE, options=list(`live-search`=TRUE, `actions-box`=TRUE)),
shinyWidgets::pickerInput(inputId="metric", label="Use the genomic prediction accuracy metric:", choices="", multiple=FALSE, options=list(`live-search`=TRUE, `actions-box`=TRUE)),
shinyWidgets::pickerInput(inputId="group_by", label="Group by:", choices=c("trait", "pop_training", "model"), selected=c("trait", "pop", "model"), multiple=TRUE, options=list(`live-search`=TRUE, `actions-box`=TRUE)),
shinyWidgets::pickerInput(inputId="sort_by", label="Sort by:", choices=c("increasing_mean", "decreasing_mean", "increasing_median", "decreasing_median", "alphabetical"), selected="increasing_mean", multiple=FALSE, options=list(`live-search`=TRUE, `actions-box`=TRUE))
shinyWidgets::pickerInput(inputId="sort_by", label="Sort by:", choices=c("increasing_mean", "decreasing_mean", "increasing_median", "decreasing_median", "alphabetical"), selected="increasing_mean", multiple=FALSE, options=list(`live-search`=TRUE, `actions-box`=TRUE)),
shinyWidgets::materialSwitch(inputId="within_box_with_labels", label="Mean-labelled boxplot", value=FALSE, status="primary", right=FALSE)
),
mainPanel(
width=750,
Expand Down Expand Up @@ -484,14 +485,37 @@ server <- function(input, output, session) {
df$grouping = factor(df$grouping, levels=as.character(df_agg$grouping[order(df_agg$metric, decreasing=TRUE)]))
}
### Plot
p = plot_ly(data=df,
y=~metric,
x=~grouping,
type='violin',
box=list(visible=TRUE),
meanline = list(visible=TRUE),
split=~grouping
)
if (!input$within_box_with_labels) {
p = plot_ly(data=df,
y=~metric,
x=~grouping,
type='violin',
box=list(visible=TRUE),
meanline = list(visible=TRUE),
split=~grouping
)
} else {
list_bp = boxplot(metric ~ grouping, data=df, plot=FALSE)
n_groups = ncol(list_bp$stats)
df_agg = aggregate(metric ~ grouping, data=df, FUN=mean, na.rm=TRUE)
colnames(df_agg)[2] = "metric_mean"
df = merge(merge(df, df_agg, sort=FALSE, by="grouping"),
data.frame(grouping=list_bp$names, pos_x=seq(0, n_groups+1, length=n_groups+1)[1:n_groups], pos_y=1.05*min(df$metric, na.rm=TRUE)),
sort=FALSE, by="grouping")
p = plot_ly(data=df,
y=~metric,
x=~grouping,
type='box',
boxmean=TRUE,
split=~grouping
)
p = p %>% add_annotations(
x=~grouping,
y=~pos_y,
text=~round(metric_mean, 4),
showarrow=FALSE
)
}
p = p %>% layout(
title=title,
yaxis=list(title=input$metric),
Expand Down Expand Up @@ -668,7 +692,7 @@ server <- function(input, output, session) {
p = plotly::plot_ly(data=df, x=~x, y=~y, type="scatter", mode='markers', text=~ID)
} else {
### Prepare pop_validation x model matrix of GP metric data
vec_lopo_pop = sort(unique(df_metrics_across_pop$pop_validation))
vec_lopo_pop = sort(input$lopo_pop)
n = length(vec_lopo_pop)
m = length(input$lopo_model)
M = matrix(NA, nrow=n, ncol=m)
Expand Down

0 comments on commit 7c4980f

Please sign in to comment.