From 0e68163cfc508f61029358895074280f40a3c7a9 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sat, 16 Jul 2022 17:53:50 +0200 Subject: [PATCH] Fix #342. --- NEWS.md | 1 + R/summary_funs.R | 30 ++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 55f80f492..384548694 100644 --- a/NEWS.md +++ b/NEWS.md @@ -19,6 +19,7 @@ * `plot.vsel()` now draws the dashed red horizontal line for the reference model (and---if present---the dotted black horizontal line for the baseline model) first (i.e., before the submodel-specific graphical elements), to avoid overplotting. * Fix GitHub issue #339. (GitHub: #340) * Fix argument `d_test` of `varsel()`: Not only the predictive performance of the *reference model* needs to be evaluated on the test data, but also the predictive performance of the *submodels*. (GitHub: #341) +* Fix GitHub issue #342. # projpred 2.1.2 diff --git a/R/summary_funs.R b/R/summary_funs.R index da10db409..2ba61c093 100644 --- a/R/summary_funs.R +++ b/R/summary_funs.R @@ -52,6 +52,11 @@ summ_ref <- varsel$summaries$ref summ_sub <- varsel$summaries$sub + if (varsel$refmodel$family$family == "binomial" && + !all(varsel$d_test$weights == 1)) { + varsel$d_test$y_prop <- varsel$d_test$y / varsel$d_test$weights + } + ## fetch the mu and lppd for the baseline model if (is.null(nfeat_baseline)) { ## no baseline model, i.e, compute the statistics on the actual @@ -171,7 +176,11 @@ get_stat <- function(mu, lppd, d_test, stat, mu.bs = NULL, lppd.bs = NULL, sqrt(n_notna) * n_notna } } else if (stat == "mse") { - y <- d_test$y + if (is.null(d_test$y_prop)) { + y <- d_test$y + } else { + y <- d_test$y_prop + } if (!is.null(mu.bs)) { value <- mean(weights * ((mu - y)^2 - (mu.bs - y)^2), na.rm = TRUE) value.se <- weighted.sd((mu - y)^2 - (mu.bs - y)^2, weights, @@ -183,7 +192,11 @@ get_stat <- function(mu, lppd, d_test, stat, mu.bs = NULL, lppd.bs = NULL, sqrt(n_notna) } } else if (stat == "rmse") { - y <- d_test$y + if (is.null(d_test$y_prop)) { + y <- d_test$y + } else { + y <- d_test$y_prop + } if (!is.null(mu.bs)) { ## make sure the relative rmse is computed using only those points for ## which @@ -219,6 +232,19 @@ get_stat <- function(mu, lppd, d_test, stat, mu.bs = NULL, lppd.bs = NULL, } } else if (stat == "acc" || stat == "pctcorr") { y <- d_test$y + if (!is.null(d_test$y_prop)) { + y <- unlist(lapply(seq_along(y), function(i_short) { + c(rep(0L, d_test$weights[i_short] - y[i_short]), + rep(1L, y[i_short])) + })) + mu <- rep(mu, d_test$weights) + if (!is.null(mu.bs)) { + mu.bs <- rep(mu.bs, d_test$weights) + } + n_notna <- sum(d_test$weights) + weights <- rep(weights, d_test$weights) + weights <- n_notna * weights / sum(weights) + } if (!is.null(mu.bs)) { value <- mean(weights * ((round(mu) == y) - (round(mu.bs) == y)), na.rm = TRUE)