From 0e68163cfc508f61029358895074280f40a3c7a9 Mon Sep 17 00:00:00 2001
From: fweber144 <fweber144@protonmail.com>
Date: Sat, 16 Jul 2022 17:53:50 +0200
Subject: [PATCH] Fix #342.

---
 NEWS.md          |  1 +
 R/summary_funs.R | 30 ++++++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 55f80f492..384548694 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -19,6 +19,7 @@
 * `plot.vsel()` now draws the dashed red horizontal line for the reference model (and---if present---the dotted black horizontal line for the baseline model) first (i.e., before the submodel-specific graphical elements), to avoid overplotting.
 * Fix GitHub issue #339. (GitHub: #340)
 * Fix argument `d_test` of `varsel()`: Not only the predictive performance of the *reference model* needs to be evaluated on the test data, but also the predictive performance of the *submodels*. (GitHub: #341)
+* Fix GitHub issue #342.
 
 # projpred 2.1.2
 
diff --git a/R/summary_funs.R b/R/summary_funs.R
index da10db409..2ba61c093 100644
--- a/R/summary_funs.R
+++ b/R/summary_funs.R
@@ -52,6 +52,11 @@
   summ_ref <- varsel$summaries$ref
   summ_sub <- varsel$summaries$sub
 
+  if (varsel$refmodel$family$family == "binomial" &&
+      !all(varsel$d_test$weights == 1)) {
+    varsel$d_test$y_prop <- varsel$d_test$y / varsel$d_test$weights
+  }
+
   ## fetch the mu and lppd for the baseline model
   if (is.null(nfeat_baseline)) {
     ## no baseline model, i.e, compute the statistics on the actual
@@ -171,7 +176,11 @@ get_stat <- function(mu, lppd, d_test, stat, mu.bs = NULL, lppd.bs = NULL,
         sqrt(n_notna) * n_notna
     }
   } else if (stat == "mse") {
-    y <- d_test$y
+    if (is.null(d_test$y_prop)) {
+      y <- d_test$y
+    } else {
+      y <- d_test$y_prop
+    }
     if (!is.null(mu.bs)) {
       value <- mean(weights * ((mu - y)^2 - (mu.bs - y)^2), na.rm = TRUE)
       value.se <- weighted.sd((mu - y)^2 - (mu.bs - y)^2, weights,
@@ -183,7 +192,11 @@ get_stat <- function(mu, lppd, d_test, stat, mu.bs = NULL, lppd.bs = NULL,
         sqrt(n_notna)
     }
   } else if (stat == "rmse") {
-    y <- d_test$y
+    if (is.null(d_test$y_prop)) {
+      y <- d_test$y
+    } else {
+      y <- d_test$y_prop
+    }
     if (!is.null(mu.bs)) {
       ## make sure the relative rmse is computed using only those points for
       ## which
@@ -219,6 +232,19 @@ get_stat <- function(mu, lppd, d_test, stat, mu.bs = NULL, lppd.bs = NULL,
     }
   } else if (stat == "acc" || stat == "pctcorr") {
     y <- d_test$y
+    if (!is.null(d_test$y_prop)) {
+      y <- unlist(lapply(seq_along(y), function(i_short) {
+        c(rep(0L, d_test$weights[i_short] - y[i_short]),
+          rep(1L, y[i_short]))
+      }))
+      mu <- rep(mu, d_test$weights)
+      if (!is.null(mu.bs)) {
+        mu.bs <- rep(mu.bs, d_test$weights)
+      }
+      n_notna <- sum(d_test$weights)
+      weights <- rep(weights, d_test$weights)
+      weights <- n_notna * weights / sum(weights)
+    }
     if (!is.null(mu.bs)) {
       value <- mean(weights * ((round(mu) == y) - (round(mu.bs) == y)),
                     na.rm = TRUE)