From 58581eb89f2599c8e3567f6c00f90a5c2439c1ac Mon Sep 17 00:00:00 2001
From: Frank Weber <55132727+fweber144@users.noreply.github.com>
Date: Mon, 11 Oct 2021 18:05:35 +0200
Subject: [PATCH] Miscellaneous modifications and fixes (again) (#234)

* Remove `as.matrix.list()` (neither used nor needed).

* Define the default `baseline` in the `vsel` methods directly (instead of

using `NULL`) and remove the `is.null()` case in `.validate_baseline()`.

* Docs: Mention that for argument `baseline`, the best submodel is decided in terms of `stats[1]`.

* Docs of `summary.vsel()`: Minor wording improvements for argument `type`.

* Docs of argument `baseline`: Mention the relevance.

* Simplify `cvfun()` in `get_refmodel.stanreg()`.

* Set a visible default for argument `K` of `cv_varsel()`.

* Improve y-axis label in `plot.vsel()`.

* There are no Gaussian-only `stats` (at least it isn't implemented this

way). Therefore, adapt the tests.

* Fix issue #210.

* Adapt the tests to the fix for issue #210.

* Harmonize definitions and usages of `%<name>%` binary infix operators.

* Unify the equivalent special binary (infix) operators `%||%` and `%ORifNULL%`.

* Avoid function definitions without curly braces because they're not shown in

RStudio's document outline.

* Remove `.get_proj_handle()` (not necessary and had an incorrect default for `regul`).

* `search_forward()`: Remove unused argument `increasing_order`.

* `projfun.R`: Enhance comments.

* `predict.subfit()`: Don't access `subfit$x` if not necessary.

* Minor cleaning.

* Remove more unused methods.

* `get_refmodel.stanreg()`: Throw an error if **rstanarm** is not installed.

* Replace "search path" by "solution path" (at least where relevant and easily possible) to

avoid a (purely optical) confusion with "search part".
---
 NAMESPACE                          |   6 --
 R/cv_varsel.R                      |  49 ++++++-------
 R/divergence_minimizers.R          |   3 +-
 R/gamm4.R                          |   4 +-
 R/methods.R                        | 106 ++++++++++++++---------------
 R/misc.R                           |  45 ++++++------
 R/project.R                        |   6 +-
 R/projfun.R                        |  43 ++++--------
 R/refmodel.R                       |  10 +--
 R/search.R                         |  15 ++--
 R/varsel.R                         |   4 +-
 man/cv_varsel.Rd                   |   6 +-
 man/plot.vsel.Rd                   |   6 +-
 man/suggest_size.Rd                |   6 +-
 man/summary.vsel.Rd                |  13 ++--
 tests/testthat/helpers/testers.R   |  80 +++++++++++++++++-----
 tests/testthat/setup.R             |   4 +-
 tests/testthat/test_datafit.R      |   2 +-
 tests/testthat/test_methods_vsel.R |   3 +-
 tests/testthat/test_varsel.R       |  10 +--
 20 files changed, 218 insertions(+), 203 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
index 69f9e6470..9f1e39b4b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,11 +3,9 @@
 S3method(as.matrix,gamm4)
 S3method(as.matrix,glm)
 S3method(as.matrix,glmerMod)
-S3method(as.matrix,list)
 S3method(as.matrix,lm)
 S3method(as.matrix,lmerMod)
 S3method(as.matrix,projection)
-S3method(as.matrix,ridgelm)
 S3method(as.matrix,subfit)
 S3method(coef,subfit)
 S3method(cv_varsel,default)
@@ -24,10 +22,6 @@ S3method(solution_terms,projection)
 S3method(solution_terms,vsel)
 S3method(suggest_size,vsel)
 S3method(summary,vsel)
-S3method(t,glm)
-S3method(t,list)
-S3method(t,lm)
-S3method(t,ridgelm)
 S3method(varsel,default)
 S3method(varsel,refmodel)
 export("%>%")
diff --git a/R/cv_varsel.R b/R/cv_varsel.R
index 1c641cd08..5c8a9d8fb 100644
--- a/R/cv_varsel.R
+++ b/R/cv_varsel.R
@@ -24,9 +24,7 @@
 #'   `NULL`, all observations are used, but for faster experimentation, one can
 #'   set this to a smaller value.
 #' @param K Only relevant if `cv_method == "kfold"`. Number of folds in the
-#'   \eqn{K}-fold CV. If `NULL`, then `5` is used for genuine reference models
-#'   (i.e., of class `refmodel`) and `10` for `datafit`s (that is, for penalized
-#'   maximum likelihood estimation).
+#'   \eqn{K}-fold CV.
 #' @param validate_search Only relevant if `cv_method == "LOO"`. A single
 #'   logical value indicating whether to cross-validate also the search part,
 #'   i.e., whether to run the search separately for each CV fold (`TRUE`) or not
@@ -103,17 +101,27 @@ cv_varsel.default <- function(object, ...) {
 #' @rdname cv_varsel
 #' @export
 cv_varsel.refmodel <- function(
-  object, method = NULL,
+  object,
+  method = NULL,
   cv_method = if (!inherits(object, "datafit")) "LOO" else "kfold",
-  ndraws = 20, nclusters = NULL,
-  ndraws_pred = 400, nclusters_pred = NULL,
+  ndraws = 20,
+  nclusters = NULL,
+  ndraws_pred = 400,
+  nclusters_pred = NULL,
   cv_search = !inherits(object, "datafit"),
-  nterms_max = NULL, penalty = NULL,
+  nterms_max = NULL,
+  penalty = NULL,
   verbose = TRUE,
-  nloo = NULL, K = NULL, lambda_min_ratio = 1e-5,
-  nlambda = 150, thresh = 1e-6, regul = 1e-4,
-  validate_search = TRUE, seed = NULL,
-  search_terms = NULL, ...
+  nloo = NULL,
+  K = if (!inherits(object, "datafit")) 5 else 10,
+  lambda_min_ratio = 1e-5,
+  nlambda = 150,
+  thresh = 1e-6,
+  regul = 1e-4,
+  validate_search = TRUE,
+  seed = NULL,
+  search_terms = NULL,
+  ...
 ) {
   refmodel <- object
   ## resolve the arguments similar to varsel
@@ -279,23 +287,16 @@ parse_args_cv_varsel <- function(refmodel, cv_method, K,
     cv_method <- "kfold"
   }
 
-  if (!is.null(K)) {
-    if (length(K) > 1 || !is.numeric(K) || K != round(K)) {
-      stop("K must be a single integer value.")
+  if (cv_method == "kfold") {
+    stopifnot(!is.null(K))
+    if (length(K) > 1 || !is.numeric(K) || !.is.wholenumber(K)) {
+      stop("`K` must be a single integer value.")
     }
     if (K < 2) {
-      stop("K must be at least 2.")
+      stop("`K` must be at least 2.")
     }
     if (K > NROW(refmodel$y)) {
-      stop("K cannot exceed the number of observations.")
-    }
-  }
-
-  if (cv_method == "kfold" && is.null(K)) {
-    if (inherits(refmodel, "datafit")) {
-      K <- 10
-    } else {
-      K <- 5
+      stop("`K` cannot exceed the number of observations.")
     }
   }
 
diff --git a/R/divergence_minimizers.R b/R/divergence_minimizers.R
index 9a5798f80..9c9fe27fd 100644
--- a/R/divergence_minimizers.R
+++ b/R/divergence_minimizers.R
@@ -272,12 +272,11 @@ subprd <- function(fit, newdata = NULL) {
 predict.subfit <- function(subfit, newdata = NULL) {
   beta <- subfit$beta
   alpha <- subfit$alpha
-  x <- subfit$x
   if (is.null(newdata)) {
     if (is.null(beta)) {
       return(as.matrix(rep(alpha, NROW(subfit$x))))
     } else {
-      return(x %*% rbind(alpha, beta))
+      return(subfit$x %*% rbind(alpha, beta))
     }
   } else {
     contrasts_arg <- get_contrasts_arg_list(subfit$formula, newdata)
diff --git a/R/gamm4.R b/R/gamm4.R
index eee88029c..ee84b257d 100644
--- a/R/gamm4.R
+++ b/R/gamm4.R
@@ -7,7 +7,7 @@ gamm4.setup <- function(formula, pterms, data = NULL, knots = NULL) {
     formula$response <- 1
     pterms$response <- 1
   }
-  gam.setup <- `%:::%`("mgcv", "gam.setup")
+  gam.setup <- "mgcv" %:::% "gam.setup"
 
   G <- gam.setup(formula, pterms,
                  data = data, knots = knots, sp = NULL, min.sp = NULL,
@@ -160,7 +160,7 @@ model.matrix.gamm4 <- function(formula, random = NULL, data = NULL,
   names(dl) <- vars ## list of all variables needed
   ## summarize the input data
 
-  variable.summary <- `%:::%`("mgcv", "variable.summary")
+  variable.summary <- "mgcv" %:::% "variable.summary"
   var.summary <- variable.summary(gp$pf, dl, nrow(mf))
 
   ## lmer offset handling work around...
diff --git a/R/methods.R b/R/methods.R
index 29e22d816..2347c1361 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -294,6 +294,9 @@ proj_predict_aux <- function(proj, mu, weights, ...) {
 #'
 #' @inheritParams summary.vsel
 #' @param x An object of class `vsel` (returned by [varsel()] or [cv_varsel()]).
+#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
+#'   the reference model or the best submodel (in terms of `stats[1]`),
+#'   respectively.
 #'
 #' @examples
 #' if (requireNamespace("rstanarm", quietly = TRUE)) {
@@ -317,9 +320,15 @@ proj_predict_aux <- function(proj, mu, weights, ...) {
 #' }
 #'
 #' @export
-plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
-                      deltas = FALSE, alpha = 0.32, baseline = NULL,
-                      ...) {
+plot.vsel <- function(
+  x,
+  nterms_max = NULL,
+  stats = "elpd",
+  deltas = FALSE,
+  alpha = 0.32,
+  baseline = if (!inherits(x$refmodel, "datafit")) "ref" else "best",
+  ...
+) {
   object <- x
   .validate_vsel_object_stats(object, stats)
   baseline <- .validate_baseline(object$refmodel, baseline, deltas)
@@ -360,7 +369,16 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
   if (nterms_max < 1) {
     stop("nterms_max must be at least 1")
   }
-  ylab <- if (deltas) "Difference to the baseline" else "Value"
+  if (baseline == "ref") {
+    baseline_pretty <- "reference model"
+  } else {
+    baseline_pretty <- "best submodel"
+  }
+  if (deltas) {
+    ylab <- paste0("Difference vs.", baseline_pretty)
+  } else {
+    ylab <- "Value"
+  }
 
   # make sure that breaks on the x-axis are integers
   n_opts <- c(4, 5, 6)
@@ -434,18 +452,18 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
 #'   `"diff"`, and `"diff.se"` indicating which of these to compute (mean,
 #'   standard error, lower and upper credible bounds, difference to the
 #'   corresponding statistic of the reference model, and standard error of this
-#'   difference). The credible bounds are determined so that `1 - alpha` percent
-#'   of the probability mass falls between them. Items `"diff"` and `"diff.se"`
-#'   are only supported if `!deltas`.
+#'   difference, respectively). The credible bounds are determined so that `1 -
+#'   alpha` percent of the probability mass falls between them. Items `"diff"`
+#'   and `"diff.se"` are only supported if `deltas` is `FALSE`.
 #' @param deltas If `TRUE`, the submodel statistics are estimated relative to
 #'   the baseline model (see argument `baseline`) instead of estimating the
 #'   actual values of the statistics.
 #' @param alpha A number giving the desired coverage of the credible intervals.
 #'   For example, `alpha = 0.32` corresponds to 68% probability mass within the
 #'   intervals, that is, one-standard-error intervals.
-#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
-#'   the reference model or the best submodel found, respectively. If `NULL`,
-#'   then `"ref"` is used, except for `datafit`s for which `"best"` is used.
+#' @param baseline Only relevant if `deltas` is `TRUE`. Either `"ref"` or
+#'   `"best"` indicating whether the baseline is the reference model or the best
+#'   submodel (in terms of `stats[1]`), respectively.
 #' @param ... Currently ignored.
 #'
 #' @examples
@@ -470,9 +488,16 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
 #' }
 #'
 #' @export
-summary.vsel <- function(object, nterms_max = NULL, stats = "elpd",
-                         type = c("mean", "se", "diff", "diff.se"),
-                         deltas = FALSE, alpha = 0.32, baseline = NULL, ...) {
+summary.vsel <- function(
+  object,
+  nterms_max = NULL,
+  stats = "elpd",
+  type = c("mean", "se", "diff", "diff.se"),
+  deltas = FALSE,
+  alpha = 0.32,
+  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
+  ...
+) {
   .validate_vsel_object_stats(object, stats)
   baseline <- .validate_baseline(object$refmodel, baseline, deltas)
 
@@ -658,7 +683,6 @@ print.vsel <- function(x, ...) {
 #' results via [plot.vsel()] and/or [summary.vsel()] and make the final decision
 #' based on what is most appropriate for the given problem.
 #'
-#' @inheritParams summary.vsel
 #' @param object An object of class `vsel` (returned by [varsel()] or
 #'   [cv_varsel()]).
 #' @param stat Statistic used for the decision. See [summary.vsel()] for
@@ -673,6 +697,9 @@ print.vsel <- function(x, ...) {
 #' @param type Either `"upper"` or `"lower"` determining whether the decisions
 #'   are based on the upper or lower credible bounds, respectively. See section
 #'   "Details" for more information.
+#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
+#'   the reference model or the best submodel (in terms of `stats[1]`),
+#'   respectively.
 #' @param warnings Whether to give warnings if automatic suggestion fails,
 #'   mainly for internal use. Usually there is no reason to set this to `FALSE`.
 #' @param ... Currently ignored.
@@ -735,9 +762,16 @@ suggest_size <- function(object, ...) {
 
 #' @rdname suggest_size
 #' @export
-suggest_size.vsel <- function(object, stat = "elpd", alpha = 0.32, pct = 0,
-                              type = "upper", baseline = NULL, warnings = TRUE,
-                              ...) {
+suggest_size.vsel <- function(
+  object,
+  stat = "elpd",
+  alpha = 0.32,
+  pct = 0,
+  type = "upper",
+  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
+  warnings = TRUE,
+  ...
+) {
   .validate_vsel_object_stats(object, stat)
   if (length(stat) > 1) {
     stop("Only one statistic can be specified to suggest_size")
@@ -824,13 +858,6 @@ as.matrix.lm <- function(x, ...) {
            replace_population_names())
 }
 
-#' @method as.matrix ridgelm
-#' @keywords internal
-#' @export
-as.matrix.ridgelm <- function(x, ...) {
-  return(as.matrix.lm(x))
-}
-
 #' @method as.matrix subfit
 #' @keywords internal
 #' @export
@@ -979,37 +1006,6 @@ as.matrix.gamm4 <- function(x, ...) {
   return(as.matrix.lm(x, ...))
 }
 
-#' @method as.matrix list
-#' @keywords internal
-#' @export
-as.matrix.list <- function(x, ...) {
-  return(do.call(cbind, lapply(x, as.matrix.glm, ...)))
-}
-
-#' @keywords internal
-#' @export
-t.glm <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
-#' @keywords internal
-#' @export
-t.lm <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
-#' @keywords internal
-#' @export
-t.ridgelm <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
-#' @keywords internal
-#' @export
-t.list <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
 #' Extract projected parameter draws
 #'
 #' This is the [as.matrix()] method for `projection` objects (returned by
diff --git a/R/misc.R b/R/misc.R
index c154377a3..e83ed05bb 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -87,10 +87,9 @@ bootstrap <- function(x, fun = mean, b = 1000, oobfun = NULL, seed = NULL,
   }
 }
 
-# from rstanarm
-`%ORifNULL%` <- function(a, b) if (is.null(a)) b else a
-
-.is.wholenumber <- function(x) abs(x - round(x)) < .Machine$double.eps^0.5
+.is.wholenumber <- function(x) {
+  abs(x - round(x)) < .Machine$double.eps^0.5
+}
 
 .validate_num_folds <- function(k, n) {
   if (!is.numeric(k) || length(k) != 1 || !.is.wholenumber(k)) {
@@ -128,22 +127,15 @@ bootstrap <- function(x, fun = mean, b = 1000, oobfun = NULL, seed = NULL,
 }
 
 .validate_baseline <- function(refmodel, baseline, deltas) {
-  if (is.null(baseline)) {
-    if (inherits(refmodel, "datafit")) {
-      baseline <- "best"
-    } else {
-      baseline <- "ref"
-    }
-  } else {
-    if (!(baseline %in% c("ref", "best"))) {
-      stop("Argument 'baseline' must be either 'ref' or 'best'.")
-    }
-    if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
-      # no reference model (or the results missing for some other reason),
-      # so cannot compute differences between the reference model and submodels
-      stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
-           "reference model.")
-    }
+  stopifnot(!is.null(baseline))
+  if (!(baseline %in% c("ref", "best"))) {
+    stop("Argument 'baseline' must be either 'ref' or 'best'.")
+  }
+  if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
+    # no reference model (or the results missing for some other reason),
+    # so cannot compute differences between the reference model and submodels
+    stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
+         "reference model.")
   }
   return(baseline)
 }
@@ -344,8 +336,9 @@ nlist <- function(...) {
   dots
 }
 
-## ifelse operator
-"%||%" <- function(x, y) {
+# The `%||%` special binary (infix) operator from brms (equivalent to the
+# `%ORifNULL%` operator from rstanarm):
+`%||%` <- function(x, y) {
   if (is.null(x)) x <- y
   x
 }
@@ -405,7 +398,7 @@ eval2 <- function(expr, envir = parent.frame(), ...) {
   eval(expr, envir, ...)
 }
 
-# coerce 'x' to a single character string
+# coerce `x` to a single character string
 as_one_character <- function(x, allow_na = FALSE) {
   s <- substitute(x)
   x <- as.character(x)
@@ -443,7 +436,7 @@ magrittr::`%>%`
 # Function where() is not exported by package tidyselect, so redefine it here to
 # avoid a note in R CMD check which would occur for usage of
 # tidyselect:::where():
-where <- `%:::%`("tidyselect", "where")
+where <- "tidyselect" %:::% "where"
 
 get_as.matrix_cls_projpred <- function() {
   ### Only works when projpred is loaded via devtools::load_all():
@@ -463,4 +456,6 @@ get_as.matrix_cls_projpred <- function() {
 
 ## Helper function extract and combine mu and lppd from K lists with each
 ## n/K of the elements to one list with n elements
-hf <- function(x) as.list(do.call(rbind, x))
+hf <- function(x) {
+  as.list(do.call(rbind, x))
+}
diff --git a/R/project.R b/R/project.R
index 5f11289fd..8731faa57 100644
--- a/R/project.R
+++ b/R/project.R
@@ -135,9 +135,9 @@ project <- function(object, nterms = NULL, solution_terms = NULL,
       any(
         solution_terms(object)[seq_along(solution_terms)] != solution_terms
       )) {
-    warning("The given `solution_terms` are not part of the search path (from ",
-            "`solution_terms(object)`), so `cv_search` is automatically set ",
-            "to `TRUE`.")
+    warning("The given `solution_terms` are not part of the solution path ",
+            "(from `solution_terms(object)`), so `cv_search` is automatically ",
+            "set to `TRUE`.")
     cv_search <- TRUE
   }
 
diff --git a/R/projfun.R b/R/projfun.R
index d0e1eba8f..7374b879b 100644
--- a/R/projfun.R
+++ b/R/projfun.R
@@ -1,6 +1,7 @@
-## Function handles for the projection
-##
-
+# Function to project the reference model onto a single submodel with predictor
+# terms given in `solution_terms`. Note that "single submodel" does not refer to
+# a single fit (there are as many fits for this single submodel as there are
+# projected draws).
 project_submodel <- function(solution_terms, p_ref, refmodel, family, intercept,
                              regul = 1e-4) {
   validparams <- .validate_wobs_wsample(refmodel$wobs, p_ref$weights, p_ref$mu)
@@ -28,27 +29,11 @@ project_submodel <- function(solution_terms, p_ref, refmodel, family, intercept,
   ))
 }
 
-## function handle for the projection over samples
-.get_proj_handle <- function(refmodel, p_ref, family, regul = 1e-9,
-                             intercept = TRUE) {
-  return(function(solution_terms) {
-    project_submodel(
-      solution_terms = solution_terms, p_ref = p_ref, refmodel = refmodel,
-      family = family, intercept = intercept, regul = regul
-    )
-  })
-}
-
+# Function to project the reference model onto the submodels of given model
+# sizes `nterms`. Returns a list of submodels (each processed by
+# .init_submodel()).
 .get_submodels <- function(search_path, nterms, family, p_ref,
                            refmodel, intercept, regul, cv_search = FALSE) {
-  ##
-  ##
-  ## Project onto given model sizes nterms. Returns a list of submodels. If
-  ## cv_search=FALSE, submodels parameters will be as they were computed during
-  ## the search, so there is no need to project anything anymore, and this
-  ## function simply fetches the information from the search_path list, which
-  ## contains the parameter values.
-
   varorder <- search_path$solution_terms
 
   if (!cv_search) {
@@ -73,14 +58,15 @@ project_submodel <- function(solution_terms, p_ref, refmodel, family, intercept,
     }
   } else {
     ## need to project again for each submodel size
-    projfun <- .get_proj_handle(refmodel, p_ref, family, regul, intercept)
     fetch_submodel <- function(nterms) {
-      solution_terms <- utils::head(varorder, nterms)
-      return(projfun(solution_terms))
+      project_submodel(
+        solution_terms = utils::head(varorder, nterms), p_ref = p_ref,
+        refmodel = refmodel, family = family, intercept = intercept,
+        regul = regul
+      )
     }
   }
-  submodels <- lapply(nterms, fetch_submodel)
-  return(submodels)
+  return(lapply(nterms, fetch_submodel))
 }
 
 .validate_wobs_wsample <- function(ref_wobs, ref_wsample, ref_mu) {
@@ -120,6 +106,5 @@ project_submodel <- function(solution_terms, p_ref, refmodel, family, intercept,
     nlist(mu, dis)
   ), wsample)
   weights <- wsample
-  submodel <- nlist(dis, kl, weights, solution_terms, sub_fit)
-  return(submodel)
+  return(nlist(dis, kl, weights, solution_terms, sub_fit))
 }
diff --git a/R/refmodel.R b/R/refmodel.R
index f32e8baaf..b5ba668d5 100644
--- a/R/refmodel.R
+++ b/R/refmodel.R
@@ -363,6 +363,10 @@ get_refmodel.default <- function(object, formula, family = NULL, ...) {
 #' @rdname refmodel-init-get
 #' @export
 get_refmodel.stanreg <- function(object, ...) {
+  if (!requireNamespace("rstanarm", quietly = TRUE)) {
+    stop("Please install the 'rstanarm' package.")
+  }
+
   # Family ------------------------------------------------------------------
 
   family <- family(object)
@@ -495,10 +499,8 @@ get_refmodel.stanreg <- function(object, ...) {
   }
 
   cvfun <- function(folds, ...) {
-    cvres <- rstanarm::kfold(object, K = max(folds), save_fits = TRUE,
-                             folds = folds, ...)
-    fits <- cvres$fits[, "fit"]
-    return(fits)
+    rstanarm::kfold(object, K = max(folds), save_fits = TRUE,
+                    folds = folds, ...)$fits[, "fit"]
   }
 
   # Miscellaneous -----------------------------------------------------------
diff --git a/R/search.R b/R/search.R
index 6cf4cacb1..866b478b3 100644
--- a/R/search.R
+++ b/R/search.R
@@ -1,9 +1,5 @@
 search_forward <- function(p_ref, refmodel, family, intercept, nterms_max,
-                           verbose = TRUE, opt, search_terms = NULL,
-                           increasing_order = TRUE) {
-  # projfun() performs the projection:
-  projfun <- .get_proj_handle(refmodel, p_ref, family, opt$regul, intercept)
-
+                           verbose = TRUE, opt, search_terms = NULL) {
   formula <- refmodel$formula
   iq <- ceiling(quantile(seq_len(nterms_max), 1:10 / 10))
   if (is.null(search_terms)) {
@@ -22,7 +18,9 @@ search_forward <- function(p_ref, refmodel, family, intercept, nterms_max,
     if (is.null(cands))
       next
     full_cands <- lapply(cands, function(cand) c(chosen, cand))
-    subL <- lapply(full_cands, projfun)
+    subL <- lapply(full_cands, project_submodel,
+                   p_ref = p_ref, refmodel = refmodel, family = family,
+                   intercept = intercept, regul = opt$regul)
 
     ## select best candidate
     imin <- which.min(sapply(subL, "[[", "kl"))
@@ -162,6 +160,7 @@ search_L1 <- function(p_ref, refmodel, family, intercept, nterms_max, penalty,
     if (nterms == 0) {
       formula <- make_formula(c("1"))
       beta <- NULL
+      x <- x[, numeric(), drop = FALSE]
     } else {
       formula <- make_formula(solution_terms[seq_len(nterms)])
       variables <- unlist(lapply(
@@ -180,6 +179,10 @@ search_L1 <- function(p_ref, refmodel, family, intercept, nterms_max, penalty,
       ))
       indices <- match(variables, colnames(x)[search_path$solution_terms])
       beta <- search_path$beta[indices, max(indices) + 1, drop = FALSE]
+      # Also reduce `x` (important for coef.subfit(), for example); note that
+      # `x <- x[, variables, drop = FALSE]` should also be possible, but the
+      # re-use of `colnames(x)` should provide another sanity check:
+      x <- x[, colnames(x)[search_path$solution_terms[indices]], drop = FALSE]
     }
     sub <- nlist(
       alpha = search_path$alpha[nterms + 1],
diff --git a/R/varsel.R b/R/varsel.R
index c15791467..4aa4b8412 100644
--- a/R/varsel.R
+++ b/R/varsel.R
@@ -266,8 +266,8 @@ select <- function(method, p_sel, refmodel, family, intercept, nterms_max,
   ## last three
   ## are returned only if one cluster projection is used for selection):
   ##   solution_terms: the variable ordering
-  ##   beta: coefficients along the search path
-  ##   alpha: intercepts along the search path
+  ##   beta: coefficients along the solution path
+  ##   alpha: intercepts along the solution path
   ##   p_sel: the reference distribution used in the selection (the input
   ##   argument p_sel)
   ##
diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd
index a406f1ede..34f674f05 100644
--- a/man/cv_varsel.Rd
+++ b/man/cv_varsel.Rd
@@ -23,7 +23,7 @@ cv_varsel(object, ...)
   penalty = NULL,
   verbose = TRUE,
   nloo = NULL,
-  K = NULL,
+  K = if (!inherits(object, "datafit")) 5 else 10,
   lambda_min_ratio = 1e-05,
   nlambda = 150,
   thresh = 1e-06,
@@ -100,9 +100,7 @@ faster computation but higher uncertainty in the evaluation part. If
 set this to a smaller value.}
 
 \item{K}{Only relevant if \code{cv_method == "kfold"}. Number of folds in the
-\eqn{K}-fold CV. If \code{NULL}, then \code{5} is used for genuine reference models
-(i.e., of class \code{refmodel}) and \code{10} for \code{datafit}s (that is, for penalized
-maximum likelihood estimation).}
+\eqn{K}-fold CV.}
 
 \item{lambda_min_ratio}{Only relevant if \code{method} turns out as \code{"L1"}. Ratio
 between the smallest and largest lambda in the L1-penalized search. This
diff --git a/man/plot.vsel.Rd b/man/plot.vsel.Rd
index 9891a3fa1..555dcc85b 100644
--- a/man/plot.vsel.Rd
+++ b/man/plot.vsel.Rd
@@ -10,7 +10,7 @@
   stats = "elpd",
   deltas = FALSE,
   alpha = 0.32,
-  baseline = NULL,
+  baseline = if (!inherits(x$refmodel, "datafit")) "ref" else "best",
   ...
 )
 }
@@ -44,8 +44,8 @@ For example, \code{alpha = 0.32} corresponds to 68\% probability mass within the
 intervals, that is, one-standard-error intervals.}
 
 \item{baseline}{Either \code{"ref"} or \code{"best"} indicating whether the baseline is
-the reference model or the best submodel found, respectively. If \code{NULL},
-then \code{"ref"} is used, except for \code{datafit}s for which \code{"best"} is used.}
+the reference model or the best submodel (in terms of \code{stats[1]}),
+respectively.}
 
 \item{...}{Currently ignored.}
 }
diff --git a/man/suggest_size.Rd b/man/suggest_size.Rd
index 869fef362..926eda589 100644
--- a/man/suggest_size.Rd
+++ b/man/suggest_size.Rd
@@ -13,7 +13,7 @@ suggest_size(object, ...)
   alpha = 0.32,
   pct = 0,
   type = "upper",
-  baseline = NULL,
+  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
   warnings = TRUE,
   ...
 )
@@ -41,8 +41,8 @@ are based on the upper or lower credible bounds, respectively. See section
 "Details" for more information.}
 
 \item{baseline}{Either \code{"ref"} or \code{"best"} indicating whether the baseline is
-the reference model or the best submodel found, respectively. If \code{NULL},
-then \code{"ref"} is used, except for \code{datafit}s for which \code{"best"} is used.}
+the reference model or the best submodel (in terms of \code{stats[1]}),
+respectively.}
 
 \item{warnings}{Whether to give warnings if automatic suggestion fails,
 mainly for internal use. Usually there is no reason to set this to \code{FALSE}.}
diff --git a/man/summary.vsel.Rd b/man/summary.vsel.Rd
index fd39aeaea..8ade10b4f 100644
--- a/man/summary.vsel.Rd
+++ b/man/summary.vsel.Rd
@@ -11,7 +11,7 @@
   type = c("mean", "se", "diff", "diff.se"),
   deltas = FALSE,
   alpha = 0.32,
-  baseline = NULL,
+  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
   ...
 )
 }
@@ -41,9 +41,8 @@ family only);
 \code{"diff"}, and \code{"diff.se"} indicating which of these to compute (mean,
 standard error, lower and upper credible bounds, difference to the
 corresponding statistic of the reference model, and standard error of this
-difference). The credible bounds are determined so that \code{1 - alpha} percent
-of the probability mass falls between them. Items \code{"diff"} and \code{"diff.se"}
-are only supported if \code{!deltas}.}
+difference, respectively). The credible bounds are determined so that \code{1 - alpha} percent of the probability mass falls between them. Items \code{"diff"}
+and \code{"diff.se"} are only supported if \code{deltas} is \code{FALSE}.}
 
 \item{deltas}{If \code{TRUE}, the submodel statistics are estimated relative to
 the baseline model (see argument \code{baseline}) instead of estimating the
@@ -53,9 +52,9 @@ actual values of the statistics.}
 For example, \code{alpha = 0.32} corresponds to 68\% probability mass within the
 intervals, that is, one-standard-error intervals.}
 
-\item{baseline}{Either \code{"ref"} or \code{"best"} indicating whether the baseline is
-the reference model or the best submodel found, respectively. If \code{NULL},
-then \code{"ref"} is used, except for \code{datafit}s for which \code{"best"} is used.}
+\item{baseline}{Only relevant if \code{deltas} is \code{TRUE}. Either \code{"ref"} or
+\code{"best"} indicating whether the baseline is the reference model or the best
+submodel (in terms of \code{stats[1]}), respectively.}
 
 \item{...}{Currently ignored.}
 }
diff --git a/tests/testthat/helpers/testers.R b/tests/testthat/helpers/testers.R
index fcc6f2d97..7a7fd3b70 100644
--- a/tests/testthat/helpers/testers.R
+++ b/tests/testthat/helpers/testers.R
@@ -241,7 +241,7 @@ refmodel_tester <- function(
           drop = FALSE
         ]
         ### TODO (GAMs and GAMMs): Do this manually:
-        mm_s <- `%:::%`("rstanarm", "pp_data")(refmod$fit)$x
+        mm_s <- ("rstanarm" %:::% "pp_data")(refmod$fit)$x
         mm_s <- mm_s[, grep("^s\\(", colnames(mm_s), value = TRUE),
                      drop = FALSE]
         ###
@@ -423,9 +423,9 @@ refmodel_tester <- function(
 #   expected to be of class `"gam"` or `"gamm4"` (depending on whether the
 #   submodel is non-multilevel or multilevel, respectively).
 # @param wobs_expected The expected numeric vector of observation weights.
-# @param ref_formul The formula of the reference model . Should only be needed
-#   if `sub_fit_totest` comes from the L1 `search_path` of an object of class
-#   `"vsel"`. Otherwise, use `NULL`.
+# @param solterms_vsel_L1_search If `sub_fit_totest` comes from the L1
+#   `search_path` of an object of class `"vsel"`, provide here the solution
+#   terms. Otherwise, use `NULL`.
 # @param with_offs A single logical value indicating whether `sub_fit_totest` is
 #   expected to include offsets (`TRUE`) or not (`FALSE`).
 # @param info_str A single character string giving information to be printed in
@@ -441,14 +441,14 @@ sub_fit_tester <- function(
   has_grp = formula_contains_group_terms(sub_formul[[1]]),
   has_add = formula_contains_additive_terms(sub_formul[[1]]),
   wobs_expected = wobs_tst,
-  ref_formul = NULL,
+  solterms_vsel_L1_search = NULL,
   with_offs = FALSE,
   info_str
 ) {
   expect_type(sub_fit_totest, "list")
   expect_length(sub_fit_totest, nprjdraws_expected)
 
-  from_vsel_L1_search <- !is.null(ref_formul)
+  from_vsel_L1_search <- !is.null(solterms_vsel_L1_search)
 
   seq_extensive_tests <- unique(round(
     seq(1, length(sub_fit_totest),
@@ -473,11 +473,7 @@ sub_fit_tester <- function(
     } else {
       ncoefs <- 0L
     }
-    if (!from_vsel_L1_search) {
-      formul_for_mm <- sub_formul[[1]]
-    } else {
-      formul_for_mm <- ref_formul
-    }
+    formul_for_mm <- sub_formul[[1]]
     sub_trms_for_mm <- labels(terms(formul_for_mm))
     if (length(sub_trms_for_mm) > 0) {
       if (any(grepl("xca\\.", sub_trms_for_mm))) {
@@ -497,6 +493,56 @@ sub_fit_tester <- function(
     sub_x_expected <- model.matrix(update(formul_for_mm, NULL ~ . + 0),
                                    data = sub_data,
                                    contrasts.arg = sub_contr)
+    if (from_vsel_L1_search) {
+      attr(sub_x_expected, "assign") <- NULL
+      attr(sub_x_expected, "contrasts") <- NULL
+      # Unfortunately, model.matrix() uses terms() and there seems to be no way
+      # to set `keep.order = TRUE` in that internal terms() call. Thus, we have
+      # to reorder the columns manually:
+      if (length(solterms_vsel_L1_search) > 0) {
+        contr_list <- get_contrasts_arg_list(
+          as.formula(paste("~",
+                           paste(solterms_vsel_L1_search, collapse = " + "))),
+          data = sub_data
+        )
+        terms_contr <- names(contr_list)
+        terms_contr_expd <- lapply(solterms_vsel_L1_search, function(term_crr) {
+          if (!term_crr %in% terms_contr) {
+            return(term_crr)
+          } else {
+            if (sum(terms_contr == term_crr) != 1) {
+              stop("The following code is not general enough. It needs to be ",
+                   "adapted.")
+            }
+            return(paste0(
+              terms_contr[terms_contr == term_crr],
+              rownames(contr_list[[which(terms_contr == term_crr)]])
+            ))
+          }
+        })
+        terms_contr_expd <- unlist(terms_contr_expd)
+        colnms_x <- colnames(sub_x_expected)
+        colnms_x <- unlist(lapply(colnms_x, function(colnm_x) {
+          if (!colnm_x %in% terms_contr_expd) {
+            colon_found <- gregexpr(":", colnm_x)
+            if (length(colon_found) != 1 || colon_found == -1) {
+              stop("The following code is not general enough. It needs to be ",
+                   "adapted.")
+            }
+            return(paste(rev(strsplit(colnm_x, ":")[[1]]), collapse = ":"))
+          } else {
+            return(colnm_x)
+          }
+        }))
+        colnames(sub_x_expected) <- colnms_x
+        sub_x_expected <- sub_x_expected[
+          ,
+          colnames(sub_x_expected)[order(match(colnames(sub_x_expected),
+                                               terms_contr_expd))],
+          drop = FALSE
+        ]
+      }
+    }
     if (from_vsel_L1_search) {
       subfit_nms <- setdiff(subfit_nms, "y")
     }
@@ -778,10 +824,10 @@ projection_tester <- function(p,
   }
   if (!from_vsel_L1_search) {
     y_nm <- as.character(p$refmodel$formula)[2]
-    ref_formul_crr <- NULL
+    solterms_vsel_L1_search_crr <- NULL
   } else {
     y_nm <- ""
-    ref_formul_crr <- p$refmodel$formula
+    solterms_vsel_L1_search_crr <- p$solution_terms
   }
   y_nms <- paste0(".", y_nm)
   # A preliminary check for `nprjdraws_expected` (doesn't work for "datafit"s
@@ -826,7 +872,7 @@ projection_tester <- function(p,
                  sub_data = sub_data_crr,
                  sub_fam = p$family$family,
                  wobs_expected = p$refmodel$wobs,
-                 ref_formul = ref_formul_crr,
+                 solterms_vsel_L1_search = solterms_vsel_L1_search_crr,
                  info_str = info_str)
 
   # dis
@@ -1088,10 +1134,10 @@ vsel_tester <- function(
   nprjdraws_expected <- ncol(clust_ref$mu)
   if (!from_vsel_L1_search) {
     y_nm <- as.character(vs$refmodel$formula)[2]
-    ref_formul_crr <- NULL
+    solterms_vsel_L1_search_crr <- NULL
   } else {
     y_nm <- ""
-    ref_formul_crr <- vs$refmodel$formula
+    solterms_vsel_L1_search_crr <- vs$solution_terms
   }
   y_nms <- paste0(".", y_nm)
   if (nprjdraws_expected > 1) {
@@ -1120,7 +1166,7 @@ vsel_tester <- function(
       sub_data = sub_data_crr,
       sub_fam = vs$family$family,
       wobs_expected = vs$refmodel$wobs,
-      ref_formul = ref_formul_crr,
+      solterms_vsel_L1_search = solterms_vsel_L1_search_crr,
       info_str = paste(info_str, i, sep = "__")
     )
   }
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 6645e95ac..55d942da9 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -537,11 +537,10 @@ cvmeth_tst <- list(
 )
 
 vsel_funs <- nlist("summary.vsel", "plot.vsel", "suggest_size.vsel")
-stats_common <- c("elpd", "mlpd")
+stats_common <- c("elpd", "mlpd", "mse", "rmse")
 stats_tst <- list(
   default_stats = list(),
   common_stats = list(stats = stats_common),
-  gauss_stats = list(stats = c(stats_common, c("mse", "rmse"))),
   binom_stats = list(stats = c(stats_common, c("acc", "auc")))
 )
 type_tst <- c("mean", "lower", "upper", "se")
@@ -851,7 +850,6 @@ cre_args_smmry_vsel <- function(tstsetups_smmry_vsel) {
     fam_crr <- args_obj[[tstsetup_vsel]]$fam_nm
     add_stats <- switch(mod_crr,
                         "glm" = switch(fam_crr,
-                                       "gauss" = "gauss_stats",
                                        "brnll" = "binom_stats",
                                        "binom" = "binom_stats",
                                        "common_stats"),
diff --git a/tests/testthat/test_datafit.R b/tests/testthat/test_datafit.R
index 8ffa2ccff..11fd6ca30 100644
--- a/tests/testthat/test_datafit.R
+++ b/tests/testthat/test_datafit.R
@@ -557,7 +557,7 @@ test_that(paste(
       stats_expected = stats_common,
       type_expected = type_tst,
       cv_method_expected =
-        args_cvvs_datafit[[tstsetup]]$cv_method %ORifNULL% "LOO",
+        args_cvvs_datafit[[tstsetup]]$cv_method %||% "LOO",
       solterms_expected = cvvss_datafit[[tstsetup]]$solution_terms
     )
     if (run_snaps) {
diff --git a/tests/testthat/test_methods_vsel.R b/tests/testthat/test_methods_vsel.R
index 09845bc49..5fb9f10a5 100644
--- a/tests/testthat/test_methods_vsel.R
+++ b/tests/testthat/test_methods_vsel.R
@@ -93,7 +93,7 @@ test_that(paste(
       type_expected = args_smmry_cvvs[[tstsetup]]$type,
       nterms_max_expected = args_smmry_cvvs[[tstsetup]]$nterms_max,
       cv_method_expected =
-        args_cvvs[[tstsetup_cvvs]]$cv_method %ORifNULL% "LOO",
+        args_cvvs[[tstsetup_cvvs]]$cv_method %||% "LOO",
       solterms_expected = cvvss[[tstsetup_cvvs]]$solution_terms
     )
   }
@@ -268,7 +268,6 @@ test_that("`stat` works", {
     tstsetup_vs <- args_smmry_vs[[tstsetup]]$tstsetup_vsel
     fam_crr <- args_vs[[tstsetup_vs]]$fam_nm
     stat_crr_nm <- switch(fam_crr,
-                          "gauss" = "gauss_stats",
                           "brnll" = "binom_stats",
                           "binom" = "binom_stats",
                           "common_stats")
diff --git a/tests/testthat/test_varsel.R b/tests/testthat/test_varsel.R
index 4350fcee2..522edec92 100644
--- a/tests/testthat/test_varsel.R
+++ b/tests/testthat/test_varsel.R
@@ -701,15 +701,15 @@ test_that("`validate_search` works", {
 
 test_that("invalid `K` fails", {
   expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = 1),
-               "^K must be at least 2\\.$")
+               "^`K` must be at least 2\\.$")
   expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = 1000),
-               "^K cannot exceed the number of observations\\.$")
+               "^`K` cannot exceed the number of observations\\.$")
   expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = c(4, 9)),
-               "^K must be a single integer value\\.$")
+               "^`K` must be a single integer value\\.$")
   expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = "a"),
-               "^K must be a single integer value\\.$")
+               "^`K` must be a single integer value\\.$")
   expect_error(cv_varsel(refmods[[1]], cv_method = "kfold", K = dat),
-               "^K must be a single integer value\\.$")
+               "^`K` must be a single integer value\\.$")
 })
 
 test_that("`cvfits` (actually passed to init_refmodel()) works", {