Miscellaneous modifications and fixes (again) (#234)

* Remove `as.matrix.list()` (neither used nor needed). * Define the default `baseline` in the `vsel` methods directly (instead of using `NULL`) and remove the `is.null()` case in `.validate_baseline()`. * Docs: Mention that for argument `baseline`, the best submodel is decided in terms of `stats[1]`. * Docs of `summary.vsel()`: Minor wording improvements for argument `type`. * Docs of argument `baseline`: Mention the relevance. * Simplify `cvfun()` in `get_refmodel.stanreg()`. * Set a visible default for argument `K` of `cv_varsel()`. * Improve y-axis label in `plot.vsel()`. * There are no Gaussian-only `stats` (at least it isn't implemented this way). Therefore, adapt the tests. * Fix issue #210. * Adapt the tests to the fix for issue #210. * Harmonize definitions and usages of `%<name>%` binary infix operators. * Unify the equivalent special binary (infix) operators `%||%` and `%ORifNULL%`. * Avoid function definitions without curly braces because they're not shown in RStudio's document outline. * Remove `.get_proj_handle()` (not necessary and had an incorrect default for `regul`). * `search_forward()`: Remove unused argument `increasing_order`. * `projfun.R`: Enhance comments. * `predict.subfit()`: Don't access `subfit$x` if not necessary. * Minor cleaning. * Remove more unused methods. * `get_refmodel.stanreg()`: Throw an error if **rstanarm** is not installed. * Replace "search path" by "solution path" (at least where relevant and easily possible) to avoid a (purely optical) confusion with "search part".
stan-dev · Oct 11, 2021 · 58581eb · 58581eb
1 parent dd4bc36
commit 58581eb
Show file tree

Hide file tree

Showing 20 changed files with 218 additions and 203 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,11 +3,9 @@
 S3method(as.matrix,gamm4)
 S3method(as.matrix,glm)
 S3method(as.matrix,glmerMod)
-S3method(as.matrix,list)
 S3method(as.matrix,lm)
 S3method(as.matrix,lmerMod)
 S3method(as.matrix,projection)
-S3method(as.matrix,ridgelm)
 S3method(as.matrix,subfit)
 S3method(coef,subfit)
 S3method(cv_varsel,default)
@@ -24,10 +22,6 @@ S3method(solution_terms,projection)
 S3method(solution_terms,vsel)
 S3method(suggest_size,vsel)
 S3method(summary,vsel)
-S3method(t,glm)
-S3method(t,list)
-S3method(t,lm)
-S3method(t,ridgelm)
 S3method(varsel,default)
 S3method(varsel,refmodel)
 export("%>%")

diff --git a/R/cv_varsel.R b/R/cv_varsel.R
@@ -24,9 +24,7 @@
 #'   `NULL`, all observations are used, but for faster experimentation, one can
 #'   set this to a smaller value.
 #' @param K Only relevant if `cv_method == "kfold"`. Number of folds in the
-#'   \eqn{K}-fold CV. If `NULL`, then `5` is used for genuine reference models
-#'   (i.e., of class `refmodel`) and `10` for `datafit`s (that is, for penalized
-#'   maximum likelihood estimation).
+#'   \eqn{K}-fold CV.
 #' @param validate_search Only relevant if `cv_method == "LOO"`. A single
 #'   logical value indicating whether to cross-validate also the search part,
 #'   i.e., whether to run the search separately for each CV fold (`TRUE`) or not
@@ -103,17 +101,27 @@ cv_varsel.default <- function(object, ...) {
 #' @rdname cv_varsel
 #' @export
 cv_varsel.refmodel <- function(
-  object, method = NULL,
+  object,
+  method = NULL,
   cv_method = if (!inherits(object, "datafit")) "LOO" else "kfold",
-  ndraws = 20, nclusters = NULL,
-  ndraws_pred = 400, nclusters_pred = NULL,
+  ndraws = 20,
+  nclusters = NULL,
+  ndraws_pred = 400,
+  nclusters_pred = NULL,
   cv_search = !inherits(object, "datafit"),
-  nterms_max = NULL, penalty = NULL,
+  nterms_max = NULL,
+  penalty = NULL,
   verbose = TRUE,
-  nloo = NULL, K = NULL, lambda_min_ratio = 1e-5,
-  nlambda = 150, thresh = 1e-6, regul = 1e-4,
-  validate_search = TRUE, seed = NULL,
-  search_terms = NULL, ...
+  nloo = NULL,
+  K = if (!inherits(object, "datafit")) 5 else 10,
+  lambda_min_ratio = 1e-5,
+  nlambda = 150,
+  thresh = 1e-6,
+  regul = 1e-4,
+  validate_search = TRUE,
+  seed = NULL,
+  search_terms = NULL,
+  ...
 ) {
   refmodel <- object
   ## resolve the arguments similar to varsel
@@ -279,23 +287,16 @@ parse_args_cv_varsel <- function(refmodel, cv_method, K,
     cv_method <- "kfold"
   }
 
-  if (!is.null(K)) {
-    if (length(K) > 1 || !is.numeric(K) || K != round(K)) {
-      stop("K must be a single integer value.")
+  if (cv_method == "kfold") {
+    stopifnot(!is.null(K))
+    if (length(K) > 1 || !is.numeric(K) || !.is.wholenumber(K)) {
+      stop("`K` must be a single integer value.")
     }
     if (K < 2) {
-      stop("K must be at least 2.")
+      stop("`K` must be at least 2.")
     }
     if (K > NROW(refmodel$y)) {
-      stop("K cannot exceed the number of observations.")
-    }
-  }
-
-  if (cv_method == "kfold" && is.null(K)) {
-    if (inherits(refmodel, "datafit")) {
-      K <- 10
-    } else {
-      K <- 5
+      stop("`K` cannot exceed the number of observations.")
     }
   }
 

diff --git a/R/divergence_minimizers.R b/R/divergence_minimizers.R
@@ -272,12 +272,11 @@ subprd <- function(fit, newdata = NULL) {
 predict.subfit <- function(subfit, newdata = NULL) {
   beta <- subfit$beta
   alpha <- subfit$alpha
-  x <- subfit$x
   if (is.null(newdata)) {
     if (is.null(beta)) {
       return(as.matrix(rep(alpha, NROW(subfit$x))))
     } else {
-      return(x %*% rbind(alpha, beta))
+      return(subfit$x %*% rbind(alpha, beta))
     }
   } else {
     contrasts_arg <- get_contrasts_arg_list(subfit$formula, newdata)

diff --git a/R/gamm4.R b/R/gamm4.R
@@ -7,7 +7,7 @@ gamm4.setup <- function(formula, pterms, data = NULL, knots = NULL) {
     formula$response <- 1
     pterms$response <- 1
   }
-  gam.setup <- `%:::%`("mgcv", "gam.setup")
+  gam.setup <- "mgcv" %:::% "gam.setup"
 
   G <- gam.setup(formula, pterms,
                  data = data, knots = knots, sp = NULL, min.sp = NULL,
@@ -160,7 +160,7 @@ model.matrix.gamm4 <- function(formula, random = NULL, data = NULL,
   names(dl) <- vars ## list of all variables needed
   ## summarize the input data
 
-  variable.summary <- `%:::%`("mgcv", "variable.summary")
+  variable.summary <- "mgcv" %:::% "variable.summary"
   var.summary <- variable.summary(gp$pf, dl, nrow(mf))
 
   ## lmer offset handling work around...

diff --git a/R/methods.R b/R/methods.R
@@ -294,6 +294,9 @@ proj_predict_aux <- function(proj, mu, weights, ...) {
 #'
 #' @inheritParams summary.vsel
 #' @param x An object of class `vsel` (returned by [varsel()] or [cv_varsel()]).
+#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
+#'   the reference model or the best submodel (in terms of `stats[1]`),
+#'   respectively.
 #'
 #' @examples
 #' if (requireNamespace("rstanarm", quietly = TRUE)) {
@@ -317,9 +320,15 @@ proj_predict_aux <- function(proj, mu, weights, ...) {
 #' }
 #'
 #' @export
-plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
-                      deltas = FALSE, alpha = 0.32, baseline = NULL,
-                      ...) {
+plot.vsel <- function(
+  x,
+  nterms_max = NULL,
+  stats = "elpd",
+  deltas = FALSE,
+  alpha = 0.32,
+  baseline = if (!inherits(x$refmodel, "datafit")) "ref" else "best",
+  ...
+) {
   object <- x
   .validate_vsel_object_stats(object, stats)
   baseline <- .validate_baseline(object$refmodel, baseline, deltas)
@@ -360,7 +369,16 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
   if (nterms_max < 1) {
     stop("nterms_max must be at least 1")
   }
-  ylab <- if (deltas) "Difference to the baseline" else "Value"
+  if (baseline == "ref") {
+    baseline_pretty <- "reference model"
+  } else {
+    baseline_pretty <- "best submodel"
+  }
+  if (deltas) {
+    ylab <- paste0("Difference vs.", baseline_pretty)
+  } else {
+    ylab <- "Value"
+  }
 
   # make sure that breaks on the x-axis are integers
   n_opts <- c(4, 5, 6)
@@ -434,18 +452,18 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
 #'   `"diff"`, and `"diff.se"` indicating which of these to compute (mean,
 #'   standard error, lower and upper credible bounds, difference to the
 #'   corresponding statistic of the reference model, and standard error of this
-#'   difference). The credible bounds are determined so that `1 - alpha` percent
-#'   of the probability mass falls between them. Items `"diff"` and `"diff.se"`
-#'   are only supported if `!deltas`.
+#'   difference, respectively). The credible bounds are determined so that `1 -
+#'   alpha` percent of the probability mass falls between them. Items `"diff"`
+#'   and `"diff.se"` are only supported if `deltas` is `FALSE`.
 #' @param deltas If `TRUE`, the submodel statistics are estimated relative to
 #'   the baseline model (see argument `baseline`) instead of estimating the
 #'   actual values of the statistics.
 #' @param alpha A number giving the desired coverage of the credible intervals.
 #'   For example, `alpha = 0.32` corresponds to 68% probability mass within the
 #'   intervals, that is, one-standard-error intervals.
-#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
-#'   the reference model or the best submodel found, respectively. If `NULL`,
-#'   then `"ref"` is used, except for `datafit`s for which `"best"` is used.
+#' @param baseline Only relevant if `deltas` is `TRUE`. Either `"ref"` or
+#'   `"best"` indicating whether the baseline is the reference model or the best
+#'   submodel (in terms of `stats[1]`), respectively.
 #' @param ... Currently ignored.
 #'
 #' @examples
@@ -470,9 +488,16 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
 #' }
 #'
 #' @export
-summary.vsel <- function(object, nterms_max = NULL, stats = "elpd",
-                         type = c("mean", "se", "diff", "diff.se"),
-                         deltas = FALSE, alpha = 0.32, baseline = NULL, ...) {
+summary.vsel <- function(
+  object,
+  nterms_max = NULL,
+  stats = "elpd",
+  type = c("mean", "se", "diff", "diff.se"),
+  deltas = FALSE,
+  alpha = 0.32,
+  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
+  ...
+) {
   .validate_vsel_object_stats(object, stats)
   baseline <- .validate_baseline(object$refmodel, baseline, deltas)
 
@@ -658,7 +683,6 @@ print.vsel <- function(x, ...) {
 #' results via [plot.vsel()] and/or [summary.vsel()] and make the final decision
 #' based on what is most appropriate for the given problem.
 #'
-#' @inheritParams summary.vsel
 #' @param object An object of class `vsel` (returned by [varsel()] or
 #'   [cv_varsel()]).
 #' @param stat Statistic used for the decision. See [summary.vsel()] for
@@ -673,6 +697,9 @@ print.vsel <- function(x, ...) {
 #' @param type Either `"upper"` or `"lower"` determining whether the decisions
 #'   are based on the upper or lower credible bounds, respectively. See section
 #'   "Details" for more information.
+#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
+#'   the reference model or the best submodel (in terms of `stats[1]`),
+#'   respectively.
 #' @param warnings Whether to give warnings if automatic suggestion fails,
 #'   mainly for internal use. Usually there is no reason to set this to `FALSE`.
 #' @param ... Currently ignored.
@@ -735,9 +762,16 @@ suggest_size <- function(object, ...) {
 
 #' @rdname suggest_size
 #' @export
-suggest_size.vsel <- function(object, stat = "elpd", alpha = 0.32, pct = 0,
-                              type = "upper", baseline = NULL, warnings = TRUE,
-                              ...) {
+suggest_size.vsel <- function(
+  object,
+  stat = "elpd",
+  alpha = 0.32,
+  pct = 0,
+  type = "upper",
+  baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
+  warnings = TRUE,
+  ...
+) {
   .validate_vsel_object_stats(object, stat)
   if (length(stat) > 1) {
     stop("Only one statistic can be specified to suggest_size")
@@ -824,13 +858,6 @@ as.matrix.lm <- function(x, ...) {
            replace_population_names())
 }
 
-#' @method as.matrix ridgelm
-#' @keywords internal
-#' @export
-as.matrix.ridgelm <- function(x, ...) {
-  return(as.matrix.lm(x))
-}
-
 #' @method as.matrix subfit
 #' @keywords internal
 #' @export
@@ -979,37 +1006,6 @@ as.matrix.gamm4 <- function(x, ...) {
   return(as.matrix.lm(x, ...))
 }
 
-#' @method as.matrix list
-#' @keywords internal
-#' @export
-as.matrix.list <- function(x, ...) {
-  return(do.call(cbind, lapply(x, as.matrix.glm, ...)))
-}
-
-#' @keywords internal
-#' @export
-t.glm <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
-#' @keywords internal
-#' @export
-t.lm <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
-#' @keywords internal
-#' @export
-t.ridgelm <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
-#' @keywords internal
-#' @export
-t.list <- function(x, ...) {
-  return(t(as.matrix(x), ...))
-}
-
 #' Extract projected parameter draws
 #'
 #' This is the [as.matrix()] method for `projection` objects (returned by

diff --git a/R/misc.R b/R/misc.R
@@ -87,10 +87,9 @@ bootstrap <- function(x, fun = mean, b = 1000, oobfun = NULL, seed = NULL,
   }
 }
 
-# from rstanarm
-`%ORifNULL%` <- function(a, b) if (is.null(a)) b else a
-
-.is.wholenumber <- function(x) abs(x - round(x)) < .Machine$double.eps^0.5
+.is.wholenumber <- function(x) {
+  abs(x - round(x)) < .Machine$double.eps^0.5
+}
 
 .validate_num_folds <- function(k, n) {
   if (!is.numeric(k) || length(k) != 1 || !.is.wholenumber(k)) {
@@ -128,22 +127,15 @@ bootstrap <- function(x, fun = mean, b = 1000, oobfun = NULL, seed = NULL,
 }
 
 .validate_baseline <- function(refmodel, baseline, deltas) {
-  if (is.null(baseline)) {
-    if (inherits(refmodel, "datafit")) {
-      baseline <- "best"
-    } else {
-      baseline <- "ref"
-    }
-  } else {
-    if (!(baseline %in% c("ref", "best"))) {
-      stop("Argument 'baseline' must be either 'ref' or 'best'.")
-    }
-    if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
-      # no reference model (or the results missing for some other reason),
-      # so cannot compute differences between the reference model and submodels
-      stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
-           "reference model.")
-    }
+  stopifnot(!is.null(baseline))
+  if (!(baseline %in% c("ref", "best"))) {
+    stop("Argument 'baseline' must be either 'ref' or 'best'.")
+  }
+  if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
+    # no reference model (or the results missing for some other reason),
+    # so cannot compute differences between the reference model and submodels
+    stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
+         "reference model.")
   }
   return(baseline)
 }
@@ -344,8 +336,9 @@ nlist <- function(...) {
   dots
 }
 
-## ifelse operator
-"%||%" <- function(x, y) {
+# The `%||%` special binary (infix) operator from brms (equivalent to the
+# `%ORifNULL%` operator from rstanarm):
+`%||%` <- function(x, y) {
   if (is.null(x)) x <- y
   x
 }
@@ -405,7 +398,7 @@ eval2 <- function(expr, envir = parent.frame(), ...) {
   eval(expr, envir, ...)
 }
 
-# coerce 'x' to a single character string
+# coerce `x` to a single character string
 as_one_character <- function(x, allow_na = FALSE) {
   s <- substitute(x)
   x <- as.character(x)
@@ -443,7 +436,7 @@ magrittr::`%>%`
 # Function where() is not exported by package tidyselect, so redefine it here to
 # avoid a note in R CMD check which would occur for usage of
 # tidyselect:::where():
-where <- `%:::%`("tidyselect", "where")
+where <- "tidyselect" %:::% "where"
 
 get_as.matrix_cls_projpred <- function() {
   ### Only works when projpred is loaded via devtools::load_all():
@@ -463,4 +456,6 @@ get_as.matrix_cls_projpred <- function() {
 
 ## Helper function extract and combine mu and lppd from K lists with each
 ## n/K of the elements to one list with n elements
-hf <- function(x) as.list(do.call(rbind, x))
+hf <- function(x) {
+  as.list(do.call(rbind, x))
+}