Skip to content

Commit

Permalink
Miscellaneous modifications and fixes (again) (#234)
Browse files Browse the repository at this point in the history
* Remove `as.matrix.list()` (neither used nor needed).

* Define the default `baseline` in the `vsel` methods directly (instead of

using `NULL`) and remove the `is.null()` case in `.validate_baseline()`.

* Docs: Mention that for argument `baseline`, the best submodel is decided in terms of `stats[1]`.

* Docs of `summary.vsel()`: Minor wording improvements for argument `type`.

* Docs of argument `baseline`: Mention the relevance.

* Simplify `cvfun()` in `get_refmodel.stanreg()`.

* Set a visible default for argument `K` of `cv_varsel()`.

* Improve y-axis label in `plot.vsel()`.

* There are no Gaussian-only `stats` (at least it isn't implemented this

way). Therefore, adapt the tests.

* Fix issue #210.

* Adapt the tests to the fix for issue #210.

* Harmonize definitions and usages of `%<name>%` binary infix operators.

* Unify the equivalent special binary (infix) operators `%||%` and `%ORifNULL%`.

* Avoid function definitions without curly braces because they're not shown in

RStudio's document outline.

* Remove `.get_proj_handle()` (not necessary and had an incorrect default for `regul`).

* `search_forward()`: Remove unused argument `increasing_order`.

* `projfun.R`: Enhance comments.

* `predict.subfit()`: Don't access `subfit$x` if not necessary.

* Minor cleaning.

* Remove more unused methods.

* `get_refmodel.stanreg()`: Throw an error if **rstanarm** is not installed.

* Replace "search path" by "solution path" (at least where relevant and easily possible) to

avoid a (purely optical) confusion with "search part".
  • Loading branch information
fweber144 authored Oct 11, 2021
1 parent dd4bc36 commit 58581eb
Show file tree
Hide file tree
Showing 20 changed files with 218 additions and 203 deletions.
6 changes: 0 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
S3method(as.matrix,gamm4)
S3method(as.matrix,glm)
S3method(as.matrix,glmerMod)
S3method(as.matrix,list)
S3method(as.matrix,lm)
S3method(as.matrix,lmerMod)
S3method(as.matrix,projection)
S3method(as.matrix,ridgelm)
S3method(as.matrix,subfit)
S3method(coef,subfit)
S3method(cv_varsel,default)
Expand All @@ -24,10 +22,6 @@ S3method(solution_terms,projection)
S3method(solution_terms,vsel)
S3method(suggest_size,vsel)
S3method(summary,vsel)
S3method(t,glm)
S3method(t,list)
S3method(t,lm)
S3method(t,ridgelm)
S3method(varsel,default)
S3method(varsel,refmodel)
export("%>%")
Expand Down
49 changes: 25 additions & 24 deletions R/cv_varsel.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#' `NULL`, all observations are used, but for faster experimentation, one can
#' set this to a smaller value.
#' @param K Only relevant if `cv_method == "kfold"`. Number of folds in the
#' \eqn{K}-fold CV. If `NULL`, then `5` is used for genuine reference models
#' (i.e., of class `refmodel`) and `10` for `datafit`s (that is, for penalized
#' maximum likelihood estimation).
#' \eqn{K}-fold CV.
#' @param validate_search Only relevant if `cv_method == "LOO"`. A single
#' logical value indicating whether to cross-validate also the search part,
#' i.e., whether to run the search separately for each CV fold (`TRUE`) or not
Expand Down Expand Up @@ -103,17 +101,27 @@ cv_varsel.default <- function(object, ...) {
#' @rdname cv_varsel
#' @export
cv_varsel.refmodel <- function(
object, method = NULL,
object,
method = NULL,
cv_method = if (!inherits(object, "datafit")) "LOO" else "kfold",
ndraws = 20, nclusters = NULL,
ndraws_pred = 400, nclusters_pred = NULL,
ndraws = 20,
nclusters = NULL,
ndraws_pred = 400,
nclusters_pred = NULL,
cv_search = !inherits(object, "datafit"),
nterms_max = NULL, penalty = NULL,
nterms_max = NULL,
penalty = NULL,
verbose = TRUE,
nloo = NULL, K = NULL, lambda_min_ratio = 1e-5,
nlambda = 150, thresh = 1e-6, regul = 1e-4,
validate_search = TRUE, seed = NULL,
search_terms = NULL, ...
nloo = NULL,
K = if (!inherits(object, "datafit")) 5 else 10,
lambda_min_ratio = 1e-5,
nlambda = 150,
thresh = 1e-6,
regul = 1e-4,
validate_search = TRUE,
seed = NULL,
search_terms = NULL,
...
) {
refmodel <- object
## resolve the arguments similar to varsel
Expand Down Expand Up @@ -279,23 +287,16 @@ parse_args_cv_varsel <- function(refmodel, cv_method, K,
cv_method <- "kfold"
}

if (!is.null(K)) {
if (length(K) > 1 || !is.numeric(K) || K != round(K)) {
stop("K must be a single integer value.")
if (cv_method == "kfold") {
stopifnot(!is.null(K))
if (length(K) > 1 || !is.numeric(K) || !.is.wholenumber(K)) {
stop("`K` must be a single integer value.")
}
if (K < 2) {
stop("K must be at least 2.")
stop("`K` must be at least 2.")
}
if (K > NROW(refmodel$y)) {
stop("K cannot exceed the number of observations.")
}
}

if (cv_method == "kfold" && is.null(K)) {
if (inherits(refmodel, "datafit")) {
K <- 10
} else {
K <- 5
stop("`K` cannot exceed the number of observations.")
}
}

Expand Down
3 changes: 1 addition & 2 deletions R/divergence_minimizers.R
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,11 @@ subprd <- function(fit, newdata = NULL) {
predict.subfit <- function(subfit, newdata = NULL) {
beta <- subfit$beta
alpha <- subfit$alpha
x <- subfit$x
if (is.null(newdata)) {
if (is.null(beta)) {
return(as.matrix(rep(alpha, NROW(subfit$x))))
} else {
return(x %*% rbind(alpha, beta))
return(subfit$x %*% rbind(alpha, beta))
}
} else {
contrasts_arg <- get_contrasts_arg_list(subfit$formula, newdata)
Expand Down
4 changes: 2 additions & 2 deletions R/gamm4.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ gamm4.setup <- function(formula, pterms, data = NULL, knots = NULL) {
formula$response <- 1
pterms$response <- 1
}
gam.setup <- `%:::%`("mgcv", "gam.setup")
gam.setup <- "mgcv" %:::% "gam.setup"

G <- gam.setup(formula, pterms,
data = data, knots = knots, sp = NULL, min.sp = NULL,
Expand Down Expand Up @@ -160,7 +160,7 @@ model.matrix.gamm4 <- function(formula, random = NULL, data = NULL,
names(dl) <- vars ## list of all variables needed
## summarize the input data

variable.summary <- `%:::%`("mgcv", "variable.summary")
variable.summary <- "mgcv" %:::% "variable.summary"
var.summary <- variable.summary(gp$pf, dl, nrow(mf))

## lmer offset handling work around...
Expand Down
106 changes: 51 additions & 55 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,9 @@ proj_predict_aux <- function(proj, mu, weights, ...) {
#'
#' @inheritParams summary.vsel
#' @param x An object of class `vsel` (returned by [varsel()] or [cv_varsel()]).
#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
#' the reference model or the best submodel (in terms of `stats[1]`),
#' respectively.
#'
#' @examples
#' if (requireNamespace("rstanarm", quietly = TRUE)) {
Expand All @@ -317,9 +320,15 @@ proj_predict_aux <- function(proj, mu, weights, ...) {
#' }
#'
#' @export
plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
deltas = FALSE, alpha = 0.32, baseline = NULL,
...) {
plot.vsel <- function(
x,
nterms_max = NULL,
stats = "elpd",
deltas = FALSE,
alpha = 0.32,
baseline = if (!inherits(x$refmodel, "datafit")) "ref" else "best",
...
) {
object <- x
.validate_vsel_object_stats(object, stats)
baseline <- .validate_baseline(object$refmodel, baseline, deltas)
Expand Down Expand Up @@ -360,7 +369,16 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
if (nterms_max < 1) {
stop("nterms_max must be at least 1")
}
ylab <- if (deltas) "Difference to the baseline" else "Value"
if (baseline == "ref") {
baseline_pretty <- "reference model"
} else {
baseline_pretty <- "best submodel"
}
if (deltas) {
ylab <- paste0("Difference vs.", baseline_pretty)
} else {
ylab <- "Value"
}

# make sure that breaks on the x-axis are integers
n_opts <- c(4, 5, 6)
Expand Down Expand Up @@ -434,18 +452,18 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
#' `"diff"`, and `"diff.se"` indicating which of these to compute (mean,
#' standard error, lower and upper credible bounds, difference to the
#' corresponding statistic of the reference model, and standard error of this
#' difference). The credible bounds are determined so that `1 - alpha` percent
#' of the probability mass falls between them. Items `"diff"` and `"diff.se"`
#' are only supported if `!deltas`.
#' difference, respectively). The credible bounds are determined so that `1 -
#' alpha` percent of the probability mass falls between them. Items `"diff"`
#' and `"diff.se"` are only supported if `deltas` is `FALSE`.
#' @param deltas If `TRUE`, the submodel statistics are estimated relative to
#' the baseline model (see argument `baseline`) instead of estimating the
#' actual values of the statistics.
#' @param alpha A number giving the desired coverage of the credible intervals.
#' For example, `alpha = 0.32` corresponds to 68% probability mass within the
#' intervals, that is, one-standard-error intervals.
#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
#' the reference model or the best submodel found, respectively. If `NULL`,
#' then `"ref"` is used, except for `datafit`s for which `"best"` is used.
#' @param baseline Only relevant if `deltas` is `TRUE`. Either `"ref"` or
#' `"best"` indicating whether the baseline is the reference model or the best
#' submodel (in terms of `stats[1]`), respectively.
#' @param ... Currently ignored.
#'
#' @examples
Expand All @@ -470,9 +488,16 @@ plot.vsel <- function(x, nterms_max = NULL, stats = "elpd",
#' }
#'
#' @export
summary.vsel <- function(object, nterms_max = NULL, stats = "elpd",
type = c("mean", "se", "diff", "diff.se"),
deltas = FALSE, alpha = 0.32, baseline = NULL, ...) {
summary.vsel <- function(
object,
nterms_max = NULL,
stats = "elpd",
type = c("mean", "se", "diff", "diff.se"),
deltas = FALSE,
alpha = 0.32,
baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
...
) {
.validate_vsel_object_stats(object, stats)
baseline <- .validate_baseline(object$refmodel, baseline, deltas)

Expand Down Expand Up @@ -658,7 +683,6 @@ print.vsel <- function(x, ...) {
#' results via [plot.vsel()] and/or [summary.vsel()] and make the final decision
#' based on what is most appropriate for the given problem.
#'
#' @inheritParams summary.vsel
#' @param object An object of class `vsel` (returned by [varsel()] or
#' [cv_varsel()]).
#' @param stat Statistic used for the decision. See [summary.vsel()] for
Expand All @@ -673,6 +697,9 @@ print.vsel <- function(x, ...) {
#' @param type Either `"upper"` or `"lower"` determining whether the decisions
#' are based on the upper or lower credible bounds, respectively. See section
#' "Details" for more information.
#' @param baseline Either `"ref"` or `"best"` indicating whether the baseline is
#' the reference model or the best submodel (in terms of `stats[1]`),
#' respectively.
#' @param warnings Whether to give warnings if automatic suggestion fails,
#' mainly for internal use. Usually there is no reason to set this to `FALSE`.
#' @param ... Currently ignored.
Expand Down Expand Up @@ -735,9 +762,16 @@ suggest_size <- function(object, ...) {

#' @rdname suggest_size
#' @export
suggest_size.vsel <- function(object, stat = "elpd", alpha = 0.32, pct = 0,
type = "upper", baseline = NULL, warnings = TRUE,
...) {
suggest_size.vsel <- function(
object,
stat = "elpd",
alpha = 0.32,
pct = 0,
type = "upper",
baseline = if (!inherits(object$refmodel, "datafit")) "ref" else "best",
warnings = TRUE,
...
) {
.validate_vsel_object_stats(object, stat)
if (length(stat) > 1) {
stop("Only one statistic can be specified to suggest_size")
Expand Down Expand Up @@ -824,13 +858,6 @@ as.matrix.lm <- function(x, ...) {
replace_population_names())
}

#' @method as.matrix ridgelm
#' @keywords internal
#' @export
as.matrix.ridgelm <- function(x, ...) {
return(as.matrix.lm(x))
}

#' @method as.matrix subfit
#' @keywords internal
#' @export
Expand Down Expand Up @@ -979,37 +1006,6 @@ as.matrix.gamm4 <- function(x, ...) {
return(as.matrix.lm(x, ...))
}

#' @method as.matrix list
#' @keywords internal
#' @export
as.matrix.list <- function(x, ...) {
return(do.call(cbind, lapply(x, as.matrix.glm, ...)))
}

#' @keywords internal
#' @export
t.glm <- function(x, ...) {
return(t(as.matrix(x), ...))
}

#' @keywords internal
#' @export
t.lm <- function(x, ...) {
return(t(as.matrix(x), ...))
}

#' @keywords internal
#' @export
t.ridgelm <- function(x, ...) {
return(t(as.matrix(x), ...))
}

#' @keywords internal
#' @export
t.list <- function(x, ...) {
return(t(as.matrix(x), ...))
}

#' Extract projected parameter draws
#'
#' This is the [as.matrix()] method for `projection` objects (returned by
Expand Down
45 changes: 20 additions & 25 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,9 @@ bootstrap <- function(x, fun = mean, b = 1000, oobfun = NULL, seed = NULL,
}
}

# from rstanarm
`%ORifNULL%` <- function(a, b) if (is.null(a)) b else a

.is.wholenumber <- function(x) abs(x - round(x)) < .Machine$double.eps^0.5
.is.wholenumber <- function(x) {
abs(x - round(x)) < .Machine$double.eps^0.5
}

.validate_num_folds <- function(k, n) {
if (!is.numeric(k) || length(k) != 1 || !.is.wholenumber(k)) {
Expand Down Expand Up @@ -128,22 +127,15 @@ bootstrap <- function(x, fun = mean, b = 1000, oobfun = NULL, seed = NULL,
}

.validate_baseline <- function(refmodel, baseline, deltas) {
if (is.null(baseline)) {
if (inherits(refmodel, "datafit")) {
baseline <- "best"
} else {
baseline <- "ref"
}
} else {
if (!(baseline %in% c("ref", "best"))) {
stop("Argument 'baseline' must be either 'ref' or 'best'.")
}
if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
# no reference model (or the results missing for some other reason),
# so cannot compute differences between the reference model and submodels
stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
"reference model.")
}
stopifnot(!is.null(baseline))
if (!(baseline %in% c("ref", "best"))) {
stop("Argument 'baseline' must be either 'ref' or 'best'.")
}
if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
# no reference model (or the results missing for some other reason),
# so cannot compute differences between the reference model and submodels
stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
"reference model.")
}
return(baseline)
}
Expand Down Expand Up @@ -344,8 +336,9 @@ nlist <- function(...) {
dots
}

## ifelse operator
"%||%" <- function(x, y) {
# The `%||%` special binary (infix) operator from brms (equivalent to the
# `%ORifNULL%` operator from rstanarm):
`%||%` <- function(x, y) {
if (is.null(x)) x <- y
x
}
Expand Down Expand Up @@ -405,7 +398,7 @@ eval2 <- function(expr, envir = parent.frame(), ...) {
eval(expr, envir, ...)
}

# coerce 'x' to a single character string
# coerce `x` to a single character string
as_one_character <- function(x, allow_na = FALSE) {
s <- substitute(x)
x <- as.character(x)
Expand Down Expand Up @@ -443,7 +436,7 @@ magrittr::`%>%`
# Function where() is not exported by package tidyselect, so redefine it here to
# avoid a note in R CMD check which would occur for usage of
# tidyselect:::where():
where <- `%:::%`("tidyselect", "where")
where <- "tidyselect" %:::% "where"

get_as.matrix_cls_projpred <- function() {
### Only works when projpred is loaded via devtools::load_all():
Expand All @@ -463,4 +456,6 @@ get_as.matrix_cls_projpred <- function() {

## Helper function extract and combine mu and lppd from K lists with each
## n/K of the elements to one list with n elements
hf <- function(x) as.list(do.call(rbind, x))
hf <- function(x) {
as.list(do.call(rbind, x))
}
Loading

0 comments on commit 58581eb

Please sign in to comment.