diff --git a/R/cv_varsel.R b/R/cv_varsel.R index a895acdf5..826e31a6b 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -372,12 +372,12 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, nterms_max = nterms_max, penalty = penalty, verbose = FALSE, opt = opt, search_terms = search_terms ) - solution_terms <- search_path$solution_terms ## project onto the selected models and compute the prediction accuracy for ## the full data submodels <- .get_submodels( - search_path = search_path, nterms = c(0, seq_along(solution_terms)), + search_path = search_path, + nterms = c(0, seq_along(search_path$solution_terms)), p_ref = p_pred, refmodel = refmodel, regul = opt$regul, cv_search = cv_search ) @@ -392,7 +392,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, ## compute approximate LOO with PSIS weights for (k in seq_along(submodels)) { - mu_k <- refmodel$family$mu_fun(submodels[[k]]$sub_fit, + mu_k <- refmodel$family$mu_fun(submodels[[k]]$submodl, obs = inds, offset = refmodel$offset[inds]) log_lik_sub <- t(refmodel$family$ll_fun( @@ -418,13 +418,14 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, data = refmodel$fetch_data(), add_main_effects = FALSE) ## with `match` we get the indices of the variables as they enter the - ## solution path in solution_terms - solution <- match(solution_terms, setdiff(candidate_terms, "1")) + ## solution path in `search_path$solution_terms` + solution <- match(search_path$solution_terms, + setdiff(candidate_terms, "1")) for (i in seq_len(n)) { solution_terms_mat[i, seq_along(solution)] <- solution } sel <- nlist(search_path, kl = sapply(submodels, function(x) x$kl), - solution_terms) + solution_terms = search_path$solution_terms) } else { if (verbose) { print(msg) @@ -451,12 +452,12 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, nterms_max = nterms_max, penalty = penalty, verbose = FALSE, opt = opt, search_terms = search_terms ) - solution_terms <- search_path$solution_terms ## project onto the selected models and compute the prediction accuracy ## for the left-out point submodels <- .get_submodels( - search_path = search_path, nterms = c(0, seq_along(solution_terms)), + search_path = search_path, + nterms = c(0, seq_along(search_path$solution_terms)), p_ref = p_pred, refmodel = refmodel, regul = opt$regul, cv_search = cv_search ) @@ -472,8 +473,9 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, data = refmodel$fetch_data(), add_main_effects = FALSE) ## with `match` we get the indices of the variables as they enter the - ## solution path in solution_terms - solution <- match(solution_terms, setdiff(candidate_terms, "1")) + ## solution path in `search_path$solution_terms` + solution <- match(search_path$solution_terms, + setdiff(candidate_terms, "1")) solution_terms_mat[i, seq_along(solution)] <- solution if (verbose) { diff --git a/R/divergence_minimizers.R b/R/divergence_minimizers.R index 6f3e8ba70..b04320036 100644 --- a/R/divergence_minimizers.R +++ b/R/divergence_minimizers.R @@ -412,8 +412,8 @@ check_conv <- function(fit) { # Prediction functions for submodels -------------------------------------- -subprd <- function(fit, newdata) { - return(do.call(cbind, lapply(fit, function(fit) { +subprd <- function(fits, newdata) { + return(do.call(cbind, lapply(fits, function(fit) { # Only pass argument `allow.new.levels` to the predict() generic if the fit # is multilevel: has_grp <- inherits(fit, c("lmerMod", "glmerMod")) @@ -446,6 +446,10 @@ predict.subfit <- function(subfit, newdata = NULL) { if (is.null(beta)) { return(as.matrix(rep(alpha, NROW(x)))) } else { + if (ncol(x) != length(beta) + 1L) { + stop("The number of columns in the model matrix (\"X\") doesn't match ", + "the number of coefficients.") + } return(x %*% rbind(alpha, beta)) } } diff --git a/R/methods.R b/R/methods.R index 4beeb94f2..6138136b7 100644 --- a/R/methods.R +++ b/R/methods.R @@ -159,13 +159,6 @@ proj_helper <- function(object, newdata, count_terms_chosen(proj$solution_terms, add_icpt = TRUE) }) - solution_terms <- list(...)$solution_terms - if (!is.null(solution_terms) && - length(solution_terms) > NCOL(newdata)) { - stop("The number of solution terms is greater than the number of columns ", - "in `newdata`.") - } - preds <- lapply(projs, function(proj) { w_o <- proj$refmodel$extract_model_data( proj$refmodel$fit, newdata = newdata, wrhs = weightsnew, orhs = offsetnew, @@ -179,7 +172,7 @@ proj_helper <- function(object, newdata, if (length(offsetnew) == 0) { offsetnew <- rep(0, NROW(newdata)) } - mu <- proj$refmodel$family$mu_fun(proj$sub_fit, + mu <- proj$refmodel$family$mu_fun(proj$submodl, newdata = newdata, offset = offsetnew) onesub_fun(proj, mu, weightsnew, offset = offsetnew, newdata = newdata, @@ -1053,7 +1046,7 @@ as.matrix.projection <- function(x, ...) { warning("Note that projection was performed using clustering and the ", "clusters might have different weights.") } - res <- do.call(rbind, lapply(x$sub_fit, get_subparams)) + res <- do.call(rbind, lapply(x$submodl, get_subparams)) if (x$refmodel$family$family == "gaussian") res <- cbind(res, sigma = x$dis) return(res) } diff --git a/R/project.R b/R/project.R index 6d20d4c71..cbb514122 100644 --- a/R/project.R +++ b/R/project.R @@ -59,7 +59,8 @@ #' \item{`solution_terms`}{A character vector of the submodel's #' predictor terms, ordered in the way in which the terms were added to the #' submodel.} -#' \item{`sub_fit`}{The submodel's fitted model object.} +#' \item{`submodl`}{A `list` containing the submodel fits (one fit per +#' projected draw).} #' \item{`p_type`}{A single logical value indicating whether the #' reference model's posterior draws have been clustered for the projection #' (`TRUE`) or not (`FALSE`).} @@ -135,6 +136,11 @@ project <- function(object, nterms = NULL, solution_terms = NULL, cv_search <- TRUE } + if (!cv_search) { + warning("Currently, `cv_search = FALSE` requires some caution, see GitHub ", + "issues #168 and #211.") + } + if (!is.null(solution_terms)) { ## if solution_terms is given, nterms is ignored ## (project only onto the given submodel) @@ -215,7 +221,7 @@ project <- function(object, nterms = NULL, solution_terms = NULL, search_path = nlist( solution_terms, p_sel = object$search_path$p_sel, - sub_fits = object$search_path$sub_fits + submodls = object$search_path$submodls ), nterms = nterms, p_ref = p_ref, refmodel = refmodel, regul = regul, cv_search = cv_search diff --git a/R/projfun.R b/R/projfun.R index 4af29d54a..f7b46a211 100644 --- a/R/projfun.R +++ b/R/projfun.R @@ -12,7 +12,7 @@ project_submodel <- function(solution_terms, p_ref, refmodel, regul = 1e-4) { data = refmodel$fetch_data(), y = p_ref$mu ) - sub_fit <- refmodel$div_minimizer( + submodl <- refmodel$div_minimizer( formula = flatten_formula(subset$formula), data = subset$data, family = refmodel$family, @@ -22,11 +22,11 @@ project_submodel <- function(solution_terms, p_ref, refmodel, regul = 1e-4) { ) if (isTRUE(getOption("projpred.check_conv", FALSE))) { - check_conv(sub_fit) + check_conv(submodl) } return(.init_submodel( - sub_fit = sub_fit, p_ref = p_ref, refmodel = refmodel, + submodl = submodl, p_ref = p_ref, refmodel = refmodel, solution_terms = solution_terms, wobs = wobs, wsample = wsample )) } @@ -37,25 +37,26 @@ project_submodel <- function(solution_terms, p_ref, refmodel, regul = 1e-4) { .get_submodels <- function(search_path, nterms, p_ref, refmodel, regul, cv_search = FALSE) { if (!cv_search) { - ## simply fetch the already computed quantities for each submodel size + # In this case, simply fetch the already computed projections, so don't + # project again. fetch_submodel <- function(nterms) { validparams <- .validate_wobs_wsample( refmodel$wobs, search_path$p_sel$weights, search_path$p_sel$mu ) wobs <- validparams$wobs wsample <- validparams$wsample - - ## reuse sub_fit as projected during search - sub_refit <- search_path$sub_fits[[nterms + 1]] - return(.init_submodel( - sub_fit = sub_refit, p_ref = search_path$p_sel, refmodel = refmodel, + # Re-use the submodel fits from the search: + submodl = search_path$submodls[[nterms + 1]], + p_ref = search_path$p_sel, + refmodel = refmodel, solution_terms = utils::head(search_path$solution_terms, nterms), - wobs = wobs, wsample = wsample + wobs = wobs, + wsample = wsample )) } } else { - ## need to project again for each submodel size + # In this case, project again. fetch_submodel <- function(nterms) { return(project_submodel( solution_terms = utils::head(search_path$solution_terms, nterms), @@ -83,7 +84,7 @@ project_submodel <- function(solution_terms, p_ref, refmodel, regul = 1e-4) { return(nlist(wobs, wsample)) } -.init_submodel <- function(sub_fit, p_ref, refmodel, solution_terms, wobs, +.init_submodel <- function(submodl, p_ref, refmodel, solution_terms, wobs, wsample) { p_ref$mu <- refmodel$family$linkinv( refmodel$family$linkfun(p_ref$mu) + refmodel$offset @@ -114,7 +115,7 @@ project_submodel <- function(solution_terms, p_ref, refmodel, regul = 1e-4) { ### } - mu <- refmodel$family$mu_fun(sub_fit, offset = refmodel$offset) + mu <- refmodel$family$mu_fun(submodl, offset = refmodel$offset) dis <- refmodel$family$dis_fun(p_ref, nlist(mu), wobs) kl <- weighted.mean( refmodel$family$kl(p_ref, @@ -122,5 +123,5 @@ project_submodel <- function(solution_terms, p_ref, refmodel, regul = 1e-4) { nlist(mu, dis)), wsample ) - return(nlist(dis, kl, weights = wsample, solution_terms, sub_fit)) + return(nlist(dis, kl, weights = wsample, solution_terms, submodl)) } diff --git a/R/refmodel.R b/R/refmodel.R index ea715c071..afae2b7c1 100644 --- a/R/refmodel.R +++ b/R/refmodel.R @@ -84,13 +84,13 @@ #' + `newdata` accepts either `NULL` (for using the original dataset, #' typically stored in `fit`) or data for new observations (at least in the #' form of a `data.frame`). -#' * `proj_predfun`: `proj_predfun(fit, newdata)` where: -#' + `fit` accepts a `list` of length \eqn{S_{\mbox{prj}}}{S_prj} containing -#' this number of submodel fits. This `list` is the same as that returned by -#' [project()] in its output element `sub_fit` (which in turn is the same as -#' the return value of `div_minimizer`, except if [project()] was used with -#' an `object` of class `vsel` based on an L1 search as well as with -#' `cv_search = FALSE`). +#' * `proj_predfun`: `proj_predfun(fits, newdata)` where: +#' + `fits` accepts a `list` of length \eqn{S_{\mbox{prj}}}{S_prj} +#' containing this number of submodel fits. This `list` is the same as that +#' returned by [project()] in its output element `submodl` (which in turn is +#' the same as the return value of `div_minimizer`, except if [project()] +#' was used with an `object` of class `vsel` based on an L1 search as well +#' as with `cv_search = FALSE`). #' + `newdata` accepts data for new observations (at least in the form of a #' `data.frame`). #' * `div_minimizer` does not need to have a specific prototype, but it needs to @@ -563,14 +563,14 @@ init_refmodel <- function(object, data, formula, family, ref_predfun = NULL, family <- extend_family(family) } - family$mu_fun <- function(fit, obs = NULL, newdata = NULL, offset = NULL) { + family$mu_fun <- function(fits, obs = NULL, newdata = NULL, offset = NULL) { newdata <- fetch_data(data, obs = obs, newdata = newdata) if (is.null(offset)) { offset <- rep(0, nrow(newdata)) } else { stopifnot(length(offset) %in% c(1L, nrow(newdata))) } - family$linkinv(proj_predfun(fit, newdata = newdata) + offset) + family$linkinv(proj_predfun(fits, newdata = newdata) + offset) } # Special case: `datafit` ------------------------------------------------- diff --git a/R/search.R b/R/search.R index 1aeb205db..5adffc697 100644 --- a/R/search.R +++ b/R/search.R @@ -25,7 +25,7 @@ search_forward <- function(p_ref, refmodel, nterms_max, verbose = TRUE, opt, chosen <- c(chosen, cands[imin]) ## append submodels - submodels <- c(submodels, list(subL[[imin]]$sub_fit)) + submodels <- c(submodels, list(subL[[imin]]$submodl)) if (verbose && count_terms_chosen(chosen) %in% iq) { print(paste0(names(iq)[max(which(count_terms_chosen(chosen) == iq))], @@ -35,7 +35,7 @@ search_forward <- function(p_ref, refmodel, nterms_max, verbose = TRUE, opt, ## reduce chosen to a list of non-redundant accumulated models return(list(solution_terms = setdiff(reduce_models(chosen), "1"), - sub_fits = submodels)) + submodls = submodels)) } # copied over from search until we resolve the TODO below @@ -150,7 +150,7 @@ search_L1 <- function(p_ref, refmodel, nterms_max, penalty, opt) { refmodel$formula, colnames(x)[search_path$solution_terms], refmodel$fetch_data() ) - sub_fits <- lapply(0:length(solution_terms), function(nterms) { + submodls <- lapply(0:length(solution_terms), function(nterms) { if (nterms == 0) { formula <- make_formula(c("1")) beta <- NULL @@ -186,5 +186,5 @@ search_L1 <- function(p_ref, refmodel, nterms_max, penalty, opt) { return(list(sub)) }) return(list(solution_terms = solution_terms[seq_len(nterms_max)], - sub_fits = sub_fits[seq_len(nterms_max + 1)])) + submodls = submodls[seq_len(nterms_max + 1)])) } diff --git a/R/summary_funs.R b/R/summary_funs.R index 076d3818b..cb7e49177 100644 --- a/R/summary_funs.R +++ b/R/summary_funs.R @@ -5,7 +5,7 @@ weights = refmodel$wobs[test_points]), family = refmodel$family, wsample = model$weights, - mu = refmodel$family$mu_fun(model$sub_fit, + mu = refmodel$family$mu_fun(model$submodl, obs = test_points, offset = refmodel$offset[test_points]), dis = model$dis diff --git a/man/project.Rd b/man/project.Rd index fe4dfa77e..b370a9c94 100644 --- a/man/project.Rd +++ b/man/project.Rd @@ -76,7 +76,8 @@ model.} \item{\code{solution_terms}}{A character vector of the submodel's predictor terms, ordered in the way in which the terms were added to the submodel.} -\item{\code{sub_fit}}{The submodel's fitted model object.} +\item{\code{submodl}}{A \code{list} containing the submodel fits (one fit per +projected draw).} \item{\code{p_type}}{A single logical value indicating whether the reference model's posterior draws have been clustered for the projection (\code{TRUE}) or not (\code{FALSE}).} diff --git a/man/refmodel-init-get.Rd b/man/refmodel-init-get.Rd index e976a8d0c..f5b4de3a1 100644 --- a/man/refmodel-init-get.Rd +++ b/man/refmodel-init-get.Rd @@ -139,14 +139,14 @@ following prototypes: typically stored in \code{fit}) or data for new observations (at least in the form of a \code{data.frame}). } -\item \code{proj_predfun}: \code{proj_predfun(fit, newdata)} where: +\item \code{proj_predfun}: \code{proj_predfun(fits, newdata)} where: \itemize{ -\item \code{fit} accepts a \code{list} of length \eqn{S_{\mbox{prj}}}{S_prj} containing -this number of submodel fits. This \code{list} is the same as that returned by -\code{\link[=project]{project()}} in its output element \code{sub_fit} (which in turn is the same as -the return value of \code{div_minimizer}, except if \code{\link[=project]{project()}} was used with -an \code{object} of class \code{vsel} based on an L1 search as well as with -\code{cv_search = FALSE}). +\item \code{fits} accepts a \code{list} of length \eqn{S_{\mbox{prj}}}{S_prj} +containing this number of submodel fits. This \code{list} is the same as that +returned by \code{\link[=project]{project()}} in its output element \code{submodl} (which in turn is +the same as the return value of \code{div_minimizer}, except if \code{\link[=project]{project()}} +was used with an \code{object} of class \code{vsel} based on an L1 search as well +as with \code{cv_search = FALSE}). \item \code{newdata} accepts data for new observations (at least in the form of a \code{data.frame}). } diff --git a/tests/testthat/helpers/testers.R b/tests/testthat/helpers/testers.R index 6db6bdf4a..8c7c4b60b 100644 --- a/tests/testthat/helpers/testers.R +++ b/tests/testthat/helpers/testers.R @@ -407,35 +407,35 @@ refmodel_tester <- function( return(invisible(TRUE)) } -# A helper function for testing the structure of a list of subfits (whose -# elements must not necessarily be of class `"subfit"`) for the same single -# submodel +# A helper function for testing the structure of a list of fits (each fit must +# not necessarily be of class `"subfit"`) for the same single submodel # -# @param sub_fit_totest The list of subfits to test. +# @param submodl_totest The `submodl` object (a list of fits for a single +# submodel, with one fit per projected draw) to test. # @param nprjdraws_expected A single numeric value giving the expected number of # projected draws. # @param sub_formul A list of formulas for the submodel (with one element per # projected draw). # @param sub_data The dataset used for fitting the submodel. # @param sub_fam A single character string giving the submodel's family. -# @param has_grp A single logical value indicating whether `sub_fit_obj` is -# expected to be of class `"lmerMod"` or `"glmerMod"` (if, at the same time, -# `has_add` is `FALSE`). -# @param has_add A single logical value indicating whether `sub_fit_obj` is -# expected to be of class `"gam"` or `"gamm4"` (depending on whether the -# submodel is non-multilevel or multilevel, respectively). +# @param has_grp A single logical value indicating whether the fits in +# `submodl_totest` are expected to be of class `"lmerMod"` or `"glmerMod"` +# (if, at the same time, `has_add` is `FALSE`). +# @param has_add A single logical value indicating whether the fits in +# `submodl_totest` are expected to be of class `"gam"` or `"gamm4"` (depending +# on whether the submodel is non-multilevel or multilevel, respectively). # @param wobs_expected The expected numeric vector of observation weights. -# @param solterms_vsel_L1_search If `sub_fit_totest` comes from the L1 +# @param solterms_vsel_L1_search If `submodl_totest` comes from the L1 # `search_path` of an object of class `"vsel"`, provide here the solution # terms. Otherwise, use `NULL`. -# @param with_offs A single logical value indicating whether `sub_fit_totest` is +# @param with_offs A single logical value indicating whether `submodl_totest` is # expected to include offsets (`TRUE`) or not (`FALSE`). # @param info_str A single character string giving information to be printed in # case of failure. # # @return `TRUE` (invisible). -sub_fit_tester <- function( - sub_fit_totest, +submodl_tester <- function( + submodl_totest, nprjdraws_expected, sub_formul, sub_data, @@ -447,14 +447,14 @@ sub_fit_tester <- function( with_offs = FALSE, info_str ) { - expect_type(sub_fit_totest, "list") - expect_length(sub_fit_totest, nprjdraws_expected) + expect_type(submodl_totest, "list") + expect_length(submodl_totest, nprjdraws_expected) from_vsel_L1_search <- !is.null(solterms_vsel_L1_search) seq_extensive_tests <- unique(round( - seq(1, length(sub_fit_totest), - length.out = min(length(sub_fit_totest), nclusters_pred_tst)) + seq(1, length(submodl_totest), + length.out = min(length(submodl_totest), nclusters_pred_tst)) )) if (!has_grp && !has_add) { @@ -549,70 +549,70 @@ sub_fit_tester <- function( if (from_vsel_L1_search) { subfit_nms <- setdiff(subfit_nms, "y") } - for (j in seq_along(sub_fit_totest)) { - expect_s3_class(sub_fit_totest[[!!j]], "subfit") - expect_type(sub_fit_totest[[!!j]], "list") - expect_named(sub_fit_totest[[!!j]], subfit_nms, info = info_str) + for (j in seq_along(submodl_totest)) { + expect_s3_class(submodl_totest[[!!j]], "subfit") + expect_type(submodl_totest[[!!j]], "list") + expect_named(submodl_totest[[!!j]], subfit_nms, info = info_str) if (j %in% seq_extensive_tests) { - expect_true(is.vector(sub_fit_totest[[!!j]]$alpha, "double"), + expect_true(is.vector(submodl_totest[[!!j]]$alpha, "double"), info = info_str) - expect_length(sub_fit_totest[[!!j]]$alpha, 1) + expect_length(submodl_totest[[!!j]]$alpha, 1) if (length(sub_trms) > 0 || !from_vsel_L1_search) { - expect_true(is.matrix(sub_fit_totest[[!!j]]$beta), info = info_str) - expect_true(is.numeric(sub_fit_totest[[!!j]]$beta), info = info_str) - expect_identical(dim(sub_fit_totest[[!!j]]$beta), c(ncoefs, 1L), + expect_true(is.matrix(submodl_totest[[!!j]]$beta), info = info_str) + expect_true(is.numeric(submodl_totest[[!!j]]$beta), info = info_str) + expect_identical(dim(submodl_totest[[!!j]]$beta), c(ncoefs, 1L), info = info_str) } else if (length(sub_trms) == 0) { - expect_null(sub_fit_totest[[!!j]]$beta, info = info_str) + expect_null(submodl_totest[[!!j]]$beta, info = info_str) } if (!from_vsel_L1_search) { - expect_true(is.matrix(sub_fit_totest[[!!j]]$w), info = info_str) - expect_type(sub_fit_totest[[!!j]]$w, "double") - expect_identical(dim(sub_fit_totest[[!!j]]$w), c(nobsv, 1L), + expect_true(is.matrix(submodl_totest[[!!j]]$w), info = info_str) + expect_type(submodl_totest[[!!j]]$w, "double") + expect_identical(dim(submodl_totest[[!!j]]$w), c(nobsv, 1L), info = info_str) } else { - expect_true(is.vector(sub_fit_totest[[!!j]]$w, "double"), + expect_true(is.vector(submodl_totest[[!!j]]$w, "double"), info = info_str) - expect_length(sub_fit_totest[[!!j]]$w, nobsv) + expect_length(submodl_totest[[!!j]]$w, nobsv) } - expect_true(all(sub_fit_totest[[!!j]]$w > 0), info = info_str) + expect_true(all(submodl_totest[[!!j]]$w > 0), info = info_str) - expect_s3_class(sub_fit_totest[[!!j]]$formula, "formula") - if (!grepl(":", as.character(sub_fit_totest[[j]]$formula)[3])) { - expect_equal(sub_fit_totest[[!!j]]$formula, sub_formul[[!!j]], + expect_s3_class(submodl_totest[[!!j]]$formula, "formula") + if (!grepl(":", as.character(submodl_totest[[j]]$formula)[3])) { + expect_equal(submodl_totest[[!!j]]$formula, sub_formul[[!!j]], info = info_str) } else { # The order of interactions might be changed in the reference model: - expect_equal(sub_fit_totest[[!!j]]$formula[[2]], + expect_equal(submodl_totest[[!!j]]$formula[[2]], sub_formul[[!!j]][[2]], info = info_str) - expect_equal(labels(terms(sub_fit_totest[[!!j]]$formula)), + expect_equal(labels(terms(submodl_totest[[!!j]]$formula)), labels(terms(sub_formul[[!!j]])), info = info_str) } - expect_identical(sub_fit_totest[[!!j]]$x, sub_x_expected, + expect_identical(submodl_totest[[!!j]]$x, sub_x_expected, info = info_str) if (!from_vsel_L1_search) { y_ch <- setNames(eval(str2lang(as.character(sub_formul[[j]])[2]), sub_data), seq_len(nobsv)) - expect_identical(sub_fit_totest[[!!j]]$y, y_ch, info = info_str) + expect_identical(submodl_totest[[!!j]]$y, y_ch, info = info_str) } } } } else if (has_grp && !has_add) { if (sub_fam == "gaussian") { - for (j in seq_along(sub_fit_totest)) { - expect_s4_class(sub_fit_totest[[!!j]], "lmerMod") + for (j in seq_along(submodl_totest)) { + expect_s4_class(submodl_totest[[!!j]], "lmerMod") } } else { - for (j in seq_along(sub_fit_totest)) { - expect_s4_class(sub_fit_totest[[!!j]], "glmerMod") + for (j in seq_along(submodl_totest)) { + expect_s4_class(submodl_totest[[!!j]], "glmerMod") } } @@ -646,7 +646,7 @@ sub_fit_tester <- function( } else { sub_formul_expected <- sub_formul[[j]] } - expect_equal(sub_fit_totest[[!!j]]@call[["formula"]], + expect_equal(submodl_totest[[!!j]]@call[["formula"]], sub_formul_expected, info = info_str) @@ -656,73 +656,73 @@ sub_fit_tester <- function( } else { offs_expected <- offs_tst } - expect_identical(sub_fit_totest[[!!j]]@resp$offset, + expect_identical(submodl_totest[[!!j]]@resp$offset, offs_expected, info = info_str) if (!is.null(wobs_expected)) { - expect_equal(sub_fit_totest[[!!j]]@resp$weights, + expect_equal(submodl_totest[[!!j]]@resp$weights, wobs_expected, info = info_str) } else { - expect_equal(sub_fit_totest[[!!j]]@resp$weights, + expect_equal(submodl_totest[[!!j]]@resp$weights, rep(1, nobsv), info = info_str) } - expect_equal(sub_fit_totest[[!!j]]@resp$y, + expect_equal(submodl_totest[[!!j]]@resp$y, eval(str2lang(as.character(sub_formul[[!!j]])[2]), sub_data), info = info_str) # frame - expect_identical(sub_fit_totest[[!!j]]@frame, - model.frame(sub_fit_totest[[!!j]]), + expect_identical(submodl_totest[[!!j]]@frame, + model.frame(submodl_totest[[!!j]]), info = info_str) expect_equal( - sub_fit_totest[[!!j]]@frame[[ - grep("y_|ybinprop", names(sub_fit_totest[[!!j]]@frame), value = TRUE) + submodl_totest[[!!j]]@frame[[ + grep("y_|ybinprop", names(submodl_totest[[!!j]]@frame), value = TRUE) ]], - sub_fit_totest[[!!j]]@resp$y, + submodl_totest[[!!j]]@resp$y, info = info_str ) if (!is.null(wobs_expected)) { - expect_equal(sub_fit_totest[[!!j]]@frame$`(weights)`, - sub_fit_totest[[!!j]]@resp$weights, + expect_equal(submodl_totest[[!!j]]@frame$`(weights)`, + submodl_totest[[!!j]]@resp$weights, info = info_str) } else { - expect_null(sub_fit_totest[[!!j]]@frame$`(weights)`, + expect_null(submodl_totest[[!!j]]@frame$`(weights)`, info = info_str) } if (with_offs) { - expect_equal(sub_fit_totest[[!!j]]@frame$`offset(offs_col)`, + expect_equal(submodl_totest[[!!j]]@frame$`offset(offs_col)`, offs_expected, info = info_str) } frame_nms <- grep("y_|ybinprop|^\\(weights\\)$|^offset\\(.*\\)$", - names(sub_fit_totest[[j]]@frame), + names(submodl_totest[[j]]@frame), value = TRUE, invert = TRUE) expect_setequal(frame_nms, names(sub_mf_expected)) expect_equal( - sub_fit_totest[[!!j]]@frame[frame_nms], + submodl_totest[[!!j]]@frame[frame_nms], sub_mf_expected[frame_nms], info = info_str ) # model.matrix() - expect_identical(model.matrix(sub_fit_totest[[!!j]]), mm_expected, + expect_identical(model.matrix(submodl_totest[[!!j]]), mm_expected, info = info_str) # flist - expect_type(sub_fit_totest[[!!j]]@flist, "list") - expect_length(sub_fit_totest[[!!j]]@flist, length(nlvl_ran)) - z_nms <- intersect(names(sub_fit_totest[[j]]@flist), - names(sub_fit_totest[[j]]@frame)) - expect_identical(sub_fit_totest[[!!j]]@flist[z_nms], - as.list(sub_fit_totest[[!!j]]@frame[z_nms]), + expect_type(submodl_totest[[!!j]]@flist, "list") + expect_length(submodl_totest[[!!j]]@flist, length(nlvl_ran)) + z_nms <- intersect(names(submodl_totest[[j]]@flist), + names(submodl_totest[[j]]@frame)) + expect_identical(submodl_totest[[!!j]]@flist[z_nms], + as.list(submodl_totest[[!!j]]@frame[z_nms]), info = info_str) # coef() - coefs_crr <- coef(sub_fit_totest[[!!j]]) + coefs_crr <- coef(submodl_totest[[!!j]]) expect_type(coefs_crr, "list") expect_length(coefs_crr, length(nlvl_ran)) for (zz in seq_len(length(nlvl_ran))) { @@ -736,13 +736,13 @@ sub_fit_tester <- function( } } } else if (!has_grp && has_add) { - for (j in seq_along(sub_fit_totest)) { - expect_s3_class(sub_fit_totest[[!!j]], "gam") + for (j in seq_along(submodl_totest)) { + expect_s3_class(submodl_totest[[!!j]], "gam") } # TODO (GAMs): Add more expectations for GAMs. } else if (has_grp && has_add) { - for (j in seq_along(sub_fit_totest)) { - expect_s3_class(sub_fit_totest[[!!j]], "gamm4") + for (j in seq_along(submodl_totest)) { + expect_s3_class(submodl_totest[[!!j]], "gamm4") } # TODO (GAMMs): Add more expectations for GAMMs. } @@ -789,7 +789,7 @@ projection_tester <- function(p, # would have to be updated: expect_named( p, - c("dis", "kl", "weights", "solution_terms", "sub_fit", "p_type", + c("dis", "kl", "weights", "solution_terms", "submodl", "p_type", "refmodel"), info = info_str ) @@ -809,7 +809,7 @@ projection_tester <- function(p, expect_identical(p$solution_terms, solterms_expected, info = info_str) } - # sub_fit + # submodl sub_trms_crr <- p$solution_terms if (length(sub_trms_crr) == 0) { sub_trms_crr <- as.character(as.numeric(p$refmodel$intercept)) @@ -857,7 +857,7 @@ projection_tester <- function(p, for (i in seq_len(nprjdraws_expected)) { sub_data_crr[[y_nms[i]]] <- clust_ref$mu[, i] } - sub_fit_tester(p$sub_fit, + submodl_tester(p$submodl, nprjdraws_expected = nprjdraws_expected, sub_formul = sub_formul_crr, sub_data = sub_data_crr, @@ -1113,12 +1113,12 @@ vsel_tester <- function( # search_path expect_type(vs$search_path, "list") - expect_named(vs$search_path, c("solution_terms", "sub_fits", "p_sel"), + expect_named(vs$search_path, c("solution_terms", "submodls", "p_sel"), info = info_str) expect_identical(vs$search_path$solution_terms, vs$solution_terms, info = info_str) - expect_type(vs$search_path$sub_fits, "list") - expect_length(vs$search_path$sub_fits, solterms_len_expected + 1) + expect_type(vs$search_path$submodls, "list") + expect_length(vs$search_path$submodls, solterms_len_expected + 1) from_vsel_L1_search <- method_expected == "l1" clust_ref <- .get_refdist(vs$refmodel, ndraws = ndraws_expected, @@ -1140,10 +1140,10 @@ vsel_tester <- function( for (i in seq_len(nprjdraws_expected)) { sub_data_crr[[y_nms[i]]] <- clust_ref$mu[, i] } - solterms_for_subfits <- c(as.character(as.numeric(vs$refmodel$intercept)), - vs$solution_terms) - for (i in seq_along(vs$search_path$sub_fits)) { - sub_trms_crr <- head(solterms_for_subfits, i) + solterms_for_sub <- c(as.character(as.numeric(vs$refmodel$intercept)), + vs$solution_terms) + for (i in seq_along(vs$search_path$submodls)) { + sub_trms_crr <- head(solterms_for_sub, i) if (length(sub_trms_crr) > 1) { sub_trms_crr <- setdiff(sub_trms_crr, "1") } @@ -1152,8 +1152,8 @@ vsel_tester <- function( y_nm_i, "~", paste(sub_trms_crr, collapse = " + ") )) }) - sub_fit_tester( - vs$search_path$sub_fits[[i]], + submodl_tester( + vs$search_path$submodls[[i]], nprjdraws_expected = nprjdraws_expected, sub_formul = sub_formul_crr, sub_data = sub_data_crr, diff --git a/tests/testthat/test_datafit.R b/tests/testthat/test_datafit.R index 695cff2a1..683ccccbe 100644 --- a/tests/testthat/test_datafit.R +++ b/tests/testthat/test_datafit.R @@ -169,11 +169,16 @@ if (run_vs) { }) prjs_vs_datafit <- lapply(args_prj_vs_datafit, function(args_prj_vs_i) { - do.call(project, c( - list(object = vss_datafit[[args_prj_vs_i$tstsetup_vsel]], - cv_search = FALSE), - excl_nonargs(args_prj_vs_i) - )) + expect_warning( + do.call(project, c( + list(object = vss_datafit[[args_prj_vs_i$tstsetup_vsel]], + cv_search = FALSE), + excl_nonargs(args_prj_vs_i) + )), + paste("^Currently, `cv_search = FALSE` requires some caution, see GitHub", + "issues #168 and #211\\.$"), + info = args_prj_vs_i$tstsetup_vsel + ) }) } @@ -687,12 +692,17 @@ test_that(paste( method = "l1", lambda_min_ratio = lambda_min_ratio, nlambda = nlambda, thresh = 1e-12 )) - pred1 <- proj_linpred(vs, - newdata = data.frame(x = x, offset = offset, - weights = weights), - nterms = 0:nterms, transform = FALSE, - offsetnew = ~offset, - cv_search = FALSE) + expect_warning( + pred1 <- proj_linpred(vs, + newdata = data.frame(x = x, offset = offset, + weights = weights), + nterms = 0:nterms, transform = FALSE, + offsetnew = ~offset, + cv_search = FALSE), + paste("^Currently, `cv_search = FALSE` requires some caution, see GitHub", + "issues #168 and #211\\.$"), + info = fam$family + ) # compute the results for the Lasso lasso <- glmnet::glmnet(x, y_glmnet, @@ -720,12 +730,12 @@ test_that(paste( # check that the coefficients are similar ind <- match(vs$solution_terms, setdiff(split_formula(formula), "1")) if (Sys.getenv("NOT_CRAN") == "true") { - betas <- sapply(vs$search_path$sub_fits, function(x) x[[1]]$beta %||% 0) + betas <- sapply(vs$search_path$submodls, function(x) x[[1]]$beta %||% 0) delta <- sapply(seq_len(nterms), function(i) { abs(t(betas[[i + 1]]) - lasso$beta[ind[1:i], lambdainds[i + 1]]) }) expect_true(median(unlist(delta)) < 6e-2) - expect_true(median(abs(sapply(vs$search_path$sub_fits, function(x) { + expect_true(median(abs(sapply(vs$search_path$submodls, function(x) { x[[1]]$alpha }) - lasso$a0[lambdainds])) < 1.5e-1) } else { diff --git a/tests/testthat/test_div_minimizer.R b/tests/testthat/test_div_minimizer.R index 969f1c818..e787fa3ee 100644 --- a/tests/testthat/test_div_minimizer.R +++ b/tests/testthat/test_div_minimizer.R @@ -62,7 +62,7 @@ test_that("all div_minimizer()s work", { } else { wobs_expected_crr <- NULL } - sub_fit_tester(divmin_res, + submodl_tester(divmin_res, nprjdraws_expected = 1L, sub_formul = list(args_fit_i$formula), sub_data = eval(args_fit_i$data), diff --git a/tests/testthat/test_proj_pred.R b/tests/testthat/test_proj_pred.R index d9acbc82f..db0a21fb7 100644 --- a/tests/testthat/test_proj_pred.R +++ b/tests/testthat/test_proj_pred.R @@ -192,19 +192,12 @@ test_that("invalid `newdata` fails", { proj_linpred(prjs, newdata = dat[, 1]), "must be a data\\.frame or a matrix" ) - expect_error( - proj_linpred(prjs, - solution_terms = rep_len(solterms_x, length.out = 1e4)), - paste("^The number of solution terms is greater than the number of", - "columns in `newdata`\\.$") - ) stopifnot(length(solterms_x) > 1) expect_error( - proj_linpred(prjs[[head(grep("\\.glm\\.gauss", names(prjs)), 1)]], - newdata = dat[, 1, drop = FALSE], - solution_terms = solterms_x), - paste("^The number of solution terms is greater than the number of", - "columns in `newdata`\\.$") + proj_linpred(prjs[[head(grep("\\.glm\\.gauss.*\\.solterms_x", names(prjs)), + 1)]], + newdata = dat[, head(solterms_x, -1), drop = FALSE]), + "^object 'xco\\.1' not found$" ) }) @@ -802,21 +795,14 @@ test_that("invalid `newdata` fails", { proj_predict(prjs, newdata = dat[, 1], .seed = seed2_tst), "must be a data\\.frame or a matrix" ) - expect_error( - proj_predict(prjs, - .seed = seed2_tst, - solution_terms = rep_len(solterms_x, length.out = 1e4)), - paste("^The number of solution terms is greater than the number of", - "columns in `newdata`\\.$") - ) stopifnot(length(solterms_x) > 1) expect_error( - proj_predict(prjs[[head(grep("\\.glm\\.gauss", names(prjs)), 1)]], - newdata = dat[, 1, drop = FALSE], + proj_predict(prjs[[head(grep("\\.glm\\.gauss.*\\.solterms_x", names(prjs)), + 1)]], + newdata = dat[, head(solterms_x, -1), drop = FALSE], .seed = seed2_tst, solution_terms = solterms_x), - paste("^The number of solution terms is greater than the number of", - "columns in `newdata`\\.$") + "^object 'xco\\.1' not found$" ) }) diff --git a/tests/testthat/test_project.R b/tests/testthat/test_project.R index 0aec7bb49..d32421946 100644 --- a/tests/testthat/test_project.R +++ b/tests/testthat/test_project.R @@ -33,6 +33,17 @@ test_that("invalid `solution_terms` warns or fails", { "`solution_terms`\\.$"), info = tstsetup ) + expect_error( + do.call(project, c( + list(object = refmods[[args_prj_i$tstsetup_ref]], + solution_terms = rep_len(args_prj_i$solution_terms, + length.out = 1e4)), + excl_nonargs(args_prj_i, nms_excl_add = "solution_terms") + )), + paste("^Argument 'solution_terms' contains more terms than the number of", + "terms in the reference model\\.$"), + info = tstsetup + ) for (solterms_crr in list(2, 1:3, "1", list(solterms_x, solterms_x))) { tstsetup_crr <- paste(tstsetup, paste(solterms_crr, collapse = ","), sep = "__") diff --git a/tests/testthat/test_varsel.R b/tests/testthat/test_varsel.R index 23187e542..04ac7feb9 100644 --- a/tests/testthat/test_varsel.R +++ b/tests/testthat/test_varsel.R @@ -275,17 +275,17 @@ test_that(paste( } for (m in seq_len(m_max)) { # Selection: - subfits_jm_regul <- vs_regul$search_path$sub_fits[[m]] + submodl_jm_regul <- vs_regul$search_path$submodls[[m]] if (ncl_crr == 1) { - subfits_jm_regul <- list(subfits_jm_regul) + submodl_jm_regul <- list(submodl_jm_regul) } else { - stopifnot(identical(ncl_crr, length(subfits_jm_regul))) + stopifnot(identical(ncl_crr, length(submodl_jm_regul))) } for (nn in seq_len(ncl_crr)) { - stopifnot(length(subfits_jm_regul[[nn]]$alpha) == 1) - ssq_regul_sel_alpha[j, m, nn] <- subfits_jm_regul[[nn]]$alpha^2 - if (length(subfits_jm_regul[[nn]]$beta) > 0) { - ssq_regul_sel_beta[j, m, nn] <- sum(subfits_jm_regul[[nn]]$beta^2) + stopifnot(length(submodl_jm_regul[[nn]]$alpha) == 1) + ssq_regul_sel_alpha[j, m, nn] <- submodl_jm_regul[[nn]]$alpha^2 + if (length(submodl_jm_regul[[nn]]$beta) > 0) { + ssq_regul_sel_beta[j, m, nn] <- sum(submodl_jm_regul[[nn]]$beta^2) } } # Prediction: