From ce3b2fdd5733a2ba6b337175afe0428a4737a9ef Mon Sep 17 00:00:00 2001 From: Frank Weber <55132727+fweber144@users.noreply.github.com> Date: Mon, 24 Jan 2022 17:25:14 +0100 Subject: [PATCH] Fix issue #156 (#269) * Use `t2()` instead of `s()` in the demonstration of a smooth term with multiple predictors. The reason is that `nitro` and `bv` measure different things (unlike spatial coordinates, for example) and `s()` uses a thin plate regression spline by default (which is *isotropic*). * Fix issue #156. --- R/formula.R | 10 ++-------- R/refmodel.R | 17 ++++++++++++++--- man/refmodel-init-get.Rd | 17 ++++++++++++++--- vignettes/projpred.Rmd | 6 +++--- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/R/formula.R b/R/formula.R index 1abb5b1e0..9be13e704 100644 --- a/R/formula.R +++ b/R/formula.R @@ -84,14 +84,8 @@ extract_response <- function(response) { return(response_name_ch) } -## Parse additive terms from a list of individual terms. These may include -## individual smoothers s(x), interaction smoothers s(x, z), smooth intercepts -## s(factor, bs = "re"), group level independent smoothers s(x, by = g), or -## shared smoothers for low dimensional factors s(x, g, bs = "fs"), interaction -## group level smoothers with same shape for high dimensional factors t2(x, z, -## g, bs = c(., ., "re")), interaction group level independent smoothers te(x, -## z, by = g), population level interaction smoothers te(x, z, bs = c(., .)), -## s(x, z). +## Parse additive terms (smooth terms) from a list of individual terms. See +## `?init_refmodel` for allowed smooth terms. ## @param terms list of terms to parse ## @return a vector of smooth terms parse_additive_terms <- function(terms) { diff --git a/R/refmodel.R b/R/refmodel.R index 00aa71931..e417ccecf 100644 --- a/R/refmodel.R +++ b/R/refmodel.R @@ -21,9 +21,10 @@ #' @param data Data used for fitting the reference model. #' @param formula Reference model's formula. For general information on formulas #' in \R, see [`formula`]. For multilevel formulas, see also package -#' \pkg{lme4}, in particular [lme4::lmer()] and [lme4::glmer()]. For additive -#' formulas, see also packages \pkg{mgcv}, in particular [mgcv::gam()], and -#' \pkg{gamm4}, in particular [gamm4::gamm4()]. +#' \pkg{lme4} (in particular, functions [lme4::lmer()] and [lme4::glmer()]). +#' For additive formulas, see also packages \pkg{mgcv} (in particular, +#' function [mgcv::gam()]) and \pkg{gamm4} (in particular, function +#' [gamm4::gamm4()]) as well as the notes in section "Formula terms" below. #' @param ref_predfun Prediction function for the linear predictor of the #' reference model, including offsets (if existing). See also section #' "Arguments `ref_predfun`, `proj_predfun`, and `div_minimizer`" below. If @@ -67,6 +68,16 @@ #' #' @details #' +#' # Formula terms +#' +#' For additive models (still an experimental feature), only [mgcv::s()] and +#' [mgcv::t2()] are currently supported as smooth terms. Furthermore, these need +#' to be called without any arguments apart from the predictor names (symbols). +#' For example, for smoothing the effect of a predictor `x`, only `s(x)` or +#' `t2(x)` are allowed. As another example, for smoothing the joint effect of +#' two predictors `x` and `z`, only `s(x, z)` or `t2(x, z)` are allowed (and +#' analogously for higher-order joint effects, e.g., of three predictors). +#' #' # Arguments `ref_predfun`, `proj_predfun`, and `div_minimizer` #' #' Arguments `ref_predfun`, `proj_predfun`, and `div_minimizer` may be `NULL` diff --git a/man/refmodel-init-get.Rd b/man/refmodel-init-get.Rd index dd9c2605b..0a3dd5281 100644 --- a/man/refmodel-init-get.Rd +++ b/man/refmodel-init-get.Rd @@ -52,9 +52,10 @@ arguments passed to the appropriate method. Else: ignored.} \item{formula}{Reference model's formula. For general information on formulas in \R, see \code{\link{formula}}. For multilevel formulas, see also package -\pkg{lme4}, in particular \code{\link[lme4:lmer]{lme4::lmer()}} and \code{\link[lme4:glmer]{lme4::glmer()}}. For additive -formulas, see also packages \pkg{mgcv}, in particular \code{\link[mgcv:gam]{mgcv::gam()}}, and -\pkg{gamm4}, in particular \code{\link[gamm4:gamm4]{gamm4::gamm4()}}.} +\pkg{lme4} (in particular, functions \code{\link[lme4:lmer]{lme4::lmer()}} and \code{\link[lme4:glmer]{lme4::glmer()}}). +For additive formulas, see also packages \pkg{mgcv} (in particular, +function \code{\link[mgcv:gam]{mgcv::gam()}}) and \pkg{gamm4} (in particular, function +\code{\link[gamm4:gamm4]{gamm4::gamm4()}}) as well as the notes in section "Formula terms" below.} \item{family}{A \code{\link{family}} object representing the observational model (i.e., the distributional family for the response).} @@ -120,6 +121,16 @@ also be used directly without a call to \code{\link[=get_refmodel]{get_refmodel( for \eqn{K}-fold cross-validation (\eqn{K}-fold CV) only; see \code{\link[=cv_varsel]{cv_varsel()}} for the use of \eqn{K}-fold CV in \pkg{projpred}. } +\section{Formula terms}{ +For additive models (still an experimental feature), only \code{\link[mgcv:s]{mgcv::s()}} and +\code{\link[mgcv:t2]{mgcv::t2()}} are currently supported as smooth terms. Furthermore, these need +to be called without any arguments apart from the predictor names (symbols). +For example, for smoothing the effect of a predictor \code{x}, only \code{s(x)} or +\code{t2(x)} are allowed. As another example, for smoothing the joint effect of +two predictors \code{x} and \code{z}, only \code{s(x, z)} or \code{t2(x, z)} are allowed (and +analogously for higher-order joint effects, e.g., of three predictors). +} + \section{Arguments \code{ref_predfun}, \code{proj_predfun}, and \code{div_minimizer}}{ Arguments \code{ref_predfun}, \code{proj_predfun}, and \code{div_minimizer} may be \code{NULL} for using an internal default. Otherwise, let \eqn{N} denote the number of diff --git a/vignettes/projpred.Rmd b/vignettes/projpred.Rmd index d68999abb..b8191c426 100755 --- a/vignettes/projpred.Rmd +++ b/vignettes/projpred.Rmd @@ -252,7 +252,7 @@ This PPPC shows that our final projection is able to generate predictions simila ## Supported types of reference models {#refmodtypes} -Apart from the `gaussian()` response family used in this vignette, **projpred** also supports the `binomial()`^[The `binomial()` family includes the `brms::bernoulli()` family as a special case which may only be used via the **brms** package.] and the `poisson()` family. On the side of the predictors, **projpred** not only supports linear main effects as shown in this vignette, but also interactions, multilevel^[Multilevel models are also known as *hierarchical* models or models with *partially pooled*, *group-level*, or---in frequentist terms---*random* effects.], and---as an experimental feature---additive^[Additive terms are also known as *smoothing* terms.] terms. +Apart from the `gaussian()` response family used in this vignette, **projpred** also supports the `binomial()`^[The `binomial()` family includes the `brms::bernoulli()` family as a special case which may only be used via the **brms** package.] and the `poisson()` family. On the side of the predictors, **projpred** not only supports linear main effects as shown in this vignette, but also interactions, multilevel^[Multilevel models are also known as *hierarchical* models or models with *partially pooled*, *group-level*, or---in frequentist terms---*random* effects.], and---as an experimental feature---additive^[Additive terms are also known as *smooth* terms.] terms. Transferring this vignette to such more complex reference models is straightforward: Basically, only the code for fitting the reference model via **rstanarm** or **brms** needs to be adapted. The **projpred** code stays almost the same. Only note that in case of multilevel or additive reference models, @@ -274,10 +274,10 @@ As an example for an additive (non-multilevel) reference model, consider the `la data("lasrosas.corn", package = "agridat") # Convert `year` to a `factor` (this could also be solved by using # `factor(year)` in the formula, but we avoid that here to put more emphasis on -# the demonstration of the smoothing term): +# the demonstration of the smooth term): lasrosas.corn$year <- as.factor(lasrosas.corn$year) refm_fit <- stan_gamm4( - yield ~ year + topo + s(nitro, bv), + yield ~ year + topo + t2(nitro, bv), family = gaussian(), data = lasrosas.corn, seed = 4919670, QR = TRUE, refresh = 0