From ce3b2fdd5733a2ba6b337175afe0428a4737a9ef Mon Sep 17 00:00:00 2001
From: Frank Weber <55132727+fweber144@users.noreply.github.com>
Date: Mon, 24 Jan 2022 17:25:14 +0100
Subject: [PATCH] Fix issue #156 (#269)

* Use `t2()` instead of `s()` in the demonstration of

a smooth term with multiple predictors. The reason is that `nitro` and `bv` measure different things (unlike spatial coordinates, for example) and `s()` uses a thin plate regression spline by default (which is *isotropic*).

* Fix issue #156.
---
 R/formula.R              | 10 ++--------
 R/refmodel.R             | 17 ++++++++++++++---
 man/refmodel-init-get.Rd | 17 ++++++++++++++---
 vignettes/projpred.Rmd   |  6 +++---
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/R/formula.R b/R/formula.R
index 1abb5b1e0..9be13e704 100644
--- a/R/formula.R
+++ b/R/formula.R
@@ -84,14 +84,8 @@ extract_response <- function(response) {
   return(response_name_ch)
 }
 
-## Parse additive terms from a list of individual terms. These may include
-## individual smoothers s(x), interaction smoothers s(x, z), smooth intercepts
-## s(factor, bs = "re"), group level independent smoothers s(x, by = g), or
-## shared smoothers for low dimensional factors s(x, g, bs = "fs"), interaction
-## group level smoothers with same shape for high dimensional factors t2(x, z,
-## g, bs = c(., ., "re")), interaction group level independent smoothers te(x,
-## z, by = g), population level interaction smoothers te(x, z, bs = c(., .)),
-## s(x, z).
+## Parse additive terms (smooth terms) from a list of individual terms. See
+## `?init_refmodel` for allowed smooth terms.
 ## @param terms list of terms to parse
 ## @return a vector of smooth terms
 parse_additive_terms <- function(terms) {
diff --git a/R/refmodel.R b/R/refmodel.R
index 00aa71931..e417ccecf 100644
--- a/R/refmodel.R
+++ b/R/refmodel.R
@@ -21,9 +21,10 @@
 #' @param data Data used for fitting the reference model.
 #' @param formula Reference model's formula. For general information on formulas
 #'   in \R, see [`formula`]. For multilevel formulas, see also package
-#'   \pkg{lme4}, in particular [lme4::lmer()] and [lme4::glmer()]. For additive
-#'   formulas, see also packages \pkg{mgcv}, in particular [mgcv::gam()], and
-#'   \pkg{gamm4}, in particular [gamm4::gamm4()].
+#'   \pkg{lme4} (in particular, functions [lme4::lmer()] and [lme4::glmer()]).
+#'   For additive formulas, see also packages \pkg{mgcv} (in particular,
+#'   function [mgcv::gam()]) and \pkg{gamm4} (in particular, function
+#'   [gamm4::gamm4()]) as well as the notes in section "Formula terms" below.
 #' @param ref_predfun Prediction function for the linear predictor of the
 #'   reference model, including offsets (if existing). See also section
 #'   "Arguments `ref_predfun`, `proj_predfun`, and `div_minimizer`" below. If
@@ -67,6 +68,16 @@
 #'
 #' @details
 #'
+#' # Formula terms
+#'
+#' For additive models (still an experimental feature), only [mgcv::s()] and
+#' [mgcv::t2()] are currently supported as smooth terms. Furthermore, these need
+#' to be called without any arguments apart from the predictor names (symbols).
+#' For example, for smoothing the effect of a predictor `x`, only `s(x)` or
+#' `t2(x)` are allowed. As another example, for smoothing the joint effect of
+#' two predictors `x` and `z`, only `s(x, z)` or `t2(x, z)` are allowed (and
+#' analogously for higher-order joint effects, e.g., of three predictors).
+#'
 #' # Arguments `ref_predfun`, `proj_predfun`, and `div_minimizer`
 #'
 #' Arguments `ref_predfun`, `proj_predfun`, and `div_minimizer` may be `NULL`
diff --git a/man/refmodel-init-get.Rd b/man/refmodel-init-get.Rd
index dd9c2605b..0a3dd5281 100644
--- a/man/refmodel-init-get.Rd
+++ b/man/refmodel-init-get.Rd
@@ -52,9 +52,10 @@ arguments passed to the appropriate method. Else: ignored.}
 
 \item{formula}{Reference model's formula. For general information on formulas
 in \R, see \code{\link{formula}}. For multilevel formulas, see also package
-\pkg{lme4}, in particular \code{\link[lme4:lmer]{lme4::lmer()}} and \code{\link[lme4:glmer]{lme4::glmer()}}. For additive
-formulas, see also packages \pkg{mgcv}, in particular \code{\link[mgcv:gam]{mgcv::gam()}}, and
-\pkg{gamm4}, in particular \code{\link[gamm4:gamm4]{gamm4::gamm4()}}.}
+\pkg{lme4} (in particular, functions \code{\link[lme4:lmer]{lme4::lmer()}} and \code{\link[lme4:glmer]{lme4::glmer()}}).
+For additive formulas, see also packages \pkg{mgcv} (in particular,
+function \code{\link[mgcv:gam]{mgcv::gam()}}) and \pkg{gamm4} (in particular, function
+\code{\link[gamm4:gamm4]{gamm4::gamm4()}}) as well as the notes in section "Formula terms" below.}
 
 \item{family}{A \code{\link{family}} object representing the observational model (i.e.,
 the distributional family for the response).}
@@ -120,6 +121,16 @@ also be used directly without a call to \code{\link[=get_refmodel]{get_refmodel(
 for \eqn{K}-fold cross-validation (\eqn{K}-fold CV) only; see \code{\link[=cv_varsel]{cv_varsel()}}
 for the use of \eqn{K}-fold CV in \pkg{projpred}.
 }
+\section{Formula terms}{
+For additive models (still an experimental feature), only \code{\link[mgcv:s]{mgcv::s()}} and
+\code{\link[mgcv:t2]{mgcv::t2()}} are currently supported as smooth terms. Furthermore, these need
+to be called without any arguments apart from the predictor names (symbols).
+For example, for smoothing the effect of a predictor \code{x}, only \code{s(x)} or
+\code{t2(x)} are allowed. As another example, for smoothing the joint effect of
+two predictors \code{x} and \code{z}, only \code{s(x, z)} or \code{t2(x, z)} are allowed (and
+analogously for higher-order joint effects, e.g., of three predictors).
+}
+
 \section{Arguments \code{ref_predfun}, \code{proj_predfun}, and \code{div_minimizer}}{
 Arguments \code{ref_predfun}, \code{proj_predfun}, and \code{div_minimizer} may be \code{NULL}
 for using an internal default. Otherwise, let \eqn{N} denote the number of
diff --git a/vignettes/projpred.Rmd b/vignettes/projpred.Rmd
index d68999abb..b8191c426 100755
--- a/vignettes/projpred.Rmd
+++ b/vignettes/projpred.Rmd
@@ -252,7 +252,7 @@ This PPPC shows that our final projection is able to generate predictions simila
 
 ## Supported types of reference models {#refmodtypes}
 
-Apart from the `gaussian()` response family used in this vignette, **projpred** also supports the `binomial()`^[The `binomial()` family includes the `brms::bernoulli()` family as a special case which may only be used via the **brms** package.] and the `poisson()` family. On the side of the predictors, **projpred** not only supports linear main effects as shown in this vignette, but also interactions, multilevel^[Multilevel models are also known as *hierarchical* models or models with *partially pooled*, *group-level*, or---in frequentist terms---*random* effects.], and---as an experimental feature---additive^[Additive terms are also known as *smoothing* terms.] terms.
+Apart from the `gaussian()` response family used in this vignette, **projpred** also supports the `binomial()`^[The `binomial()` family includes the `brms::bernoulli()` family as a special case which may only be used via the **brms** package.] and the `poisson()` family. On the side of the predictors, **projpred** not only supports linear main effects as shown in this vignette, but also interactions, multilevel^[Multilevel models are also known as *hierarchical* models or models with *partially pooled*, *group-level*, or---in frequentist terms---*random* effects.], and---as an experimental feature---additive^[Additive terms are also known as *smooth* terms.] terms.
 
 Transferring this vignette to such more complex reference models is straightforward: Basically, only the code for fitting the reference model via **rstanarm** or **brms** needs to be adapted. The **projpred** code stays almost the same. Only note that in case of multilevel or additive reference models,
 <!-- the parallelization of the projection is not recommended and that -->
@@ -274,10 +274,10 @@ As an example for an additive (non-multilevel) reference model, consider the `la
 data("lasrosas.corn", package = "agridat")
 # Convert `year` to a `factor` (this could also be solved by using
 # `factor(year)` in the formula, but we avoid that here to put more emphasis on
-# the demonstration of the smoothing term):
+# the demonstration of the smooth term):
 lasrosas.corn$year <- as.factor(lasrosas.corn$year)
 refm_fit <- stan_gamm4(
-  yield ~ year + topo + s(nitro, bv),
+  yield ~ year + topo + t2(nitro, bv),
   family = gaussian(),
   data = lasrosas.corn,
   seed = 4919670, QR = TRUE, refresh = 0