Skip to content

Commit

Permalink
Merge pull request #5 from MarselScheer/release/v0.1.0
Browse files Browse the repository at this point in the history
Release/v0.1.0
  • Loading branch information
MarselScheer authored Sep 2, 2021
2 parents 9510843 + c8d2f80 commit 7090de3
Show file tree
Hide file tree
Showing 99 changed files with 10,063 additions and 157 deletions.
7 changes: 6 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
^GOF\.Rproj$
^renv$
^renv\.lock$
^bootGOF\.Rproj$
^\.Rproj\.user$
^_pkgdown\.yml$
^docs$
^pkgdown$
^docker$
^README\.Rmd$
design.plantuml
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
.Rhistory
.RData
.Ruserdata
inst/doc
43 changes: 35 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GOF
Title: Bootstrap based goodness-of-fit tests
Version: 0.0.0.9000
Package: bootGOF
Title: Bootstrap Based Goodness-of-Fit Tests
Version: 0.1.0
Authors@R:
c(person(given = "Marsel",
family = "Scheer",
Expand All @@ -10,10 +10,37 @@ Authors@R:
family = "Dikta",
role = c("aut"),
email = "[email protected]"))
Description: Bootstrap based goodness-of-fit tests.
Description: Bootstrap based goodness-of-fit tests. It allows
to perform rigorous statistical tests to check if a chosen
model family is correct based on the marked empirical
process. The implemented algorithms are described in
(Dikta and Scheer (2021) <doi:10.1007/978-3-030-73480-0>)
and can be applied
to generalized linear models without any further implementation
effort. As far as certain linearity conditions are fulfilled
the resampling scheme are also applicable beyond generalized
linear models. This is reflected in the software architecture
which allows to reuse the resampling scheme by implementing
only certain interfaces for models that are not supported
natively by the package.
Imports:
checkmate (>= 2.0.0),
R6 (>= 2.4.1)
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.0.2
URL: https://github.com/MarselScheer/GOF
BugReports: https://github.com/MarselScheer/GOF/issues
RoxygenNote: 7.1.0
URL: https://github.com/MarselScheer/bootGOF
BugReports: https://github.com/MarselScheer/bootGOF/issues
Suggests:
testthat,
covr,
roxygen2,
pkgdown,
devtools,
tinytest,
mockery,
knitr,
rmarkdown,
minpack.lm,
MASS
VignetteBuilder: knitr
16 changes: 16 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,2 +1,18 @@
# Generated by roxygen2: do not edit by hand

export(GOF_glm_info_extractor)
export(GOF_glm_sim_param)
export(GOF_glm_trainer)
export(GOF_lm_info_extractor)
export(GOF_lm_sim_param)
export(GOF_lm_trainer)
export(GOF_model)
export(GOF_model_info_extractor)
export(GOF_model_resample)
export(GOF_model_simulator)
export(GOF_model_test)
export(GOF_model_trainer)
export(GOF_sim_wild_rademacher)
export(Rn1_CvM)
export(Rn1_KS)
export(Rn1_statistic)
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Version 0.1.0
=========================

* initial release
* classical linear models and GLMs can be tested
out of the box
* interfaces defined and documented in order to
reuse the resampling schemes for other models
115 changes: 115 additions & 0 deletions R/GOF_model.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
##' @title Convenience function for creating a GOF-test for statistical models
##'
##' @description Simplifies the creation of an instance of
##' \link{GOF_model_test}, the actual work horse for performing a
##' goodness-of-fit-test.
##' @param model of class 'lm' or 'glm'. Caution with MASS::glm.nb, see
##' vignette 'New-Models' for more details.
##' @param data see \link{GOF_model_test}
##' @param nmb_boot_samples see \link{GOF_model_test}
##' @param simulator_type either "parameteric" or "semi_parameteric_rademacher"
##' @param y_name see \link{GOF_model_test}
##' @param Rn1_statistic see \link{GOF_model_test}
##' @param gof_model_resample_class no need to change this parameter. Here the
##' class used for resampling the model (\link{GOF_model_resample})
##' is injected. This parameter simply makes it easier to test the
##' convenience function properly.
##' @param gof_model_test_class no need to change this parameter. Here the
##' class used for performing the GOF test (\link{GOF_model_test})
##' is injected. This parameter simply makes it easier to test the
##' convenience function properly.
##' @export
##' @return instance of \link{GOF_model_test}
##' @examples
##' set.seed(1)
##' N <- 100
##' X1 <- rnorm(N)
##' X2 <- rnorm(N)
##' d <- data.frame(
##' y = rpois(n = N, lambda = exp(4 + X1 * 2 + X2 * 6)),
##' x1 = X1,
##' x2 = X2)
##' fit <- glm(y ~ x1, data = d, family = poisson())
##' mt <- GOF_model(
##' model = fit,
##' data = d,
##' nmb_boot_samples = 100,
##' simulator_type = "parametric",
##' y_name = "y",
##' Rn1_statistic = Rn1_KS$new())
##' mt$get_pvalue()
##' fit <- glm(y ~ x1 + x2, data = d, family = poisson())
##' mt <- GOF_model(
##' model = fit,
##' data = d,
##' nmb_boot_samples = 100,
##' simulator_type = "parametric",
##' y_name = "y",
##' Rn1_statistic = Rn1_KS$new())
##' mt$get_pvalue()
GOF_model <- function(model,
data,
nmb_boot_samples,
simulator_type,
y_name,
Rn1_statistic,
gof_model_resample_class = GOF_model_resample,
gof_model_test_class = GOF_model_test
) {
checkmate::assert_subset(
x = simulator_type,
choices = c("parametric", "semi_parametric_rademacher"))
checkmate::assert_multi_class(x = model, classes = c("lm", "glm"))
if (inherits(x = model, what = "negbin")) {
warning("The GOF-test requires to refit the model. Refitting MASS::glm.nb can be problematic, see vignette New-Models")
}



simulators <- list(
lm = list(
parametric = GOF_lm_sim_param,
semi_parametric_rademacher = GOF_sim_wild_rademacher),
glm = list(
parametric = GOF_glm_sim_param,
semi_parametric_rademacher = list(
new = function() stop(
paste(
"Ordinary Least Square estimate necessary for semi_parameteric_rademacher.",
"But MLE is used for GLMs."
)))
)
)

if (inherits(x = model, what = "glm")) {
ms <- simulators[["glm"]][[simulator_type]]$new()
mt <- GOF_glm_trainer$new()
mie <- GOF_glm_info_extractor$new()
} else if (inherits(x = model, what = "lm")) {
mt <- GOF_lm_trainer$new()
mie <- GOF_lm_info_extractor$new()

if (simulator_type == "parametric") {
ms <- simulators[["lm"]][[simulator_type]]$new()
} else {
ms <- simulators[["lm"]][[simulator_type]]$new(
gof_model_info_extractor = mie
)
}
}

model_resample <- gof_model_resample_class$new(
gof_model_simulator = ms,
gof_model_trainer = mt
)

ret <- gof_model_test_class$new(
model = model,
data = data,
nmb_boot_samples = nmb_boot_samples,
y_name = y_name,
Rn1_statistic = Rn1_statistic,
gof_model_info_extractor = mie,
gof_model_resample = model_resample)
return(ret)
}
89 changes: 89 additions & 0 deletions R/GOF_model_info_extractor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
##' @title R6 Class representing model information
##'
##' @description R6 does not offer interfaces. Hence all methods
##' are considered as abstract.
##' @export
GOF_model_info_extractor <- R6::R6Class(
classname = "GOF_model_info_extractor",
public = list(
##' @description Abstract function that estimates/predicts the
##' the dependent variable in \code{model}
##' @param model fitted model
##' @return estimate/prediction of the dependent variable
##' fitted by \code{model}
yhat = function(model) {
stop("Abstract method. Needs to be implemented")
},
##' @description abstract function that calculates the residuals
##' on the scale of the dependent variable.
##' @param model fitted model
##' @return residuals on the scale of the dependent variable
y_minus_yhat = function(model) {
stop("Abstract method. Needs to be implemented")
},
##' @description abstract function that calculates the inner product
##' of estimated parameters and the independent variables.
##' @param model fitted model
##' @return inner product of the estimated parameters and the
##' independent variables.
beta_x_covariates = function(model) {
stop("Abstract method. Needs to be implemented")
})
)

##' @title Implements the "interface" GOF_model_info_extractor
##' for linear models
##' @description This class is specialized in extracting various information
##' from an object of class "lm"
##' @export
GOF_lm_info_extractor = R6::R6Class(
classname = "GOF_lm_info_extractor",
inherit = GOF_model_info_extractor,
public = list(
##' @description see \link{GOF_model_info_extractor}
##' @param model see \link{GOF_model_info_extractor}
##' @return see \link{GOF_model_info_extractor}
yhat = function(model) {
predict.lm(object = model)
},
##' @description see \link{GOF_model_info_extractor}
##' @param model see \link{GOF_model_info_extractor}
##' @return see \link{GOF_model_info_extractor}
y_minus_yhat = function(model) {
residuals.lm(object = model)
},
##' @description see \link{GOF_model_info_extractor}
##' @param model see \link{GOF_model_info_extractor}
##' @return see \link{GOF_model_info_extractor}
beta_x_covariates = function(model) {
predict.lm(object = model)
}))


##' @title Implements the "interface" GOF_model_info_extractor for
##' for generalized linear models
##' @description This class is specialized in extracting various information
##' from an object of class "glm"
##' @export
GOF_glm_info_extractor = R6::R6Class(
classname = "GOF_glm_info_extractor",
inherit = GOF_model_info_extractor,
public = list(
##' @description see \link{GOF_model_info_extractor}
##' @param model see \link{GOF_model_info_extractor}
##' @return see \link{GOF_model_info_extractor}
yhat = function(model) {
predict.glm(object = model, type = "response")
},
##' @description see \link{GOF_model_info_extractor}
##' @param model see \link{GOF_model_info_extractor}
##' @return see \link{GOF_model_info_extractor}
y_minus_yhat = function(model) {
residuals.glm(object = model, type = "response")
},
##' @description see \link{GOF_model_info_extractor}
##' @param model see \link{GOF_model_info_extractor}
##' @return see \link{GOF_model_info_extractor}
beta_x_covariates = function(model) {
predict.glm(object = model, type = "link")
}))
33 changes: 33 additions & 0 deletions R/GOF_model_resample.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
##' @title R6 Class representing the resampling scheme for
##' Goodness-of-fit-tests for (linear) models
##'
##' @description Class is able to resample model fit, i.e. generate a
##' new data set and refit the model to the new data.
##' @export
GOF_model_resample <- R6::R6Class(
classname = "GOF_model_resample",
public = list(
##' @param gof_model_simulator an instance that implements \link{GOF_model_simulator}
##' @param gof_model_trainer an instance that implements \link{GOF_model_trainer}
##' @return No explicit return
initialize = function(gof_model_simulator, gof_model_trainer) {
private$model_simulator <- gof_model_simulator
private$model_trainer <- gof_model_trainer
},
##' @description resamples the dependent variable in \code{data} and refits
##' \code{model} to that new data set
##' @param model fitted model based on \code{data}
##' @param data used to fit \code{model}
##' @param y_name string specifying the name of the dependent variable
##' @return a resampled version of \code{model}
resample = function(model, data, y_name) {
data_new <- data
data_new[[y_name]] <- private$model_simulator$resample_y(model = model)
ret <- private$model_trainer$refit(model = model, data = data_new)
return(ret)
}),
private = list(
model_simulator = NULL,
model_trainer = NULL
)
)
Loading

0 comments on commit 7090de3

Please sign in to comment.