From 6b5237d76676302770ea405ef4f2010029777a41 Mon Sep 17 00:00:00 2001 From: Ben Schneider Date: Sat, 9 Mar 2024 11:29:40 -0500 Subject: [PATCH] Keeping up with 'survey' 4.3: allow the user to specify a `degf` argument in `as_survey_rep()`. --- NEWS.md | 3 +++ R/as_survey_rep.r | 10 ++++++++-- man/as_survey_rep.Rd | 7 +++++++ tests/testthat/test_as_survey_rep.r | 15 +++++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index c29f165..6538ff4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# (development) +* `as_survey_rep()` now has an argument `degf`, corresponding to the same argument in the survey function `svrepdesign()`. This argument can be useful for large data sets, since specifying a value for `degf` avoids a calculation which can be slow for very large data sets. + # srvyr 1.2.0 * `survey_prop()` now uses proportions as the default, which should confidence interval improve coverage, but does mean results may slightly change (#141, #142, thanks @szimmer) * New function `survey_corr()` calculates the correlation between 2 variables, (#150, #151, thanks @szimmer & @bschneidr) diff --git a/R/as_survey_rep.r b/R/as_survey_rep.r index 19ec236..e001f05 100644 --- a/R/as_survey_rep.r +++ b/R/as_survey_rep.r @@ -32,6 +32,10 @@ #' @param fpctype Finite population correction information #' @param mse if \code{TRUE}, compute variances based on sum of squares #' around the point estimate, rather than the mean of the replicates +#' @param degf Design degrees of freedom: a single number, or \code{NULL}, +#' in which case a value will be computed automatically, which can be slow +#' for very large data sets. See \code{\link[survey]{svrepdesign}} +#' for more details. #' @param ... ignored #' @param compress if \code{TRUE}, store replicate weights in compressed form #' (if converting from design) @@ -73,7 +77,7 @@ as_survey_rep.data.frame <- "other"), combined_weights = TRUE, rho = NULL, bootstrap_average = NULL, scale = NULL, rscales = NULL, fpc = NULL, fpctype = c("fraction", "correction"), - mse = getOption("survey.replicates.mse"), ...) { + mse = getOption("survey.replicates.mse"), degf = NULL, ...) { variables <- srvyr_select_vars(rlang::enquo(variables), .data) repweights <- srvyr_select_vars(rlang::enquo(repweights), .data) weights <- srvyr_select_vars(rlang::enquo(weights), .data) @@ -85,6 +89,7 @@ as_survey_rep.data.frame <- repweights = repweights, weights = weights, data = .data, + degf = degf, type = type, combined.weights = combined_weights, rho = rho, @@ -112,7 +117,7 @@ as_survey_rep.tbl_lazy <- "other"), combined_weights = TRUE, rho = NULL, bootstrap_average = NULL, scale = NULL, rscales = NULL, fpc = NULL, fpctype = c("fraction", "correction"), - mse = getOption("survey.replicates.mse"), ...) { + mse = getOption("survey.replicates.mse"), degf = NULL, ...) { variables <- rlang::enquo(variables) repweights <- rlang::enquo(repweights) @@ -134,6 +139,7 @@ as_survey_rep.tbl_lazy <- repweights = repweights, weights = weights, data = survey_vars_local, + degf = degf, type = type, combined.weights = combined_weights, rho = rho, diff --git a/man/as_survey_rep.Rd b/man/as_survey_rep.Rd index 00682f0..369f98a 100644 --- a/man/as_survey_rep.Rd +++ b/man/as_survey_rep.Rd @@ -26,6 +26,7 @@ as_survey_rep(.data, ...) fpc = NULL, fpctype = c("fraction", "correction"), mse = getOption("survey.replicates.mse"), + degf = NULL, ... ) @@ -44,6 +45,7 @@ as_survey_rep(.data, ...) fpc = NULL, fpctype = c("fraction", "correction"), mse = getOption("survey.replicates.mse"), + degf = NULL, ... ) @@ -105,6 +107,11 @@ weights have been averaged, gives the number of iterations averaged over.} \item{mse}{if \code{TRUE}, compute variances based on sum of squares around the point estimate, rather than the mean of the replicates} +\item{degf}{Design degrees of freedom: a single number, or \code{NULL}, +in which case a value will be computed automatically, which can be slow +for very large data sets. See \code{\link[survey]{svrepdesign}} +for more details.} + \item{compress}{if \code{TRUE}, store replicate weights in compressed form (if converting from design)} } diff --git a/tests/testthat/test_as_survey_rep.r b/tests/testthat/test_as_survey_rep.r index 7662faf..6c6456c 100644 --- a/tests/testthat/test_as_survey_rep.r +++ b/tests/testthat/test_as_survey_rep.r @@ -228,3 +228,18 @@ test_that("as_survey_rep works when using SDR/ACS method of replicate weights", expect_equal(c(out_survey[[1]], sqrt(attr(out_survey, "var"))), c(out_srvyr_acs[[1]][[1]], out_srvyr_acs[[2]][[1]])) }) + +# ------------------------------------------------------------------ +# Test user-specified degrees of freedom +# ------------------------------------------------------------------ + +sdr_srvyr <- cbind(sdr_sample, as.data.frame(sdr_factors)) %>% + as_survey_rep(repweights = starts_with("REP_"), + weights = "weights", + type = "successive-difference", + combined = FALSE, + degf = 4) + +test_that("as_survey_rep accepts user-specified degf", { + expect_equal(degf(sdr_srvyr), 4) +})