-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from ucsf-bhhi/sampling-tools
initial sampling tools
- Loading branch information
Showing
41 changed files
with
1,125 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: statewide.survey.tools | ||
Title: Statewide Survey Tools | ||
Version: 0.1.1 | ||
Version: 0.2.0 | ||
Authors@R: | ||
person("Eve", "Perry", , "[email protected]", role = c("aut", "cre")) | ||
Description: Helper tools for the Statewide Survey. | ||
|
@@ -11,14 +11,24 @@ Roxygen: list(markdown = TRUE) | |
RoxygenNote: 7.1.2 | ||
Imports: | ||
airtabler (>= 0.1.6), | ||
assertthat, | ||
dplyr, | ||
glue, | ||
httr, | ||
jsonlite, | ||
lubridate, | ||
magrittr, | ||
purrr, | ||
REDCapR, | ||
rlang, | ||
RStata (>= 1.2.0), | ||
tidyr | ||
tidyr, | ||
tidyselect | ||
Remotes: | ||
eveyp/RStata, | ||
bergant/airtabler | ||
URL: https://github.com/ucsf-bhhi/statewide.survey.tools | ||
BugReports: https://github.com/ucsf-bhhi/statewide.survey.tools/issues | ||
Suggests: | ||
testthat (>= 3.0.0) | ||
Config/testthat/edition: 3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,19 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export("%>%") | ||
export(adjust_new_sites) | ||
export(adjust_sheltered_unsheltered) | ||
export(connect_to_airtable) | ||
export(draw_sample) | ||
export(fetch_redcap_data) | ||
export(format_stata_date) | ||
export(insert_airtable_records) | ||
export(prepare_sample_for_airtable) | ||
export(pull_from_airtable) | ||
export(redcap_api_url) | ||
export(redcap_token) | ||
export(run_stata_cleaning) | ||
importFrom(assertthat,has_name) | ||
importFrom(magrittr,"%>%") | ||
importFrom(rlang,"!!") | ||
importFrom(rlang,abort) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#' Adjust New Sites' Sampling Weights | ||
#' | ||
#' Increases the sampling weights of new venues. New venues are ones have not | ||
#' been eligible to be selected in a previous sample. | ||
#' | ||
#' @param sampling_weight The name of the variable with the sampling weights. | ||
#' @param samples The name of the variable containing the samples the venue has been eligible for. | ||
#' @param new_venue_factor The scaling factor for new venues. | ||
#' | ||
#' @return A vector of adjusted sampling weights. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' venues %>% | ||
#' mutate(sampling_weight = adjust_new_sites(sampling_weight, samples, 2)) | ||
#' } | ||
adjust_new_sites = function(sampling_weight, samples, new_venue_factor) { | ||
assertthat::assert_that(is.numeric(sampling_weight)) | ||
|
||
if (!is.list(samples)) | ||
abort("'samples' is not a list-column. Did you use the correct variable?") | ||
|
||
if (length(sampling_weight) != length(samples)) | ||
abort("'sampling_weight' and 'samples' must be the same length.") | ||
|
||
assertthat::assert_that(assertthat::is.number(new_venue_factor)) | ||
|
||
if (new_venue_factor <= 0) | ||
abort("'new_venue_factor' must be positive.") | ||
|
||
purrr::map2_dbl(sampling_weight, samples, ~ ifelse(is.null(.y), .x * new_venue_factor, .x)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#' Adjust PEH Counts for Sheltered/Unsheltered Share | ||
#' | ||
#' Makes the adjustment to sampling weights so that the weighted average share | ||
#' of residential venue (ie. emergency shelters or encampments) PEH who are | ||
#' sheltered matches a target (usually from PIT counts). It's designed for use | ||
#' within a [dplyr::mutate()] pipeline when calculating the sampling weights. | ||
#' | ||
#' @param peh_count A numeric vector of venue PEH counts. | ||
#' @param site_category A character vector of venue site categories. | ||
#' @param sheltered_share The target sheltered share as a single numeric. | ||
#' | ||
#' @return A vector of adjusted PEH counts. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' venue_data %>% | ||
#' mutate( | ||
#' sampling_weight = adjust_sheltered_unsheltered( | ||
#' final_peh_estimate, | ||
#' site_category, | ||
#' 0.45 | ||
#' ) | ||
#' ) | ||
#' } | ||
adjust_sheltered_unsheltered = function( | ||
peh_count, site_category, sheltered_share | ||
) { | ||
# check inputs | ||
assertthat::assert_that(is.numeric(peh_count)) | ||
assertthat::assert_that(is.character(site_category)) | ||
assertthat::assert_that( | ||
any(c("Emergency Shelter", "Encampment") %in% unique(site_category)), | ||
msg = "'site_category' doesn't have any sheltered or unsheltered venues. | ||
Is this the correct variable?" | ||
) | ||
assertthat::assert_that(assertthat::is.number(sheltered_share)) | ||
assertthat::assert_that( | ||
sheltered_share >= 0 & sheltered_share <= 1, | ||
msg = "'sheltered_share' must be between 0 and 1." | ||
) | ||
|
||
# turn site_category into a sheltered/unsheltered indicator | ||
sheltered_indicator = sheltered_or_unsheltered(site_category) | ||
|
||
# calculate observed sheltered share in the venue data | ||
observed_sheltered_share = observed_sheltered_share( | ||
sheltered_indicator, | ||
peh_count | ||
) | ||
|
||
# calculate the adjuster | ||
adjuster = calculate_adjuster(sheltered_share, observed_sheltered_share) | ||
|
||
# apply the adjuster | ||
purrr::map2_dbl(sheltered_indicator, peh_count, adjust_count, adjuster) | ||
} | ||
|
||
sheltered_or_unsheltered = function(site_category) { | ||
dplyr::case_when( | ||
site_category == "Emergency Shelter" ~ "Sheltered", | ||
site_category == "Encampment" ~ "Unsheltered", | ||
TRUE ~ NA_character_ | ||
) | ||
} | ||
|
||
observed_sheltered_share = function(sheltered_indicator, peh_count) { | ||
sheltered = sum( | ||
peh_count[sheltered_indicator == "Sheltered"], | ||
na.rm = TRUE | ||
) | ||
sheltered_or_unsheltered = sum(peh_count[!is.na(sheltered_indicator)]) | ||
|
||
return(sheltered / sheltered_or_unsheltered) | ||
} | ||
|
||
calculate_adjuster = function(sheltered_share, observed_sheltered_share) { | ||
sheltered_share / observed_sheltered_share | ||
} | ||
|
||
adjust_count = function(sheltered_indicator, peh_count, adjuster) { | ||
dplyr::case_when( | ||
sheltered_indicator == "Sheltered" ~ peh_count * adjuster, | ||
sheltered_indicator == "Unsheltered" ~ peh_count * 1 / adjuster, | ||
TRUE ~ as.numeric(peh_count) | ||
) | ||
} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#' Draw Venue Sample | ||
#' | ||
#' Draws a weighted, without replacement sample of venues. Supports both | ||
#' stratified and unstratified samples. | ||
#' | ||
#' @param data Data frame with venues. | ||
#' @param weights Name of the weighting variable. | ||
#' @param n Either a single number of venues to select, or a data frame with | ||
#' stratification plan (see below). | ||
#' | ||
#' @section Stratification Plan: | ||
#' To draw a stratified sample, include a data frame with the stratification | ||
#' plan. | ||
#' | ||
#' The data frame should include a row for each stratum with the values of | ||
#' the stratification variables and the number of venues to sample. The | ||
#' stratification variables must have the same name and coding as in the | ||
#' venue data. The variable with the the number of venues to sample must be | ||
#' named `n`. | ||
#' | ||
#' For example: | ||
#' | ||
#' | **dv_shelter** | **zone** | **n** | | ||
#' | -------------- | -------------- | ----- | | ||
#' | TRUE | Eastern Placer | 1 | | ||
#' | TRUE | Central Placer | 2 | | ||
#' | TRUE | South Placer | 3 | | ||
#' | FALSE | Eastern Placer | 4 | | ||
#' | FALSE | Central Placer | 5 | | ||
#' | FALSE | South Placer | 6 | | ||
#' | ||
#' This can be generated with the following R code: | ||
#' ``` | ||
#' tibble::tribble( | ||
#' ~dv_shelter, ~zone , ~n, | ||
#' TRUE , "Eastern Placer", 1, | ||
#' TRUE , "Central Placer", 2, | ||
#' TRUE , "South Placer" , 3, | ||
#' FALSE , "Eastern Placer", 4, | ||
#' FALSE , "Central Placer", 5, | ||
#' FALSE , "South Placer" , 6 | ||
#' ) | ||
#' ``` | ||
#' | ||
#' @return The venue data frame with the variable `sampled` added, which | ||
#' indicates whether the venue was selected. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' draw_sample(venues, sampling_weight, 5) | ||
#' | ||
#' strata = tibble::tribble( | ||
#' ~zone , ~n, | ||
#' "Eastern Placer", 1, | ||
#' "Central Placer", 3, | ||
#' "South Placer" , 5 | ||
#' ) | ||
#' draw_sample(venues, sampling_weight, strata) | ||
#' } | ||
draw_sample = function(data, weights, n) { | ||
# make sure the main df doesn't already have a variable called sampled | ||
if (assertthat::has_name(data, "sampled")) | ||
abort("Venue data already has a variable named 'sampled'. Please rename or remove this variable.") | ||
|
||
# make sure that the main df has the weights variable | ||
weighting_var_name = rlang::as_string(rlang::ensym(weights)) | ||
if (!assertthat::has_name(data, weighting_var_name)) { | ||
abort(glue::glue("Venue data is missing the weighting variable: '{weighting_var_name}'.")) | ||
} | ||
|
||
n_int = assertthat::is.count(n) | ||
n_df = is.data.frame(n) | ||
|
||
if (!(n_int | n_df)) | ||
abort("n must be a single positive integer or a data frame with strata.") | ||
data = dplyr::mutate(data, .id = dplyr::row_number()) | ||
|
||
if (n_df) { | ||
# make sure the strata df has a column named n | ||
if (!assertthat::has_name(n, "n")) | ||
abort("Strata data frame is missing 'n' column.") | ||
|
||
# make sure the main df has all of the stratification variables | ||
strata_vars = names(n)[names(n) != "n"] | ||
purrr::walk( | ||
strata_vars, | ||
~ if ((!assertthat::has_name(data, .x))) | ||
abort( | ||
glue::glue( | ||
"Venue data is missing the stratification variable: '{.x}'." | ||
) | ||
) | ||
) | ||
|
||
results = vector("list", nrow(n)) | ||
for (i in seq_len(nrow(n))) { | ||
results[[i]] = dplyr::semi_join( | ||
data, | ||
n[i,], | ||
by = strata_vars | ||
) %>% | ||
dplyr::slice_sample(n = n$n[i], weight_by = {{ weights }}) %>% | ||
dplyr::pull(.id) | ||
} | ||
ids = purrr::flatten_chr(results) | ||
} | ||
|
||
if (n_int) { | ||
ids = dplyr::slice_sample(data, n = n, weight_by = {{ weights }}) %>% | ||
dplyr::pull(.id) | ||
} | ||
|
||
dplyr::mutate(data, sampled = .id %in% ids) %>% | ||
dplyr::select(-.id) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#' Downloads REDCap Data | ||
#' | ||
#' Downloads data from REDCap by wrapping [REDCapR::redcap_read()]. The API URL | ||
#' is filled automatically, but can be overridden. The token for the main | ||
#' project is also filled automatically, but can be overridden or replaced with | ||
#' the RDS project token. | ||
#' | ||
#' @param ... Options passed to [REDCapR::redcap_read()]. | ||
#' @param redcap_uri REDCap API URL. Defaults to [redcap_api_url()]. | ||
#' @param token REDCap API Token. Defaults to main project token. Use RDS | ||
#' project token with `redcap_token("rds")`. | ||
#' @param verbose Should messages be printed to the R console during the | ||
#' operation. The verbose output might contain sensitive information (e.g. | ||
#' PHI), so turn this off if the output might be visible somewhere public. | ||
#' | ||
#' @return A tibble with the requested data. | ||
#' @export | ||
#' @seealso [redcap_token()] | ||
fetch_redcap_data = function( | ||
..., | ||
redcap_uri = redcap_api_url(), | ||
token = redcap_token(), | ||
verbose = FALSE | ||
) { | ||
REDCapR::redcap_read_oneshot( | ||
..., | ||
redcap_uri = redcap_uri, | ||
token = token, | ||
verbose = verbose | ||
)$data %>% | ||
dplyr::as_tibble() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#' Convert a Stata date to text | ||
#' | ||
#' Converts a Stata date to a character string. | ||
#' | ||
#' @param date Stata date. | ||
#' @param format Format string for character representation. See | ||
#' [base::strptime()] for options. Defaults to formatting like January 1, | ||
#' 2022. | ||
#' | ||
#' @return A character string with the formatted date. | ||
#' @export | ||
format_stata_date = function(date, format = "%B %e, %Y") { | ||
lubridate::dmy(date) %>% | ||
as.character(format) | ||
} |
Oops, something went wrong.