From c272441ec2dfc41eef430de48581eaa42c924c1b Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Fri, 21 Jan 2022 16:32:51 -0800 Subject: [PATCH 01/16] mark internal --- R/stata.R | 2 ++ man/default_cleaning_do_file.Rd | 1 + man/get_stata_path.Rd | 1 + 3 files changed, 4 insertions(+) diff --git a/R/stata.R b/R/stata.R index c6f568c..6bcf9bf 100644 --- a/R/stata.R +++ b/R/stata.R @@ -29,6 +29,7 @@ run_stata_cleaning = function( #' Provides path to included Stata data cleaning code. #' #' @return Path to cleaning do file. +#' @keywords internal default_cleaning_do_file = function() { system.file("stata", "data_cleaning.do", package = "statewide.survey.tools") } @@ -36,6 +37,7 @@ default_cleaning_do_file = function() { #' Returns path to Stata binary #' #' @return Path to Stata binary. +#' @keywords internal get_stata_path = function() { path = getOption("RStata.StataPath") if (is.null(path)) { diff --git a/man/default_cleaning_do_file.Rd b/man/default_cleaning_do_file.Rd index f043b2e..88888fd 100644 --- a/man/default_cleaning_do_file.Rd +++ b/man/default_cleaning_do_file.Rd @@ -12,3 +12,4 @@ Path to cleaning do file. \description{ Provides path to included Stata data cleaning code. } +\keyword{internal} diff --git a/man/get_stata_path.Rd b/man/get_stata_path.Rd index 1ffc3e2..43bc52c 100644 --- a/man/get_stata_path.Rd +++ b/man/get_stata_path.Rd @@ -12,3 +12,4 @@ Path to Stata binary. \description{ Returns path to Stata binary } +\keyword{internal} From 3f7081dd2cb9447694e619547a465dd8e3d61c03 Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Fri, 21 Jan 2022 16:34:41 -0800 Subject: [PATCH 02/16] airtable uploads --- DESCRIPTION | 2 + NAMESPACE | 2 + R/airtable.R | 148 +++++++++++++++++++++++++++++ man/chunk_data.Rd | 20 ++++ man/insert_airtable_records.Rd | 33 +++++++ man/json_for_airtable.Rd | 16 ++++ man/post_to_airtable.Rd | 20 ++++ man/prepare_sample_for_airtable.Rd | 56 +++++++++++ 8 files changed, 297 insertions(+) create mode 100644 man/chunk_data.Rd create mode 100644 man/insert_airtable_records.Rd create mode 100644 man/json_for_airtable.Rd create mode 100644 man/post_to_airtable.Rd create mode 100644 man/prepare_sample_for_airtable.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 693c331..7c3a918 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,8 +12,10 @@ RoxygenNote: 7.1.2 Imports: airtabler (>= 0.1.6), dplyr, + httr, lubridate, magrittr, + purrr, REDCapR, RStata (>= 1.2.0), tidyr diff --git a/NAMESPACE b/NAMESPACE index f2ba316..879022a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,8 @@ export("%>%") export(connect_to_airtable) export(fetch_redcap_data) export(format_stata_date) +export(insert_airtable_records) +export(prepare_sample_for_airtable) export(redcap_api_url) export(redcap_token) export(run_stata_cleaning) diff --git a/R/airtable.R b/R/airtable.R index 6aa808c..a07f6e1 100644 --- a/R/airtable.R +++ b/R/airtable.R @@ -21,3 +21,151 @@ connect_to_airtable = function( ) { airtabler::airtable(base = base, tables = tables) } + +#' Prepare sampling data for Airtable +#' +#' Formats the sampling data for submission to Airtable. It selects only the +#' variables that will be sent to Airtable (internal Airtable venue id, sampling +#' weight, & selected indicator) and adds the sample metadata (ie. county, zone, +#' and round). +#' +#' @param sample Data frame with the full sample (ie. both selected and not +#' selected sites). +#' @param venue_id Name of the column with the internal Airtable venue ID (different than +#' the venue_id we commonly use like placer_123). +#' @param sampling_weight Name of the column with the sampling weights. +#' @param selected_indicator Name of the column which indicates whether the +#' venue was selected. +#' @param county Character string with the county name of the sample. +#' @param zone Character string with the zone of the sample. +#' @param round Character string or number with the round of the sample. +#' +#' @return A data frame ready to be passed to [insert_airtable_records()]. +#' @export +#' +#' @examples +#' \dontrun{ +#' prepare_sample_for_airtable( +#' final_sample, +#' id, +#' sampling_weight, +#' sampled, +#' "Sonoma", +#' "Santa Rosa", +#' 2 +#' ) +#' } +prepare_sample_for_airtable = function( + sample, + venue_id, + sampling_weight, + sampled_indicator, + county, + zone, + round +) { + sample %>% + select( + Venue = {{ venue_id }}, + {{ sampling_weight }}, + sampled = {{ selected_indicator }} + ) %>% + mutate( + Venue = as.list(Venue), + sampling_weight = as.numeric(sampling_weight), + sampled = as.integer(sampled), + sample_county = county, + sample_zone = zone, + sample_round = as.character(round) + ) +} + +#' Upload Data to Airtable +#' +#' Uploads a properly named and formatted data frame to Airtable. It handles +#' converting the data frame to JSON that Airtable understands, and splitting +#' the data into chunks of no more than 10 rows to comply with Airtable limits. +#' +#' @param data Data frame of properly named and formatted data. +#' @param base Airtable Base ID. +#' @param table Name of the Airtable table. +#' @param chunk_size Number of rows in each data chunk. Airtable accepts a +#' maximum of 10 rows in one request, so this cannot be greater than 10. +#' Default is 10. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' insert_airtable_records( +#' data_for_airtable, +#' "appOLCptG2wxvoGtH", +#' "Samples" +#' ) +#' } +insert_airtable_records = function( + data, + base, + table, + chunk_size = 10 +) { + # break data into chunks of 10 rows each b/c the airtable api will only + # take that many in one request + chunk_data(data, chunk_size = 10) %>% + purrr::walk(post_to_airtable, base = base, table = table) +} + +#' Split Data Frame into Chunks +#' +#' Breaks a data frame into equal sized chunks for upload to Airtable. +#' +#' @param data A data frame or tibble. +#' @param chunk_size The number of rows in each chunk. +#' +#' @return A list of data frames or tibbles. +#' +#' @keywords internal +chunk_data = function(data, chunk_size) { + dplyr::group_split(data, group_id = (dplyr::row_number() - 1) %/% chunk_size, .keep = FALSE) +} + +#' Send Upload Request to Airtable +#' +#' Sends an HTTP POST request to Airtable. It handles converting the data frame +#' to JSON, and creating and submitting the request. +#' +#' @param data A data frame or tibble with no more than 10 rows. +#' @param base Airtable Base ID. +#' @param table Airtable table name. +#' +#' @keywords internal +post_to_airtable = function(data, base, table) { + httr::POST( + url = glue::glue("https://api.airtable.com/v0/{base}/{URLencode(table)}"), + httr::add_headers( + Authorization = paste("Bearer", Sys.getenv("AIRTABLE_API_KEY")), + `Content-Type` = "application/json" + ), + body = json_for_airtable(data), + encode = "raw" + ) +} + +#' Convert Data Frame to Airtable JSON +#' +#' Converts a data frame or tibble to the JSON data format accepted by the +#' Airtable API. +#' +#' @param data Data frame/tibble with properly named and formatted columns. +#' +#' +#' @keywords internal +json_for_airtable = function(data) { + # turn the data frame into a list of its rows + rows = split(data, seq(nrow(data))) %>% + stats::setNames(rep(NULL, nrow(data))) %>% + purrr::map(~ list(fields = jsonlite::unbox(.x))) + + list(records = rows, typecast = jsonlite::unbox(TRUE)) %>% + jsonlite::toJSON(pretty = T) +} diff --git a/man/chunk_data.Rd b/man/chunk_data.Rd new file mode 100644 index 0000000..536a572 --- /dev/null +++ b/man/chunk_data.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{chunk_data} +\alias{chunk_data} +\title{Split Data Frame into Chunks} +\usage{ +chunk_data(data, chunk_size) +} +\arguments{ +\item{data}{A data frame or tibble.} + +\item{chunk_size}{The number of rows in each chunk.} +} +\value{ +A list of data frames or tibbles. +} +\description{ +Breaks a data frame into equal sized chunks for upload to Airtable. +} +\keyword{internal} diff --git a/man/insert_airtable_records.Rd b/man/insert_airtable_records.Rd new file mode 100644 index 0000000..734795e --- /dev/null +++ b/man/insert_airtable_records.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{insert_airtable_records} +\alias{insert_airtable_records} +\title{Upload Data to Airtable} +\usage{ +insert_airtable_records(data, base, table, chunk_size = 10) +} +\arguments{ +\item{data}{Data frame of properly named and formatted data.} + +\item{base}{Airtable Base ID.} + +\item{table}{Name of the Airtable table.} + +\item{chunk_size}{Number of rows in each data chunk. Airtable accepts a +maximum of 10 rows in one request, so this cannot be greater than 10. +Default is 10.} +} +\description{ +Uploads a properly named and formatted data frame to Airtable. It handles +converting the data frame to JSON that Airtable understands, and splitting +the data into chunks of no more than 10 rows to comply with Airtable limits. +} +\examples{ +\dontrun{ + insert_airtable_records( + data_for_airtable, + "appOLCptG2wxvoGtH", + "Samples" + ) +} +} diff --git a/man/json_for_airtable.Rd b/man/json_for_airtable.Rd new file mode 100644 index 0000000..a4865f2 --- /dev/null +++ b/man/json_for_airtable.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{json_for_airtable} +\alias{json_for_airtable} +\title{Convert Data Frame to Airtable JSON} +\usage{ +json_for_airtable(data) +} +\arguments{ +\item{data}{Data frame/tibble with properly named and formatted columns.} +} +\description{ +Converts a data frame or tibble to the JSON data format accepted by the +Airtable API. +} +\keyword{internal} diff --git a/man/post_to_airtable.Rd b/man/post_to_airtable.Rd new file mode 100644 index 0000000..46f7fb4 --- /dev/null +++ b/man/post_to_airtable.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{post_to_airtable} +\alias{post_to_airtable} +\title{Send Upload Request to Airtable} +\usage{ +post_to_airtable(data, base, table) +} +\arguments{ +\item{data}{A data frame or tibble with no more than 10 rows.} + +\item{base}{Airtable Base ID.} + +\item{table}{Airtable table name.} +} +\description{ +Sends an HTTP POST request to Airtable. It handles converting the data frame +to JSON, and creating and submitting the request. +} +\keyword{internal} diff --git a/man/prepare_sample_for_airtable.Rd b/man/prepare_sample_for_airtable.Rd new file mode 100644 index 0000000..aec175c --- /dev/null +++ b/man/prepare_sample_for_airtable.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{prepare_sample_for_airtable} +\alias{prepare_sample_for_airtable} +\title{Prepare sampling data for Airtable} +\usage{ +prepare_sample_for_airtable( + sample, + venue_id, + sampling_weight, + sampled_indicator, + county, + zone, + round +) +} +\arguments{ +\item{sample}{Data frame with the full sample (ie. both selected and not +selected sites).} + +\item{venue_id}{Name of the column with the internal Airtable venue ID (different than +the venue_id we commonly use like placer_123).} + +\item{sampling_weight}{Name of the column with the sampling weights.} + +\item{county}{Character string with the county name of the sample.} + +\item{zone}{Character string with the zone of the sample.} + +\item{round}{Character string or number with the round of the sample.} + +\item{selected_indicator}{Name of the column which indicates whether the +venue was selected.} +} +\value{ +A data frame ready to be passed to \code{\link[=insert_airtable_records]{insert_airtable_records()}}. +} +\description{ +Formats the sampling data for submission to Airtable. It selects only the +variables that will be sent to Airtable (internal Airtable venue id, sampling +weight, & selected indicator) and adds the sample metadata (ie. county, zone, +and round). +} +\examples{ +\dontrun{ + prepare_sample_for_airtable( + final_sample, + id, + sampling_weight, + sampled, + "Sonoma", + "Santa Rosa", + 2 + ) +} +} From ac551003830a80601f0fac81577a6141390af2fc Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Fri, 21 Jan 2022 16:35:20 -0800 Subject: [PATCH 03/16] airtable downloads --- NAMESPACE | 1 + R/airtable.R | 57 +++++++++++++++++++++++++++++++++++++++ man/fix_names.Rd | 15 +++++++++++ man/pull_from_airtable.Rd | 42 +++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 man/fix_names.Rd create mode 100644 man/pull_from_airtable.Rd diff --git a/NAMESPACE b/NAMESPACE index 879022a..d295b08 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(fetch_redcap_data) export(format_stata_date) export(insert_airtable_records) export(prepare_sample_for_airtable) +export(pull_from_airtable) export(redcap_api_url) export(redcap_token) export(run_stata_cleaning) diff --git a/R/airtable.R b/R/airtable.R index a07f6e1..33c771a 100644 --- a/R/airtable.R +++ b/R/airtable.R @@ -169,3 +169,60 @@ json_for_airtable = function(data) { list(records = rows, typecast = jsonlite::unbox(TRUE)) %>% jsonlite::toJSON(pretty = T) } + +#' Download Airtable Data +#' +#' Downloads data from Airtable and peforms some basic cleaning. +#' +#' The cleaning steps are: +#' * Make variable names lowercase and replace spaces with underscores +#' * Convert specified list-columns to regular columns (these should only be list-columns of length 1) +#' * In logical variables, replace NA with FALSE +#' * In character variables, remove leading and trailing whitespace +#' +#' @param base Airtable Base object +#' @param table Airtable Table names +#' @param unnest_cols +#' [List-columns](https://jennybc.github.io/purrr-tutorial/ls13_list-columns.html) +#' to convert in +#' [tidyselect](https://tidyselect.r-lib.org/reference/language.html) +#' format. +#' @param ... Options to pass to [airtabler::air_select()]. +#' +#' @return A tibble with the cleaned data. +#' @export +#' +#' @examples +#' \dontrun{ +#' base = connect_to_airtable() +#' pull_from_airtable(base, "Samples") +#' pull_from_airtable(base, "Venues", unnest_cols = final_peh_estimate) +#' } +pull_from_airtable = function(base, table, unnest_cols, ...) { + # download all the records from the table + base[[table]][["select_all"]](...) %>% + # clean up the variable names (lowercase and spaces to underscores) + dplyr::rename_with(fix_names) %>% + # convert requested list columns + tidyr::unnest({{ unnest_cols }}) %>% + dplyr::mutate( + # in logical variables replace NA with FALSE + dplyr::across(tidyselect::vars_select_helpers$where(is.logical), tidyr::replace_na, FALSE), + # trim leading and trailing whitespace in all character variables + dplyr::across(tidyselect::vars_select_helpers$where(is.character), trimws) + ) +} + +#' Fix Variable Names +#' +#' Make variable names lower case and switch spaces to underscores. +#' +#' @param name Variable name as a string. +#' +#' @keywords internal +fix_names = function(name) { + tolower( + # swap spaces for underscores + gsub(" ", "_", name) + ) +} diff --git a/man/fix_names.Rd b/man/fix_names.Rd new file mode 100644 index 0000000..66fda65 --- /dev/null +++ b/man/fix_names.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{fix_names} +\alias{fix_names} +\title{Fix Variable Names} +\usage{ +fix_names(name) +} +\arguments{ +\item{name}{Variable name as a string.} +} +\description{ +Make variable names lower case and switch spaces to underscores. +} +\keyword{internal} diff --git a/man/pull_from_airtable.Rd b/man/pull_from_airtable.Rd new file mode 100644 index 0000000..3456afd --- /dev/null +++ b/man/pull_from_airtable.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airtable.R +\name{pull_from_airtable} +\alias{pull_from_airtable} +\title{Download Airtable Data} +\usage{ +pull_from_airtable(base, table, unnest_cols, ...) +} +\arguments{ +\item{base}{Airtable Base object} + +\item{table}{Airtable Table names} + +\item{unnest_cols}{\href{https://jennybc.github.io/purrr-tutorial/ls13_list-columns.html}{List-columns} +to convert in +\href{https://tidyselect.r-lib.org/reference/language.html}{tidyselect} +format.} + +\item{...}{Options to pass to \code{\link[airtabler:air_select]{airtabler::air_select()}}.} +} +\value{ +A tibble with the cleaned data. +} +\description{ +Downloads data from Airtable and peforms some basic cleaning. +} +\details{ +The cleaning steps are: +\itemize{ +\item Make variable names lowercase and replace spaces with underscores +\item Convert specified list-columns to regular columns (these should only be list-columns of length 1) +\item In logical variables, replace NA with FALSE +\item In character variables, remove leading and trailing whitespace +} +} +\examples{ +\dontrun{ + base = connect_to_airtable() + pull_from_airtable(base, "Samples") + pull_from_airtable(base, "Venues", unnest_cols = final_peh_estimate) +} +} From 26acd7010bfcc0f4a7201f5a9e40ed9ef4dea163 Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 26 Jan 2022 13:07:40 -0800 Subject: [PATCH 04/16] fix names --- R/airtable.R | 8 ++++---- man/prepare_sample_for_airtable.Rd | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/airtable.R b/R/airtable.R index 33c771a..668c29d 100644 --- a/R/airtable.R +++ b/R/airtable.R @@ -34,7 +34,7 @@ connect_to_airtable = function( #' @param venue_id Name of the column with the internal Airtable venue ID (different than #' the venue_id we commonly use like placer_123). #' @param sampling_weight Name of the column with the sampling weights. -#' @param selected_indicator Name of the column which indicates whether the +#' @param sampled_indicator Name of the column which indicates whether the #' venue was selected. #' @param county Character string with the county name of the sample. #' @param zone Character string with the zone of the sample. @@ -65,12 +65,12 @@ prepare_sample_for_airtable = function( round ) { sample %>% - select( + dplyr::select( Venue = {{ venue_id }}, {{ sampling_weight }}, - sampled = {{ selected_indicator }} + sampled = {{ sampled_indicator }} ) %>% - mutate( + dplyr::mutate( Venue = as.list(Venue), sampling_weight = as.numeric(sampling_weight), sampled = as.integer(sampled), diff --git a/man/prepare_sample_for_airtable.Rd b/man/prepare_sample_for_airtable.Rd index aec175c..a199b0d 100644 --- a/man/prepare_sample_for_airtable.Rd +++ b/man/prepare_sample_for_airtable.Rd @@ -23,14 +23,14 @@ the venue_id we commonly use like placer_123).} \item{sampling_weight}{Name of the column with the sampling weights.} +\item{sampled_indicator}{Name of the column which indicates whether the +venue was selected.} + \item{county}{Character string with the county name of the sample.} \item{zone}{Character string with the zone of the sample.} \item{round}{Character string or number with the round of the sample.} - -\item{selected_indicator}{Name of the column which indicates whether the -venue was selected.} } \value{ A data frame ready to be passed to \code{\link[=insert_airtable_records]{insert_airtable_records()}}. From 226ddc081a5d8a302ac4aa25657330cc069e916b Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 26 Jan 2022 13:08:05 -0800 Subject: [PATCH 05/16] update dependencies --- DESCRIPTION | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7c3a918..8fa7529 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,14 +11,19 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.2 Imports: airtabler (>= 0.1.6), + assertthat, dplyr, + glue, httr, + jsonlite, lubridate, magrittr, purrr, REDCapR, + rlang, RStata (>= 1.2.0), - tidyr + tidyr, + tidyselect Remotes: eveyp/RStata, bergant/airtabler From 116b1b11d069cf0c2e1d41d05fad9491a5f54e25 Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 26 Jan 2022 13:08:25 -0800 Subject: [PATCH 06/16] import functions --- NAMESPACE | 3 +++ R/statewide.survey.tools.R | 8 ++++++++ man/statewide.survey.tools.Rd | 8 ++++++++ 3 files changed, 19 insertions(+) create mode 100644 R/statewide.survey.tools.R create mode 100644 man/statewide.survey.tools.Rd diff --git a/NAMESPACE b/NAMESPACE index d295b08..8011e1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,4 +10,7 @@ export(pull_from_airtable) export(redcap_api_url) export(redcap_token) export(run_stata_cleaning) +importFrom(assertthat,has_name) importFrom(magrittr,"%>%") +importFrom(rlang,"!!") +importFrom(rlang,abort) diff --git a/R/statewide.survey.tools.R b/R/statewide.survey.tools.R new file mode 100644 index 0000000..b2381c1 --- /dev/null +++ b/R/statewide.survey.tools.R @@ -0,0 +1,8 @@ +#' statewide.survey.tools +#' +#' @importFrom assertthat has_name +#' @importFrom rlang abort +#' @importFrom rlang !! +#' +#' @name statewide.survey.tools +NULL diff --git a/man/statewide.survey.tools.Rd b/man/statewide.survey.tools.Rd new file mode 100644 index 0000000..a194d34 --- /dev/null +++ b/man/statewide.survey.tools.Rd @@ -0,0 +1,8 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statewide.survey.tools.R +\name{statewide.survey.tools} +\alias{statewide.survey.tools} +\title{statewide.survey.tools} +\description{ +statewide.survey.tools +} From dd14673f49c66bcaab355923b905b836f3d1113a Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 26 Jan 2022 13:09:15 -0800 Subject: [PATCH 07/16] add draw_sample function --- NAMESPACE | 1 + R/sampling.R | 116 +++++++++++++++++++++++++++++++++++++++++++++ man/draw_sample.Rd | 71 +++++++++++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 R/sampling.R create mode 100644 man/draw_sample.Rd diff --git a/NAMESPACE b/NAMESPACE index 8011e1e..7dadaa7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(connect_to_airtable) +export(draw_sample) export(fetch_redcap_data) export(format_stata_date) export(insert_airtable_records) diff --git a/R/sampling.R b/R/sampling.R new file mode 100644 index 0000000..2aee58c --- /dev/null +++ b/R/sampling.R @@ -0,0 +1,116 @@ +#' Draw Venue Sample +#' +#' Draws a weighted, without replacement sample of venues. Supports both +#' stratified and unstratified samples. +#' +#' @param data Data frame with venues. +#' @param weights Name of the weighting variable. +#' @param n Either a single number of venues to select, or a data frame with +#' stratification plan (see below). +#' +#' @section Stratification Plan: +#' To draw a stratified sample, include a data frame with the stratification +#' plan. +#' +#' The data frame should include a row for each stratum with the values of +#' the stratification variables and the number of venues to sample. The +#' stratification variables must have the same name and coding as in the +#' venue data. The variable with the the number of venues to sample must be +#' named `n`. +#' +#' For example: +#' +#' | **dv_shelter** | **zone** | **n** | +#' | -------------- | -------------- | ----- | +#' | TRUE | Eastern Placer | 1 | +#' | TRUE | Central Placer | 2 | +#' | TRUE | South Placer | 3 | +#' | FALSE | Eastern Placer | 4 | +#' | FALSE | Central Placer | 5 | +#' | FALSE | South Placer | 6 | +#' +#' This can be generated with the following R code: +#' ``` +#' tibble::tribble( +#' ~dv_shelter, ~zone , ~n, +#' TRUE , "Eastern Placer", 1, +#' TRUE , "Central Placer", 2, +#' TRUE , "South Placer" , 3, +#' FALSE , "Eastern Placer", 4, +#' FALSE , "Central Placer", 5, +#' FALSE , "South Placer" , 6 +#' ) +#' ``` +#' +#' @return The venue data frame with the variable `sampled` added, which +#' indicates whether the venue was selected. +#' @export +#' +#' @examples +#' \dontrun{ +#' draw_sample(venues, sampling_weight, 5) +#' +#' strata = tibble::tribble( +#' ~zone , ~n, +#' "Eastern Placer", 1, +#' "Central Placer", 3, +#' "South Placer" , 5 +#' ) +#' draw_sample(venues, sampling_weight, strata) +#' } +draw_sample = function(data, weights, n) { + # make sure the main df doesn't already have a variable called sampled + if (has_name(data, "sampled")) + abort("Venue data already has a variable named 'sampled'. Please rename or remove this variable.") + + # make sure that the main df has the weights variable + weighting_var_name = rlang::as_string(rlang::ensym(weights)) + if (!has_name(data, weighting_var_name)) { + abort(glue::glue("Venue data is missing the weighting variable: '{weighting_var_name}'.")) + } + + n_int = assertthat::is.count(n) + n_df = is.data.frame(n) + + if (!(n_int | n_df)) + abort("n must be a single positive integer or a data frame with strata.") + data = dplyr::mutate(data, .id = dplyr::row_number()) + + if (n_df) { + # make sure the strata df has a column named n + if (!has_name(n, "n")) + abort("Strata data frame is missing 'n' column.") + + # make sure the main df has all of the stratification variables + strata_vars = names(n)[names(n) != "n"] + purrr::walk( + strata_vars, + ~ if ((!has_name(data, .x))) + abort( + glue::glue( + "Venue data is missing the stratification variable: '{.x}'." + ) + ) + ) + + results = vector("list", nrow(n)) + for (i in seq_len(nrow(n))) { + results[[i]] = dplyr::semi_join( + data, + n[i,], + by = strata_vars + ) %>% + dplyr::slice_sample(n = n$n[i], weight_by = {{ weights }}) %>% + dplyr::pull(.id) + } + ids = purrr::flatten_chr(results) + } + + if (n_int) { + ids = dplyr::slice_sample(data, n = n, weight_by = {{ weights }}) %>% + dplyr::pull(.id) + } + + dplyr::mutate(data, sampled = .id %in% ids) %>% + dplyr::select(-.id) +} diff --git a/man/draw_sample.Rd b/man/draw_sample.Rd new file mode 100644 index 0000000..3ed58da --- /dev/null +++ b/man/draw_sample.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sampling.R +\name{draw_sample} +\alias{draw_sample} +\title{Draw Venue Sample} +\usage{ +draw_sample(data, weights, n) +} +\arguments{ +\item{data}{Data frame with venues.} + +\item{weights}{Name of the weighting variable.} + +\item{n}{Either a single number of venues to select, or a data frame with +stratification plan (see below).} +} +\value{ +The venue data frame with the variable \code{sampled} added, which +indicates whether the venue was selected. +} +\description{ +Draws a weighted, without replacement sample of venues. Supports both +stratified and unstratified samples. +} +\section{Stratification Plan}{ + +To draw a stratified sample, include a data frame with the stratification +plan. + +The data frame should include a row for each stratum with the values of +the stratification variables and the number of venues to sample. The +stratification variables must have the same name and coding as in the +venue data. The variable with the the number of venues to sample must be +named \code{n}. + +For example:\tabular{lll}{ + \strong{dv_shelter} \tab \strong{zone} \tab \strong{n} \cr + TRUE \tab Eastern Placer \tab 1 \cr + TRUE \tab Central Placer \tab 2 \cr + TRUE \tab South Placer \tab 3 \cr + FALSE \tab Eastern Placer \tab 4 \cr + FALSE \tab Central Placer \tab 5 \cr + FALSE \tab South Placer \tab 6 \cr +} + + +This can be generated with the following R code:\preformatted{tibble::tribble( + ~dv_shelter, ~zone , ~n, + TRUE , "Eastern Placer", 1, + TRUE , "Central Placer", 2, + TRUE , "South Placer" , 3, + FALSE , "Eastern Placer", 4, + FALSE , "Central Placer", 5, + FALSE , "South Placer" , 6 +) +} +} + +\examples{ +\dontrun{ + draw_sample(venues, sampling_weight, 5) + + strata = tibble::tribble( + ~zone , ~n, + "Eastern Placer", 1, + "Central Placer", 3, + "South Placer" , 5 + ) + draw_sample(venues, sampling_weight, strata) +} +} From e8a871b1b843512b7cbb227e892332e82ce27a02 Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Thu, 27 Jan 2022 17:27:36 -0800 Subject: [PATCH 08/16] split out functions to their own files --- R/airtable.R | 228 ---------------------------- R/connect_to_airtable.R | 23 +++ R/{sampling.R => draw_sample.R} | 16 +- R/fetch_redcap_data.R | 32 ++++ R/format_stata_date.R | 15 ++ R/insert_airtable_records.R | 89 +++++++++++ R/prepare_sample_for_airtable.R | 57 +++++++ R/pull_from_airtable.R | 57 +++++++ R/{redcap.R => redcap_utils.R} | 33 ---- R/{stata.R => run_stata_cleaning.R} | 16 -- man/chunk_data.Rd | 2 +- man/connect_to_airtable.Rd | 2 +- man/default_cleaning_do_file.Rd | 2 +- man/draw_sample.Rd | 2 +- man/fetch_redcap_data.Rd | 2 +- man/fix_names.Rd | 2 +- man/format_stata_date.Rd | 2 +- man/get_stata_path.Rd | 2 +- man/get_stata_version.Rd | 2 +- man/insert_airtable_records.Rd | 2 +- man/json_for_airtable.Rd | 2 +- man/post_to_airtable.Rd | 2 +- man/prepare_sample_for_airtable.Rd | 2 +- man/pull_from_airtable.Rd | 2 +- man/redcap_api_url.Rd | 2 +- man/redcap_token.Rd | 2 +- man/run_stata_cleaning.Rd | 2 +- 27 files changed, 298 insertions(+), 302 deletions(-) delete mode 100644 R/airtable.R create mode 100644 R/connect_to_airtable.R rename R/{sampling.R => draw_sample.R} (91%) create mode 100644 R/fetch_redcap_data.R create mode 100644 R/format_stata_date.R create mode 100644 R/insert_airtable_records.R create mode 100644 R/prepare_sample_for_airtable.R create mode 100644 R/pull_from_airtable.R rename R/{redcap.R => redcap_utils.R} (55%) rename R/{stata.R => run_stata_cleaning.R} (76%) diff --git a/R/airtable.R b/R/airtable.R deleted file mode 100644 index 668c29d..0000000 --- a/R/airtable.R +++ /dev/null @@ -1,228 +0,0 @@ -#' Connect to the Airtable API -#' -#' Makes a connection to the Airtable API using the [airtabler::airtabler()] -#' package. -#' -#' Expects an API key in an environment variable `AIRTABLE_API_KEY`. To start R -#' session with the initialized environment variable create an .Renviron file -#' with a line like this: `AIRTABLE_API_KEY=`. -#' -#' See https://airtable.com/appOLCptG2wxvoGtH/api/docs#curl/authentication for -#' details on obtaining an Airtable API key. -#' -#' @param base ID for the airtable base. -#' @param tables Names of the tables to include. -#' -#' @return An Airtable base object from [airtabler::airtable()]. -#' @export -connect_to_airtable = function( - base = "appOLCptG2wxvoGtH", - tables = c("Venues", "Samples", "Visit Check-Out") -) { - airtabler::airtable(base = base, tables = tables) -} - -#' Prepare sampling data for Airtable -#' -#' Formats the sampling data for submission to Airtable. It selects only the -#' variables that will be sent to Airtable (internal Airtable venue id, sampling -#' weight, & selected indicator) and adds the sample metadata (ie. county, zone, -#' and round). -#' -#' @param sample Data frame with the full sample (ie. both selected and not -#' selected sites). -#' @param venue_id Name of the column with the internal Airtable venue ID (different than -#' the venue_id we commonly use like placer_123). -#' @param sampling_weight Name of the column with the sampling weights. -#' @param sampled_indicator Name of the column which indicates whether the -#' venue was selected. -#' @param county Character string with the county name of the sample. -#' @param zone Character string with the zone of the sample. -#' @param round Character string or number with the round of the sample. -#' -#' @return A data frame ready to be passed to [insert_airtable_records()]. -#' @export -#' -#' @examples -#' \dontrun{ -#' prepare_sample_for_airtable( -#' final_sample, -#' id, -#' sampling_weight, -#' sampled, -#' "Sonoma", -#' "Santa Rosa", -#' 2 -#' ) -#' } -prepare_sample_for_airtable = function( - sample, - venue_id, - sampling_weight, - sampled_indicator, - county, - zone, - round -) { - sample %>% - dplyr::select( - Venue = {{ venue_id }}, - {{ sampling_weight }}, - sampled = {{ sampled_indicator }} - ) %>% - dplyr::mutate( - Venue = as.list(Venue), - sampling_weight = as.numeric(sampling_weight), - sampled = as.integer(sampled), - sample_county = county, - sample_zone = zone, - sample_round = as.character(round) - ) -} - -#' Upload Data to Airtable -#' -#' Uploads a properly named and formatted data frame to Airtable. It handles -#' converting the data frame to JSON that Airtable understands, and splitting -#' the data into chunks of no more than 10 rows to comply with Airtable limits. -#' -#' @param data Data frame of properly named and formatted data. -#' @param base Airtable Base ID. -#' @param table Name of the Airtable table. -#' @param chunk_size Number of rows in each data chunk. Airtable accepts a -#' maximum of 10 rows in one request, so this cannot be greater than 10. -#' Default is 10. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' insert_airtable_records( -#' data_for_airtable, -#' "appOLCptG2wxvoGtH", -#' "Samples" -#' ) -#' } -insert_airtable_records = function( - data, - base, - table, - chunk_size = 10 -) { - # break data into chunks of 10 rows each b/c the airtable api will only - # take that many in one request - chunk_data(data, chunk_size = 10) %>% - purrr::walk(post_to_airtable, base = base, table = table) -} - -#' Split Data Frame into Chunks -#' -#' Breaks a data frame into equal sized chunks for upload to Airtable. -#' -#' @param data A data frame or tibble. -#' @param chunk_size The number of rows in each chunk. -#' -#' @return A list of data frames or tibbles. -#' -#' @keywords internal -chunk_data = function(data, chunk_size) { - dplyr::group_split(data, group_id = (dplyr::row_number() - 1) %/% chunk_size, .keep = FALSE) -} - -#' Send Upload Request to Airtable -#' -#' Sends an HTTP POST request to Airtable. It handles converting the data frame -#' to JSON, and creating and submitting the request. -#' -#' @param data A data frame or tibble with no more than 10 rows. -#' @param base Airtable Base ID. -#' @param table Airtable table name. -#' -#' @keywords internal -post_to_airtable = function(data, base, table) { - httr::POST( - url = glue::glue("https://api.airtable.com/v0/{base}/{URLencode(table)}"), - httr::add_headers( - Authorization = paste("Bearer", Sys.getenv("AIRTABLE_API_KEY")), - `Content-Type` = "application/json" - ), - body = json_for_airtable(data), - encode = "raw" - ) -} - -#' Convert Data Frame to Airtable JSON -#' -#' Converts a data frame or tibble to the JSON data format accepted by the -#' Airtable API. -#' -#' @param data Data frame/tibble with properly named and formatted columns. -#' -#' -#' @keywords internal -json_for_airtable = function(data) { - # turn the data frame into a list of its rows - rows = split(data, seq(nrow(data))) %>% - stats::setNames(rep(NULL, nrow(data))) %>% - purrr::map(~ list(fields = jsonlite::unbox(.x))) - - list(records = rows, typecast = jsonlite::unbox(TRUE)) %>% - jsonlite::toJSON(pretty = T) -} - -#' Download Airtable Data -#' -#' Downloads data from Airtable and peforms some basic cleaning. -#' -#' The cleaning steps are: -#' * Make variable names lowercase and replace spaces with underscores -#' * Convert specified list-columns to regular columns (these should only be list-columns of length 1) -#' * In logical variables, replace NA with FALSE -#' * In character variables, remove leading and trailing whitespace -#' -#' @param base Airtable Base object -#' @param table Airtable Table names -#' @param unnest_cols -#' [List-columns](https://jennybc.github.io/purrr-tutorial/ls13_list-columns.html) -#' to convert in -#' [tidyselect](https://tidyselect.r-lib.org/reference/language.html) -#' format. -#' @param ... Options to pass to [airtabler::air_select()]. -#' -#' @return A tibble with the cleaned data. -#' @export -#' -#' @examples -#' \dontrun{ -#' base = connect_to_airtable() -#' pull_from_airtable(base, "Samples") -#' pull_from_airtable(base, "Venues", unnest_cols = final_peh_estimate) -#' } -pull_from_airtable = function(base, table, unnest_cols, ...) { - # download all the records from the table - base[[table]][["select_all"]](...) %>% - # clean up the variable names (lowercase and spaces to underscores) - dplyr::rename_with(fix_names) %>% - # convert requested list columns - tidyr::unnest({{ unnest_cols }}) %>% - dplyr::mutate( - # in logical variables replace NA with FALSE - dplyr::across(tidyselect::vars_select_helpers$where(is.logical), tidyr::replace_na, FALSE), - # trim leading and trailing whitespace in all character variables - dplyr::across(tidyselect::vars_select_helpers$where(is.character), trimws) - ) -} - -#' Fix Variable Names -#' -#' Make variable names lower case and switch spaces to underscores. -#' -#' @param name Variable name as a string. -#' -#' @keywords internal -fix_names = function(name) { - tolower( - # swap spaces for underscores - gsub(" ", "_", name) - ) -} diff --git a/R/connect_to_airtable.R b/R/connect_to_airtable.R new file mode 100644 index 0000000..6aa808c --- /dev/null +++ b/R/connect_to_airtable.R @@ -0,0 +1,23 @@ +#' Connect to the Airtable API +#' +#' Makes a connection to the Airtable API using the [airtabler::airtabler()] +#' package. +#' +#' Expects an API key in an environment variable `AIRTABLE_API_KEY`. To start R +#' session with the initialized environment variable create an .Renviron file +#' with a line like this: `AIRTABLE_API_KEY=`. +#' +#' See https://airtable.com/appOLCptG2wxvoGtH/api/docs#curl/authentication for +#' details on obtaining an Airtable API key. +#' +#' @param base ID for the airtable base. +#' @param tables Names of the tables to include. +#' +#' @return An Airtable base object from [airtabler::airtable()]. +#' @export +connect_to_airtable = function( + base = "appOLCptG2wxvoGtH", + tables = c("Venues", "Samples", "Visit Check-Out") +) { + airtabler::airtable(base = base, tables = tables) +} diff --git a/R/sampling.R b/R/draw_sample.R similarity index 91% rename from R/sampling.R rename to R/draw_sample.R index 2aee58c..8636ccb 100644 --- a/R/sampling.R +++ b/R/draw_sample.R @@ -60,12 +60,12 @@ #' } draw_sample = function(data, weights, n) { # make sure the main df doesn't already have a variable called sampled - if (has_name(data, "sampled")) + if (assertthat::has_name(data, "sampled")) abort("Venue data already has a variable named 'sampled'. Please rename or remove this variable.") # make sure that the main df has the weights variable weighting_var_name = rlang::as_string(rlang::ensym(weights)) - if (!has_name(data, weighting_var_name)) { + if (!assertthat::has_name(data, weighting_var_name)) { abort(glue::glue("Venue data is missing the weighting variable: '{weighting_var_name}'.")) } @@ -78,19 +78,19 @@ draw_sample = function(data, weights, n) { if (n_df) { # make sure the strata df has a column named n - if (!has_name(n, "n")) + if (!assertthat::has_name(n, "n")) abort("Strata data frame is missing 'n' column.") # make sure the main df has all of the stratification variables strata_vars = names(n)[names(n) != "n"] purrr::walk( strata_vars, - ~ if ((!has_name(data, .x))) - abort( - glue::glue( - "Venue data is missing the stratification variable: '{.x}'." - ) + ~ if ((!assertthat::has_name(data, .x))) + abort( + glue::glue( + "Venue data is missing the stratification variable: '{.x}'." ) + ) ) results = vector("list", nrow(n)) diff --git a/R/fetch_redcap_data.R b/R/fetch_redcap_data.R new file mode 100644 index 0000000..cd4ca4a --- /dev/null +++ b/R/fetch_redcap_data.R @@ -0,0 +1,32 @@ +#' Downloads REDCap Data +#' +#' Downloads data from REDCap by wrapping [REDCapR::redcap_read()]. The API URL +#' is filled automatically, but can be overridden. The token for the main +#' project is also filled automatically, but can be overridden or replaced with +#' the RDS project token. +#' +#' @param ... Options passed to [REDCapR::redcap_read()]. +#' @param redcap_uri REDCap API URL. Defaults to [redcap_api_url()]. +#' @param token REDCap API Token. Defaults to main project token. Use RDS +#' project token with `redcap_token("rds")`. +#' @param verbose Should messages be printed to the R console during the +#' operation. The verbose output might contain sensitive information (e.g. +#' PHI), so turn this off if the output might be visible somewhere public. +#' +#' @return A tibble with the requested data. +#' @export +#' @seealso [redcap_token()] +fetch_redcap_data = function( + ..., + redcap_uri = redcap_api_url(), + token = redcap_token(), + verbose = FALSE +) { + REDCapR::redcap_read_oneshot( + ..., + redcap_uri = redcap_uri, + token = token, + verbose = verbose + )$data %>% + dplyr::as_tibble() +} diff --git a/R/format_stata_date.R b/R/format_stata_date.R new file mode 100644 index 0000000..002df16 --- /dev/null +++ b/R/format_stata_date.R @@ -0,0 +1,15 @@ +#' Convert a Stata date to text +#' +#' Converts a Stata date to a character string. +#' +#' @param date Stata date. +#' @param format Format string for character representation. See +#' [base::strptime()] for options. Defaults to formatting like January 1, +#' 2022. +#' +#' @return A character string with the formatted date. +#' @export +format_stata_date = function(date, format = "%B %e, %Y") { + lubridate::dmy(date) %>% + as.character(format) +} diff --git a/R/insert_airtable_records.R b/R/insert_airtable_records.R new file mode 100644 index 0000000..f51d72b --- /dev/null +++ b/R/insert_airtable_records.R @@ -0,0 +1,89 @@ +#' Upload Data to Airtable +#' +#' Uploads a properly named and formatted data frame to Airtable. It handles +#' converting the data frame to JSON that Airtable understands, and splitting +#' the data into chunks of no more than 10 rows to comply with Airtable limits. +#' +#' @param data Data frame of properly named and formatted data. +#' @param base Airtable Base ID. +#' @param table Name of the Airtable table. +#' @param chunk_size Number of rows in each data chunk. Airtable accepts a +#' maximum of 10 rows in one request, so this cannot be greater than 10. +#' Default is 10. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' insert_airtable_records( +#' data_for_airtable, +#' "appOLCptG2wxvoGtH", +#' "Samples" +#' ) +#' } +insert_airtable_records = function( + data, + base, + table, + chunk_size = 10 +) { + # break data into chunks of 10 rows each b/c the airtable api will only + # take that many in one request + chunk_data(data, chunk_size = 10) %>% + purrr::walk(post_to_airtable, base = base, table = table) +} + +#' Split Data Frame into Chunks +#' +#' Breaks a data frame into equal sized chunks for upload to Airtable. +#' +#' @param data A data frame or tibble. +#' @param chunk_size The number of rows in each chunk. +#' +#' @return A list of data frames or tibbles. +#' +#' @keywords internal +chunk_data = function(data, chunk_size) { + dplyr::group_split(data, group_id = (dplyr::row_number() - 1) %/% chunk_size, .keep = FALSE) +} + +#' Send Upload Request to Airtable +#' +#' Sends an HTTP POST request to Airtable. It handles converting the data frame +#' to JSON, and creating and submitting the request. +#' +#' @param data A data frame or tibble with no more than 10 rows. +#' @param base Airtable Base ID. +#' @param table Airtable table name. +#' +#' @keywords internal +post_to_airtable = function(data, base, table) { + httr::POST( + url = glue::glue("https://api.airtable.com/v0/{base}/{URLencode(table)}"), + httr::add_headers( + Authorization = paste("Bearer", Sys.getenv("AIRTABLE_API_KEY")), + `Content-Type` = "application/json" + ), + body = json_for_airtable(data), + encode = "raw" + ) +} + +#' Convert Data Frame to Airtable JSON +#' +#' Converts a data frame or tibble to the JSON data format accepted by the +#' Airtable API. +#' +#' @param data Data frame/tibble with properly named and formatted columns. +#' +#' +#' @keywords internal +json_for_airtable = function(data) { + # turn the data frame into a list of its rows + rows = split(data, seq(nrow(data))) %>% + stats::setNames(rep(NULL, nrow(data))) %>% + purrr::map(~ list(fields = jsonlite::unbox(.x))) + + list(records = rows, typecast = jsonlite::unbox(TRUE)) %>% + jsonlite::toJSON(pretty = T) +} diff --git a/R/prepare_sample_for_airtable.R b/R/prepare_sample_for_airtable.R new file mode 100644 index 0000000..603900e --- /dev/null +++ b/R/prepare_sample_for_airtable.R @@ -0,0 +1,57 @@ +#' Prepare sampling data for Airtable +#' +#' Formats the sampling data for submission to Airtable. It selects only the +#' variables that will be sent to Airtable (internal Airtable venue id, sampling +#' weight, & selected indicator) and adds the sample metadata (ie. county, zone, +#' and round). +#' +#' @param sample Data frame with the full sample (ie. both selected and not +#' selected sites). +#' @param venue_id Name of the column with the internal Airtable venue ID (different than +#' the venue_id we commonly use like placer_123). +#' @param sampling_weight Name of the column with the sampling weights. +#' @param sampled_indicator Name of the column which indicates whether the +#' venue was selected. +#' @param county Character string with the county name of the sample. +#' @param zone Character string with the zone of the sample. +#' @param round Character string or number with the round of the sample. +#' +#' @return A data frame ready to be passed to [insert_airtable_records()]. +#' @export +#' +#' @examples +#' \dontrun{ +#' prepare_sample_for_airtable( +#' final_sample, +#' id, +#' sampling_weight, +#' sampled, +#' "Sonoma", +#' "Santa Rosa", +#' 2 +#' ) +#' } +prepare_sample_for_airtable = function( + sample, + venue_id, + sampling_weight, + sampled_indicator, + county, + zone, + round +) { + sample %>% + dplyr::select( + Venue = {{ venue_id }}, + {{ sampling_weight }}, + sampled = {{ sampled_indicator }} + ) %>% + dplyr::mutate( + Venue = as.list(Venue), + sampling_weight = as.numeric(sampling_weight), + sampled = as.integer(sampled), + sample_county = county, + sample_zone = zone, + sample_round = as.character(round) + ) +} diff --git a/R/pull_from_airtable.R b/R/pull_from_airtable.R new file mode 100644 index 0000000..4b28d25 --- /dev/null +++ b/R/pull_from_airtable.R @@ -0,0 +1,57 @@ +#' Download Airtable Data +#' +#' Downloads data from Airtable and peforms some basic cleaning. +#' +#' The cleaning steps are: +#' * Make variable names lowercase and replace spaces with underscores +#' * Convert specified list-columns to regular columns (these should only be list-columns of length 1) +#' * In logical variables, replace NA with FALSE +#' * In character variables, remove leading and trailing whitespace +#' +#' @param base Airtable Base object +#' @param table Airtable Table names +#' @param unnest_cols +#' [List-columns](https://jennybc.github.io/purrr-tutorial/ls13_list-columns.html) +#' to convert in +#' [tidyselect](https://tidyselect.r-lib.org/reference/language.html) +#' format. +#' @param ... Options to pass to [airtabler::air_select()]. +#' +#' @return A tibble with the cleaned data. +#' @export +#' +#' @examples +#' \dontrun{ +#' base = connect_to_airtable() +#' pull_from_airtable(base, "Samples") +#' pull_from_airtable(base, "Venues", unnest_cols = final_peh_estimate) +#' } +pull_from_airtable = function(base, table, unnest_cols, ...) { + # download all the records from the table + base[[table]][["select_all"]](...) %>% + # clean up the variable names (lowercase and spaces to underscores) + dplyr::rename_with(fix_names) %>% + # convert requested list columns + tidyr::unnest({{ unnest_cols }}) %>% + dplyr::mutate( + # in logical variables replace NA with FALSE + dplyr::across(tidyselect::vars_select_helpers$where(is.logical), tidyr::replace_na, FALSE), + # trim leading and trailing whitespace in all character variables + dplyr::across(tidyselect::vars_select_helpers$where(is.character), trimws) + ) +} + +#' Fix Variable Names +#' +#' Make variable names lower case and switch spaces to underscores. +#' +#' @param name Variable name as a string. +#' +#' @keywords internal +fix_names = function(name) { + tolower( + # swap spaces for underscores + gsub(" ", "_", name) + ) +} + diff --git a/R/redcap.R b/R/redcap_utils.R similarity index 55% rename from R/redcap.R rename to R/redcap_utils.R index fe89f56..422b7da 100644 --- a/R/redcap.R +++ b/R/redcap_utils.R @@ -40,36 +40,3 @@ redcap_token = function(project = "main") { NULL } } - -#' Downloads REDCap Data -#' -#' Downloads data from REDCap by wrapping [REDCapR::redcap_read()]. The API URL -#' is filled automatically, but can be overridden. The token for the main -#' project is also filled automatically, but can be overridden or replaced with -#' the RDS project token. -#' -#' @param ... Options passed to [REDCapR::redcap_read()]. -#' @param redcap_uri REDCap API URL. Defaults to [redcap_api_url()]. -#' @param token REDCap API Token. Defaults to main project token. Use RDS -#' project token with `redcap_token("rds")`. -#' @param verbose Should messages be printed to the R console during the -#' operation. The verbose output might contain sensitive information (e.g. -#' PHI), so turn this off if the output might be visible somewhere public. -#' -#' @return A tibble with the requested data. -#' @export -#' @seealso [redcap_token()] -fetch_redcap_data = function( - ..., - redcap_uri = redcap_api_url(), - token = redcap_token(), - verbose = FALSE -) { - REDCapR::redcap_read_oneshot( - ..., - redcap_uri = redcap_uri, - token = token, - verbose = verbose - )$data %>% - dplyr::as_tibble() -} diff --git a/R/stata.R b/R/run_stata_cleaning.R similarity index 76% rename from R/stata.R rename to R/run_stata_cleaning.R index 6bcf9bf..c12acf0 100644 --- a/R/stata.R +++ b/R/run_stata_cleaning.R @@ -58,19 +58,3 @@ get_stata_version = function() { version } - -#' Convert a Stata date to text -#' -#' Converts a Stata date to a character string. -#' -#' @param date Stata date. -#' @param format Format string for character representation. See -#' [base::strptime()] for options. Defaults to formatting like January 1, -#' 2022. -#' -#' @return A character string with the formatted date. -#' @export -format_stata_date = function(date, format = "%B %e, %Y") { - lubridate::dmy(date) %>% - as.character(format) -} diff --git a/man/chunk_data.Rd b/man/chunk_data.Rd index 536a572..e85eb4a 100644 --- a/man/chunk_data.Rd +++ b/man/chunk_data.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/insert_airtable_records.R \name{chunk_data} \alias{chunk_data} \title{Split Data Frame into Chunks} diff --git a/man/connect_to_airtable.Rd b/man/connect_to_airtable.Rd index d0e991e..0e14ffa 100644 --- a/man/connect_to_airtable.Rd +++ b/man/connect_to_airtable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/connect_to_airtable.R \name{connect_to_airtable} \alias{connect_to_airtable} \title{Connect to the Airtable API} diff --git a/man/default_cleaning_do_file.Rd b/man/default_cleaning_do_file.Rd index 88888fd..9fbabcc 100644 --- a/man/default_cleaning_do_file.Rd +++ b/man/default_cleaning_do_file.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stata.R +% Please edit documentation in R/run_stata_cleaning.R \name{default_cleaning_do_file} \alias{default_cleaning_do_file} \title{Provides path to included Stata data cleaning code.} diff --git a/man/draw_sample.Rd b/man/draw_sample.Rd index 3ed58da..deaa55a 100644 --- a/man/draw_sample.Rd +++ b/man/draw_sample.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sampling.R +% Please edit documentation in R/draw_sample.R \name{draw_sample} \alias{draw_sample} \title{Draw Venue Sample} diff --git a/man/fetch_redcap_data.Rd b/man/fetch_redcap_data.Rd index fd7714c..7a07edd 100644 --- a/man/fetch_redcap_data.Rd +++ b/man/fetch_redcap_data.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/redcap.R +% Please edit documentation in R/fetch_redcap_data.R \name{fetch_redcap_data} \alias{fetch_redcap_data} \title{Downloads REDCap Data} diff --git a/man/fix_names.Rd b/man/fix_names.Rd index 66fda65..0ee0eaa 100644 --- a/man/fix_names.Rd +++ b/man/fix_names.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/pull_from_airtable.R \name{fix_names} \alias{fix_names} \title{Fix Variable Names} diff --git a/man/format_stata_date.Rd b/man/format_stata_date.Rd index 14ef0d7..5dcf9be 100644 --- a/man/format_stata_date.Rd +++ b/man/format_stata_date.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stata.R +% Please edit documentation in R/format_stata_date.R \name{format_stata_date} \alias{format_stata_date} \title{Convert a Stata date to text} diff --git a/man/get_stata_path.Rd b/man/get_stata_path.Rd index 43bc52c..d0a808f 100644 --- a/man/get_stata_path.Rd +++ b/man/get_stata_path.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stata.R +% Please edit documentation in R/run_stata_cleaning.R \name{get_stata_path} \alias{get_stata_path} \title{Returns path to Stata binary} diff --git a/man/get_stata_version.Rd b/man/get_stata_version.Rd index 794a6a8..258f798 100644 --- a/man/get_stata_version.Rd +++ b/man/get_stata_version.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stata.R +% Please edit documentation in R/run_stata_cleaning.R \name{get_stata_version} \alias{get_stata_version} \title{Returns Stata version} diff --git a/man/insert_airtable_records.Rd b/man/insert_airtable_records.Rd index 734795e..c18e870 100644 --- a/man/insert_airtable_records.Rd +++ b/man/insert_airtable_records.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/insert_airtable_records.R \name{insert_airtable_records} \alias{insert_airtable_records} \title{Upload Data to Airtable} diff --git a/man/json_for_airtable.Rd b/man/json_for_airtable.Rd index a4865f2..429ff9a 100644 --- a/man/json_for_airtable.Rd +++ b/man/json_for_airtable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/insert_airtable_records.R \name{json_for_airtable} \alias{json_for_airtable} \title{Convert Data Frame to Airtable JSON} diff --git a/man/post_to_airtable.Rd b/man/post_to_airtable.Rd index 46f7fb4..5e16511 100644 --- a/man/post_to_airtable.Rd +++ b/man/post_to_airtable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/insert_airtable_records.R \name{post_to_airtable} \alias{post_to_airtable} \title{Send Upload Request to Airtable} diff --git a/man/prepare_sample_for_airtable.Rd b/man/prepare_sample_for_airtable.Rd index a199b0d..bfc3eba 100644 --- a/man/prepare_sample_for_airtable.Rd +++ b/man/prepare_sample_for_airtable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/prepare_sample_for_airtable.R \name{prepare_sample_for_airtable} \alias{prepare_sample_for_airtable} \title{Prepare sampling data for Airtable} diff --git a/man/pull_from_airtable.Rd b/man/pull_from_airtable.Rd index 3456afd..e3b809f 100644 --- a/man/pull_from_airtable.Rd +++ b/man/pull_from_airtable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/airtable.R +% Please edit documentation in R/pull_from_airtable.R \name{pull_from_airtable} \alias{pull_from_airtable} \title{Download Airtable Data} diff --git a/man/redcap_api_url.Rd b/man/redcap_api_url.Rd index 3d458a0..81e6a05 100644 --- a/man/redcap_api_url.Rd +++ b/man/redcap_api_url.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/redcap.R +% Please edit documentation in R/redcap_utils.R \name{redcap_api_url} \alias{redcap_api_url} \title{REDCap API URL} diff --git a/man/redcap_token.Rd b/man/redcap_token.Rd index c5d927e..cd3ea19 100644 --- a/man/redcap_token.Rd +++ b/man/redcap_token.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/redcap.R +% Please edit documentation in R/redcap_utils.R \name{redcap_token} \alias{redcap_token} \title{Fetch appropriate REDCap API Token} diff --git a/man/run_stata_cleaning.Rd b/man/run_stata_cleaning.Rd index 6927c8b..31775cd 100644 --- a/man/run_stata_cleaning.Rd +++ b/man/run_stata_cleaning.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stata.R +% Please edit documentation in R/run_stata_cleaning.R \name{run_stata_cleaning} \alias{run_stata_cleaning} \title{Runs Stata REDCap data cleaning code.} From ad5d2f0c127ceaead8c922cb34e6a08d26a2104a Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Thu, 27 Jan 2022 17:28:06 -0800 Subject: [PATCH 09/16] adjust_sheltered_unsheltered function --- NAMESPACE | 1 + R/adjust_sheltered_unsheltered.R | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 R/adjust_sheltered_unsheltered.R diff --git a/NAMESPACE b/NAMESPACE index 7dadaa7..184d498 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(adjust_sheltered_unsheltered) export(connect_to_airtable) export(draw_sample) export(fetch_redcap_data) diff --git a/R/adjust_sheltered_unsheltered.R b/R/adjust_sheltered_unsheltered.R new file mode 100644 index 0000000..55deb43 --- /dev/null +++ b/R/adjust_sheltered_unsheltered.R @@ -0,0 +1,72 @@ +#' Adjust PEH Counts for Sheltered/Unsheltered Share +#' +#' @param peh_count A numeric vector of venue PEH counts. +#' @param site_category A character vector of venue site categories. +#' @param sheltered_share The target sheltered share as a single numeric. +#' +#' @return A vector of adjusted PEH counts. +#' @export +#' +#' @examples +adjust_sheltered_unsheltered = function( + peh_count, site_category, sheltered_share +) { + # check inputs + assertthat::assert_that(is.numeric(peh_count)) + assertthat::assert_that(is.character(site_category)) + assertthat::assert_that( + any(c("Emergency Shelter", "Encampment") %in% unique(site_category)), + msg = "'site_category' doesn't have any sheltered or unsheltered venues. + Is this the correct variable?" + ) + assertthat::assert_that(assertthat::is.number(sheltered_share)) + assertthat::assert_that( + sheltered_share >= 0 & sheltered_share <= 1, + msg = "'sheltered_share' must be between 0 and 1." + ) + + # turn site_category into a sheltered/unsheltered indicator + sheltered_indicator = sheltered_or_unsheltered(site_category) + + # calculate observed sheltered share in the venue data + observed_sheltered_share = observed_sheltered_share( + sheltered_indicator, + peh_count + ) + + # calculate the adjuster + adjuster = calculate_adjuster(sheltered_share, observed_sheltered_share) + + # apply the adjuster + purrr::map2_dbl(sheltered_indicator, peh_count, adjust_count, adjuster) +} + +sheltered_or_unsheltered = function(site_category) { + dplyr::case_when( + site_category == "Emergency Shelter" ~ "Sheltered", + site_category == "Encampment" ~ "Unsheltered", + TRUE ~ NA_character_ + ) +} + +observed_sheltered_share = function(sheltered_indicator, peh_count) { + sheltered = sum( + peh_count[sheltered_indicator == "Sheltered"], + na.rm = TRUE + ) + sheltered_or_unsheltered = sum(peh_count[!is.na(sheltered_indicator)]) + + return(sheltered / sheltered_or_unsheltered) +} + +calculate_adjuster = function(sheltered_share, observed_sheltered_share) { + sheltered_share / observed_sheltered_share +} + +adjust_count = function(sheltered_indicator, peh_count, adjuster) { + dplyr::case_when( + sheltered_indicator == "Sheltered" ~ peh_count * adjuster, + sheltered_indicator == "Unsheltered" ~ peh_count * 1 / adjuster, + TRUE ~ peh_count + ) +} From e4e5b4ce7ae5a3d36992980bb1592e5f42e2b51b Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Thu, 27 Jan 2022 17:28:19 -0800 Subject: [PATCH 10/16] setup tests --- DESCRIPTION | 3 +++ tests/testthat.R | 4 ++++ 2 files changed, 7 insertions(+) create mode 100644 tests/testthat.R diff --git a/DESCRIPTION b/DESCRIPTION index 8fa7529..4865339 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,3 +29,6 @@ Remotes: bergant/airtabler URL: https://github.com/ucsf-bhhi/statewide.survey.tools BugReports: https://github.com/ucsf-bhhi/statewide.survey.tools/issues +Suggests: + testthat (>= 3.0.0) +Config/testthat/edition: 3 diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..a64b065 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(statewide.survey.tools) + +test_check("statewide.survey.tools") From 0d83fdaad8261999f781bb9e4a05f25fe2cba6ec Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Thu, 27 Jan 2022 17:28:41 -0800 Subject: [PATCH 11/16] adjust_sheltered_unsheltered tests --- .../_snaps/adjust_sheltered_unsheltered.md | 8 ++ .../test-adjust_sheltered_unsheltered.R | 77 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 tests/testthat/_snaps/adjust_sheltered_unsheltered.md create mode 100644 tests/testthat/test-adjust_sheltered_unsheltered.R diff --git a/tests/testthat/_snaps/adjust_sheltered_unsheltered.md b/tests/testthat/_snaps/adjust_sheltered_unsheltered.md new file mode 100644 index 0000000..9acd84e --- /dev/null +++ b/tests/testthat/_snaps/adjust_sheltered_unsheltered.md @@ -0,0 +1,8 @@ +# adjust_sheltered_unsheltered works + + { + "type": "double", + "attributes": {}, + "value": [6.27273, 9.40909, 10, 2, 2.55072, 7.65217, 0.63768, 6.37681, 18.81818, 12.75362] + } + diff --git a/tests/testthat/test-adjust_sheltered_unsheltered.R b/tests/testthat/test-adjust_sheltered_unsheltered.R new file mode 100644 index 0000000..55443c8 --- /dev/null +++ b/tests/testthat/test-adjust_sheltered_unsheltered.R @@ -0,0 +1,77 @@ +peh_count = c(4, 6, 10, 2, 4, 12, 1, 10, 12, 20) +site_category = c( + "Emergency Shelter", "Emergency Shelter", "Non Shelter Venue", "Hotspot", + "Encampment", "Encampment", "Encampment", "Encampment", + "Emergency Shelter", "Encampment" +) +no_sheltered_unsheltered = c("Non Shelter Venue", "Hotspot") + +test_that("sheltered_or_unsheltered works", { + expect_equal(sheltered_or_unsheltered("Emergency Shelter"), "Sheltered") + expect_equal(sheltered_or_unsheltered("Encampment"), "Unsheltered") + expect_equal(sheltered_or_unsheltered("Non Shelter Venue"), NA_character_) + expect_equal(sheltered_or_unsheltered("Hotspot"), NA_character_) + expect_equal(sheltered_or_unsheltered(""), NA_character_) + expect_equal(sheltered_or_unsheltered(NA_character_), NA_character_) +}) + +test_that("observed_sheltered_share works", { + sheltered_indicator = sheltered_or_unsheltered(site_category) + + expect_equal( + observed_sheltered_share(sheltered_indicator, peh_count), + 22 / (22 + 47) + ) +}) + +test_that("calculate_adjuster works", { + expect_equal(calculate_adjuster(0.75, 0.25), 3) +}) + +test_that("adjust_count works", { + expect_equal(adjust_count("Sheltered", 10, 2), 20) + expect_equal(adjust_count("Unsheltered", 10, 2), 5) + expect_equal(adjust_count(NA_character_, 10, 2), 10) +}) + +test_that("adjust_sheltered_unsheltered input checking works", { + expect_error( + adjust_sheltered_unsheltered(site_category, peh_count, 0.5), + "peh_count is not a numeric or integer vector" + ) + + expect_error( + adjust_sheltered_unsheltered(peh_count, peh_count, 0.5), + "site_category is not a character vector" + ) + + expect_error( + adjust_sheltered_unsheltered(peh_count, no_sheltered_unsheltered, 0.5), + "'site_category' doesn't have any sheltered or unsheltered venues. + Is this the correct variable?" + ) + + expect_error( + adjust_sheltered_unsheltered(peh_count, site_category, 1.1), + "'sheltered_share' must be between 0 and 1." + ) + + # something seems broken with testing of assertthat::is.number so skipping + # this test + # expect_error( + # adjust_sheltered_unsheltered(peh_count, site_category, c(0.25, 0.5)), + # "sheltered_share is not a number (a length one numeric vector)." + # ) + + expect_error( + adjust_sheltered_unsheltered(peh_count, site_category, -0.1), + "'sheltered_share' must be between 0 and 1." + ) +}) + +test_that("adjust_sheltered_unsheltered works", { + expect_snapshot_value( + round(adjust_sheltered_unsheltered(peh_count, site_category, 0.5), 5), + style = "json2" + ) +}) From baf2517b13d7d6ecfdaddbd65100672227acda2a Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Thu, 27 Jan 2022 17:44:01 -0800 Subject: [PATCH 12/16] adjust_sheltered_unsheltered help --- R/adjust_sheltered_unsheltered.R | 15 ++++++++++++ man/adjust_sheltered_unsheltered.Rd | 36 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 man/adjust_sheltered_unsheltered.Rd diff --git a/R/adjust_sheltered_unsheltered.R b/R/adjust_sheltered_unsheltered.R index 55deb43..5f68a15 100644 --- a/R/adjust_sheltered_unsheltered.R +++ b/R/adjust_sheltered_unsheltered.R @@ -1,5 +1,10 @@ #' Adjust PEH Counts for Sheltered/Unsheltered Share #' +#' Makes the adjustment to sampling weights so that the weighted average share +#' of residential venue (ie. emergency shelters or encampments) PEH who are +#' sheltered matches a target (usually from PIT counts). It's designed for use +#' within a [dplyr::mutate()] pipeline when calculating the sampling weights. +#' #' @param peh_count A numeric vector of venue PEH counts. #' @param site_category A character vector of venue site categories. #' @param sheltered_share The target sheltered share as a single numeric. @@ -8,6 +13,16 @@ #' @export #' #' @examples +#' \dontrun{ +#' venue_data %>% +#' mutate( +#' sampling_weight = adjust_sheltered_unsheltered( +#' final_peh_estimate, +#' site_category, +#' 0.45 +#' ) +#' ) +#' } adjust_sheltered_unsheltered = function( peh_count, site_category, sheltered_share ) { diff --git a/man/adjust_sheltered_unsheltered.Rd b/man/adjust_sheltered_unsheltered.Rd new file mode 100644 index 0000000..796d9b2 --- /dev/null +++ b/man/adjust_sheltered_unsheltered.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/adjust_sheltered_unsheltered.R +\name{adjust_sheltered_unsheltered} +\alias{adjust_sheltered_unsheltered} +\title{Adjust PEH Counts for Sheltered/Unsheltered Share} +\usage{ +adjust_sheltered_unsheltered(peh_count, site_category, sheltered_share) +} +\arguments{ +\item{peh_count}{A numeric vector of venue PEH counts.} + +\item{site_category}{A character vector of venue site categories.} + +\item{sheltered_share}{The target sheltered share as a single numeric.} +} +\value{ +A vector of adjusted PEH counts. +} +\description{ +Makes the adjustment to sampling weights so that the weighted average share +of residential venue (ie. emergency shelters or encampments) PEH who are +sheltered matches a target (usually from PIT counts). It's designed for use +within a \code{\link[dplyr:mutate]{dplyr::mutate()}} pipeline when calculating the sampling weights. +} +\examples{ +\dontrun{ + venue_data \%>\% + mutate( + sampling_weight = adjust_sheltered_unsheltered( + final_peh_estimate, + site_category, + 0.45 + ) + ) +} +} From 88936b4086ad51fbb848cfc82dbe7cc5092b00cf Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 2 Feb 2022 16:38:38 -0800 Subject: [PATCH 13/16] fix type --- R/adjust_sheltered_unsheltered.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/adjust_sheltered_unsheltered.R b/R/adjust_sheltered_unsheltered.R index 5f68a15..2aba343 100644 --- a/R/adjust_sheltered_unsheltered.R +++ b/R/adjust_sheltered_unsheltered.R @@ -82,6 +82,6 @@ adjust_count = function(sheltered_indicator, peh_count, adjuster) { dplyr::case_when( sheltered_indicator == "Sheltered" ~ peh_count * adjuster, sheltered_indicator == "Unsheltered" ~ peh_count * 1 / adjuster, - TRUE ~ peh_count + TRUE ~ as.numeric(peh_count) ) } From c5f89a650e3b470cc0ae2ab3680187febd07ae9e Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 2 Feb 2022 16:39:13 -0800 Subject: [PATCH 14/16] adjust_new_sites function --- NAMESPACE | 1 + R/adjust_new_sites.R | 33 ++++++++++++++++++++++++++ man/adjust_new_sites.Rd | 28 ++++++++++++++++++++++ tests/testthat/test-adjust_new_sites.R | 9 +++++++ 4 files changed, 71 insertions(+) create mode 100644 R/adjust_new_sites.R create mode 100644 man/adjust_new_sites.Rd create mode 100644 tests/testthat/test-adjust_new_sites.R diff --git a/NAMESPACE b/NAMESPACE index 184d498..49aa43b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(adjust_new_sites) export(adjust_sheltered_unsheltered) export(connect_to_airtable) export(draw_sample) diff --git a/R/adjust_new_sites.R b/R/adjust_new_sites.R new file mode 100644 index 0000000..28b3a1e --- /dev/null +++ b/R/adjust_new_sites.R @@ -0,0 +1,33 @@ +#' Adjust New Sites' Sampling Weights +#' +#' Increases the sampling weights of new venues. New venues are ones have not +#' been eligible to be selected in a previous sample. +#' +#' @param sampling_weight The name of the variable with the sampling weights. +#' @param samples The name of the variable containing the samples the venue has been eligible for. +#' @param new_venue_factor The scaling factor for new venues. +#' +#' @return A vector of adjusted sampling weights. +#' @export +#' +#' @examples +#' \dontrun{ +#' venues %>% +#' mutate(sampling_weight = adjust_new_sites(sampling_weight, samples, 2)) +#' } +adjust_new_sites = function(sampling_weight, samples, new_venue_factor) { + assertthat::assert_that(is.numeric(sampling_weight)) + + if (!is.list(samples)) + abort("'samples' is not a list-column. Did you use the correct variable?") + + if (length(sampling_weight) != length(samples)) + abort("'sampling_weight' and 'samples' must be the same length.") + + assertthat::assert_that(assertthat::is.number(new_venue_factor)) + + if (new_venue_factor <= 0) + abort("'new_venue_factor' must be positive.") + + purrr::map2_dbl(sampling_weight, samples, ~ ifelse(is.null(.y), .x * new_venue_factor, .x)) +} diff --git a/man/adjust_new_sites.Rd b/man/adjust_new_sites.Rd new file mode 100644 index 0000000..5d249c4 --- /dev/null +++ b/man/adjust_new_sites.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/adjust_new_sites.R +\name{adjust_new_sites} +\alias{adjust_new_sites} +\title{Adjust New Sites' Sampling Weights} +\usage{ +adjust_new_sites(sampling_weight, samples, new_venue_factor) +} +\arguments{ +\item{sampling_weight}{The name of the variable with the sampling weights.} + +\item{samples}{The name of the variable containing the samples the venue has been eligible for.} + +\item{new_venue_factor}{The scaling factor for new venues.} +} +\value{ +A vector of adjusted sampling weights. +} +\description{ +Increases the sampling weights of new venues. New venues are ones have not +been eligible to be selected in a previous sample. +} +\examples{ +\dontrun{ +venues \%>\% + mutate(sampling_weight = adjust_new_sites(sampling_weight, samples, 2)) +} +} diff --git a/tests/testthat/test-adjust_new_sites.R b/tests/testthat/test-adjust_new_sites.R new file mode 100644 index 0000000..57a9a7b --- /dev/null +++ b/tests/testthat/test-adjust_new_sites.R @@ -0,0 +1,9 @@ +test_that("adjust_new_sites works", { + sampling_weights = c(1, 3.3, 4, 5, 6.1) + samples = list(c("xyz", "zyx"), NULL, NULL, "xyz", "zyx") + + expect_equal( + adjust_new_sites(sampling_weights, samples, 2), + c(1.0, 6.6, 8.0, 5.0, 6.1) + ) +}) From 78c49a1f29a10be88894105b44ae7700ec4d371f Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 2 Feb 2022 16:39:29 -0800 Subject: [PATCH 15/16] draw_sample tests --- tests/testthat/_snaps/draw_sample.md | 100 ++++++++++++++++++ tests/testthat/test-draw_sample.R | 54 ++++++++++ .../test_data/test_sampling_weights.rds | Bin 0 -> 2731 bytes 3 files changed, 154 insertions(+) create mode 100644 tests/testthat/_snaps/draw_sample.md create mode 100644 tests/testthat/test-draw_sample.R create mode 100644 tests/testthat/test_data/test_sampling_weights.rds diff --git a/tests/testthat/_snaps/draw_sample.md b/tests/testthat/_snaps/draw_sample.md new file mode 100644 index 0000000..55ab5aa --- /dev/null +++ b/tests/testthat/_snaps/draw_sample.md @@ -0,0 +1,100 @@ +# no strata works + + { + "type": "list", + "attributes": { + "names": { + "type": "character", + "attributes": {}, + "value": ["id", "site_category", "zone", "sampling_weight", "sampled"] + }, + "row.names": { + "type": "integer", + "attributes": {}, + "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + }, + "class": { + "type": "character", + "attributes": {}, + "value": ["tbl_df", "tbl", "data.frame"] + } + }, + "value": [ + { + "type": "character", + "attributes": {}, + "value": ["rec0BqSxHG0it6r1z", "rec0jfaSaeInI0D9K", "rec0km4bt4aloj58u", "rec68EYwqQI7SN89z", "rec8Ilxtpe54Ogzl0", "rec9ZDOrv9bV8j5mV", "recANm0miaBQ3TO5u", "recBj8S9nZsQiH2v3", "recC6pIakfvfg75La", "recJtIlwyhvB6By7m", "recKDonju7LPCbIl1", "recMTwwOG92qpFqAY", "recMuusZ6Jl3zFDJS", "recOsnomUWAbAugie", "recPeT9RJa0f8TqMF", "recSyWbTiLCKSrmDP", "recUrFO9hxaItcj1l", "recV4Qf9TZVOU0Byn", "recWGVLCpdAjvdtC3", "recYt4yp6HnzN5v5E", "recavxiVHAIHluDON", "recbZUZbQxg0rd48H", "reccecEwN40yXoAb0", "recdKOmYLrH2j8NBk", "recdoxUOenK5q2ifW", "recgWq4sJsfS1ggSa", "recgdkp2T4uoqELux", "reclYdsIHjkHzAH9O", "recsNrwdUId8inAMH", "rect15vaq6KhWuj7Q", "recujyDuqbdmDKoGn", "recuqLP9bmKRyPJhg", "recwdCDzWdAPab8sZ"] + }, + { + "type": "character", + "attributes": {}, + "value": ["Encampment", "Non Shelter Venue", "Non Shelter Venue", "Non Shelter Venue", "Encampment", "Encampment", "Encampment", "Non Shelter Venue", "Encampment", "Hotspot", "Non Shelter Venue", "Non Shelter Venue", "Emergency Shelter", "Encampment", "Non Shelter Venue", "Non Shelter Venue", "Encampment", "Encampment", "Encampment", "Encampment", "Hotspot", "Encampment", "Emergency Shelter", "Hotspot", "Non Shelter Venue", "Emergency Shelter", "Emergency Shelter", "Emergency Shelter", "Encampment", "Hotspot", "Non Shelter Venue", "Emergency Shelter", "Encampment"] + }, + { + "type": "character", + "attributes": {}, + "value": ["South Placer", "South Placer", "Central Placer", "South Placer", "South Placer", "South Placer", "South Placer", "South Placer", "South Placer", "Central Placer", "Central Placer", "South Placer", "Eastern Placer", "South Placer", "Central Placer", "South Placer", "South Placer", "South Placer", "South Placer", "South Placer", "Central Placer", "South Placer", "Eastern Placer", "Central Placer", "South Placer", "Central Placer", "Eastern Placer", "South Placer", "South Placer", "South Placer", "South Placer", "Central Placer", "South Placer"] + }, + { + "type": "double", + "attributes": {}, + "value": [2.68562401, 30, 15, 60, 40.28436019, 5.37124803, 2.68562401, 1, 8.95208004, 2, 225, 54, 6.70235294, 18.79936809, 54, 60, 17.00895208, 2.68562401, 32.22748815, 8.95208004, 10, 53.71248025, 40.21411765, 4, 30, 4.46823529, 5.58529412, 78.19411765, 13.42812006, 2, 30, 5.58529412, 3.58083202] + }, + { + "type": "logical", + "attributes": {}, + "value": [false, false, false, true, true, false, false, false, false, true, true, false, false, false, true, true, false, false, true, false, false, false, true, false, false, false, false, true, false, false, false, true, false] + } + ] + } + +# stratified sample works + + { + "type": "list", + "attributes": { + "names": { + "type": "character", + "attributes": {}, + "value": ["id", "site_category", "zone", "sampling_weight", "sampled"] + }, + "row.names": { + "type": "integer", + "attributes": {}, + "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + }, + "class": { + "type": "character", + "attributes": {}, + "value": ["tbl_df", "tbl", "data.frame"] + } + }, + "value": [ + { + "type": "character", + "attributes": {}, + "value": ["rec0BqSxHG0it6r1z", "rec0jfaSaeInI0D9K", "rec0km4bt4aloj58u", "rec68EYwqQI7SN89z", "rec8Ilxtpe54Ogzl0", "rec9ZDOrv9bV8j5mV", "recANm0miaBQ3TO5u", "recBj8S9nZsQiH2v3", "recC6pIakfvfg75La", "recJtIlwyhvB6By7m", "recKDonju7LPCbIl1", "recMTwwOG92qpFqAY", "recMuusZ6Jl3zFDJS", "recOsnomUWAbAugie", "recPeT9RJa0f8TqMF", "recSyWbTiLCKSrmDP", "recUrFO9hxaItcj1l", "recV4Qf9TZVOU0Byn", "recWGVLCpdAjvdtC3", "recYt4yp6HnzN5v5E", "recavxiVHAIHluDON", "recbZUZbQxg0rd48H", "reccecEwN40yXoAb0", "recdKOmYLrH2j8NBk", "recdoxUOenK5q2ifW", "recgWq4sJsfS1ggSa", "recgdkp2T4uoqELux", "reclYdsIHjkHzAH9O", "recsNrwdUId8inAMH", "rect15vaq6KhWuj7Q", "recujyDuqbdmDKoGn", "recuqLP9bmKRyPJhg", "recwdCDzWdAPab8sZ"] + }, + { + "type": "character", + "attributes": {}, + "value": ["Encampment", "Non Shelter Venue", "Non Shelter Venue", "Non Shelter Venue", "Encampment", "Encampment", "Encampment", "Non Shelter Venue", "Encampment", "Hotspot", "Non Shelter Venue", "Non Shelter Venue", "Emergency Shelter", "Encampment", "Non Shelter Venue", "Non Shelter Venue", "Encampment", "Encampment", "Encampment", "Encampment", "Hotspot", "Encampment", "Emergency Shelter", "Hotspot", "Non Shelter Venue", "Emergency Shelter", "Emergency Shelter", "Emergency Shelter", "Encampment", "Hotspot", "Non Shelter Venue", "Emergency Shelter", "Encampment"] + }, + { + "type": "character", + "attributes": {}, + "value": ["South Placer", "South Placer", "Central Placer", "South Placer", "South Placer", "South Placer", "South Placer", "South Placer", "South Placer", "Central Placer", "Central Placer", "South Placer", "Eastern Placer", "South Placer", "Central Placer", "South Placer", "South Placer", "South Placer", "South Placer", "South Placer", "Central Placer", "South Placer", "Eastern Placer", "Central Placer", "South Placer", "Central Placer", "Eastern Placer", "South Placer", "South Placer", "South Placer", "South Placer", "Central Placer", "South Placer"] + }, + { + "type": "double", + "attributes": {}, + "value": [2.68562401, 30, 15, 60, 40.28436019, 5.37124803, 2.68562401, 1, 8.95208004, 2, 225, 54, 6.70235294, 18.79936809, 54, 60, 17.00895208, 2.68562401, 32.22748815, 8.95208004, 10, 53.71248025, 40.21411765, 4, 30, 4.46823529, 5.58529412, 78.19411765, 13.42812006, 2, 30, 5.58529412, 3.58083202] + }, + { + "type": "logical", + "attributes": {}, + "value": [false, false, false, true, true, false, false, false, false, false, true, true, false, true, false, false, false, false, false, false, false, true, false, false, false, false, true, false, false, false, false, true, false] + } + ] + } + diff --git a/tests/testthat/test-draw_sample.R b/tests/testthat/test-draw_sample.R new file mode 100644 index 0000000..b6037da --- /dev/null +++ b/tests/testthat/test-draw_sample.R @@ -0,0 +1,54 @@ +test_sampling_weights = readr::read_rds("test_data/test_sampling_weights.rds") + +test_that("no strata works", { + withr::local_seed(510) + + test_n = 10 + + test_sample = draw_sample(test_sampling_weights, sampling_weight, test_n) + + # make sure sample has same number of rows as venue data + expect_equal(nrow(test_sample), nrow(test_sampling_weights)) + # make sure the requested number of sites were selected + expect_equal(sum(test_sample$sampled), test_n) + + # final check to just make sure it does the same thing each time + expect_snapshot_value(test_sample, style = "json2") +}) + +test_that("stratified sample works", { + withr::local_seed(510) + + test_strata = dplyr::tribble( + ~ zone, ~ site_category, ~ n, + "Eastern Placer", "Emergency Shelter", 1, + "Central Placer", "Emergency Shelter", 1, + "Central Placer", "Non Shelter Venue", 1, + "South Placer", "Encampment", 3, + "South Placer", "Non Shelter Venue", 2 + ) + + test_sample = draw_sample( + test_sampling_weights, sampling_weight, test_strata + ) + + # make sure sample has same number of rows as venue data + expect_equal(nrow(test_sample), nrow(test_sampling_weights)) + # make sure the requested number of sites were selected + expect_equal(sum(test_sample$sampled), sum(test_strata$n)) + + # make sure the requested number of sites per strata were selected + purrr::pwalk(test_strata, function(zone_s, site_category_s, n) { + expect_equal( + test_sample %>% + dplyr::filter(zone == zone_s, site_category == site_category_s) %>% + dplyr::pull(sampled) %>% + sum() + , + n + ) + }) + + # final check to just make sure it does the same thing each time + expect_snapshot_value(test_sample, style = "json2") +}) diff --git a/tests/testthat/test_data/test_sampling_weights.rds b/tests/testthat/test_data/test_sampling_weights.rds new file mode 100644 index 0000000000000000000000000000000000000000..e4559eccd08523f4dbe8f6a2c429a8a7800cb0ca GIT binary patch literal 2731 zcmb_eOK;;;6i$;CrqxWDFd8I4ppG<)VHQ=A#)-WE?8Is9B#o1C9!)mY^&^fQKjK#$ zM=aPg{DavcSRsA^8}_``uo!koNJ#9FD7RCmToOmqQm$m*d(U^i@0{c7Yu}TdL?V$) zY;GhI$rK!>?!nt8yzav50UT|^KJCb2reyOo<|w0j1t&X40c{9)AhRZ$p@|v_jE%hD z<$-Q#815rL0k8A}yTumy22P+-IIGXRiHzkrWv*us1~H&HXG#cNT*Hhm0#FSjW2m6i zI%snk8l$A)JYf#pmdfO&2MD8Fn6O|xoDP*DRtE@B^;kU!N7GWF6c!BxsL__G`Nev( zEUxo)cM}Tg=H6+@DF4xJFLK6XcN8NgPB2n5@ z^2+EV>3jKbQeez^1Dj$MgaM{AwaZW}qx&@1KmcLT83?VJl5wOw&LDs&i%P)i|SHcbJ@pH%eoKAz8ByC!00Q&Z)y50w_H@?^fL*pH&ndT_{BwPjcKIMe%VJYt7iy!PuN7Me!`DLEw*|?dYt|9^ z5PiGeBm|@OBy3hk3c?7aGCl7}R1*leJ6PI4I~7v|#>9|KZ$W8T<{m$ibx(Hoy0YnC zz75v~R>iMkNw15uN`95GJa=NJmz7`0H+tvogyf&jzh-8mZ$@Fkw+>D^ol3wr_wN*rQj?Y?nuA``$Grj4(0y(8{{5#c>n+a literal 0 HcmV?d00001 From c3fcfb5d55b0beecb8b37943b670663b731e77c6 Mon Sep 17 00:00:00 2001 From: Eve Perry Date: Wed, 2 Feb 2022 16:39:55 -0800 Subject: [PATCH 16/16] bump version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4865339..7cb13f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: statewide.survey.tools Title: Statewide Survey Tools -Version: 0.1.1 +Version: 0.2.0 Authors@R: person("Eve", "Perry", , "eve.perry@ucsf.edu", role = c("aut", "cre")) Description: Helper tools for the Statewide Survey.