diff --git a/NAMESPACE b/NAMESPACE index 1afdb8d..1d612ed 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(get_prism_cutpoints) export(inferencedata_to_tidy_draws) export(location_lookup) export(nhsn_soda_query) +export(nullable_comparison) export(pivot_hubverse_quantiles_wider) export(plot_coverage_by_date) export(plot_forecast_quantiles) @@ -31,7 +32,9 @@ export(sample_aggregated_trajectories) export(scale_color_prism) export(scale_colour_prism) export(scale_fill_prism) -export(soql_is_in) +export(soql_nullable_is_in) +export(soql_nullable_select) +export(soql_nullable_where) export(target_end_dates_from_horizons) export(theme_forecasttools) export(to_location_table_column) diff --git a/R/filter_for_scoring.R b/R/filter_for_scoring.R index 63082dd..74d5e31 100644 --- a/R/filter_for_scoring.R +++ b/R/filter_for_scoring.R @@ -32,6 +32,23 @@ filter_for_scoring <- function(forecasts_and_targets, models = NULL) { to_score <- forecasts_and_targets + loc_codes <- if (!is.null(locations)) { + us_loc_abbr_to_code(locations) + } else { + NULL + } + min_forecast_date <- if (!is.null(min_forecast_date)) { + as.Date(min_forecast_date) + } else { + NULL + } + + max_forecast_date <- if (!is.null(max_forecast_date)) { + as.Date(max_forecast_date) + } else { + NULL + } + ## strip NA values from ## not yet scored tables if ("true_value" %in% names(to_score)) { @@ -39,49 +56,22 @@ filter_for_scoring <- function(forecasts_and_targets, "Filtering out forecasts without corresponding truth data..." ) to_score <- to_score |> - dplyr::filter(!is.na(true_value)) + dplyr::filter(!is.na(.data$true_value)) } - # filter forecasts to score - if (!is.null(min_forecast_date)) { - cli::cli_inform( - "Filtering out forecast dates before {min_forecast_date}" - ) - to_score <- to_score |> - dplyr::filter(forecast_date >= as.Date(!!min_forecast_date)) - } - if (!is.null(max_forecast_date)) { - cli::cli_inform( - "Filtering out forecast dates after {max_forecast_date}" - ) - to_score <- to_score |> - dplyr::filter(forecast_date <= as.Date(!!max_forecast_date)) - } - if (!is.null(horizons)) { - cli::cli_inform( - "Filtering out horizons not in {horizons}" + to_score <- to_score |> + dplyr::filter( + nullable_comparison( + .data$forecast_date, ">=", + !!min_forecast_date + ), + nullable_comparison( + .data$forecast_date, "<=", + !!max_forecast_date + ), + nullable_comparison(.data$horizon, "%in%", !!horizons), + nullable_comparison(.data$location, "%in%", !!loc_codes), + nullable_comparison(.data$model, "%in%", !!models) ) - to_score <- to_score |> - dplyr::filter(horizon %in% c(!!horizons)) - } - if (!is.null(locations)) { - cli::cli_inform( - "Filtering out locations not in {locations}" - ) - - loc_codes <- us_loc_abbr_to_code( - locations - ) - to_score <- to_score |> - dplyr::filter(location %in% c(!!loc_codes)) - } - if (!is.null(models)) { - cli::cli_inform( - "Filtering out models not in {models}" - ) - to_score <- to_score |> - dplyr::filter(model %in% c(!!models)) - } - return(to_score) } diff --git a/R/pull_nhsn.R b/R/pull_nhsn.R index fdce926..7efcd4a 100644 --- a/R/pull_nhsn.R +++ b/R/pull_nhsn.R @@ -128,29 +128,11 @@ pull_nhsn <- function(api_endpoint = return(df) } -#' Return a [soql::soql_where()] construct -#' for a given column being in a list of values -#' -#' @param soql_list A `soql` query object, which -#' can be piped in. If one hasn't been -#' created yet, use or pipe in [soql::soql()]. -#' @param column The column to filter on -#' @param match_values A vector of values that column -#' must match -#' @return A new soql object with the filter added, -#' for use in other functions. -#' @export -soql_is_in <- function(soql_list, column, match_values) { - query <- stringr::str_glue( - "{column}='{match_values}'" - ) |> - paste(collapse = " OR ") - return(soql::soql_where(soql_list, query)) -} #' Construct a Socrata open data -#' API (SODA) query for the NSHN -#' dataset +#' API (SODA) query for the NHSN Hospital Respiratory +#' Data set. +#' #' @param api_endpoint Base API endpoint URL to use #' when constructing the query. #' @param start_date Pull only rows with dates @@ -187,41 +169,24 @@ nhsn_soda_query <- function(api_endpoint, ), desc = FALSE, ...) { - query <- soql::soql() |> - soql::soql_add_endpoint(api_endpoint) - - if (!is.null(columns)) { - query <- query |> - soql::soql_select(paste( - unique( - c("jurisdiction", "weekendingdate", columns) - ), - collapse = "," - )) - } - - if (!is.null(start_date)) { - query <- query |> - soql::soql_where( - stringr::str_glue("weekendingdate >= '{start_date}'") - ) - } - - if (!is.null(end_date)) { - query <- query |> - soql::soql_where( - stringr::str_glue("weekendingdate <= '{end_date}'") - ) - } - - if (!is.null(jurisdictions)) { - query <- query |> - soql_is_in( - "jurisdiction", jurisdictions - ) + cols <- if (!is.null(columns)) { + c("jurisdiction", "weekendingdate", columns) + } else { + NULL } - query <- query |> + query <- soql::soql() |> + soql::soql_add_endpoint(api_endpoint) |> + soql_nullable_select(cols) |> + soql_nullable_where( + "weekendingdate", ">=", start_date + ) |> + soql_nullable_where( + "weekendingdate", "<=", end_date + ) |> + soql_nullable_is_in( + "jurisdiction", jurisdictions + ) |> soql::soql_order( paste(unique(order_by), collapse = "," diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..2eafefe --- /dev/null +++ b/R/utils.R @@ -0,0 +1,139 @@ +#' Compare two quantities `a` and `b` with a given +#' `comparison operator`, +#' returning a single `TRUE` if `b` is +#' `NULL`. +#' +#' Useful for letting `NULL` reflect "all values" +#' if [dplyr::filter()] calls. Internal function. +#' +#' @param a First set of values for the comparison +#' @param comparison_operator Comparison operator for the comparison, +#' as a string, e.g. `"=="`, `">="` `"%in%"`. +#' @param b Second set of values for the comparison, or `NULL`. +#' @return A logical vector. Equivalent to `b {comparison_operator} a` +#' if `b` is not `NULL` and to `TRUE` if `b` is `NULL`. +#' #' +#' @examples +#' +#' x <- 6 +#' nullable_comparison(5, ">", x) +#' +#' x <- NULL +#' nullable_comparison(5, ">", x) +#' +#' df <- tibble::tibble(y = 1:6) +#' x <- 3 +#' df |> dplyr::filter( +#' nullable_comparison(y, ">", !!x), +#' y < 5 +#' ) +#' x <- NULL +#' df |> dplyr::filter( +#' nullable_comparison(y, ">", !!x), +#' y < 5 +#' ) +#' +#' @export +nullable_comparison <- function(a, + comparison_operator, + b) { + comparison_func <- getFunction(comparison_operator) + return( + if (!is.null(b)) { + comparison_func(a, b) + } else { + TRUE + } + ) +} + + +#' Add a [soql::soql_where()] statement to a SOQL query, +#' or return the original query if `where_value` is `NULL`. +#' +#' @param soql_list A `soql` query object, which +#' can be piped in. If one hasn't been +#' created yet, use or pipe in [soql::soql()]. +#' @param column column name for the [soql::soql_where()] component +#' of the query, as a string. +#' @param comparison_operator comparison operator for the +#' [soql::soql_where()] component of the query, as a string. +#' @param where_value A value for the comparison, or `NULL` +#' for no filtering. +#' @return A new [soql::soql()] object with the filter added, or +#' simply the input object if `where_value` is `NULL`. +#' @export +soql_nullable_where <- function(soql_list, + column, + comparison_operator, + where_value) { + return( + if (!is.null(where_value)) { + soql::soql_where( + soql_list, + glue::glue(paste0( + "{column} ", + "{comparison_operator} ", + "'{where_value}'" + )) + ) + } else { + soql_list + } + ) +} + + +#' Add an "is in" statement to a SOQL query, or return the original +#' query if `match_values` is `NULL`. +#' +#' An is in statement is a [soql::soql_where()] statement +#' that requires the values of a given column to match one of the +#' entries of a vector of `match_values`. +#' +#' @param soql_list A `soql` query object, which +#' can be piped in. If one hasn't been +#' created yet, use or pipe in [soql::soql()]. +#' @param column The column to filter on +#' @param match_values A vector of values that column +#' must match, or `NULL` for no filtering. +#' @return A new [soql::soql()] object with the filter added, +#' or simply the input object if `match_value` is `NULL`. +#' @export +soql_nullable_is_in <- function(soql_list, column, match_values) { + if (is.null(match_values)) { + return(soql_list) + } else { + query <- glue::glue( + "{column}='{unique(match_values)}'" + ) |> + paste(collapse = " OR ") + return(soql::soql_where(soql_list, query)) + } +} + +#' Add a [soql::soql_select()] statement to a query +#' to select a set of columns `columns`, or return +#' the original query if `columns` is `NULL`. +#' +#' @param soql_list A `soql` query object, which +#' can be piped in. If one hasn't been +#' created yet, use or pipe in [soql::soql()]. +#' @param columns The columns to select, or `NULL`. +#' @return A new [soql::soql()] object with the selection statement +#' added, or the input object if `columns` is `NULL`. +#' @export +soql_nullable_select <- function(soql_list, columns) { + return( + if (!is.null(columns)) { + soql::soql_select( + soql_list, + paste(unique(columns), + collapse = "," + ) + ) + } else { + soql_list + } + ) +} diff --git a/man/nhsn_soda_query.Rd b/man/nhsn_soda_query.Rd index bca3e4b..ca052f2 100644 --- a/man/nhsn_soda_query.Rd +++ b/man/nhsn_soda_query.Rd @@ -3,8 +3,8 @@ \name{nhsn_soda_query} \alias{nhsn_soda_query} \title{Construct a Socrata open data -API (SODA) query for the NSHN -dataset} +API (SODA) query for the NHSN Hospital Respiratory +Data set.} \usage{ nhsn_soda_query( api_endpoint, @@ -55,6 +55,6 @@ the query as \code{\link[soql:soql]{soql::soql()}} output } \description{ Construct a Socrata open data -API (SODA) query for the NSHN -dataset +API (SODA) query for the NHSN Hospital Respiratory +Data set. } diff --git a/man/nullable_comparison.Rd b/man/nullable_comparison.Rd new file mode 100644 index 0000000..c637ddf --- /dev/null +++ b/man/nullable_comparison.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{nullable_comparison} +\alias{nullable_comparison} +\title{Compare two quantities \code{a} and \code{b} with a given +\verb{comparison operator}, +returning a single \code{TRUE} if \code{b} is +\code{NULL}.} +\usage{ +nullable_comparison(a, comparison_operator, b) +} +\arguments{ +\item{a}{First set of values for the comparison} + +\item{comparison_operator}{Comparison operator for the comparison, +as a string, e.g. \code{"=="}, \code{">="} \code{"\%in\%"}.} + +\item{b}{Second set of values for the comparison, or \code{NULL}.} +} +\value{ +A logical vector. Equivalent to \verb{b \{comparison_operator\} a} +if \code{b} is not \code{NULL} and to \code{TRUE} if \code{b} is \code{NULL}. +#' +} +\description{ +Useful for letting \code{NULL} reflect "all values" +if \code{\link[dplyr:filter]{dplyr::filter()}} calls. Internal function. +} +\examples{ + +x <- 6 +nullable_comparison(5, ">", x) + +x <- NULL +nullable_comparison(5, ">", x) + +df <- tibble::tibble(y = 1:6) +x <- 3 +df |> dplyr::filter( + nullable_comparison(y, ">", !!x), + y < 5 +) +x <- NULL +df |> dplyr::filter( + nullable_comparison(y, ">", !!x), + y < 5 +) + +} diff --git a/man/soql_is_in.Rd b/man/soql_is_in.Rd deleted file mode 100644 index 1edd395..0000000 --- a/man/soql_is_in.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pull_nhsn.R -\name{soql_is_in} -\alias{soql_is_in} -\title{Return a \code{\link[soql:soql_queries]{soql::soql_where()}} construct -for a given column being in a list of values} -\usage{ -soql_is_in(soql_list, column, match_values) -} -\arguments{ -\item{soql_list}{A \code{soql} query object, which -can be piped in. If one hasn't been -created yet, use or pipe in \code{\link[soql:soql]{soql::soql()}}.} - -\item{column}{The column to filter on} - -\item{match_values}{A vector of values that column -must match} -} -\value{ -A new soql object with the filter added, -for use in other functions. -} -\description{ -Return a \code{\link[soql:soql_queries]{soql::soql_where()}} construct -for a given column being in a list of values -} diff --git a/man/soql_nullable_is_in.Rd b/man/soql_nullable_is_in.Rd new file mode 100644 index 0000000..c31fff9 --- /dev/null +++ b/man/soql_nullable_is_in.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{soql_nullable_is_in} +\alias{soql_nullable_is_in} +\title{Add an "is in" statement to a SOQL query, or return the original +query if \code{match_values} is \code{NULL}.} +\usage{ +soql_nullable_is_in(soql_list, column, match_values) +} +\arguments{ +\item{soql_list}{A \code{soql} query object, which +can be piped in. If one hasn't been +created yet, use or pipe in \code{\link[soql:soql]{soql::soql()}}.} + +\item{column}{The column to filter on} + +\item{match_values}{A vector of values that column +must match, or \code{NULL} for no filtering.} +} +\value{ +A new \code{\link[soql:soql]{soql::soql()}} object with the filter added, +or simply the input object if \code{match_value} is \code{NULL}. +} +\description{ +An is in statement is a \code{\link[soql:soql_queries]{soql::soql_where()}} statement +that requires the values of a given column to match one of the +entries of a vector of \code{match_values}. +} diff --git a/man/soql_nullable_select.Rd b/man/soql_nullable_select.Rd new file mode 100644 index 0000000..f3bb9d8 --- /dev/null +++ b/man/soql_nullable_select.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{soql_nullable_select} +\alias{soql_nullable_select} +\title{Add a \code{\link[soql:soql_queries]{soql::soql_select()}} statement to a query +to select a set of columns \code{columns}, or return +the original query if \code{columns} is \code{NULL}.} +\usage{ +soql_nullable_select(soql_list, columns) +} +\arguments{ +\item{soql_list}{A \code{soql} query object, which +can be piped in. If one hasn't been +created yet, use or pipe in \code{\link[soql:soql]{soql::soql()}}.} + +\item{columns}{The columns to select, or \code{NULL}.} +} +\value{ +A new \code{\link[soql:soql]{soql::soql()}} object with the selection statement +added, or the input object if \code{columns} is \code{NULL}. +} +\description{ +Add a \code{\link[soql:soql_queries]{soql::soql_select()}} statement to a query +to select a set of columns \code{columns}, or return +the original query if \code{columns} is \code{NULL}. +} diff --git a/man/soql_nullable_where.Rd b/man/soql_nullable_where.Rd new file mode 100644 index 0000000..0ea98e7 --- /dev/null +++ b/man/soql_nullable_where.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{soql_nullable_where} +\alias{soql_nullable_where} +\title{Add a \code{\link[soql:soql_queries]{soql::soql_where()}} statement to a SOQL query, +or return the original query if \code{where_value} is \code{NULL}.} +\usage{ +soql_nullable_where(soql_list, column, comparison_operator, where_value) +} +\arguments{ +\item{soql_list}{A \code{soql} query object, which +can be piped in. If one hasn't been +created yet, use or pipe in \code{\link[soql:soql]{soql::soql()}}.} + +\item{column}{column name for the \code{\link[soql:soql_queries]{soql::soql_where()}} component +of the query, as a string.} + +\item{comparison_operator}{comparison operator for the +\code{\link[soql:soql_queries]{soql::soql_where()}} component of the query, as a string.} + +\item{where_value}{A value for the comparison, or \code{NULL} +for no filtering.} +} +\value{ +A new \code{\link[soql:soql]{soql::soql()}} object with the filter added, or +simply the input object if \code{where_value} is \code{NULL}. +} +\description{ +Add a \code{\link[soql:soql_queries]{soql::soql_where()}} statement to a SOQL query, +or return the original query if \code{where_value} is \code{NULL}. +} diff --git a/tests/testthat/test_nullable_comparison.R b/tests/testthat/test_nullable_comparison.R new file mode 100644 index 0000000..a3c01e8 --- /dev/null +++ b/tests/testthat/test_nullable_comparison.R @@ -0,0 +1,37 @@ +test_that("nullable_comparison() returns expected results", { + test_vec <- c(1, 32, -5, 3, -5.23, 1, 1.0001) + equal_length_vec <- c(1, -2, 3, 5.2, 5, 1, 10.5) + shorter_length_vec <- c(1, 3) + single_value <- 3 + null_vec <- NULL + + expect_equal( + nullable_comparison(test_vec, "%in%", single_value), + test_vec %in% single_value + ) + expect_equal( + nullable_comparison(test_vec, "%in%", shorter_length_vec), + test_vec %in% shorter_length_vec + ) + expect_equal( + nullable_comparison(test_vec, "%in%", null_vec), + TRUE + ) + + for (operator in c("==", "<", ">", "<=", ">=")) { + expect_equal( + nullable_comparison(test_vec, operator, single_value), + getFunction(operator)(test_vec, single_value) + ) + + expect_equal( + nullable_comparison(test_vec, operator, equal_length_vec), + getFunction(operator)(test_vec, equal_length_vec) + ) + + expect_equal( + nullable_comparison(test_vec, operator, null_vec), + TRUE + ) + } +}) diff --git a/vignettes/pull-nhsn.Rmd b/vignettes/pull-nhsn.Rmd index 47e8ce4..def83c3 100644 --- a/vignettes/pull-nhsn.Rmd +++ b/vignettes/pull-nhsn.Rmd @@ -30,7 +30,7 @@ Make sure to record your secret key somewhere safe but _not_ tracked by Git (e.g One place to store secrets is as [environment variables](https://en.wikipedia.org/wiki/Environment_variable). If you don't provide one explicitly, `pull_nhsn()` looks for a valid NHSN API key ID in an environment variable named `NHSN_API_KEY_ID` and a corresponding secret in an environment variable named `NHSN_API_KEY_SECRET`. ## Getting all the data -Our workhorse function for getting NHSN data is called simply `pull_nhsn()`. If you provide it with no arguments, it will simply fetch you the entire dataset as a `tibble::tibble()`, up to the specified maximum number of rows (default 10,0000). +Our workhorse function for getting NHSN data is called simply `pull_nhsn()`. If you provide it with no arguments, it will simply fetch you the entire dataset as a `tibble::tibble()`, up to the specified maximum number of rows (default 100,000). You can increase by setting the `limit` argument of `pull_nhsn()` a larger value. To protect you against accidentally pulling incomplete datasets, `pull_nhsn()` errors by default if the number of rows retrieved hits the `limit`. You can suppress that error by setting `error_on_limit = FALSE`.