generated from CDCgov/template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
289 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(add_two_numbers) | ||
export(download_from_azure_blob) | ||
export(fetch_blob_container) | ||
export(fetch_credential_from_env_var) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# CFAEpiNow2Pipeline (development version) | ||
|
||
* Azure Blob file download utilities | ||
* CI running on Ubuntu only & working pkgdown deploy to Github Pages | ||
* Initial R package with checks running in CI |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#' Download specified blobs from Blob Storage and save them in a local dir | ||
#' | ||
#' Note that I think it might be wise to instead specify a blob prefix, list the | ||
#' blobs, and download all the listed blobs. This would let us have some more | ||
#' flexibility with downloading whole remote directories (like delta tables) | ||
#' | ||
#' @param blob_names A vector of blobs to donwload from `container_name` | ||
#' @param local_dest The path to the local directory to save the files in | ||
#' @param container_name The Azure Blob Storage container with `blob_names` | ||
#' | ||
#' @return NULL on success | ||
#' @export | ||
download_from_azure_blob <- function(blob_names, local_dest, container_name) { | ||
blob_container <- rlang::try_fetch( | ||
fetch_blob_container(container_name), | ||
error = function(con) { | ||
cli::cli_abort( | ||
c( | ||
"Unable to authenticate connection to Blob endpoint", | ||
"!" = "Check correct credentials are present as env variables", | ||
"!" = "Check container {.var {container_name}} is correct" | ||
), | ||
parent = con | ||
) | ||
} | ||
) | ||
|
||
for (blob in blob_names) { | ||
local_file_path <- file.path(local_dest, blob) | ||
rlang::try_fetch( | ||
download_file_from_container( | ||
blob, | ||
blob_container, | ||
local_file_path | ||
), | ||
error = function(con) { | ||
cli::cli_abort( | ||
c( | ||
"Error downloading blob {.path {blob}}", | ||
"Using container {.path {container_name}}", | ||
"Writing to local file path {.path local_file_path}" | ||
), | ||
parent = con | ||
) | ||
} | ||
) | ||
} | ||
cli::cli_alert_success("Blobs {.path {blob_names}} downloaded successfully") | ||
invisible(NULL) | ||
} | ||
|
||
download_file_from_container <- function( | ||
blob_storage_path, | ||
container, | ||
local_file_path) { | ||
cli::cli_alert_info( | ||
"Downloading blob {.path {blob_storage_path}} to {.path {local_file_path}}" | ||
) | ||
|
||
AzureStor::download_blob( | ||
container = container, | ||
src = blob_storage_path, | ||
dest = local_file_path, | ||
overwrite = TRUE | ||
) | ||
|
||
cli::cli_alert_success( | ||
"Blob {.path {blob_storage_path}} downloaded successfully" | ||
) | ||
|
||
invisible(local_file_path) | ||
} | ||
|
||
#' Load Azure Blob endpoint using credentials in environment variables | ||
#' | ||
#' This **impure** function depends on the environment variables: | ||
#' * TENANT_ID | ||
#' * SUBSCRIPTION | ||
#' * RESOURCE_GROUP | ||
#' * STORAGE_ACCOUNT | ||
#' | ||
#' It will error out if any of the above is not set. | ||
#' @param container_name The Azure Blob Storage container associated with the | ||
#' credentials | ||
#' @return A Blob endpoint | ||
#' @export | ||
fetch_blob_container <- function(container_name) { | ||
cli::cli_alert_info( | ||
"Attempting to connect to container {.var {container_name}}" | ||
) | ||
cli::cli_alert_info("Loading Azure credentials from env vars") | ||
# nolint start: object_name_linter | ||
TENANT_ID <- fetch_credential_from_env_var("TENANT_ID") | ||
SUBSCRIPTION <- fetch_credential_from_env_var("SUBSCRIPTION") | ||
RESOURCE_GROUP <- fetch_credential_from_env_var("RESOURCE_GROUP") | ||
STORAGE_ACCOUNT <- fetch_credential_from_env_var("STORAGE_ACCOUNT") | ||
# nolint end: object_name_linter | ||
cli::cli_alert_success("Credentials loaded successfully") | ||
|
||
|
||
cli::cli_alert_info("Authenticating with loaded credentials") | ||
az <- AzureRMR::get_azure_login(TENANT_ID) | ||
subscription <- az$get_subscription(SUBSCRIPTION) | ||
resource_group <- subscription$get_resource_group(RESOURCE_GROUP) | ||
storage_account <- resource_group$get_storage_account(STORAGE_ACCOUNT) | ||
|
||
# Getting the access key | ||
keys <- storage_account$list_keys() | ||
access_key <- keys[["key1"]] | ||
|
||
endpoint <- AzureStor::blob_endpoint( | ||
storage_account$properties$primaryEndpoints$blob, | ||
key = access_key | ||
) | ||
|
||
container <- AzureStor::storage_container(endpoint, container_name) | ||
cli::cli_alert_success("Authenticated connection to {.var {container_name}}") | ||
|
||
return(container) | ||
} | ||
|
||
#' Fetch Azure credential from environment variable | ||
#' | ||
#' And throw an informative error if credential is not found | ||
#' | ||
#' @param env_var A character, the credential to fetch | ||
#' | ||
#' @return The associated value | ||
#' @export | ||
fetch_credential_from_env_var <- function(env_var) { | ||
credential <- Sys.getenv(env_var) | ||
|
||
if (credential == "") { | ||
cli::cli_abort( | ||
c( | ||
"Error loading Azure credentials from environment variables", | ||
"!" = "Environment variable {.envvar {env_var}} not specified or empty" | ||
), | ||
class = "CFA_Rt", | ||
parent = con | ||
) | ||
} | ||
|
||
return(credential) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# NOTE: these tests don't test the happy path because they don't interact with | ||
# Azure resources and mocking a full Azure Blob interface is hard. Instead, they | ||
# test that expected errors are thrown and that if Azure access is mocked, the | ||
# core download function runs all the way through. The function | ||
# `download_file_from_container` isn't tested because it's a simple wrapper | ||
# around `AzureStor::download_blob()` and `testthat::with_mocked_bindings()` | ||
# advises mocking wrappers for tests rather than injecting the mock into the | ||
# external lib. | ||
test_that("Downloading file smoke test", { | ||
file_path <- "not_a_real_file.ext" | ||
download_status <- testthat::with_mocked_bindings( | ||
{ | ||
withr::with_tempdir({ | ||
download_from_azure_blob( | ||
blob_names = c(file_path), | ||
local_dest = ".", | ||
container_name = "test_container" | ||
) | ||
}) | ||
}, | ||
fetch_blob_container = function(...) "test-container", | ||
download_file_from_container = function(...) file_path | ||
) | ||
|
||
expect_null(download_status) | ||
}) | ||
|
||
test_that("Download fail throws informative error", { | ||
# Errors on fetching credentials | ||
expect_error( | ||
download_from_azure_blob( | ||
blob_names = c("test.json"), | ||
local_dest = "./", | ||
container_name = "test_container" | ||
) | ||
) | ||
|
||
# Credentials mocked, errors on downloading file | ||
testthat::with_mocked_bindings( | ||
{ | ||
withr::with_tempdir({ | ||
expect_error( | ||
download_from_azure_blob( | ||
blob_names = c("not_a_real_file.ext"), | ||
local_dest = ".", | ||
container_name = "test_container" | ||
) | ||
) | ||
}) | ||
}, | ||
fetch_blob_container = function(...) "test-container" | ||
) | ||
}) | ||
|
||
test_that("Credential fetched successfully from env var", { | ||
withr::with_envvar(c("KEY" = "VALUE"), { | ||
expect_equal(fetch_credential_from_env_var("KEY"), "VALUE") | ||
}) | ||
}) | ||
|
||
test_that("Missing credential fails", { | ||
withr::with_envvar(c("MISSING_KEY" = ""), { | ||
expect_error(fetch_credential_from_env_var("MISSING_KEY")) | ||
}) | ||
expect_error(fetch_credential_from_env_var("NOT_A_REAL_KEY")) | ||
}) |