Skip to content

Commit

Permalink
Add Azure file download (#5)
Browse files Browse the repository at this point in the history
* Add Azure file download

* Drop roxygen2 render in pre-commit

Because of weird bug where render was breaking with error:

ℹ Loading CFAEpiNow2Pipeline
Error in `precommit::roxygenize_with_cache()`:
! The packages "AzureRMR" and "AzureStor" are required.
Backtrace:
    ▆
 1. └─precommit::roxygenize_with_cache(key = wd, dirs = path_relative_cache)
 2.   └─rlang::abort(conditionMessage(out)) at repo_114h0o4/R/roxygen2.R:103:5
Execution halted

* Update R/azure.R

Co-authored-by: Nate McIntosh <[email protected]>

* Update R/azure.R

Co-authored-by: Nate McIntosh <[email protected]>

* Drop leftover from rlang refactor

* Rename Azure authentication intermediates

To match https://github.com/CDCgov/cfazuR/tree/kg-expand-readme

Co-authored-by: Katie Gostic (she/her) <[email protected]>

* Run pre-commit over GH suggestions

---------

Co-authored-by: Nate McIntosh <[email protected]>
Co-authored-by: Katie Gostic (she/her) <[email protected]>
  • Loading branch information
3 people authored Aug 26, 2024
1 parent bab879c commit 0caa4fa
Show file tree
Hide file tree
Showing 9 changed files with 292 additions and 4 deletions.
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
repos:
# R
- repo: https://github.com/lorenzwalthert/precommit
rev: v0.4.2
rev: v0.4.3
hooks:
- id: style-files
args: [--style_pkg=styler, --style_fun=tidyverse_style]
- id: roxygenize
- id: use-tidy-description
- id: lintr
- id: readme-rmd-rendered
Expand Down
10 changes: 8 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,14 @@ Description: Add logging, metadata handling, and data handling
License: Apache License (>= 2)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Suggests:
testthat (>= 3.0.0)
testthat (>= 3.0.0),
withr
Config/testthat/edition: 3
Imports:
AzureRMR,
AzureStor,
cli,
rlang
URL: https://cdcgov.github.io/cfa-epinow2-pipeline/
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(add_two_numbers)
export(download_from_azure_blob)
export(fetch_blob_container)
export(fetch_credential_from_env_var)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# CFAEpiNow2Pipeline (development version)

* Azure Blob file download utilities
* CI running on Ubuntu only & working pkgdown deploy to Github Pages
* Initial R package with checks running in CI
* Updated DESCRIPTION and added guidelines for package authorship
146 changes: 146 additions & 0 deletions R/azure.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' Download specified blobs from Blob Storage and save them in a local dir
#'
#' Note that I think it might be wise to instead specify a blob prefix, list the
#' blobs, and download all the listed blobs. This would let us have some more
#' flexibility with downloading whole remote directories (like delta tables)
#'
#' @param blob_names A vector of blobs to donwload from `container_name`
#' @param local_dest The path to the local directory to save the files in
#' @param container_name The Azure Blob Storage container with `blob_names`
#'
#' @return NULL on success
#' @export
download_from_azure_blob <- function(blob_names, local_dest, container_name) {
# Attempt to connect to the storage container
blob_container <- rlang::try_fetch(
fetch_blob_container(container_name),
error = function(con) {
cli::cli_abort(
c(
"Unable to authenticate connection to Blob endpoint",
"!" = "Check correct credentials are present as env variables",
"!" = "Check container {.var {container_name}} is correct"
),
parent = con
)
}
)

# Attempt to save each blob into local storage
for (blob in blob_names) {
local_file_path <- file.path(local_dest, blob)
rlang::try_fetch(
download_file_from_container(
blob,
blob_container,
local_file_path
),
error = function(con) {
cli::cli_abort(
c(
"Error downloading blob {.path {blob}}",
"Using container {.path {container_name}}",
"Writing to local file path {.path local_file_path}"
),
parent = con
)
}
)
}
cli::cli_alert_success("Blobs {.path {blob_names}} downloaded successfully")
invisible(NULL)
}

download_file_from_container <- function(
blob_storage_path,
container,
local_file_path) {
cli::cli_alert_info(
"Downloading blob {.path {blob_storage_path}} to {.path {local_file_path}}"
)

AzureStor::download_blob(
container = container,
src = blob_storage_path,
dest = local_file_path,
overwrite = TRUE
)

cli::cli_alert_success(
"Blob {.path {blob_storage_path}} downloaded successfully"
)

invisible(local_file_path)
}

#' Load Azure Blob endpoint using credentials in environment variables
#'
#' This **impure** function depends on the environment variables:
#' * az_tenant_id
#' * az_subscription_id
#' * az_resource_group
#' * az_storage_account
#'
#' It will error out if any of the above is not set.
#' @param container_name The Azure Blob Storage container associated with the
#' credentials
#' @return A Blob endpoint
#' @export
fetch_blob_container <- function(container_name) {
cli::cli_alert_info(
"Attempting to connect to container {.var {container_name}}"
)
cli::cli_alert_info("Loading Azure credentials from env vars")
# nolint start: object_name_linter
az_tenant_id <- fetch_credential_from_env_var("az_tenant_id ")
az_subscription_id <- fetch_credential_from_env_var("az_subscription_id")
az_resource_group <- fetch_credential_from_env_var("az_resource_group")
az_storage_account <- fetch_credential_from_env_var("az_storage_account")
# nolint end: object_name_linter
cli::cli_alert_success("Credentials loaded successfully")


cli::cli_alert_info("Authenticating with loaded credentials")
az <- AzureRMR::get_azure_login(az_tenant_id)
subscription <- az$get_subscription(az_subscription_id)
resource_group <- subscription$get_resource_group(az_resource_group)
storage_account <- resource_group$get_storage_account(az_storage_account)

# Getting the access key
keys <- storage_account$list_keys()
access_key <- keys[["key1"]]

endpoint <- AzureStor::blob_endpoint(
storage_account$properties$primaryEndpoints$blob,
key = access_key
)

container <- AzureStor::storage_container(endpoint, container_name)
cli::cli_alert_success("Authenticated connection to {.var {container_name}}")

return(container)
}

#' Fetch Azure credential from environment variable
#'
#' And throw an informative error if credential is not found
#'
#' @param env_var A character, the credential to fetch
#'
#' @return The associated value
#' @export
fetch_credential_from_env_var <- function(env_var) {
credential <- Sys.getenv(env_var)

if (credential == "") {
cli::cli_abort(
c(
"Error loading Azure credentials from environment variables",
"!" = "Environment variable {.envvar {env_var}} not specified or empty"
),
class = "CFA_Rt"
)
}

return(credential)
}
23 changes: 23 additions & 0 deletions man/download_from_azure_blob.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/fetch_blob_container.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/fetch_credential_from_env_var.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 66 additions & 0 deletions tests/testthat/test-azure.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# NOTE: these tests don't test the happy path because they don't interact with
# Azure resources and mocking a full Azure Blob interface is hard. Instead, they
# test that expected errors are thrown and that if Azure access is mocked, the
# core download function runs all the way through. The function
# `download_file_from_container` isn't tested because it's a simple wrapper
# around `AzureStor::download_blob()` and `testthat::with_mocked_bindings()`
# advises mocking wrappers for tests rather than injecting the mock into the
# external lib.
test_that("Downloading file smoke test", {
file_path <- "not_a_real_file.ext"
download_status <- testthat::with_mocked_bindings(
{
withr::with_tempdir({
download_from_azure_blob(
blob_names = c(file_path),
local_dest = ".",
container_name = "test_container"
)
})
},
fetch_blob_container = function(...) "test-container",
download_file_from_container = function(...) file_path
)

expect_null(download_status)
})

test_that("Download fail throws informative error", {
# Errors on fetching credentials
expect_error(
download_from_azure_blob(
blob_names = c("test.json"),
local_dest = "./",
container_name = "test_container"
)
)

# Credentials mocked, errors on downloading file
testthat::with_mocked_bindings(
{
withr::with_tempdir({
expect_error(
download_from_azure_blob(
blob_names = c("not_a_real_file.ext"),
local_dest = ".",
container_name = "test_container"
)
)
})
},
fetch_blob_container = function(...) "test-container"
)
})

test_that("Credential fetched successfully from env var", {
withr::with_envvar(c("KEY" = "VALUE"), {
expect_equal(fetch_credential_from_env_var("KEY"), "VALUE")
})
})

test_that("Missing credential fails", {
withr::with_envvar(c("MISSING_KEY" = ""), {
expect_error(fetch_credential_from_env_var("MISSING_KEY"))
})
expect_error(fetch_credential_from_env_var("NOT_A_REAL_KEY"))
})

0 comments on commit 0caa4fa

Please sign in to comment.