Skip to content

Commit

Permalink
refactor: tweak download and list functions (#5)
Browse files Browse the repository at this point in the history
Merge branch 'master' into dev

# Conflicts:
#	R/main.R
  • Loading branch information
salvafern committed Jun 23, 2023
2 parents 543641b + 4e92f69 commit 29013dc
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 88 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: Environmental ocean data modeled both for the present and for futur
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.2.3
Suggests:
httr,
testthat (>= 3.0.0)
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Generated by roxygen2: do not edit by hand

export(download_griddap_dataset)
export(download_dataset)
export(list_layers)
160 changes: 90 additions & 70 deletions R/main.R
Original file line number Diff line number Diff line change
@@ -1,63 +1,9 @@
.list_layers <- function(..., simplify = TRUE){

# Retrieve either full list of layers or loop-up by free-text search
if(missing(...)){
layers <- rerddap::ed_datasets(which = "griddap", url = erddap.bio_oracle.org())
}else{
layers <- rerddap::ed_search_adv(..., url = erddap.bio_oracle.org())
layers <- dplyr::bind_rows(layers$alldata)
attr(layers, "class")
}

# Aftermath
layers <- colnames_normalize(layers)
if(simplify) layers <- simplify(layers)

layers
}

colnames_normalize <- function(df){
colnames(df) <- df %>% colnames() %>% tolower()
colnames(df) <- gsub(".","_", colnames(df), fixed = TRUE)
df
}

simplify <- function(df){
cols_desired <- c("dataset_id", "title", "summary")
cols_desired_exist <- all(cols_desired %in% colnames(df))

if(cols_desired_exist){
df <- df[, cols_desired]
}

df
}

#' List the layers in the Bio-Oracle dataset
#'
#' @param ... Free text search or pass params to [rerddap::ed_search_adv]
#'
#' @return a data frame with the dataset ID that can be later passed to [download_layers]
#' and further information
#' @export
#'
#' @seealso [download_layers]
#'
#' @examples \dontrun{
#' list_layers()
#' list_layers("Ocean Temperature 2100")
#' list_layers("thetao_ssp119_2020_2100_depthmean")
#' }
list_layers <- memoise::memoise(.list_layers)



#' Downloads a griddap dataset from an ERDDAP server
#'
#' @param dataset
#' @param variables
#' @param constraints
#' @param response
#' @param fmt
#' @param directory
#' @param verbose
#'
Expand All @@ -66,38 +12,51 @@ list_layers <- memoise::memoise(.list_layers)
#'
#' @seealso [list_layers]
#'
#' @examples
#'
#' @examples \dontrun{
#' # Test variables
#' dataset_id = "tas_baseline_2000_2020_depthsurf"
#' datasetid = "tas_baseline_2000_2020_depthsurf"
#' time = c('2001-01-01T00:00:00Z', '2010-01-01T00:00:00Z')
#' latitude = c(10, 20)
#' longitude = c(120, 130)
#' variables = NULL
#' variables = c("tas_max", "tas_min")
#' constraints = list(time, latitude, longitude)
#' names(constraints) = c("time", "latitude", "longitude")
#'
#' # Test call
#' download_layers(dataset_id, variables, constraints, debug = TRUE, directory = "./cache/")
download_layers = function(dataset, variables, constraints, response = "nc",
directory = "./", verbose = TRUE, debug = FALSE) {
#' download_dataset(datasetid, variables, constraints)
#' }
download_dataset = function(dataset,
variables,
constraints,
fmt="nc",
directory=FALSE,
verbose=TRUE,
debug=FALSE
) {

printer = function(message, verbose=parent.frame()$verbose) {
if(verbose) { message(message) }
}

printer = function(message, verbose = parent.frame()$verbose) {
if(verbose) { message(message)}
if(fmt == "raster"){
is_raster <- TRUE
fmt <- "nc"
}

# Args to be passed to griddap call later on
docallargs = list()
out = rerddap::info(datasetid = dataset, url = erddap.bio_oracle.org())
docallargs[["fmt"]] = fmt
out = rerddap::info(datasetid=dataset, url = erddap.bio_oracle.org())
docallargs[["datasetx"]] = out

printer(sprintf("Selected dataset %s.", dataset))
printer(sprintf("Dataset info available at: %s/griddap/%s.html", out$base_url, dataset))


# Setting constraints that match TODO: raise warning if constraint does not exist
# Setting constraints that match
for (constraint in names(constraints)) {
if (constraint %in% names(out$alldata)) {
docallargs[[constraint]] = get(constraint, constraints)
docallargs[[constraint]]= get(constraint, constraints)
}
}

Expand All @@ -113,9 +72,12 @@ download_layers = function(dataset, variables, constraints, response = "nc",
printer(sprintf("Selected %s variables: %s", length(valid_variables), toString(valid_variables)))

# Set directory for storing data
cache_dir <- hoardr::hoard()
cache_dir$cache_path_set(full_path=directory)
docallargs[["store"]] = rerddap::disk(cache_dir$cache_path_get())

if (!isFALSE(directory)) {
cache_dir <- hoardr::hoard()
cache_dir$cache_path_set(full_path=directory)
docallargs[["store"]] = rerddap::disk(cache_dir$cache_path_get())
}

# Debug flag to check args if needed
if (debug) {
Expand All @@ -124,8 +86,66 @@ download_layers = function(dataset, variables, constraints, response = "nc",

# Call
res = do.call(rerddap::griddap, docallargs)

if(is_raster) res <- griddap_to_terra(res)

return(res)
}

griddap_to_terra <- function(res){
terra::rast(x$summary$filename)
}



.list_layers <- function(..., simplify = TRUE){

# Retrieve either full list of layers or loop-up by free-text search
if(missing(...)){
layers <- rerddap::ed_datasets(which = "griddap", url = erddap.bio_oracle.org())
}else{
layers <- rerddap::ed_search_adv(..., url = erddap.bio_oracle.org())
layers <- dplyr::bind_rows(layers$alldata)
attr(layers, "class")
}

# Aftermath
layers <- colnames_normalize(layers)
if(simplify) layers <- simplify(layers)

layers
}

colnames_normalize <- function(df){
colnames(df) <- df %>% colnames() %>% tolower()
colnames(df) <- gsub(".","_", colnames(df), fixed = TRUE)
df
}

simplify <- function(df){
cols_desired <- c("dataset_id", "title", "summary")
cols_desired_exist <- all(cols_desired %in% colnames(df))

if(cols_desired_exist){
df <- df[, cols_desired]
}

df
}

#' List the layers in the Bio-Oracle dataset
#'
#' @param ... Free text search or pass params to [rerddap::ed_search_adv]
#'
#' @return a data frame with the dataset ID that can be later passed to [download_layers]
#' and further information
#' @export
#'
#' @seealso [download_layers]
#'
#' @examples \dontrun{
#' list_layers()
#' list_layers("Ocean Temperature 2100")
#' list_layers("thetao_ssp119_2020_2100_depthmean")
#' }
list_layers <- memoise::memoise(.list_layers)
33 changes: 17 additions & 16 deletions man/download_griddap_dataset.Rd → man/download_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/list_layers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 29013dc

Please sign in to comment.