From 7748b606d7b50057f49c417a815c59ff6884468e Mon Sep 17 00:00:00 2001 From: Insang Song Date: Tue, 19 Nov 2024 16:41:40 +0900 Subject: [PATCH 1/3] 1.2.0 candidate - future and future.apply dependency removed: calculate_modis_par[sic] and calculate_nlcd - Downstream changes per dropping future dependency: - Examples - Tests for the two functions are modified per dropping `nthreads` argument - calculate_covariates fixed - calculate_modis_daily is moved to auxiliary and keywords were changed likewise --- DESCRIPTION | 4 +- NAMESPACE | 8 +- NEWS.md | 10 + R/calculate_covariates.R | 245 ++---------------- R/calculate_covariates_auxiliary.R | 160 ++++++++++++ man/calculate_covariates.Rd | 2 +- ...culate_modis_par.Rd => calculate_modis.Rd} | 25 +- man/calculate_modis_daily.Rd | 6 +- man/calculate_nlcd.Rd | 3 - tests/testthat/test-modis.R | 65 ++--- tests/testthat/test-nlcd.R | 24 +- 11 files changed, 227 insertions(+), 325 deletions(-) create mode 100644 NEWS.md rename man/{calculate_modis_par.Rd => calculate_modis.Rd} (88%) diff --git a/DESCRIPTION b/DESCRIPTION index f99bfc4..966dcb1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: amadeus Title: Accessing and Analyzing Large-Scale Environmental Data -Version: 1.1.7 +Version: 1.2.0 Authors@R: c( person(given = "Mitchell", family = "Manware", role = c("aut", "ctb"), comment = c(ORCID = "0009-0003-6440-6106")), person(given = "Insang", family = "Song", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-8732-3256")), @@ -13,7 +13,7 @@ Authors@R: c( Maintainer: Kyle Messier Description: Functions are designed to facilitate access to and utility with large scale, publicly available environmental data in R. The package contains functions for downloading raw data files from web URLs (download_data()), processing the raw data files into clean spatial objects (process_covariates()), and extracting values from the spatial data objects at point and polygon locations (calculate_covariates()). These functions call a series of source-specific functions which are tailored to each data sources/datasets particular URL structure, data format, and spatial/temporal resolution. The functions are tested, versioned, and open source and open access. For sum_edc() method details, see Messier, Akita, and Serre (2012) . Depends: R (>= 4.1.0) -Imports: dplyr, sf, sftime, stats, terra, methods, data.table, httr, rvest, exactextractr, utils, stringi, testthat (>= 3.0.0), parallelly, stars, future, future.apply, tidyr, rlang, nhdplusTools, archive, collapse, Rdpack +Imports: dplyr, sf, sftime, stats, terra, methods, data.table, httr, rvest, exactextractr, utils, stringi, testthat (>= 3.0.0), parallelly, stars, tidyr, rlang, nhdplusTools, archive, collapse, Rdpack Suggests: covr, withr, diff --git a/NAMESPACE b/NAMESPACE index 44e43b2..39f712c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,8 +18,8 @@ export(calculate_hms) export(calculate_koppen_geiger) export(calculate_lagged) export(calculate_merra2) +export(calculate_modis) export(calculate_modis_daily) -export(calculate_modis_par) export(calculate_narr) export(calculate_nei) export(calculate_nlcd) @@ -148,12 +148,6 @@ importFrom(dplyr,select) importFrom(dplyr,summarize) importFrom(dplyr,ungroup) importFrom(exactextractr,exact_extract) -importFrom(future,cluster) -importFrom(future,multicore) -importFrom(future,plan) -importFrom(future,sequential) -importFrom(future.apply,future_Map) -importFrom(future.apply,future_lapply) importFrom(httr,GET) importFrom(httr,HEAD) importFrom(methods,is) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..6c02cac --- /dev/null +++ b/NEWS.md @@ -0,0 +1,10 @@ +# 1.2 +- `future` and `future.apply` dependencies were removed + - `nthreads` argument is removed from `calculate_modis_par()` and `calculate_nlcd()` +- `calculate_modis_par()` is renamed to `calculate_modis()` + +# 1.1 +- `calc_*()` functions are renamed to `calculate_*()` per naming convention of other function family in the package + +# 1.0 +- First CRAN release (v.1.0.0) \ No newline at end of file diff --git a/R/calculate_covariates.R b/R/calculate_covariates.R index 03f657d..b30194f 100644 --- a/R/calculate_covariates.R +++ b/R/calculate_covariates.R @@ -18,7 +18,7 @@ #' function. #' @note `covariate` argument value is converted to lowercase. #' @seealso -#' * \code{\link{calculate_modis_par}}: "modis", "MODIS" +#' * \code{\link{calculate_modis}}: "modis", "MODIS" #' * \code{\link{calculate_koppen_geiger}}: "koppen-geiger", "koeppen-geiger", "koppen" #' * \code{\link{calculate_ecoregion}}: "ecoregion", "ecoregions" #' * \code{\link{calculate_temporal_dummies}}: "dummies", "Dummies" @@ -75,7 +75,7 @@ calculate_covariates <- # select function to run what_to_run <- switch(covariate, - modis = calculate_modis_par, + modis = calculate_modis, ecoregion = calculate_ecoregion, ecoregions = calculate_ecoregion, koppen = calculate_koppen_geiger, @@ -285,7 +285,6 @@ calculate_koppen_geiger <- #' @param geom FALSE/"sf"/"terra".. Should the function return with geometry? #' Default is `FALSE`, options with geometry are "sf" or "terra". The #' coordinate reference system of the `sf` or `SpatVector` is that of `from.` -#' @param nthreads integer(1). Number of threads to be used #' @param ... Placeholders. #' @note NLCD is available in U.S. only. Users should be aware of #' the spatial extent of the data. The results are different depending @@ -298,19 +297,10 @@ calculate_koppen_geiger <- #' @return a data.frame or SpatVector object #' @importFrom utils read.csv #' @importFrom methods is -#' @importFrom terra rast -#' @importFrom terra project -#' @importFrom terra vect -#' @importFrom terra crs -#' @importFrom terra set.crs -#' @importFrom terra buffer -#' @importFrom sf st_union -#' @importFrom sf st_geometry -#' @importFrom terra intersect -#' @importFrom terra metags +#' @importFrom terra rast project vect crs set.crs buffer +#' @importFrom sf st_union st_geometry +#' @importFrom terra intersect metags #' @importFrom exactextractr exact_extract -#' @importFrom future plan multicore sequential -#' @importFrom future.apply future_Map #' @importFrom collapse rowbind #' @examples #' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large @@ -334,7 +324,6 @@ calculate_nlcd <- function( radius = 1000, max_cells = 5e7, geom = FALSE, - nthreads = 1L, ... ) { # check inputs @@ -349,12 +338,6 @@ calculate_nlcd <- function( if (!methods::is(from, "SpatRaster")) { stop("from is not a SpatRaster.") } - if (nthreads > 1L) { - stopifnot(Sys.info()["sysname"] != "Windows") - future::plan(future::multicore, workers = nthreads) - } else { - future::plan(future::sequential) - } # prepare locations locs_prepared <- calc_prepare_locs( @@ -380,15 +363,14 @@ calculate_nlcd <- function( # terra mode class_query <- "names" # extract land cover class in each buffer - nlcd_at_bufs <- future.apply::future_Map( + nlcd_at_bufs <- Map( function(i) { terra::freq( from, zones = bufs_pol[i, ], wide = TRUE ) - }, seq_len(nrow(bufs_pol)), - future.seed = TRUE + }, seq_len(nrow(bufs_pol)) ) nlcd_at_bufs <- collapse::rowbind(nlcd_at_bufs, fill = TRUE) nlcd_at_bufs <- nlcd_at_bufs[, -seq(1, 2)] @@ -401,7 +383,7 @@ calculate_nlcd <- function( bufs_polx <- bufs_pol[terra::ext(from), ] |> sf::st_as_sf() - nlcd_at_bufs <- future.apply::future_Map( + nlcd_at_bufs <- Map( function(i) { exactextractr::exact_extract( from, @@ -412,8 +394,7 @@ calculate_nlcd <- function( append_cols = locs_id, max_cells_in_memory = max_cells ) - }, seq_len(nrow(bufs_polx)), - future.seed = TRUE + }, seq_len(nrow(bufs_polx)) ) nlcd_at_bufs <- collapse::rowbind(nlcd_at_bufs, fill = TRUE) # select only the columns of interest @@ -452,7 +433,6 @@ calculate_nlcd <- function( geom = geom, crs = terra::crs(from) ) - future::plan(future::sequential) return(new_data_vect) } @@ -567,171 +547,6 @@ calculate_ecoregion <- } -#' A single-date MODIS worker for parallelization -#' @param from SpatRaster. Preprocessed objects. -#' @param locs SpatVector/sf/sftime object. Locations where MODIS values -#' are summarized. -#' @param locs_id character(1). Field name where unique site identifiers -#' are stored. Default is `"site_id"` -#' @param radius numeric. Radius to generate circular buffers. -#' @param date Date(1). date to query. -#' @param name_extracted character. Names of calculated covariates. -#' @param fun_summary function. Summary function for -#' multilayer rasters. Passed to `foo`. See [`exactextractr::exact_extract`] -#' for details. -#' @param max_cells integer(1). Maximum number of cells to be read at once. -#' Higher values will expedite processing, but will increase memory usage. -#' Maximum possible value is `2^31 - 1`. -#' @param geom FALSE/"sf"/"terra".. Should the function return with geometry? -#' Default is `FALSE`, options with geometry are "sf" or "terra". The -#' coordinate reference system of the `sf` or `SpatVector` is that of `from.` -#' See [`exactextractr::exact_extract`] for details. -#' @param ... Placeholders. -#' @description The function operates at MODIS/VIIRS products -#' on a daily basis. Given that the raw hdf files are downloaded from -#' NASA, standard file names include a data retrieval date flag starting -#' with letter "A". Leveraging that piece of information, the function will -#' select files of scope on the date of interest. -#' Please note that this function does not provide a function to filter -#' swaths or tiles, so it is strongly recommended to check and pre-filter -#' the file names at users' discretion. -#' @seealso -#' * Preprocessing: [process_modis_merge()], [process_modis_swath()], -#' [process_blackmarble()] -#' * Parallelization: [calculate_modis_par()] -#' @author Insang Song -#' @return a data.frame or SpatVector object. -#' @importFrom terra extract -#' @importFrom terra project -#' @importFrom terra vect -#' @importFrom terra nlyr -#' @importFrom terra describe -#' @importFrom methods is -#' @importFrom sf st_as_sf -#' @importFrom sf st_drop_geometry -#' @examples -#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large -#' ## amount of data which is not included in the package. -#' \dontrun{ -#' locs <- data.frame(lon = -78.8277, lat = 35.95013, id = "001") -#' calculate_modis_daily( -#' from = mod06l2_warp, # dervied from process_modis() example -#' locs = locs, -#' locs_id = "id", -#' radius = 0, -#' date = "2024-01-01", -#' name_extracted = "cloud_fraction_0", -#' fun_summary = "mean", -#' max_cells = 3e7 -#' ) -#' } -#' @export -calculate_modis_daily <- function( - from = NULL, - locs = NULL, - locs_id = "site_id", - radius = 0L, - date = NULL, - name_extracted = NULL, - fun_summary = "mean", - max_cells = 3e7, - geom = FALSE, - ... -) { - if (!methods::is(locs, "SpatVector")) { - locs <- try(terra::vect(locs)) - if (inherits(locs, "try-error")) { - stop("locs should be a SpatVector or convertible object.") - } - } - if (!locs_id %in% names(locs)) { - stop(sprintf("locs should include columns named %s.\n", - locs_id)) - } - - extract_with_buffer <- function( - points, - surf, - radius, - id, - func = "mean", - maxcells = NULL - ) { - # generate buffers - if (radius == 0) radius <- 1e-6 # approximately 1 meter in degree - bufs <- terra::buffer(points, width = radius, quadsegs = 180L) - bufs <- terra::project(bufs, terra::crs(surf)) - # extract raster values - surf_at_bufs <- - exactextractr::exact_extract( - x = surf, - y = sf::st_as_sf(bufs), - fun = func, - force_df = TRUE, - stack_apply = TRUE, - append_cols = id, - progress = FALSE, - max_cells_in_memory = maxcells - ) - return(surf_at_bufs) - } - - ## NaN to NA - from[is.nan(from)] <- NA - - # raster used to be vrt_today - extracted <- - extract_with_buffer( - points = locs, - surf = from, - id = locs_id, - radius = radius, - func = fun_summary, - maxcells = max_cells - ) - # cleaning names - # assuming that extracted is a data.frame - name_offset <- terra::nlyr(from) - # multiple columns will get proper names - name_range <- seq(ncol(extracted) - name_offset + 1, ncol(extracted), 1) - colnames(extracted)[name_range] <- name_extracted - extracted$time <- as.POSIXlt(date) - check_geom(geom) - if (geom %in% c("sf", "terra")) { - # convert to base date, as terra::vect does not like class "POSIXlt" - extracted$time <- as.Date.POSIXlt(extracted$time) - # location ID with geometry - locs_geom_id <- suppressMessages(calc_prepare_locs( - from = from, - locs = locs, - locs_id = locs_id, - radius = radius, - geom = geom - )[[2]] - ) - # merge - extracted_merge <- merge( - locs_geom_id, - extracted, - by = locs_id - ) - # re-convert to POSIXlt after creating the vect - extracted_merge$time <- as.POSIXlt(extracted_merge$time) - extracted_return <- calc_return_locs( - covar = extracted_merge, - POSIXt = TRUE, - geom = geom, - crs = terra::crs(from) - ) - } else { - calc_check_time(covar = extracted, POSIXt = TRUE) - extracted_return <- extracted - } - gc() - return(extracted_return) -} - - #' Calculate MODIS product covariates in multiple CPU threads #' @param from character. List of paths to MODIS/VIIRS files. #' @param locs sf/SpatVector object. Unique locs where covariates @@ -750,8 +565,6 @@ calculate_modis_daily <- function( #' Find detail usage of the argument in notes. #' @param fun_summary character or function. Function to summarize #' extracted raster values. -#' @param nthreads integer(1). Number of threads to be used -#' to calculate covariates. #' @param package_list_add character. A vector with package names to load #' these in each thread. Note that `sf`, `terra`, `exactextractr`, #' `doParallel`, `parallelly` and `dplyr` are the default packages to be @@ -767,7 +580,7 @@ calculate_modis_daily <- function( #' coordinate reference system of the `sf` or `SpatVector` is that of `from.` #' @param ... Arguments passed to `preprocess`. # nolint start -#' @description `calculate_modis_par` essentially runs [`calculate_modis_daily`] function +#' @description `calculate_modis` essentially runs [`calculate_modis_daily`] function #' in each thread (subprocess). Based on daily resolution, each day's workload #' will be distributed to each thread. With `product` argument, #' the files are processed by a customized function where the unique structure @@ -803,12 +616,7 @@ calculate_modis_daily <- function( #' * `attr(., "dates_dropped")`: Dates with insufficient tiles. #' Note that the dates mean the dates with insufficient tiles, #' not the dates without available tiles. -#' @seealso See details for setting parallelization: -#' * [`future::plan()`] -#' * [`future.apply::future_lapply()`] -#' * [`parallelly::makeClusterPSOCK()`] -#' * [`parallelly::availableCores()`] -#' +#' @seealso #' This function leverages the calculation of single-day MODIS #' covariates: #' * [`calculate_modis_daily()`] @@ -818,15 +626,10 @@ calculate_modis_daily <- function( #' * [`process_modis_swath()`] #' * [`process_blackmarble()`] #' @importFrom methods is -#' @importFrom sf st_as_sf -#' @importFrom sf st_drop_geometry +#' @importFrom sf st_as_sf st_drop_geometry #' @importFrom terra nlyr -#' @importFrom dplyr bind_rows -#' @importFrom dplyr left_join +#' @importFrom dplyr bind_rows left_join #' @importFrom rlang inject -#' @importFrom future plan -#' @importFrom future cluster -#' @importFrom future.apply future_lapply #' @importFrom parallelly availableWorkers #' @examples #' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large @@ -834,7 +637,7 @@ calculate_modis_daily <- function( #' \dontrun{ #' locs <- data.frame(lon = -78.8277, lat = 35.95013, id = "001") #' locs <- terra::vect(locs, geom = c("lon", "lat"), crs = "EPSG:4326") -#' calculate_modis_par( +#' calculate_modis( #' from = #' list.files("./data", pattern = "VNP46A2.", full.names = TRUE), #' locs = locs, @@ -843,12 +646,11 @@ calculate_modis_daily <- function( #' preprocess = process_modis_merge, #' name_covariates = "cloud_fraction_0", #' subdataset = "Cloud_Fraction", -#' fun_summary = "mean", -#' nthreads = 1 +#' fun_summary = "mean" #' ) #' } #' @export -calculate_modis_par <- +calculate_modis <- function( from = NULL, locs = NULL, @@ -858,7 +660,6 @@ calculate_modis_par <- name_covariates = NULL, subdataset = NULL, fun_summary = "mean", - nthreads = floor(length(parallelly::availableWorkers()) / 2), package_list_add = NULL, export_list_add = NULL, max_cells = 3e7, @@ -914,8 +715,7 @@ process_modis_swath, or process_blackmarble.") export_list <- c() package_list <- c("sf", "terra", "exactextractr", "data.table", "stars", - "dplyr", "parallelly", "rlang", "amadeus", "future", - "future.apply") + "dplyr", "parallelly", "rlang", "amadeus") if (!is.null(export_list_add)) { export_list <- append(export_list, export_list_add) } @@ -924,17 +724,11 @@ process_modis_swath, or process_blackmarble.") } # make clusters - # doParallel::registerDoParallel(cores = nthreads) - if (nthreads == 1) { - future::plan(future::sequential) - } else { - future::plan(future::multicore, workers = nthreads) - } idx_date_available <- seq_along(dates_available) list_date_available <- split(idx_date_available, idx_date_available) calc_results <- - future.apply::future_lapply( + lapply( list_date_available, FUN = function(datei) { options(sf_use_s2 = FALSE) @@ -1002,8 +796,7 @@ process_modis_swath, or process_blackmarble.") res0) return(res) - }, - future.seed = TRUE + } ) calc_results <- do.call(dplyr::bind_rows, calc_results) if (geom %in% c("sf", "terra")) { diff --git a/R/calculate_covariates_auxiliary.R b/R/calculate_covariates_auxiliary.R index db92210..3976b79 100644 --- a/R/calculate_covariates_auxiliary.R +++ b/R/calculate_covariates_auxiliary.R @@ -564,3 +564,163 @@ check_geom <- function(geom) { stop("`geom` must be one of FALSE, 'sf', or 'terra'.") } } + + +#' A single-date MODIS worker +#' @param from SpatRaster. Preprocessed objects. +#' @param locs SpatVector/sf/sftime object. Locations where MODIS values +#' are summarized. +#' @param locs_id character(1). Field name where unique site identifiers +#' are stored. Default is `"site_id"` +#' @param radius numeric. Radius to generate circular buffers. +#' @param date Date(1). date to query. +#' @param name_extracted character. Names of calculated covariates. +#' @param fun_summary function. Summary function for +#' multilayer rasters. Passed to `foo`. See [`exactextractr::exact_extract`] +#' for details. +#' @param max_cells integer(1). Maximum number of cells to be read at once. +#' Higher values will expedite processing, but will increase memory usage. +#' Maximum possible value is `2^31 - 1`. +#' @param geom FALSE/"sf"/"terra".. Should the function return with geometry? +#' Default is `FALSE`, options with geometry are "sf" or "terra". The +#' coordinate reference system of the `sf` or `SpatVector` is that of `from.` +#' See [`exactextractr::exact_extract`] for details. +#' @param ... Placeholders. +#' @description The function operates at MODIS/VIIRS products +#' on a daily basis. Given that the raw hdf files are downloaded from +#' NASA, standard file names include a data retrieval date flag starting +#' with letter "A". Leveraging that piece of information, the function will +#' select files of scope on the date of interest. +#' Please note that this function does not provide a function to filter +#' swaths or tiles, so it is strongly recommended to check and pre-filter +#' the file names at users' discretion. +#' @seealso +#' * Preprocessing: [process_modis_merge()], [process_modis_swath()], +#' [process_blackmarble()] +#' @keywords auxiliary +#' @author Insang Song +#' @return a data.frame or SpatVector object. +#' @importFrom terra extract project vect nlyr describe +#' @importFrom methods is +#' @importFrom sf st_as_sf st_drop_geometry +#' @examples +#' ## NOTE: Example is wrapped in `\dontrun{}` as function requires a large +#' ## amount of data which is not included in the package. +#' \dontrun{ +#' locs <- data.frame(lon = -78.8277, lat = 35.95013, id = "001") +#' calculate_modis_daily( +#' from = mod06l2_warp, # dervied from process_modis() example +#' locs = locs, +#' locs_id = "id", +#' radius = 0, +#' date = "2024-01-01", +#' name_extracted = "cloud_fraction_0", +#' fun_summary = "mean", +#' max_cells = 3e7 +#' ) +#' } +#' @export +calculate_modis_daily <- function( + from = NULL, + locs = NULL, + locs_id = "site_id", + radius = 0L, + date = NULL, + name_extracted = NULL, + fun_summary = "mean", + max_cells = 3e7, + geom = FALSE, + ... +) { + if (!methods::is(locs, "SpatVector")) { + locs <- try(terra::vect(locs)) + if (inherits(locs, "try-error")) { + stop("locs should be a SpatVector or convertible object.") + } + } + if (!locs_id %in% names(locs)) { + stop(sprintf("locs should include columns named %s.\n", + locs_id)) + } + + extract_with_buffer <- function( + points, + surf, + radius, + id, + func = "mean", + maxcells = NULL + ) { + # generate buffers + if (radius == 0) radius <- 1e-6 # approximately 1 meter in degree + bufs <- terra::buffer(points, width = radius, quadsegs = 180L) + bufs <- terra::project(bufs, terra::crs(surf)) + # extract raster values + surf_at_bufs <- + exactextractr::exact_extract( + x = surf, + y = sf::st_as_sf(bufs), + fun = func, + force_df = TRUE, + stack_apply = TRUE, + append_cols = id, + progress = FALSE, + max_cells_in_memory = maxcells + ) + return(surf_at_bufs) + } + + ## NaN to NA + from[is.nan(from)] <- NA + + # raster used to be vrt_today + extracted <- + extract_with_buffer( + points = locs, + surf = from, + id = locs_id, + radius = radius, + func = fun_summary, + maxcells = max_cells + ) + # cleaning names + # assuming that extracted is a data.frame + name_offset <- terra::nlyr(from) + # multiple columns will get proper names + name_range <- seq(ncol(extracted) - name_offset + 1, ncol(extracted), 1) + colnames(extracted)[name_range] <- name_extracted + extracted$time <- as.POSIXlt(date) + check_geom(geom) + if (geom %in% c("sf", "terra")) { + # convert to base date, as terra::vect does not like class "POSIXlt" + extracted$time <- as.Date.POSIXlt(extracted$time) + # location ID with geometry + locs_geom_id <- suppressMessages(calc_prepare_locs( + from = from, + locs = locs, + locs_id = locs_id, + radius = radius, + geom = geom + )[[2]] + ) + # merge + extracted_merge <- merge( + locs_geom_id, + extracted, + by = locs_id + ) + # re-convert to POSIXlt after creating the vect + extracted_merge$time <- as.POSIXlt(extracted_merge$time) + extracted_return <- calc_return_locs( + covar = extracted_merge, + POSIXt = TRUE, + geom = geom, + crs = terra::crs(from) + ) + } else { + calc_check_time(covar = extracted, POSIXt = TRUE) + extracted_return <- extracted + } + gc() + return(extracted_return) +} diff --git a/man/calculate_covariates.Rd b/man/calculate_covariates.Rd index 8fbf5f5..30f2bdd 100644 --- a/man/calculate_covariates.Rd +++ b/man/calculate_covariates.Rd @@ -60,7 +60,7 @@ calculate_covariates( } \seealso{ \itemize{ -\item \code{\link{calculate_modis_par}}: "modis", "MODIS" +\item \code{\link{calculate_modis}}: "modis", "MODIS" \item \code{\link{calculate_koppen_geiger}}: "koppen-geiger", "koeppen-geiger", "koppen" \item \code{\link{calculate_ecoregion}}: "ecoregion", "ecoregions" \item \code{\link{calculate_temporal_dummies}}: "dummies", "Dummies" diff --git a/man/calculate_modis_par.Rd b/man/calculate_modis.Rd similarity index 88% rename from man/calculate_modis_par.Rd rename to man/calculate_modis.Rd index 04e9394..d79dc4b 100644 --- a/man/calculate_modis_par.Rd +++ b/man/calculate_modis.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculate_covariates.R -\name{calculate_modis_par} -\alias{calculate_modis_par} +\name{calculate_modis} +\alias{calculate_modis} \title{Calculate MODIS product covariates in multiple CPU threads} \usage{ -calculate_modis_par( +calculate_modis( from = NULL, locs = NULL, locs_id = "site_id", @@ -13,7 +13,6 @@ calculate_modis_par( name_covariates = NULL, subdataset = NULL, fun_summary = "mean", - nthreads = floor(length(parallelly::availableWorkers())/2), package_list_add = NULL, export_list_add = NULL, max_cells = 3e+07, @@ -47,9 +46,6 @@ Find detail usage of the argument in notes.} \item{fun_summary}{character or function. Function to summarize extracted raster values.} -\item{nthreads}{integer(1). Number of threads to be used -to calculate covariates.} - \item{package_list_add}{character. A vector with package names to load these in each thread. Note that \code{sf}, \code{terra}, \code{exactextractr}, \code{doParallel}, \code{parallelly} and \code{dplyr} are the default packages to be @@ -78,7 +74,7 @@ not the dates without available tiles. } } \description{ -\code{calculate_modis_par} essentially runs \code{\link{calculate_modis_daily}} function +\code{calculate_modis} essentially runs \code{\link{calculate_modis_daily}} function in each thread (subprocess). Based on daily resolution, each day's workload will be distributed to each thread. With \code{product} argument, the files are processed by a customized function where the unique structure @@ -120,7 +116,7 @@ insufficient tiles. \dontrun{ locs <- data.frame(lon = -78.8277, lat = 35.95013, id = "001") locs <- terra::vect(locs, geom = c("lon", "lat"), crs = "EPSG:4326") -calculate_modis_par( +calculate_modis( from = list.files("./data", pattern = "VNP46A2.", full.names = TRUE), locs = locs, @@ -129,20 +125,11 @@ calculate_modis_par( preprocess = process_modis_merge, name_covariates = "cloud_fraction_0", subdataset = "Cloud_Fraction", - fun_summary = "mean", - nthreads = 1 + fun_summary = "mean" ) } } \seealso{ -See details for setting parallelization: -\itemize{ -\item \code{\link[future:plan]{future::plan()}} -\item \code{\link[future.apply:future_lapply]{future.apply::future_lapply()}} -\item \code{\link[parallelly:makeClusterPSOCK]{parallelly::makeClusterPSOCK()}} -\item \code{\link[parallelly:availableCores]{parallelly::availableCores()}} -} - This function leverages the calculation of single-day MODIS covariates: \itemize{ diff --git a/man/calculate_modis_daily.Rd b/man/calculate_modis_daily.Rd index 3181670..5132b81 100644 --- a/man/calculate_modis_daily.Rd +++ b/man/calculate_modis_daily.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculate_covariates.R +% Please edit documentation in R/calculate_covariates_auxiliary.R \name{calculate_modis_daily} \alias{calculate_modis_daily} -\title{A single-date MODIS worker for parallelization} +\title{A single-date MODIS worker} \usage{ calculate_modis_daily( from = NULL, @@ -81,9 +81,9 @@ calculate_modis_daily( \itemize{ \item Preprocessing: \code{\link[=process_modis_merge]{process_modis_merge()}}, \code{\link[=process_modis_swath]{process_modis_swath()}}, \code{\link[=process_blackmarble]{process_blackmarble()}} -\item Parallelization: \code{\link[=calculate_modis_par]{calculate_modis_par()}} } } \author{ Insang Song } +\keyword{auxiliary} diff --git a/man/calculate_nlcd.Rd b/man/calculate_nlcd.Rd index feaa10b..3035d9f 100644 --- a/man/calculate_nlcd.Rd +++ b/man/calculate_nlcd.Rd @@ -12,7 +12,6 @@ calculate_nlcd( radius = 1000, max_cells = 5e+07, geom = FALSE, - nthreads = 1L, ... ) } @@ -40,8 +39,6 @@ See \code{\link[exactextractr:exact_extract]{exactextractr::exact_extract}} for Default is \code{FALSE}, options with geometry are "sf" or "terra". The coordinate reference system of the \code{sf} or \code{SpatVector} is that of \code{from.}} -\item{nthreads}{integer(1). Number of threads to be used} - \item{...}{Placeholders.} } \value{ diff --git a/tests/testthat/test-modis.R b/tests/testthat/test-modis.R index 2c8d7c7..0032e8f 100644 --- a/tests/testthat/test-modis.R +++ b/tests/testthat/test-modis.R @@ -631,15 +631,14 @@ testthat::test_that("process_modis (expected errors)", { ################################################################################ ##### calc_modis* -testthat::test_that("calculate_modis_par", { +testthat::test_that("calculate_modis", { withr::local_package("sf") withr::local_package("terra") withr::local_package("stars") withr::local_package("lwgeom") withr::local_options( list( - sf_use_s2 = FALSE, - future.resolve.recursive = 2L + sf_use_s2 = FALSE ) ) @@ -677,13 +676,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod11 <- - calculate_modis_par( + calculate_modis( from = path_mod11, locs = sf::st_as_sf(site_faux), preprocess = process_modis_merge, name_covariates = c("MOD_LSTNT_0_", "MOD_LSTDY_0_"), - subdataset = "(LST_)", - nthreads = 1L + subdataset = "(LST_)" ) ) ) @@ -694,15 +692,14 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod11 <- - calculate_modis_par( + calculate_modis( from = path_mod11, locs = sf::st_as_sf(site_faux), preprocess = process_modis_merge, package_list_add = c("MASS"), export_list_add = c("aux"), name_covariates = c("MOD_LSTNT_0_", "MOD_LSTDY_0_"), - subdataset = "(LST_)", - nthreads = 1L + subdataset = "(LST_)" ) ) ) @@ -711,7 +708,7 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod11_terra <- - calculate_modis_par( + calculate_modis( from = path_mod11, locs = sf::st_as_sf(site_faux), preprocess = process_modis_merge, @@ -719,7 +716,6 @@ testthat::test_that("calculate_modis_par", { export_list_add = c("aux"), name_covariates = c("MOD_LSTNT_0_", "MOD_LSTDY_0_"), subdataset = "(LST_)", - nthreads = 1L, geom = "terra" ) ) @@ -730,7 +726,7 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod11_sf <- - calculate_modis_par( + calculate_modis( from = path_mod11, locs = sf::st_as_sf(site_faux), preprocess = process_modis_merge, @@ -738,7 +734,6 @@ testthat::test_that("calculate_modis_par", { export_list_add = c("aux"), name_covariates = c("MOD_LSTNT_0_", "MOD_LSTDY_0_"), subdataset = "(LST_)", - nthreads = 1L, geom = "sf" ) ) @@ -747,7 +742,7 @@ testthat::test_that("calculate_modis_par", { # with geometry error testthat::expect_error( - calculate_modis_par( + calculate_modis( from = path_mod11, locs = sf::st_as_sf(site_faux), preprocess = process_modis_merge, @@ -755,7 +750,6 @@ testthat::test_that("calculate_modis_par", { export_list_add = c("aux"), name_covariates = c("MOD_LSTNT_0_", "MOD_LSTDY_0_"), subdataset = "(LST_)", - nthreads = 1L, geom = TRUE ) ) @@ -780,13 +774,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod06 <- - calculate_modis_par( + calculate_modis( from = path_mod06, locs = site_faux, subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), preprocess = process_modis_swath, - name_covariates = c("MOD_CLFRN_0_", "MOD_CLFRD_0_"), - nthreads = 1 + name_covariates = c("MOD_CLFRN_0_", "MOD_CLFRD_0_") ) ) ) @@ -796,13 +789,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod06_terra <- - calculate_modis_par( + calculate_modis( from = path_mod06, locs = site_faux, subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), preprocess = process_modis_swath, name_covariates = c("MOD_CLFRN_0_", "MOD_CLFRD_0_"), - nthreads = 1, geom = "terra" ) ) @@ -813,13 +805,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_mod06_sf <- - calculate_modis_par( + calculate_modis( from = path_mod06, locs = site_faux, subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), preprocess = process_modis_swath, name_covariates = c("MOD_CLFRN_0_", "MOD_CLFRD_0_"), - nthreads = 1, geom = "sf" ) ) @@ -828,13 +819,12 @@ testthat::test_that("calculate_modis_par", { # with geometry error testthat::expect_error( - calculate_modis_par( + calculate_modis( from = path_mod06, locs = site_faux, subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), preprocess = process_modis_swath, name_covariates = c("MOD_CLFRN_0_", "MOD_CLFRD_0_"), - nthreads = 1, geom = TRUE ) ) @@ -857,13 +847,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_vnp46 <- - calculate_modis_par( + calculate_modis( from = path_vnp46, locs = site_faux, preprocess = process_blackmarble, name_covariates = c("MOD_NITLT_0_"), subdataset = 3L, - nthreads = 1, tile_df = process_blackmarble_corners(c(9, 10), c(5, 5)) ) ) @@ -874,13 +863,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_vnp46_terra <- - calculate_modis_par( + calculate_modis( from = path_vnp46, locs = site_faux, preprocess = process_blackmarble, name_covariates = c("MOD_NITLT_0_"), subdataset = 3L, - nthreads = 1, tile_df = process_blackmarble_corners(c(9, 10), c(5, 5)), geom = "terra" ) @@ -893,13 +881,12 @@ testthat::test_that("calculate_modis_par", { testthat::expect_no_error( suppressWarnings( calc_vnp46_sf <- - calculate_modis_par( + calculate_modis( from = path_vnp46, locs = sf::st_as_sf(site_faux), preprocess = process_blackmarble, name_covariates = c("MOD_NITLT_0_"), subdataset = 3L, - nthreads = 1, tile_df = process_blackmarble_corners(c(9, 10), c(5, 5)), geom = "sf" ) @@ -909,13 +896,12 @@ testthat::test_that("calculate_modis_par", { # with geometry error testthat::expect_error( - calculate_modis_par( + calculate_modis( from = path_vnp46, locs = sf::st_as_sf(site_faux), preprocess = process_blackmarble, name_covariates = c("MOD_NITLT_0_"), subdataset = 3L, - nthreads = 1, tile_df = process_blackmarble_corners(c(9, 10), c(5, 5)), geom = TRUE ) @@ -1031,40 +1017,37 @@ testthat::test_that("calculate_modis_par", { testthat::expect_true("sf" %in% class(calc_mod_sf)) testthat::expect_error( - calculate_modis_par(from = site_faux) + calculate_modis(from = site_faux) ) testthat::expect_error( - calculate_modis_par(from = path_mod11, product = "MOD11A1", locs = list(1, 2, 3)) + calculate_modis(from = path_mod11, product = "MOD11A1", locs = list(1, 2, 3)) ) testthat::expect_error( - calculate_modis_par( + calculate_modis( from = path_vnp46, locs = site_faux, preprocess = "fountain", name_covariates = c("MOD_NITLT_0_", "MOD_K1_"), - subdataset = 3L, - nthreads = 1 + subdataset = 3L ) ) testthat::expect_warning( - calculate_modis_par( + calculate_modis( from = path_vnp46, locs = site_faux, preprocess = process_blackmarble, name_covariates = c("MOD_NITLT_0_", "MOD_K1_"), subdataset = 3L, - nthreads = 2, tile_df = process_blackmarble_corners(c(9, 10), c(5, 5)) ) ) testthat::expect_warning( - flushed <- calculate_modis_par( + flushed <- calculate_modis( from = path_vnp46, locs = site_faux, name_covariates = c("MOD_NITLT_0_"), preprocess = process_blackmarble, subdataset = 3L, - nthreads = 1, radius = c(-1000, 0L) ) ) diff --git a/tests/testthat/test-nlcd.R b/tests/testthat/test-nlcd.R index acbc353..c75eb8d 100644 --- a/tests/testthat/test-nlcd.R +++ b/tests/testthat/test-nlcd.R @@ -122,10 +122,8 @@ testthat::test_that("calculate_nlcd", { withr::local_package("terra") withr::local_package("exactextractr") withr::local_package("sf") - withr::local_package("future") - withr::local_package("future.apply") withr::local_options( - list(sf_use_s2 = FALSE, future.resolve.recursive = 2L) + list(sf_use_s2 = FALSE) ) point_us1 <- cbind(lon = -114.7, lat = 38.9, site_id = 1) @@ -191,26 +189,6 @@ testthat::test_that("calculate_nlcd", { radius = 300 ) ) - # -- multicore mode works properly - testthat::expect_no_error( - calculate_nlcd( - locs = eg_data, - from = nlcdras, - mode = "exact", - radius = 1000, - nthreads = 2L - ) - ) - testthat::expect_no_error( - calculate_nlcd( - locs = eg_data, - from = nlcdras, - mode = "terra", - radius = 1000, - nthreads = 2L - ) - ) - # -- year is numeric testthat::expect_error( From b4d179e85649291f62f1bc88fa83bb2d3c556f9d Mon Sep 17 00:00:00 2001 From: Insang Song Date: Tue, 19 Nov 2024 16:44:12 +0900 Subject: [PATCH 2/3] minor documentation fix - calculate_modis(): `nthreads` removal --- R/calculate_covariates.R | 5 +---- man/calculate_modis.Rd | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/R/calculate_covariates.R b/R/calculate_covariates.R index b30194f..f3c3700 100644 --- a/R/calculate_covariates.R +++ b/R/calculate_covariates.R @@ -584,10 +584,7 @@ calculate_ecoregion <- #' in each thread (subprocess). Based on daily resolution, each day's workload #' will be distributed to each thread. With `product` argument, #' the files are processed by a customized function where the unique structure -#' and/or characteristics of the products are considered. `nthreads` -#' argument should be carefully selected in consideration of the machine's -#' CPU and memory capacities as products have their own memory pressure. -#' `locs` should be `sf` object as it is exportable to parallel workers. +#' and/or characteristics of the products are considered. # nolint end #' @note Overall, this function and dependent routines assume that the file #' system can handle concurrent access to the (network) disk by multiple diff --git a/man/calculate_modis.Rd b/man/calculate_modis.Rd index d79dc4b..86cf48e 100644 --- a/man/calculate_modis.Rd +++ b/man/calculate_modis.Rd @@ -78,10 +78,7 @@ not the dates without available tiles. in each thread (subprocess). Based on daily resolution, each day's workload will be distributed to each thread. With \code{product} argument, the files are processed by a customized function where the unique structure -and/or characteristics of the products are considered. \code{nthreads} -argument should be carefully selected in consideration of the machine's -CPU and memory capacities as products have their own memory pressure. -\code{locs} should be \code{sf} object as it is exportable to parallel workers. +and/or characteristics of the products are considered. } \note{ Overall, this function and dependent routines assume that the file From dc5768daea2bfa6106de892dec02913602006f87 Mon Sep 17 00:00:00 2001 From: Insang Song Date: Tue, 19 Nov 2024 21:46:36 +0900 Subject: [PATCH 3/3] fix invalid roxygen reference - process_modis_sds --- R/process.R | 2 +- man/process_modis_sds.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/process.R b/R/process.R index cc533e1..779b80c 100644 --- a/R/process.R +++ b/R/process.R @@ -152,7 +152,7 @@ process_covariates <- #' @author Insang Song #' @return A character object that conforms to the regular #' expression. Details of regular expression in R can be found in [regexp]. -#' @seealso [calculate_modis_par] +#' @seealso [calculate_modis] #' @examples #' process_modis_sds(product = "MOD09GA") #' @export diff --git a/man/process_modis_sds.Rd b/man/process_modis_sds.Rd index 84b494a..9a5e486 100644 --- a/man/process_modis_sds.Rd +++ b/man/process_modis_sds.Rd @@ -48,7 +48,7 @@ Name" = MCD12C1.006, then \code{product = "MCD12C1"}. process_modis_sds(product = "MOD09GA") } \seealso{ -\link{calculate_modis_par} +\link{calculate_modis} } \author{ Insang Song