Skip to content

Commit

Permalink
improve speed of GFW calculations (#225)
Browse files Browse the repository at this point in the history
* improve speed of GFW calculations
  • Loading branch information
goergen95 authored Jan 19, 2024
1 parent 5b361b1 commit e63b861
Show file tree
Hide file tree
Showing 15 changed files with 424 additions and 627 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Depends: R (>= 3.5.0)
SystemRequirements: GDAL (>= 3.0.0), PROJ (>= 4.8.0)
Imports: curl, dplyr, furrr, httr, magrittr, progressr, purrr, rvest, R.utils,
sf, stringr, terra, tibble, tidyr, tidyselect
Suggests: exactextractr, future, knitr, rmarkdown, rstac, SPEI,
Suggests: exactextractr, future, knitr, landscapemetrics, rmarkdown, rstac, SPEI,
testthat (>= 3.0.0)
VignetteBuilder: knitr
Config/Needs/website: DiagrammeR, ggplot2, lubridate, manipulateWidget, rgl, spatstat,
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
- Rasters are now cropped to the spatial extent of an asset with setting
`snap="out"`, thus delivering a slightly bigger extent (#212).

- Speed improvements for GFW indicators (up to x10 for larger rasters) now
require R package `exactextractr` to be installed. Also, it is advised
to have the R package `landscapemetrics` installed to gain the full computation
speed improvement.

## Bug Fixes

- `calc_indicators()` checks for 0-length tibbles (#196, #199, #215).
Expand Down
261 changes: 154 additions & 107 deletions R/calc_treecover_area.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,147 +79,194 @@ NULL
verbose = TRUE,
...) {
# initial argument checks
.check_namespace("exactextractr")
# check additional arguments
.gfw_check_min_cover(min_cover, "treecover_area")
.gfw_check_min_size(min_size, "treecover_area")

# handling of return value if resources are missing, e.g. no overlap
if (any(is.null(gfw_treecover), is.null(gfw_lossyear))) {
return(NA)
}
# retrieve years from portfolio
years <- attributes(x)$years
years <- .gfw_check_years(years, "treecover_area")
if (length(years) == 0) {
return(tibble::tibble(years = NA, treecover = NA))
}
# check if gfw_treecover only contains 0s, e.g. on the ocean
if (.gfw_empty_raster(gfw_treecover)) {
return(tibble::tibble(years = years, treecover = 0))
}

# prepare gfw rasters
gfw <- .gfw_prep_rasters(x, gfw_treecover, gfw_lossyear, cover = min_cover)

# apply extraction routine
gfw_stats <- exactextractr::exact_extract(
gfw, x, function(data, min_size) {
# retain only forest pixels and set area to ha
data <- .prep_gfw_data(data, min_size)
losses <- .sum_gfw(data, "coverage_area")
names(losses)[2] <- "loss"
org_coverage <- sum(data[["coverage_area"]])

if (all(losses[["loss"]] == 0)) {
result <- data.frame(
years = years,
treecover = org_coverage
)
return(result)
}

previous_losses <- sum(losses[["loss"]][losses[["years"]] < years[1]])

if (previous_losses != 0) {
org_coverage <- org_coverage - previous_losses
}

losses <- losses[losses[["years"]] %in% years, ]
losses[["treecover"]] <- org_coverage
losses[["treecover"]] <- losses[["treecover"]] - cumsum(losses[["loss"]])
losses[, c("years", "treecover")]
},
min_size = min_size, coverage_area = TRUE, summarize_df = TRUE
)

rm(gfw)
gc()
tibble::as_tibble(gfw_stats)
}


.prep_gfw_data <- function(data, min_size) {
# retain only forest pixels and set area to ha
data <- data[data[["treecover"]] == 1, ]
data[["coverage_area"]] <- data[["coverage_area"]] / 10000 # to ha

# exclude patches smaller than threshold
patch_sizes <- by(data[["coverage_area"]], data[["patches"]], sum)
keep_patches <- names(patch_sizes)[which(patch_sizes > min_size)]
keep_patches <- as.numeric(keep_patches)

data[data[["patches"]] %in% keep_patches, ]
}


.sum_gfw <- function(data, what = "coverage_area") {
# calculate loss area by year
df <- data.frame(years = 2000:2022, var = 0)
names(df)[2] <- what
my_sum <- by(data[[what]], data[["lossyear"]], sum, na.rm = TRUE)
sum_years <- as.numeric(names(my_sum))
sum_years <- sum_years + 2000

index <- match(sum_years, df[["years"]])
df[[what]][index] <- as.numeric(my_sum)

df
}

.gfw_check_years <- function(years, indicator) {
if (any(years < 2000)) {
warning(paste("Cannot calculate treecover statistics ",
msg <- paste("Cannot calculate %s statistics ",
"for years smaller than 2000.",
sep = ""
))
years <- years[years >= 2000]
if (length(years) == 0) {
return(tibble(years = NA, treecover = NA))
}
)
msg <- sprintf(msg, indicator)
warning(msg)
}
years[years >= 2000]
}

# check if gfw_treecover only contains 0s, e.g. on the ocean
minmax_gfw_treecover <- unique(as.vector(minmax(gfw_treecover)))
if (length(minmax_gfw_treecover) == 1) {
if (minmax_gfw_treecover == 0 | is.nan(minmax_gfw_treecover)) {
return(tibble(years = years, treecover = rep(0, length(years))))
}
.gfw_empty_raster <- function(gfw) {
minmax <- unique(as.vector(terra::minmax(gfw)))
if (length(minmax) > 1) {
return(FALSE)
}
if (minmax == 0 || is.nan(minmax)) {
return(TRUE)
}
}

# check additional arguments
min_cover_msg <- paste("Argument 'min_cover' for indicator 'treecover' ",
.gfw_check_min_cover <- function(min_cover, indicator) {
msg <- paste("Argument 'min_cover' for indicator '%s' ",
"must be a numeric value between 0 and 100.",
sep = ""
)

if (is.numeric(min_cover)) {
min_cover <- as.integer(round(min_cover))
} else {
stop(min_cover_msg, call. = FALSE)
}
msg <- sprintf(msg, indicator)
if (length(min_cover) != 1) stop(msg, call. = FALSE)
if (!is.numeric(min_cover)) stop(msg, call. = FALSE)
if (min_cover < 0 || min_cover > 100) {
stop(min_cover_msg, call. = FALSE)
stop(msg, call. = FALSE)
}
}

min_size_msg <- paste("Argument 'min_size' for indicator 'treecover' must be ",
"anumeric value greater 0.",
.gfw_check_min_size <- function(min_size, indicator) {
msg <- paste(
"Argument 'min_size' for indicator '%s' must be ",
"a numeric value greater 0.",
sep = ""
)
if (!is.numeric(min_size) | min_size <= 0) stop(min_size_msg, call. = FALSE)
msg <- sprintf(msg, indicator)
if (length(min_size) != 1) stop(msg, call. = FALSE)
if (!is.numeric(min_size) || min_size <= 0) stop(msg, call. = FALSE)
}

.gfw_calc_patches <- function(gfw) {
if (!requireNamespace("landscapemetrics", quietly = TRUE)) {
msg <- paste("Could not load package 'landscapemetrics'.\n",
"Consider installing it for better performance with:\n",
"install.packages('landscapemetrics)'",
sep = ""
)
message(msg)
patched <- terra::patches(gfw, directions = 4, zeroAsNA = TRUE)
} else {
patched <- landscapemetrics::get_patches(gfw, class = 1, direction = 4)[[1]][[1]]
}
patched
}

# skip if the area of the

#----------------------------------------------------------------------------
# start calculation if everything is set up correctly
# retrieve an area raster
arearaster <- cellSize(
gfw_treecover,
unit = "ha"
)
# rasterize the polygon
polyraster <- rasterize(
vect(x), gfw_treecover,
field = 1, touches = TRUE
)
.gfw_prep_rasters <- function(x, treecover, lossyear, emissions, cover) {
# mask gfw_treecover
gfw_treecover <- mask(
gfw_treecover, polyraster
treecover <- terra::mask(treecover, x)
# binarize the treecover layer based on mcover argument
binary_treecover <- terra::classify(treecover,
rcl = matrix(c(
NA, NA, 0,
0, cover, 0,
cover, 100, 1
), ncol = 3, byrow = TRUE),
include.lowest = TRUE
)

# mask lossyear
gfw_lossyear <- mask(
gfw_lossyear, polyraster
)
# binarize the gfw_treecover layer based on min_cover argument
binary_gfw_treecover <- classify(
gfw_treecover,
rcl = matrix(c(0, min_cover, 0, min_cover, 100, 1), ncol = 3, byrow = TRUE),
include.lowest = TRUE
)
# retrieve patches of comprehensive forest areas
patched <- patches(
binary_gfw_treecover,
directions = 4, zeroAsNA = TRUE
)
lossyear <- terra::mask(lossyear, binary_treecover)
lossyear <- terra::ifel(lossyear == 0, NA, lossyear)
# create patches
patched <- .gfw_calc_patches(binary_treecover)

unique_vals <- unique(as.vector(minmax(patched)))
if (length(unique_vals) == 1) {
if (is.nan(unique_vals)) {
return(tibble(years = years, treecover = rep(0, length(years))))
}
}
# get the sizes of the patches
patchsizes <- zonal(
arearaster, patched, sum,
as.raster = TRUE
)
# remove patches smaller than threshold
binary_gfw_treecover <- ifel(
patchsizes < min_size, 0, binary_gfw_treecover
)
# return 0 if binary gfw_treecover only consists of 0 or nan
minmax_gfw_treecover <- unique(as.vector(minmax(binary_gfw_treecover)))
if (length(minmax_gfw_treecover) == 1) {
if (minmax_gfw_treecover == 0 | is.nan(minmax_gfw_treecover)) {
return(
tibble(
years = years,
treecover = rep(0, length(years))
)
if (!missing(emissions)) {
if (terra::ncell(emissions) != terra::ncell(treecover)) {
emissions <- terra::resample(
emissions, treecover,
method = "bilinear"
)
}
emissions <- terra::mask(emissions, lossyear)
gfw <- c(binary_treecover, lossyear, patched, emissions)
names(gfw) <- c("treecover", "lossyear", "patches", "emissions")
} else {
# prepare return raster
gfw <- c(binary_treecover, lossyear, patched)
names(gfw) <- c("treecover", "lossyear", "patches")
}
# set no loss occurrences to NA
gfw_lossyear <- ifel(
gfw_lossyear == 0, NA, gfw_lossyear
)
# exclude non-tree pixels from lossyear layer
gfw_lossyear <- mask(
gfw_lossyear, binary_gfw_treecover
)

# get forest cover statistics for each year
yearly_cover_values <- lapply(years, function(y) {
y <- y - 2000
current_gfw_treecover <- ifel(
gfw_lossyear <= y, 0, binary_gfw_treecover
)
current_arearaster <- mask(
arearaster, current_gfw_treecover,
maskvalues = c(NA, 0)
)
ha_sum_gfw_treecover <- zonal(
current_arearaster, polyraster, sum,
na.rm = TRUE
)[2]
as.numeric(ha_sum_gfw_treecover)
})

# memory clean up
rm(arearaster, binary_gfw_treecover, patchsizes, patched, polyraster)
# return a data-frame
tibble(years = years, treecover = as.vector(unlist(yearly_cover_values)))
gfw
}


register_indicator(
name = "treecover_area",
resources = list(
Expand Down
Loading

0 comments on commit e63b861

Please sign in to comment.