Skip to content

Commit

Permalink
solves pagination and rebuilds parsing to adapt to lists of pages.
Browse files Browse the repository at this point in the history
ö: search "id" needs to be checked.
  • Loading branch information
AndreaSanchezTapia committed Nov 8, 2024
1 parent 7165ef8 commit b0e0526
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 25 deletions.
107 changes: 84 additions & 23 deletions R/get_vessel_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#' changed one or more identity properties}
#' \item{`"ALL"`}{The registryInfo array will return the same number of objects that rows we have in the vessel database}
#' }
#' @param quiet Boolean. Whether to print the number of events returned by the
#' request and progress
#' @param key Authorization token. Can be obtained with `gfw_auth()` function
#' @param print_request Boolean. Whether to print the request, for debugging
#' purposes. When contacting the GFW team it will be useful to send this string
Expand Down Expand Up @@ -73,6 +75,7 @@ get_vessel_info <- function(query = NULL,
registries_info_data = c("ALL"),
search_type = "search",
key = gfw_auth(),
quiet = FALSE,
print_request = FALSE,
...) {

Expand Down Expand Up @@ -145,32 +148,35 @@ endpoint <- base %>%
httr2::req_url_path_append(path_append) %>%
httr2::req_url_query(!!!args)

limit <- 50

request <- endpoint %>%
httr2::req_url_query(`limit` = limit) %>%
httr2::req_headers(Authorization = paste("Bearer", key, sep = " ")) %>%
#httr2::req_error(., body = gist_error_body) %>%
httr2::req_user_agent(gfw_user_agent())

response <- httr2::req_perform(request) %>%
response <- request %>%
httr2::req_perform() %>%
httr2::resp_body_json(simplifyVector = TRUE, check_type = TRUE)
# stop if not found
if (response$total == 0) return(message("No vessel was found with that identifier"))

###PAGINATION
# List to store responses
limit <- 50
responses <- list()
responses[[1]] <- response

# Current page values
total <- response$total

if (quiet == FALSE) message(paste( total, "total vessels"))
n_entries <- length(response$entries)
next_since <- response$since

while (!is.null(next_since)) {
while (n_entries != 0) {
# # API call for next page
next_response <- request %>%
httr2::req_url_query(`since` = response$since) %>%
httr2::req_url_query(`since` = next_since) %>%
httr2::req_perform() %>%
httr2::resp_body_json(simplifyVector = TRUE, check_type = TRUE)

Expand All @@ -179,31 +185,86 @@ endpoint <- base %>%

# Pull out next_since of latest API response
next_since <- next_response$since
n_entries <- length(next_response$entries)
if (quiet == FALSE) {
total_requests <- floor(total/limit)
current_request <- length(responses) + 1
message(paste(floor(current_request*100/total_requests), "%" ))
}
}
all_entries <- purrr::map(responses, purrr::pluck, 'entries') %>%
purrr::flatten(.)
# format tibbles
combinedSourcesInfo <- dplyr::bind_rows(purrr::map(response$entries$combinedSourcesInfo, tibble::tibble))
# format tibbles
all_entries <- purrr::map(responses, purrr::pluck, 'entries')
# dataset (same but good to have for length)

# 1/8 dataset
dataset <- purrr::map(all_entries, purrr::pluck, 'dataset') %>%
unlist(recursive = F) %>%
tibble::tibble(dataset = .)

# 2/8 registryinfototalrecords
# one row per vessel. Those who have registry info will show up with 1, those are the ones that have their vesselRecord id available under registryInfo$id
#most vessels with registry will show up first but sometimes there are vessels with registry down the list
registryInfoTotalRecords <-
purrr::map(all_entries, purrr::pluck, 'registryInfoTotalRecords') %>%
unlist(recursive = F) %>%
tibble::tibble(registryInfoTotalRecords = .)

# 3/8 registryInfo -only for those who have it
registryInfo <- purrr::map(all_entries, purrr::pluck, 'registryInfo') %>%
unlist(recursive = FALSE) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>%
dplyr::rename(vesselRecord = id) %>%
# dplyr::select(-`<list>`) %>%
# unnest geartypes
tidyr::unnest(geartypes, keep_empty = TRUE)

# 4/8 registryOwners #has all records with and without registry but may have a different
#dimension than registryInfo due to lack of data
registryOwners <- purrr::map(all_entries, purrr::pluck, "registryOwners") %>%
unlist(recursive= FALSE) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
# dplyr::select(-`<list>`)

# 5/8 registryPublicAuthorizations
registryPublicAuthorizations <- purrr::map(all_entries, purrr::pluck, 'registryPublicAuthorizations') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
#tidyr::unnest(sourceCode, keep_empty = TRUE)
# dplyr::select(-`<list>`)


if (!is.null(combinedSourcesInfo$geartypes))
combinedSourcesInfo <- combinedSourcesInfo %>%
tidyr::unnest(.data$geartypes, names_sep = "_geartype_", keep_empty = TRUE)
#6/8 combinedSourcesInfo joins vesselId, geartypes and shiptypes.
combinedSourcesInfo <- purrr::map(all_entries, purrr::pluck, 'combinedSourcesInfo') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>% #after indexing we can unnest
tidyr::unnest(geartypes, names_sep = "_geartype_", keep_empty = TRUE) %>%
tidyr::unnest(shiptypes, names_sep = "_shiptype_", keep_empty = TRUE)

if (!is.null(combinedSourcesInfo$shiptypes))
combinedSourcesInfo <- combinedSourcesInfo %>%
tidyr::unnest(.data$shiptypes, names_sep = "_shiptype_", keep_empty = TRUE)
# 7/8 selfReportedInfo this is AIS
selfReportedInfo <- purrr::map(all_entries, purrr::pluck, 'selfReportedInfo') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))

# build output list
output <- list(
dataset = tibble::tibble(dataset = response$entries$dataset),
registryInfoTotalRecords = tibble::tibble(registryInfoTotalRecords = response$entries$registryInfoTotalRecords),
registryInfo = dplyr::bind_rows(purrr::map(response$entries$registryInfo, tibble::tibble)),
registryOwners = dplyr::bind_rows(purrr::map(response$entries$registryOwners, tibble::tibble)),
registryPublicAuthorizations = dplyr::bind_rows(purrr::map(response$entries$registryPublicAuthorizations, tibble::tibble)),
dataset = dataset,
registryInfoTotalRecords = registryInfoTotalRecords,
registryInfo = registryInfo,
registryOwners = registryOwners,
registryPublicAuthorizations = registryPublicAuthorizations,
combinedSourcesInfo = combinedSourcesInfo,
selfReportedInfo = dplyr::bind_rows(purrr::map(response$entries$selfReportedInfo, tibble::tibble))
)
selfReportedInfo = selfReportedInfo)

output$selfReportedInfo <- output$selfReportedInfo %>% dplyr::rename(vesselId = id)
output$selfReportedInfo <- output$selfReportedInfo
return(output)
}
7 changes: 6 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,12 @@ sf_to_geojson <- function(sf_shape, endpoint = 'raster') {
}

globalVariables(c("."))
globalVariables(c("<list>"))
globalVariables(c("geartypes"))
globalVariables(c("id"))
globalVariables(c("value"))
globalVariables(c("includes"))
globalVariables(c("index"))
globalVariables(c("shiptypes"))
globalVariables(c("sourceCode"))
globalVariables(c("value"))
globalVariables(c("registries_info_data"))
6 changes: 5 additions & 1 deletion man/get_vessel_info.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b0e0526

Please sign in to comment.