Skip to content

Commit

Permalink
pagination solved + NEWS. all examples run. closes #176
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreaSanchezTapia committed Nov 8, 2024
1 parent b0e0526 commit c9140cc
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 108 deletions.
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
#gfwr 2.0.1

# Major changes

- We have updated the parsing of the output of `get_vessel_info()` to allow for
long, paginated responses. Most simple queries would not need pagination, but
some advanced queries using `where` return long responses that involve
pagination. This issue is now solved and the number of results should be
equal to the number of vessels returned by Vessel Viewer on our map.

# New features

- We added an `index` column to indicate which `vesselIds` are grouped together
and belong to the same vessel. This should help associate information from the
returned datasets and guide the user when requesting events for a single boat or
a group of boats.



# gfwr 2.0.0

__`gfwr` was updated to work with version 3 of our APIs.__ This results in various breaking changes in
Expand Down
221 changes: 113 additions & 108 deletions R/get_vessel_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ get_vessel_info <- function(query = NULL,
quiet = FALSE,
print_request = FALSE,
...) {

if (search_type %in% c("advanced", "basic")) {
# Signal the deprecation to the user
warning("basic or advanced search are no longer in use. Options are 'search' or 'id'")
Expand All @@ -93,24 +92,37 @@ get_vessel_info <- function(query = NULL,
#)
#

# gets endpoint here ---------
# gets endpoint here ---------

# API endpoint specific parameters from ...
args <- list(...)
for (i in seq_len(length(args))) {
assign(names(args[i]), args[[i]])
}
# API endpoint specific parameters from ...
args <- list(...)
for (i in seq_len(length(args))) {
assign(names(args[i]), args[[i]])
}


base <- httr2::request("https://gateway.api.globalfishingwatch.org/v3/")
base <- httr2::request("https://gateway.api.globalfishingwatch.org/v3/")

# Only one dataset ID for selected API
dataset <- "public-global-vessel-identity:latest"
dataset <- vector_to_array(dataset, type = "datasets")
args <- c(args, dataset)
# Only one dataset ID for selected API
dataset <- "public-global-vessel-identity:latest"
dataset <- vector_to_array(dataset, type = "datasets")
args <- c(args, dataset)

#Default is search
if (search_type == "search") {
#Default is search
# Search id - ID search now receives a vector
if (search_type == "id" & is.null(ids)) stop("parameter 'ids' must be specified when search_type = 'id'")
if (!is.null("ids") & is.null(where) & is.null(query) & search_type == "search") stop("search_type must be 'id' when ids are specified")
if (!is.null("ids") & search_type == "id") {
path_append <- "vessels"
ids <- vector_to_array(ids, type = "ids")
args <- c(args, ids)
if (!is.null(registries_info_data)) {
reg_info <- c(`registries-info-data` = registries_info_data)
args <- c(args, reg_info)
}
}
# search search
if (search_type == "search") {
if (is.null(query) & is.null(where)) stop("either 'query' or 'where' must be specified when search_type = 'search'")
if (!is.null(query) & !is.null(where)) stop("specify either 'query' or 'where', but not both when search_type = 'search'")
if (!is.null(query)) {
Expand All @@ -129,131 +141,124 @@ args <- c(args, dataset)
args <- c(args, incl)
}
}
# ID search now receives a vector
if (search_type == "id" & is.null(ids)) stop("parameter 'ids' must be specified when search_type = 'id'")

if (!is.null("ids") & is.null(where) & is.null(query) & search_type == "search") stop("search_type must be 'id' when ids are specified")

if (!is.null("ids") & search_type == "id") {
path_append <- "vessels"
ids <- vector_to_array(ids, type = "ids")
args <- c(args, ids)
if (!is.null(registries_info_data)) {
reg_info <- c(`registries-info-data` = registries_info_data)
args <- c(args, reg_info)
}
}
endpoint <- base %>%
httr2::req_url_path_append(path_append) %>%
httr2::req_url_query(!!!args)

endpoint <- base %>%
httr2::req_url_path_append(path_append) %>%
httr2::req_url_query(!!!args)

limit <- 50

request <- endpoint %>%
httr2::req_url_query(`limit` = limit) %>%
request <- endpoint %>%
httr2::req_headers(Authorization = paste("Bearer", key, sep = " ")) %>%
#httr2::req_error(., body = gist_error_body) %>%
httr2::req_user_agent(gfw_user_agent())

response <- request %>%
httr2::req_perform() %>%
# pagination in search
if (search_type == "search") {
limit <- 50
request <- request %>%
httr2::req_url_query(`limit` = limit)

}
# performs request
response <- request %>%
httr2::req_perform() %>%
httr2::resp_body_json(simplifyVector = TRUE, check_type = TRUE)
# stop if not found
if (response$total == 0) return(message("No vessel was found with that identifier"))
# stop if not found
if (response$total == 0) return(message("No vessel was found with that identifier"))

###PAGINATION
# List to store responses
responses <- list()
responses[[1]] <- response
# List to store responses
responses <- list()
responses[[1]] <- response

# Current page values
total <- response$total
if (quiet == FALSE) message(paste( total, "total vessels"))
n_entries <- length(response$entries)
next_since <- response$since
# Current page values
total <- response$total
if (quiet == FALSE) message(paste( total, "total vessels"))
n_entries <- length(response$entries)
next_since <- response$since

while (n_entries != 0) {
# Pagination
while (!is.null(next_since) && n_entries != 0) {
# # API call for next page
next_response <- request %>%
httr2::req_url_query(`since` = next_since) %>%
httr2::req_perform() %>%
httr2::resp_body_json(simplifyVector = TRUE, check_type = TRUE)

# Append response to list
responses[[length(responses) + 1]] <- next_response
# Append response to list
responses[[length(responses) + 1]] <- next_response

# Pull out next_since of latest API response
next_since <- next_response$since
n_entries <- length(next_response$entries)
if (quiet == FALSE) {
# Pull out next_since of latest API response
next_since <- next_response$since
n_entries <- length(next_response$entries)
if (quiet == FALSE) {
total_requests <- floor(total/limit)
current_request <- length(responses) + 1
message(paste(floor(current_request*100/total_requests), "%" ))
}
message(paste(floor(current_request*100/total_requests), "%" ))
}
# format tibbles
all_entries <- purrr::map(responses, purrr::pluck, 'entries')
# dataset (same but good to have for length)
}
# format tibbles
all_entries <- purrr::map(responses, purrr::pluck, 'entries')

# 1/8 dataset
dataset <- purrr::map(all_entries, purrr::pluck, 'dataset') %>%
unlist(recursive = F) %>%
tibble::tibble(dataset = .)
# 1/8 dataset
dataset <- purrr::map(all_entries, purrr::pluck, 'dataset') %>%
unlist(recursive = F) %>%
tibble::tibble(dataset = .)

# 2/8 registryinfototalrecords
# one row per vessel. Those who have registry info will show up with 1, those are the ones that have their vesselRecord id available under registryInfo$id
#most vessels with registry will show up first but sometimes there are vessels with registry down the list
registryInfoTotalRecords <-
purrr::map(all_entries, purrr::pluck, 'registryInfoTotalRecords') %>%
unlist(recursive = F) %>%
tibble::tibble(registryInfoTotalRecords = .)
# 2/8 registryinfototalrecords
# one row per vessel. Those who have registry info will show up with 1,
# those are the ones that have their vesselRecord id available under registryInfo$id
# most vessels with registry will show up first but sometimes there are vessels with registry down the list
registryInfoTotalRecords <-
purrr::map(all_entries, purrr::pluck, 'registryInfoTotalRecords') %>%
unlist(recursive = F) %>%
tibble::tibble(registryInfoTotalRecords = .)

# 3/8 registryInfo -only for those who have it
registryInfo <- purrr::map(all_entries, purrr::pluck, 'registryInfo') %>%
unlist(recursive = FALSE) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>%
dplyr::rename(vesselRecord = id) %>%
# 3/8 registryInfo -only for those who have it
registryInfo <- purrr::map(all_entries, purrr::pluck, 'registryInfo') %>%
unlist(recursive = FALSE) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>%
dplyr::rename(vesselRecord = id) %>%
# dplyr::select(-`<list>`) %>%
# unnest geartypes
tidyr::unnest(geartypes, keep_empty = TRUE)

# 4/8 registryOwners #has all records with and without registry but may have a different
#dimension than registryInfo due to lack of data
registryOwners <- purrr::map(all_entries, purrr::pluck, "registryOwners") %>%
unlist(recursive= FALSE) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
# dplyr::select(-`<list>`)
tidyr::unnest(geartypes, keep_empty = TRUE)

# 5/8 registryPublicAuthorizations
registryPublicAuthorizations <- purrr::map(all_entries, purrr::pluck, 'registryPublicAuthorizations') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
#tidyr::unnest(sourceCode, keep_empty = TRUE)
# dplyr::select(-`<list>`)
# 4/8 registryOwners #has all records with and without registry but may have a different
#dimension than registryInfo due to lack of data
registryOwners <- purrr::map(all_entries, purrr::pluck, "registryOwners") %>%
unlist(recursive= FALSE) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
# dplyr::select(-`<list>`)

# 5/8 registryPublicAuthorizations
registryPublicAuthorizations <- purrr::map(all_entries, purrr::pluck, 'registryPublicAuthorizations') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
#tidyr::unnest(sourceCode, keep_empty = TRUE)
# dplyr::select(-`<list>`)

#6/8 combinedSourcesInfo joins vesselId, geartypes and shiptypes.
combinedSourcesInfo <- purrr::map(all_entries, purrr::pluck, 'combinedSourcesInfo') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>% #after indexing we can unnest
tidyr::unnest(geartypes, names_sep = "_geartype_", keep_empty = TRUE) %>%
tidyr::unnest(shiptypes, names_sep = "_shiptype_", keep_empty = TRUE)
# 6/8 combinedSourcesInfo joins vesselId, geartypes and shiptypes.
combinedSourcesInfo <- purrr::map(all_entries, purrr::pluck, 'combinedSourcesInfo') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>% #after indexing we can unnest
tidyr::unnest(geartypes, names_sep = "_geartype_", keep_empty = TRUE) %>%
tidyr::unnest(shiptypes, names_sep = "_shiptype_", keep_empty = TRUE)

# 7/8 selfReportedInfo this is AIS
selfReportedInfo <- purrr::map(all_entries, purrr::pluck, 'selfReportedInfo') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index))
# 7/8 selfReportedInfo this is AIS
selfReportedInfo <- purrr::map(all_entries, purrr::pluck, 'selfReportedInfo') %>%
unlist(recursive = F) %>%
purrr::map(., tibble::tibble) %>%
dplyr::bind_rows(.id = "index") %>%
dplyr::mutate(index = as.numeric(index)) %>%
dplyr::rename(vesselId = id)

# build output list
output <- list(
Expand Down

0 comments on commit c9140cc

Please sign in to comment.