diff --git a/R/ids_get.R b/R/ids_get.R index 2305fc8..de5a2c2 100644 --- a/R/ids_get.R +++ b/R/ids_get.R @@ -1,79 +1,119 @@ -#' Fetch Debt Statistics from the World Bank International Debt Statistics API -#' -#' This function returns a tibble with debt statistics data fetched from the -#' World Bank International Debt Statistics (IDS) API. The data can be filtered -#' by geographies, series, counterparts, and time periods. -#' -#' @param geographies A character vector representing the geographic codes -#' (e.g., "ZMB" for Zambia). This argument is required and cannot contain NA -#' values. -#' @param series A character vector representing the series codes (e.g., -#' "DT.DOD.DPPG.CD"). This argument is required and cannot contain NA values. -#' @param counterparts An optional character vector representing counterpart -#' areas (e.g., "all", "001"). This argument cannot contain NA values -#' (default: "all"). -#' @param start_date An optional numeric value representing the starting year -#' (e.g., 2015). It must be greater than or equal to 1970. If not provided, the -#' entire time range is used. -#' @param end_date An optional numeric value representing the ending year (e.g., -#' 2020). It must be greater than or equal to 1970 and cannot be earlier than -#' `start_date`. If not provided, the entire available time range is used. -#' @param progress A logical value indicating whether to display a progress -#' message during the request process (default: `FALSE`). Must be either `TRUE` -#' or `FALSE`. +#' Fetch Data from the World Bank International Debt Statistics (IDS) API +#' +#' Retrieves standardized debt statistics from the World Bank's International +#' Debt Statistics (IDS) database, which provides comprehensive data on the +#' external debt of low and middle-income countries. The function handles +#' country identification, data validation, and unit standardization, making it +#' easier to conduct cross-country debt analysis and monitoring. +#' +#' @param geographies A character vector of geography identifiers representing +#' debtor countries and aggregates. Must use `geography_id` from +#' \link{ids_list_geographies}: +#' * For individual countries, use ISO3C codes (e.g., "GHA" for Ghana) +#' * For aggregates, use World Bank codes (e.g., "LIC" for low income +#' countries) +#' The IDS database covers low and middle-income countries and related +#' aggregates only. Cannot contain NA values. +#' +#' @param series A character vector of debt statistics series identifiers that +#' must match the `series_id` column from \link{ids_list_series}. Each +#' series represents a specific debt statistic (e.g., "DT.DOD.DECT.CD" for +#' total external debt stocks, "DT.TDS.DECT.CD" for debt service payments). +#' Cannot contain NA values. +# +#' @param counterparts A character vector of creditor identifiers that must +#' match the `counterpart_id` column from \link{ids_list_counterparts}. The +#' default "WLD" returns aggregated global totals across all creditors. +#' Common options: +#' * "WLD" - World total (aggregated across all creditors) +#' * "all" - Retrieve data broken down by all creditors +#' * All identifiers are strings, but some are string-formatted numbers +#' (e.g., "730" for China, "907" for IMF), while others are alphabetic +#' codes (e.g., "BND" for bondholders) +#' Cannot contain NA values. +# +#' @param start_date A numeric value representing the starting year (default: +#' 2000). This default is intended to reduce data volume. For historical +#' analysis, explicitly set to 1970 (the earliest year of data available). +#' +#' @param end_date A numeric value representing the ending year (default: NULL). +#' Must be >= 1970 and cannot be earlier than start_date. If NULL, returns +#' data through the most recent available year. Some debt service-related +#' series include projections of debt service. For the 2024 data release, +#' debt service projections are available through 2031. +#' +#' @param progress A logical value indicating whether to display progress +#' messages during data retrieval (default: FALSE). #' #' @return A tibble containing debt statistics with the following columns: #' \describe{ -#' \item{geography_id}{The unique identifier for the geography (e.g., "ZMB").} -#' \item{series_id}{The unique identifier for the series (e.g., -#' "DT.DOD.DPPG.CD").} -#' \item{counterpart_id}{The unique identifier for the counterpart (e.g., -#' "all").} -#' \item{year}{The year corresponding to the data (e.g., 2020).} -#' \item{value}{The numeric value representing the statistic for the given -#' geography, series, counterpart, and year.} +#' \item{geography_id}{The identifier for the debtor geography (e.g., "GHA" +#' for Ghana, "LIC" for low income countries)} +#' \item{series_id}{The identifier for the debt statistic series (e.g., +#' "DT.DOD.DECT.CD" for total external debt stocks)} +#' \item{counterpart_id}{The identifier for the creditor (e.g., "WLD" for +#' world total, "730" for China)} +#' \item{year}{The year of the observation} +#' \item{value}{The numeric value of the debt statistic, standardized to the +#' units specified in the series definition (typically current US dollars)} #' } #' -#' @export +#' @section Data Coverage and Validation: +#' The IDS database provides detailed debt statistics for low and middle-income +#' countries, including: +#' * Debt stocks and flows +#' * Debt service and interest payments +#' * Creditor composition +#' * Terms and conditions of new commitments #' -#' @examplesIf curl::has_internet() -#' \donttest{ -#' # Fetch data for a series without specifying a time range or counterpart -#' ids_get( -#' geographies = "ZMB", -#' series = "DT.DOD.DPPG.CD", -#' ) +#' To ensure valid queries: +#' * Use \link{ids_list_geographies} to find valid debtor geography codes +#' * Use \link{ids_list_series} to explore available debt statistics +#' * Use \link{ids_list_counterparts} to see available creditor codes #' -#' # Fetch specific debt statistics for Zambia from 2015 to 2020 -#' ids_get( -#' geographies = "ZMB", -#' series = c("DT.DOD.DPPG.CD", "BM.GSR.TOTL.CD"), -#' start_date = 2015, -#' end_date = 2020 +#' @examples +#' \donttest{ +#' # Get total external debt stocks for a single country from 2000 onward +#' ghana_debt <- ids_get( +#' geographies = "GHA", +#' series = "DT.DOD.DECT.CD" # External debt stocks, total #' ) #' -#' # Fetch data for specific counterparts -#' ids_get( -#' geographies = "ZMB", -#' series = "DT.DOD.DPPG.CD", -#' counterparts = c("216", "231") +#' # Compare debt service metrics across income groups +#' income_groups <- ids_get( +#' geographies = c("LIC", "LMC", "UMC"), # Income group aggregates +#' series = "DT.TDS.DECT.CD", # Total debt service +#' start_date = 2010 #' ) #' -#' # Fetch data for multiple geographies and counterparts -#' ids_get( -#' geographies = c("ZMB", "CHN"), -#' series = "DT.DOD.DPPG.CD", -#' counterparts = c("216", "231"), -#' start_date = 2019, -#' end_date = 2020 +#' # Analyze debt composition by major creditors +#' creditor_analysis <- ids_get( +#' geographies = c("KEN", "ETH"), # Kenya and Ethiopia +#' series = c( +#' "DT.DOD.DECT.CD", # Total external debt +#' "DT.TDS.DECT.CD" # Total debt service +#' ), +#' counterparts = c( +#' "WLD", # World total +#' "730", # China +#' "907", # IMF +#' "BND" # Bondholders +#' ), +#' start_date = 2015 #' ) #' } #' +#' @seealso +#' * `ids_list_geographies()` for available debtor geography codes +#' * `ids_list_series()` for available debt statistics series codes +#' * `ids_list_counterparts()` for available creditor codes +#' +#' @export ids_get <- function( geographies, series, - counterparts = "all", - start_date = NULL, + counterparts = "WLD", + start_date = 2000, end_date = NULL, progress = FALSE ) { @@ -94,6 +134,9 @@ ids_get <- function( # Process debt statistics debt_statistics <- process_debt_statistics(debt_statistics_raw) + # Apply specific filtering logic for years beyond latest actual data + debt_statistics <- filter_post_actual_na(debt_statistics) + debt_statistics } @@ -155,11 +198,11 @@ create_progress_message <- function( } paste( - "Fetching series", series, - "for geography", geography, - ", counterpart", counterpart, - ", and time", time - ) + "Fetching series", series, + "for geography", geography, + ", counterpart", counterpart, + ", and time", time + ) } #' Create Resource URL @@ -342,6 +385,12 @@ validate_progress <- function(progress) { } } + +# to be updated manually with each release +# for the 2024-12 IDS release: +latest_year_observed <- 2023 +latest_year_projections <- 2031 + #' Validate Year Input #' #' Helper function to validate a year input is numeric, single value, and >= @@ -385,8 +434,42 @@ process_time_range <- function(start_date, end_date) { "{.arg start_date} cannot be greater than {.arg end_date}." ) } - paste("YR", seq(start_date, end_date, by = 1), collapse = ";", sep = "") + paste( + "YR", seq(start_date, end_date, by = 1), + collapse = ";", sep = "" + ) + } else if (!is.null(start_date)) { + paste( + "YR", seq(start_date, latest_year_projections, by = 1), + collapse = ";", sep = "" + ) } else { "all" } } + +#' Filter Data for Years Beyond Latest Observed Data +#' +#' This function filters out rows for years beyond the latest observed data +#' and removes rows with NA values for these years. +#' +#' @param data The data to filter. +#' +#' @return The filtered data. +#' +#' @noRd +#' @keywords internal +filter_post_actual_na <- function(data) { + # Identify rows after the latest actual year + data_after_actual <- data |> + filter(.data$year > latest_year_observed) + + # Check if all rows for these years have NA in `value` + if (all(is.na(data_after_actual$value))) { + # Remove these rows from the data + data <- data |> + filter(.data$year <= latest_year_observed) + } + + data +} diff --git a/man/ids_get.Rd b/man/ids_get.Rd index b9a620a..445dd32 100644 --- a/man/ids_get.Rd +++ b/man/ids_get.Rd @@ -2,91 +2,138 @@ % Please edit documentation in R/ids_get.R \name{ids_get} \alias{ids_get} -\title{Fetch Debt Statistics from the World Bank International Debt Statistics API} +\title{Fetch Data from the World Bank International Debt Statistics (IDS) API} \usage{ ids_get( geographies, series, - counterparts = "all", - start_date = NULL, + counterparts = "WLD", + start_date = 2000, end_date = NULL, progress = FALSE ) } \arguments{ -\item{geographies}{A character vector representing the geographic codes -(e.g., "ZMB" for Zambia). This argument is required and cannot contain NA -values.} +\item{geographies}{A character vector of geography identifiers representing +debtor countries and aggregates. Must use \code{geography_id} from +\code{ids_list_geographies()}: +\itemize{ +\item For individual countries, use ISO3C codes (e.g., "GHA" for Ghana) +\item For aggregates, use World Bank codes (e.g., "LIC" for low income +countries) +The IDS database covers low and middle-income countries and related +aggregates only. Cannot contain NA values. +}} -\item{series}{A character vector representing the series codes (e.g., -"DT.DOD.DPPG.CD"). This argument is required and cannot contain NA values.} +\item{series}{A character vector of debt statistics series identifiers that +must match the \code{series_id} column from \code{ids_list_series()}. Each series +represents a specific debt statistic (e.g., "DT.DOD.DECT.CD" for total +external debt stocks, "DT.TDS.DECT.CD" for debt service payments). Cannot +contain NA values.} -\item{counterparts}{An optional character vector representing counterpart -areas (e.g., "all", "001"). This argument cannot contain NA values -(default: "all").} +\item{counterparts}{A character vector of creditor identifiers that must +match the \code{counterpart_id} column from \code{ids_list_counterparts()}. The +default "WLD" returns aggregated global totals across all creditors. +Common options: +\itemize{ +\item "WLD" - World total (aggregated across all creditors) +\item "all" - Retrieve data broken down by all creditors +\item Individual creditors use numeric codes (e.g., "730" for China) +\item Special creditors have text codes (e.g., "907" for IMF, "BND" for +bondholders) +Cannot contain NA values. +}} -\item{start_date}{An optional numeric value representing the starting year -(e.g., 2015). It must be greater than or equal to 1970. If not provided, the -entire time range is used.} +\item{start_date}{A numeric value representing the starting year (default: +2000). Must be >= 1970. The default focuses on modern data while reducing +data volume. For historical analysis, explicitly set to 1970.} -\item{end_date}{An optional numeric value representing the ending year (e.g., -2020). It must be greater than or equal to 1970 and cannot be earlier than -\code{start_date}. If not provided, the entire available time range is used.} +\item{end_date}{A numeric value representing the ending year (default: NULL). +Must be >= 1970 and cannot be earlier than start_date. If NULL, returns +data through the most recent available year. Some debt service related +series include projections of debt service. For the 2024 data release, +debt service projections available through 2031.} -\item{progress}{A logical value indicating whether to display a progress -message during the request process (default: \code{FALSE}). Must be either \code{TRUE} -or \code{FALSE}.} +\item{progress}{A logical value indicating whether to display progress +messages during data retrieval (default: FALSE).} } \value{ A tibble containing debt statistics with the following columns: \describe{ -\item{geography_id}{The unique identifier for the geography (e.g., "ZMB").} -\item{series_id}{The unique identifier for the series (e.g., -"DT.DOD.DPPG.CD").} -\item{counterpart_id}{The unique identifier for the counterpart (e.g., -"all").} -\item{year}{The year corresponding to the data (e.g., 2020).} -\item{value}{The numeric value representing the statistic for the given -geography, series, counterpart, and year.} +\item{geography_id}{The identifier for the debtor geography (e.g., "GHA" +for Ghana, "LIC" for low income countries)} +\item{series_id}{The identifier for the debt statistic series (e.g., +"DT.DOD.DECT.CD" for total external debt stocks)} +\item{counterpart_id}{The identifier for the creditor (e.g., "WLD" for +world total, "730" for China)} +\item{year}{The year of the observation} +\item{value}{The numeric value of the debt statistic, standardized to the +units specified in the series definition (typically current US dollars)} } } \description{ -This function returns a tibble with debt statistics data fetched from the -World Bank International Debt Statistics (IDS) API. The data can be filtered -by geographies, series, counterparts, and time periods. +Retrieves standardized debt statistics from the World Bank's International +Debt Statistics (IDS) database, which provides comprehensive data on the +external debt of low and middle-income countries. The function handles +country identification, data validation, and unit standardization, making it +easier to conduct cross-country debt analysis and monitoring. } +\section{Data Coverage and Validation}{ + +The IDS database provides detailed debt statistics for low and middle-income +countries, including: +\itemize{ +\item Debt stocks and flows +\item Debt service and interest payments +\item Creditor composition +\item Terms and conditions of new commitments +} + +To ensure valid queries: +\itemize{ +\item Use \code{ids_list_geographies()} to find valid debtor geography codes +\item Use \code{ids_list_series()} to explore available debt statistics +\item Use \code{ids_list_counterparts()} to see available creditor codes +} +} + \examples{ -\dontshow{if (curl::has_internet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ -# Fetch data for a series without specifying a time range or counterpart -ids_get( - geographies = "ZMB", - series = "DT.DOD.DPPG.CD", +# Get total external debt stocks for a single country from 2000 onward +ghana_debt <- ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD" # External debt stocks, total ) -# Fetch specific debt statistics for Zambia from 2015 to 2020 -ids_get( - geographies = "ZMB", - series = c("DT.DOD.DPPG.CD", "BM.GSR.TOTL.CD"), - start_date = 2015, - end_date = 2020 +# Compare debt service metrics across income groups +income_groups <- ids_get( + geographies = c("LIC", "LMC", "UMC"), # Income group aggregates + series = "DT.TDS.DECT.CD", # Total debt service + start_date = 2010 ) -# Fetch data for specific counterparts -ids_get( - geographies = "ZMB", - series = "DT.DOD.DPPG.CD", - counterparts = c("216", "231") +# Analyze debt composition by major creditors +creditor_analysis <- ids_get( + geographies = c("KEN", "ETH"), # Kenya and Ethiopia + series = c( + "DT.DOD.DECT.CD", # Total external debt + "DT.TDS.DECT.CD" # Total debt service + ), + counterparts = c( + "WLD", # World total + "730", # China + "907", # IMF + "BND" # Bondholders + ), + start_date = 2015 ) +} -# Fetch data for multiple geographies and counterparts -ids_get( - geographies = c("ZMB", "CHN"), - series = "DT.DOD.DPPG.CD", - counterparts = c("216", "231"), - start_date = 2019, - end_date = 2020 -) } -\dontshow{\}) # examplesIf} +\seealso{ +\itemize{ +\item \code{ids_list_geographies()} for available debtor geography codes +\item \code{ids_list_series()} for available debt statistics series codes +\item \code{ids_list_counterparts()} for available creditor codes +} } diff --git a/tests/testthat/test-ids_bulk.R b/tests/testthat/test-ids_bulk.R index 5d33476..ff6bcb2 100644 --- a/tests/testthat/test-ids_bulk.R +++ b/tests/testthat/test-ids_bulk.R @@ -20,6 +20,9 @@ test_that("ids_bulk handles custom file paths", { }, process_bulk_data = function(...) { tibble::tibble() + }, + get_response_headers = function(...) { + list(`content-length` = 1000) } ) diff --git a/tests/testthat/test-ids_get.R b/tests/testthat/test-ids_get.R index d12ecf6..0149303 100644 --- a/tests/testthat/test-ids_get.R +++ b/tests/testthat/test-ids_get.R @@ -407,3 +407,145 @@ test_that("ids_get enforces vector length limits", { NA ) }) + +test_that("ids_get uses new default parameters correctly", { + # Test that default counterparts = "WLD" + default_result <- ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD" + ) + + # All records should have counterpart_id = "WLD" + expect_true(all(default_result$counterpart_id == "WLD")) + + # All years should be >= 2000 (the new default start_date) + expect_true(all(default_result$year >= 2000)) +}) + +test_that("ids_get filters post-observed-year NAs correctly", { + result <- ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD" + ) + + # Ensure no rows exist beyond latest_year_observed if all values are NA + expect_true(all(result$year <= latest_year_observed | !is.na(result$value))) +}) + +test_that("ids_get correctly applies default years for projection series", { + result <- ids_get( + geographies = "GHA", + series = "DT.TDS.DECT.CD" # Projection series + ) + + # Verify the years in the result + expect_true(all(result$year >= 2000 & result$year <= latest_year_projections)) +}) + +test_that("ids_get retains post-actual-year data with values", { + result <- tibble( + geography_id = rep("GHA", 12), + series_id = rep("DT.DOD.DECT.CD", 12), + counterpart_id = rep("WLD", 12), + year = 2020:latest_year_projections, + value = c(1:4, rep(NA, 8)) + ) + + filtered_result <- filter_post_actual_na(result) + + # Rows with years <= LATEST_YEAR_ACTUAL should remain + expect_equal(filtered_result$year, 2020:latest_year_observed) +}) + +test_that("ids_get handles valid geography codes correctly", { + # Test individual country code (ISO3C) + expect_silent(ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD", + start_date = 2020, + end_date = 2020 + )) + + # Test income group aggregate code + expect_silent(ids_get( + geographies = "LIC", + series = "DT.DOD.DECT.CD", + start_date = 2020, + end_date = 2020 + )) + + # Test multiple geography types together + expect_silent(ids_get( + geographies = c("GHA", "LIC"), + series = "DT.DOD.DECT.CD", + start_date = 2020, + end_date = 2020 + )) +}) + +test_that("ids_get handles valid counterpart codes correctly", { + # Test default world aggregate + expect_silent(ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD", + counterparts = "WLD", + start_date = 2020, + end_date = 2020 + )) + + # Test numeric country code + expect_silent(ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD", + counterparts = "730", # China + start_date = 2020, + end_date = 2020 + )) + + # Test special text codes + expect_silent(ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD", + counterparts = c("907", "BND"), # IMF and bondholders + start_date = 2020, + end_date = 2020 + )) + + # Test requesting all counterparts + expect_silent(ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD", + counterparts = "all", + start_date = 2020, + end_date = 2020 + )) +}) + +test_that("ids_get returns expected data structure", { + result <- ids_get( + geographies = "GHA", + series = "DT.DOD.DECT.CD", + start_date = 2020, + end_date = 2020 + ) + + # Check tibble structure + expect_s3_class(result, "tbl_df") + + # Verify column names + expected_columns <- c( + "geography_id", + "series_id", + "counterpart_id", + "year", + "value" + ) + expect_named(result, expected_columns) + + # Check data types + expect_type(result$geography_id, "character") + expect_type(result$series_id, "character") + expect_type(result$counterpart_id, "character") + expect_type(result$year, "integer") + expect_type(result$value, "double") +})