diff --git a/DESCRIPTION b/DESCRIPTION index aee8d9b..7a96d7c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,7 +47,8 @@ Imports: RColorBrewer, remotes, scales, - shiny + shiny, + arrow Depends: R (>= 4.1.1), stats, diff --git a/NAMESPACE b/NAMESPACE index 9863c7e..10d415f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,3 +30,4 @@ importFrom("propvacc", "get_beta_params") importFrom("tidyr", "pivot_longer") importFrom("stats", "aggregate", "cor", "median", "optim", "quantile") importFrom("utils", "install.packages", "installed.packages") +importFrom("arrow", "write_parquet", "read_parquet") diff --git a/R/download_climate_data.R b/R/download_climate_data.R index 2d11966..04c7e63 100644 --- a/R/download_climate_data.R +++ b/R/download_climate_data.R @@ -1,6 +1,6 @@ -#' Download and Save Climate Data for Multiple Countries +#' Download and Save Climate Data for Multiple Countries (Parquet Format) #' -#' This function downloads daily climate data for a list of specified countries, saving the data as CSV files. The data includes both historical and future climate variables at grid points within each country. +#' This function downloads daily climate data for a list of specified countries, saving the data as Parquet files. The data includes both historical and future climate variables at grid points within each country. #' #' @param PATHS A list containing paths where raw and processed data are stored. #' PATHS is typically the output of the `get_paths()` function and should include: @@ -14,9 +14,9 @@ #' @param date_start A character string representing the start date for the climate data (in "YYYY-MM-DD" format). #' @param date_stop A character string representing the end date for the climate data (in "YYYY-MM-DD" format). #' -#' @return The function does not return a value. It downloads the climate data for each country and saves the results as CSV files in the specified directory. +#' @return The function does not return a value. It downloads the climate data for each country and saves the results as Parquet files in the specified directory. #' -#' @details This function uses country shapefiles to generate a grid of points within each country, at which climate data is downloaded. The function retrieves climate data for the specified date range (`date_start` to `date_stop`). The data is saved for each country in a CSV file named `climate_data___.csv`. +#' @details This function uses country shapefiles to generate a grid of points within each country, at which climate data is downloaded. The function retrieves climate data for the specified date range (`date_start` to `date_stop`). The data is saved for each country in a Parquet file named `climate_data___.parquet`. #' #' The climate data variables include temperature, wind speed, cloud cover, precipitation, and more. The function retrieves data from multiple climate models, including MRI and EC Earth models. #' @@ -24,8 +24,7 @@ #' @importFrom lubridate year month week yday #' @importFrom sf st_read st_coordinates #' @importFrom glue glue -#' @importFrom utils write.csv -#' @importFrom MOSAIC convert_iso_to_country generate_country_grid_n get_climate_future +#' @importFrom arrow write_parquet #' @examples #' \dontrun{ #' # Define paths for raw and processed data using get_paths() @@ -43,14 +42,10 @@ #'} #' #' @export -download_climate_data <- function(PATHS, iso_codes, api_key, n_points = 3, date_start = "1970-01-01", date_stop = "2030-12-31") { +download_climate_data <- function(PATHS, iso_codes, api_key, n_points, date_start, date_stop) { - # Ensure output directory exists, if not, create it - if (!dir.exists(PATHS$DATA_CLIMATE)) { - dir.create(PATHS$DATA_CLIMATE, recursive = TRUE) - } + if (!dir.exists(PATHS$DATA_CLIMATE)) dir.create(PATHS$DATA_CLIMATE, recursive = TRUE) - # List of climate variables for both historical and future data climate_variables_historical_and_future <- c( "temperature_2m_mean", "temperature_2m_max", "temperature_2m_min", "wind_speed_10m_mean", "wind_speed_10m_max", "cloud_cover_mean", @@ -96,10 +91,9 @@ download_climate_data <- function(PATHS, iso_codes, api_key, n_points = 3, date_ climate_data ) - # Save climate data as CSV - write.csv(climate_data, - file = file.path(PATHS$DATA_CLIMATE, paste0("climate_data_", date_start, "_", date_stop, "_", country_iso_code, ".csv")), - row.names = FALSE) + # Save climate data as Parquet + arrow::write_parquet(climate_data, + sink = file.path(PATHS$DATA_CLIMATE, paste0("climate_data_", date_start, "_", date_stop, "_", country_iso_code, ".parquet"))) } message(glue::glue("Climate data saved for all countries here: {PATHS$DATA_CLIMATE}")) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index ca17dc1..d23549c 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 3.1.12.3 pkgdown: 2.0.9 pkgdown_sha: ~ articles: {} -last_built: 2024-09-22T20:52Z +last_built: 2024-09-22T21:33Z urls: reference: institutefordiseasemodeling.github.io/MOSAIC-pkg/reference article: institutefordiseasemodeling.github.io/MOSAIC-pkg/articles diff --git a/src/mosiac_setup.sh b/src/mosiac_setup.sh index 696b044..0ce5fa7 100644 --- a/src/mosiac_setup.sh +++ b/src/mosiac_setup.sh @@ -16,7 +16,7 @@ cd "$MOSAIC_DIR" # Clone the MOSAIC-data repository if [ ! -d "MOSAIC-data" ]; then - git clone https://github.com/InstituteforDiseaseModeling/MOSAIC-data.git + git clone git@github.com:InstituteforDiseaseModeling/MOSAIC-data.git echo "Cloned MOSAIC-data repository." else echo "MOSAIC-data repository already exists." @@ -24,7 +24,7 @@ fi # Clone the MOSAIC-pkg repository if [ ! -d "MOSAIC-pkg" ]; then - git clone https://github.com/InstituteforDiseaseModeling/MOSAIC-pkg.git + git clone git@github.com:InstituteforDiseaseModeling/MOSAIC-data.git echo "Cloned MOSAIC-pkg repository." else echo "MOSAIC-pkg repository already exists." @@ -32,7 +32,7 @@ fi # Clone the MOSAIC-docs repository if [ ! -d "MOSAIC-docs" ]; then - git clone https://github.com/InstituteforDiseaseModeling/MOSAIC-docs.git + git clone git@github.com:InstituteforDiseaseModeling/MOSAIC-data.git echo "Cloned MOSAIC-docs repository." else echo "MOSAIC-docs repository already exists."