Environement for level2 creation

firms-gta · Nov 5, 2024 · d45b1b3 · d45b1b3
1 parent ad4029f
commit d45b1b3
Show file tree

Hide file tree

Showing 2 changed files with 167 additions and 0 deletions.
diff --git a/Create_lvl2_IRD_dataset.R b/Create_lvl2_IRD_dataset.R
@@ -0,0 +1,105 @@
+# Load and Set Up Project Environment
+# ------------------------------------
+
+# Load 'renv' for project-specific environments
+# If not already installed, install 'renv'
+if (!require("renv")) install.packages("renv")
+library(renv)
+
+# Activate the project environment and restore the library if using 'renv'
+renv::activate()
+renv::restore()
+require(here)
+# Define required packages
+required_packages <- c(
+  "remotes", "tinytex", "googledrive", "gsheet", "readr", "plotrix", "janitor", 
+  "dotenv", "data.table", "here", "xfun", "RPostgreSQL", "RPostgres", "DBI", 
+  "rpostgis", "terra", "sf", "RSQLite", "webshot", "usethis", "ows4R", "sp", 
+  "flextable", "dplyr", "stringr", "tibble", "bookdown", "knitr", 
+  "purrr", "readxl", "odbc", "rlang", "kableExtra", "tidyr", "ggplot2", 
+  "stats", "RColorBrewer", "cowplot", "tmap", "curl", "officer", 
+  "gdata", "R3port", "reshape2", "tools", "plogr", "futile.logger", "lubridate"
+)
+
+# Install and load packages
+install_and_load <- function(package) {
+  if (!require(package, character.only = TRUE)) {
+    install.packages(package)
+    library(package, character.only = TRUE)
+  }
+}
+sapply(required_packages, install_and_load)
+require(geoflow)
+
+# Workflow Execution and Configurations
+# -------------------------------------
+
+# Initialize and execute the workflow
+config <- initWorkflow(here::here("tunaatlas_qa_global_datasets_catch.json"))
+unlink(config$job, recursive = TRUE)
+con <- config$software$output$dbi
+
+# Execute workflows and rename outputs
+tunaatlas_qa_global_datasets_catch_path <- executeWorkflow(here::here("tunaatlas_qa_global_datasets_catch.json"))
+
+# Summarizing Step
+# ----------------
+
+# Load function and run summarizing step
+source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions/Summarising_step.R")
+# Summarising_step(main_dir = tunaatlas_qa_global_datasets_catch_path, connectionDB = con, config = config, sizepdf = "middle", savestep = TRUE, usesave = FALSE)
+
+
+# NetCDF Creation
+# ---------------
+
+# # Load function to convert to NetCDF and process entities
+# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/tunaatlas_actions/convert_to_netcdf.R")
+# entity_dirs <- list.dirs(file.path(tunaatlas_qa_global_datasets_catch_path, "entities"), full.names = TRUE, recursive = FALSE)
+# wd <- getwd()
+# 
+# # Iterate through entities to convert each to NetCDF format
+# for (entitynumber in seq_along(config$metadata$content$entities)) {
+#   entity <- config$metadata$content$entities[[entitynumber]]
+#   dataset_pid <- entity$identifiers[["id"]]
+#   setwd(file.path(tunaatlas_qa_global_datasets_catch_path, "entities", dataset_pid))
+#   action <- entity$data$actions[[1]]
+#   convert_to_netcdf(action, config, entity, uploadgoogledrive = FALSE)
+# }
+# 
+# setwd(wd)
+# 
+# 
+# # DOI Processing
+# # --------------
+# 
+# # Process entities for DOI
+# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/tunaatlas_actions/process_entities_for_DOI.R")
+# process_entities_for_DOI(tunaatlas_qa_global_datasets_catch_path, "~/firms-gta/geoflow-tunaatlas/jobs/processed_entities_for_DOI")
+# 
+# 
+# # Irregular Data Extraction
+# # -------------------------
+# 
+# # Extract data irregularities between nominal and georeferenced datasets
+# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions/strata_in_georef_but_not_in_nominal_report_launching.R")
+# upgraded_nominal <- strata_in_georef_but_not_in_nominal_report_launching("~/blue-cloud-dataspace/GlobalFisheriesAtlas/data", connectionDB = con)
+# 
+# 
+# # CPUE Analysis
+# # -------------
+# 
+# # Perform analysis on Catch Per Unit Effort (CPUE)
+# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions/strata_with_catches_without_effort.R")
+# CPUE <- strata_with_catches_without_effort(tunaatlas_qa_global_datasets_catch_path, connectionDB = con)
+# 
+# # Filter CPUE data for cases with catch but no effort, and vice versa
+# catch_without_effort <- CPUE %>% dplyr::filter(is.na(measurement_value_effort) | measurement_value_effort == 0 & measurement_value_catch != 0)
+# effort_without_catch <- CPUE %>% dplyr::filter(is.na(measurement_value_catch) | measurement_value_catch == 0 & measurement_value_effort != 0)
+# 
+# 
+# # Deployment to GeoServer, GeoNetwork, and Zenodo
+# # -----------------------------------------------
+# 
+# # Initialize and deploy datasets on geoserver
+# tunaatlas_qa_services <- initWorkflow("tunaatlas_qa_services.json")
diff --git a/geoflow_entities_tuna_global_datasets_IRD_level1_2022 - IRD_level2.csv b/geoflow_entities_tuna_global_datasets_IRD_level1_2022 - IRD_level2.csv
@@ -0,0 +1,62 @@
+Identifier,Title,Description,Subject,Creator,Date,Type,Language,SpatialCoverage,TemporalCoverage,Relation,Rights,Provenance,Format,Data
+id:global_catch_ird_level2_rf1,"title:""Global monthly catch of tuna and tuna-like species (%temporal_extent:start% - %temporal_extent:end%) aggregated by statistical squares of 1° or 5° longitude and latitude (IRD level 2)""_","abstract: This dataset presents the global, monthly-spatially aggregated catch of tuna and tuna-like species (including billfish, bonitos, and mackerel) from %temporal_extent:start% to %temporal_extent:end%. The data were compiled using public domain georeferenced catch-and-effort datasets released by the five tuna Regional Fisheries Management Organizations (t-RFMOs): the Commission for the Conservation of Southern Bluefin Tuna (CCSBT), the Inter-American Tropical Tuna Commission (IATTC), the International Commission for the Conservation of Atlantic Tunas (ICCAT), the Indian Ocean Tuna Commission (IOTC), and the Western and Central Pacific Fisheries Commission (WCPFC).
+
+The original datasets were accessed via the FIRMS Global Tuna Atlas data collection framework and harmonized according to the FAO Coordinating Working Party on Fishery Statistics (CWP) standards. The IRD level 2 dataset was generated by converting the number of fish to tons and adjusting the data based on total catch provided by the t-RFMOs. The final dataset is stratified by year, month, reporting/fishing fleet, fishing gear, fishing mode (type of school association), and area (statistical squares of 1° or 5° longitude and latitude).
+
+t-RFMO specific descriptions of the original input data sets can be found at the following links:
+- CCSBT: https://www.ccsbt.org/en/content/sbt-data
+- IATTC: https://www.iattc.org/PublicDomainData/IATTC-Catch-by-species1.htm
+- ICCAT: https://www.iccat.int/en/accesingdb.html
+- IOTC: https://iotc.org/data/datasets/latest/CEAll
+- WCPFC: https://www.wcpfc.int/public-domain
+
+The processes applied to produce this IRD level 2 dataset at global scale consist of a series of steps:.
+
+More details on the processes are provided in the lineage section._
+edition: 2024.1.0
+",,"owner:[email protected]_
+custodian:[email protected]_
+pointOfContact:[email protected]_
+publisher:[email protected]_
+Data structure:[email protected]_
+principalInvestigator:[email protected]_
+metadata:[email protected],[email protected]_
+processor:[email protected],:[email protected],:[email protected],[email protected]","creation:2021-12-01_
+edition:2023-06-26",dataset,eng,"SRID=4326;POLYGON((-180 -90,-180 90,180 90,180 -90,-180 -90))","2024-01-01","thumbnail:Map overview@https://lh3.google.com/u/1/d/16knpvudY_FkHWWz-k7nSIpRMwI6mtEqB=w1680-h858-iv1_
+http:codelists[Source codelists]@https://drive.google.com/open?id=1Hi0oHdZjZWYwLnhdiq_Y-gvFzu6d32hH",,"statement:Data management workflow_
+process: Public domain datasets from IOTC were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
+process: Public domain datasets from ICCAT were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
+process: Public domain datasets from IATTC were collated (through the RFMO website or sent be e-mail). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
+process: Public domain datasets from WCPFC were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
+process: Public domain datasets from CCSBT were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
+process: Concerns ICCAT purse seine datasets : ICCAT delivers two catch-and-efforts datasets for purse seiners: one that gives the detail of the type of school (Fad|Free school) for purse seine fisheries and that starts in 1994 (called Task II catch|effort by operation mode Fad|Free school) and one that does not give the information of the type of school and that covers all the time period (from 1950) (called Task II catch|effort). These data are redundant (i.e. the data from the dataset Task II catch|effort by operation mode are also available in the dataset Task II catch|effort) but in the latter, the information on the type of school is not available. Both datasets were combined to produce a dataset that covers the whole time period, with fishing mode information (Fad | free school)._
+process: Concerns IATTC purse seine datasets : For confidentiality policies, information on flag and school type for the geo-referenced catches is available in separate files for the eastern Pacific Ocean purse seine datasets. For each stratum, the catch from the flag-detailed dataset was raised to the catch from the school type-detailed dataset to get an estimation of the catches by flag and school type in each stratum._
+process: All the datasets were merged._
+process: Concerns IATTC and WCPFC data. IATTC and WCPFC have an overlapping area in their respective area of competence. Data from both RFMOs may be redundant in this overlapping zone. In the overlapping area, data from IATTC were kept (i.e. for this specific zone, data from WCPFC was removed). Information regarding the data in the IATTC / WCPFC overlapping area: after the eventual other corrections applied, e.g. raisings, catch units conversions, etc., the ratio between the catches from IATTC and those from WCPFC was of: 18.09 for the catches expressed in weight and 12.65 for the catches expressed in number._
+process: Concerns Southern Bluefin Tuna (SBF) data: SBF tuna data do exist in both CCSBT data and the other tuna RFMOs data. Data from CCSBT and the other RFMOs may be redundant. For the Southern Bluefin Tuna, data from CCSBT were kept (i.e. data from the other RFMOs for SBF were removed). Information regarding the SBF data: after the eventual other corrections applied, e.g. raisings, catch units conversions, etc., the ratio between the catches from CCSBT and those from the other RFMOs for SBF was of: 1.56 for the catches expressed in weight. A total of 17706660.62 fishes were available in the CCSBT datasets - while no data in number were available in the other RFMOs datasets - and were thus integrated in this dataset._
+process: Coding systems and nomenclatures used to describe the data (e.g. gears, flags, species) may differ according to tRFMOs. Codes used by the tuna RFMOs in their respective datasets were mapped to global code lists (ISSCFG, ISO3 countries codes, ASFIS). These mappings have been done with the collaboration of the Secretariats. Some codes could not have been mapped to standard code lists, for some tRFMOs own-defined codes that usually are aggregation of existing codes (e.g. flag “IDPH” - Indonesia and Philippines – for WCPFC; species “Otun” - other tuna – for ICCAT). In those cases, the code for species was set to UNK (Unknown). However, these codes have been mapped with more aggregated code lists – i.e. group of species. Information regarding the data that have species set to Unknown (i.e. data for which raw species do not have any correspondence in ASFIS): the catches that have species set to Unknown represent 4.17 % of the catches expressed in weight and 7.98 % of the catches expressed in number of fishes._
+process: The CSV file was integrated into the Tuna atlas database.",,"access:zenodo_
+source:10.5281/zenodo.13757369_
+sourceType:other_
+uploadSource:fact_tables.catch_
+uploadType:dbtable_
+featureType:catch_
+action:create_global_tuna_atlas_dataset.R[R generation script]@./tunaatlas_scripts/generation/create_global_tuna_atlas_dataset_v2023.R_
+action_option_fact:catch_
+action_option_dataset_level:2_
+action_option_recap_each_step:true_
+action_option_from_level0:false_
+action_option_geom_table:area.grid_area_labels_
+action_option_unit_conversion_convert:false_
+action_option_raising_georef_to_nominal:true_
+action_option_level2RF2:false_
+action_option_doigeoref:10.5281/zenodo.11460074_
+action_option_keygeoref:global_catch_firms_level0_harmonized.csv_
+action_option_doinominal: 10.5281/zenodo.11410529_
+action_option_keynominal:global_nominal_catch_firms_level0_harmonized.csv_
+action_option_aggregate_on_5deg_data_with_resolution_inferior_to_5deg:false_
+action_option_disaggregate_on_5deg_data_with_resolution_superior_to_5deg:none_
+action_option_disaggregate_on_1deg_data_with_resolution_superior_to_1deg:none_
+action_option_curation_absurd_converted_data:true_
+action_option_decrease_when_rf_inferior_to_one:false_
+"