Skip to content

Commit

Permalink
Environement for level2 creation
Browse files Browse the repository at this point in the history
  • Loading branch information
bastienird committed Nov 5, 2024
1 parent ad4029f commit d45b1b3
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
105 changes: 105 additions & 0 deletions Create_lvl2_IRD_dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Load and Set Up Project Environment
# ------------------------------------

# Load 'renv' for project-specific environments
# If not already installed, install 'renv'
if (!require("renv")) install.packages("renv")
library(renv)

# Activate the project environment and restore the library if using 'renv'
renv::activate()
renv::restore()
require(here)
# Define required packages
required_packages <- c(
"remotes", "tinytex", "googledrive", "gsheet", "readr", "plotrix", "janitor",
"dotenv", "data.table", "here", "xfun", "RPostgreSQL", "RPostgres", "DBI",
"rpostgis", "terra", "sf", "RSQLite", "webshot", "usethis", "ows4R", "sp",
"flextable", "dplyr", "stringr", "tibble", "bookdown", "knitr",
"purrr", "readxl", "odbc", "rlang", "kableExtra", "tidyr", "ggplot2",
"stats", "RColorBrewer", "cowplot", "tmap", "curl", "officer",
"gdata", "R3port", "reshape2", "tools", "plogr", "futile.logger", "lubridate"
)

# Install and load packages
install_and_load <- function(package) {
if (!require(package, character.only = TRUE)) {
install.packages(package)
library(package, character.only = TRUE)
}
}
sapply(required_packages, install_and_load)
require(geoflow)

# Workflow Execution and Configurations
# -------------------------------------

# Initialize and execute the workflow
config <- initWorkflow(here::here("tunaatlas_qa_global_datasets_catch.json"))
unlink(config$job, recursive = TRUE)
con <- config$software$output$dbi

# Execute workflows and rename outputs
tunaatlas_qa_global_datasets_catch_path <- executeWorkflow(here::here("tunaatlas_qa_global_datasets_catch.json"))

# Summarizing Step
# ----------------

# Load function and run summarizing step
source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions/Summarising_step.R")
# Summarising_step(main_dir = tunaatlas_qa_global_datasets_catch_path, connectionDB = con, config = config, sizepdf = "middle", savestep = TRUE, usesave = FALSE)


# NetCDF Creation
# ---------------

# # Load function to convert to NetCDF and process entities
# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/tunaatlas_actions/convert_to_netcdf.R")
# entity_dirs <- list.dirs(file.path(tunaatlas_qa_global_datasets_catch_path, "entities"), full.names = TRUE, recursive = FALSE)
# wd <- getwd()
#
# # Iterate through entities to convert each to NetCDF format
# for (entitynumber in seq_along(config$metadata$content$entities)) {
# entity <- config$metadata$content$entities[[entitynumber]]
# dataset_pid <- entity$identifiers[["id"]]
# setwd(file.path(tunaatlas_qa_global_datasets_catch_path, "entities", dataset_pid))
# action <- entity$data$actions[[1]]
# convert_to_netcdf(action, config, entity, uploadgoogledrive = FALSE)
# }
#
# setwd(wd)
#
#
# # DOI Processing
# # --------------
#
# # Process entities for DOI
# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/tunaatlas_actions/process_entities_for_DOI.R")
# process_entities_for_DOI(tunaatlas_qa_global_datasets_catch_path, "~/firms-gta/geoflow-tunaatlas/jobs/processed_entities_for_DOI")
#
#
# # Irregular Data Extraction
# # -------------------------
#
# # Extract data irregularities between nominal and georeferenced datasets
# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions/strata_in_georef_but_not_in_nominal_report_launching.R")
# upgraded_nominal <- strata_in_georef_but_not_in_nominal_report_launching("~/blue-cloud-dataspace/GlobalFisheriesAtlas/data", connectionDB = con)
#
#
# # CPUE Analysis
# # -------------
#
# # Perform analysis on Catch Per Unit Effort (CPUE)
# source("https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions/strata_with_catches_without_effort.R")
# CPUE <- strata_with_catches_without_effort(tunaatlas_qa_global_datasets_catch_path, connectionDB = con)
#
# # Filter CPUE data for cases with catch but no effort, and vice versa
# catch_without_effort <- CPUE %>% dplyr::filter(is.na(measurement_value_effort) | measurement_value_effort == 0 & measurement_value_catch != 0)
# effort_without_catch <- CPUE %>% dplyr::filter(is.na(measurement_value_catch) | measurement_value_catch == 0 & measurement_value_effort != 0)
#
#
# # Deployment to GeoServer, GeoNetwork, and Zenodo
# # -----------------------------------------------
#
# # Initialize and deploy datasets on geoserver
# tunaatlas_qa_services <- initWorkflow("tunaatlas_qa_services.json")
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Identifier,Title,Description,Subject,Creator,Date,Type,Language,SpatialCoverage,TemporalCoverage,Relation,Rights,Provenance,Format,Data
id:global_catch_ird_level2_rf1,"title:""Global monthly catch of tuna and tuna-like species (%temporal_extent:start% - %temporal_extent:end%) aggregated by statistical squares of 1° or 5° longitude and latitude (IRD level 2)""_","abstract: This dataset presents the global, monthly-spatially aggregated catch of tuna and tuna-like species (including billfish, bonitos, and mackerel) from %temporal_extent:start% to %temporal_extent:end%. The data were compiled using public domain georeferenced catch-and-effort datasets released by the five tuna Regional Fisheries Management Organizations (t-RFMOs): the Commission for the Conservation of Southern Bluefin Tuna (CCSBT), the Inter-American Tropical Tuna Commission (IATTC), the International Commission for the Conservation of Atlantic Tunas (ICCAT), the Indian Ocean Tuna Commission (IOTC), and the Western and Central Pacific Fisheries Commission (WCPFC).

The original datasets were accessed via the FIRMS Global Tuna Atlas data collection framework and harmonized according to the FAO Coordinating Working Party on Fishery Statistics (CWP) standards. The IRD level 2 dataset was generated by converting the number of fish to tons and adjusting the data based on total catch provided by the t-RFMOs. The final dataset is stratified by year, month, reporting/fishing fleet, fishing gear, fishing mode (type of school association), and area (statistical squares of 1° or 5° longitude and latitude).

t-RFMO specific descriptions of the original input data sets can be found at the following links:
- CCSBT: https://www.ccsbt.org/en/content/sbt-data
- IATTC: https://www.iattc.org/PublicDomainData/IATTC-Catch-by-species1.htm
- ICCAT: https://www.iccat.int/en/accesingdb.html
- IOTC: https://iotc.org/data/datasets/latest/CEAll
- WCPFC: https://www.wcpfc.int/public-domain

The processes applied to produce this IRD level 2 dataset at global scale consist of a series of steps:.

More details on the processes are provided in the lineage section._
edition: 2024.1.0
",,"owner:[email protected]_
custodian:[email protected]_
pointOfContact:[email protected]_
publisher:[email protected]_
Data structure:[email protected]_
principalInvestigator:[email protected]_
metadata:[email protected],[email protected]_
processor:[email protected],:[email protected],:[email protected],[email protected]","creation:2021-12-01_
edition:2023-06-26",dataset,eng,"SRID=4326;POLYGON((-180 -90,-180 90,180 90,180 -90,-180 -90))","2024-01-01","thumbnail:Map overview@https://lh3.google.com/u/1/d/16knpvudY_FkHWWz-k7nSIpRMwI6mtEqB=w1680-h858-iv1_
http:codelists[Source codelists]@https://drive.google.com/open?id=1Hi0oHdZjZWYwLnhdiq_Y-gvFzu6d32hH",,"statement:Data management workflow_
process: Public domain datasets from IOTC were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
process: Public domain datasets from ICCAT were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
process: Public domain datasets from IATTC were collated (through the RFMO website or sent be e-mail). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
process: Public domain datasets from WCPFC were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
process: Public domain datasets from CCSBT were collated (through the RFMO website). Their structure (i.e. column organization and names) was harmonized and they were loaded in the Tuna atlas database._
process: Concerns ICCAT purse seine datasets : ICCAT delivers two catch-and-efforts datasets for purse seiners: one that gives the detail of the type of school (Fad|Free school) for purse seine fisheries and that starts in 1994 (called Task II catch|effort by operation mode Fad|Free school) and one that does not give the information of the type of school and that covers all the time period (from 1950) (called Task II catch|effort). These data are redundant (i.e. the data from the dataset Task II catch|effort by operation mode are also available in the dataset Task II catch|effort) but in the latter, the information on the type of school is not available. Both datasets were combined to produce a dataset that covers the whole time period, with fishing mode information (Fad | free school)._
process: Concerns IATTC purse seine datasets : For confidentiality policies, information on flag and school type for the geo-referenced catches is available in separate files for the eastern Pacific Ocean purse seine datasets. For each stratum, the catch from the flag-detailed dataset was raised to the catch from the school type-detailed dataset to get an estimation of the catches by flag and school type in each stratum._
process: All the datasets were merged._
process: Concerns IATTC and WCPFC data. IATTC and WCPFC have an overlapping area in their respective area of competence. Data from both RFMOs may be redundant in this overlapping zone. In the overlapping area, data from IATTC were kept (i.e. for this specific zone, data from WCPFC was removed). Information regarding the data in the IATTC / WCPFC overlapping area: after the eventual other corrections applied, e.g. raisings, catch units conversions, etc., the ratio between the catches from IATTC and those from WCPFC was of: 18.09 for the catches expressed in weight and 12.65 for the catches expressed in number._
process: Concerns Southern Bluefin Tuna (SBF) data: SBF tuna data do exist in both CCSBT data and the other tuna RFMOs data. Data from CCSBT and the other RFMOs may be redundant. For the Southern Bluefin Tuna, data from CCSBT were kept (i.e. data from the other RFMOs for SBF were removed). Information regarding the SBF data: after the eventual other corrections applied, e.g. raisings, catch units conversions, etc., the ratio between the catches from CCSBT and those from the other RFMOs for SBF was of: 1.56 for the catches expressed in weight. A total of 17706660.62 fishes were available in the CCSBT datasets - while no data in number were available in the other RFMOs datasets - and were thus integrated in this dataset._
process: Coding systems and nomenclatures used to describe the data (e.g. gears, flags, species) may differ according to tRFMOs. Codes used by the tuna RFMOs in their respective datasets were mapped to global code lists (ISSCFG, ISO3 countries codes, ASFIS). These mappings have been done with the collaboration of the Secretariats. Some codes could not have been mapped to standard code lists, for some tRFMOs own-defined codes that usually are aggregation of existing codes (e.g. flag “IDPH” - Indonesia and Philippines – for WCPFC; species “Otun” - other tuna – for ICCAT). In those cases, the code for species was set to UNK (Unknown). However, these codes have been mapped with more aggregated code lists – i.e. group of species. Information regarding the data that have species set to Unknown (i.e. data for which raw species do not have any correspondence in ASFIS): the catches that have species set to Unknown represent 4.17 % of the catches expressed in weight and 7.98 % of the catches expressed in number of fishes._
process: The CSV file was integrated into the Tuna atlas database.",,"access:zenodo_
source:10.5281/zenodo.13757369_
sourceType:other_
uploadSource:fact_tables.catch_
uploadType:dbtable_
featureType:catch_
action:create_global_tuna_atlas_dataset.R[R generation script]@./tunaatlas_scripts/generation/create_global_tuna_atlas_dataset_v2023.R_
action_option_fact:catch_
action_option_dataset_level:2_
action_option_recap_each_step:true_
action_option_from_level0:false_
action_option_geom_table:area.grid_area_labels_
action_option_unit_conversion_convert:false_
action_option_raising_georef_to_nominal:true_
action_option_level2RF2:false_
action_option_doigeoref:10.5281/zenodo.11460074_
action_option_keygeoref:global_catch_firms_level0_harmonized.csv_
action_option_doinominal: 10.5281/zenodo.11410529_
action_option_keynominal:global_nominal_catch_firms_level0_harmonized.csv_
action_option_aggregate_on_5deg_data_with_resolution_inferior_to_5deg:false_
action_option_disaggregate_on_5deg_data_with_resolution_superior_to_5deg:none_
action_option_disaggregate_on_1deg_data_with_resolution_superior_to_1deg:none_
action_option_curation_absurd_converted_data:true_
action_option_decrease_when_rf_inferior_to_one:false_
"

0 comments on commit d45b1b3

Please sign in to comment.