From 6c8f20edfd797156ab53bb475bb4ebc1bbd1e3fa Mon Sep 17 00:00:00 2001 From: pwoods25443 Date: Fri, 3 May 2024 15:42:25 -0400 Subject: [PATCH 1/4] Add some data documentation and supplementary data for the public bigquery data sets --- README.md | 1 + data/README.md | 8 +++++ data/structures_meta.sql | 67 ++++++++++++++++++++++++++++++++++++++++ data/structures_view.sql | 15 +++++++++ 4 files changed, 91 insertions(+) create mode 100644 data/README.md create mode 100644 data/structures_meta.sql create mode 100644 data/structures_view.sql diff --git a/README.md b/README.md index fa79725..7d91927 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Code repository for the paper "Satellite mapping reveals extensive industrial ac ├── README.md # this file ├── LICENSE # usage and distribution conditions ├── analysis # code and notebooks of analyses and figures + ├── data # links to public data sets ├── detector # code of SAR detection system in Earth Engine └── nnets # code of Deep Learning models and training diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..1a6e14a --- /dev/null +++ b/data/README.md @@ -0,0 +1,8 @@ +# Public Dataset + +## Bigquery + +Data sets from this paper are available as public bigquery tables + +`global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106` +`global-fishing-watch.paper_industrial_activity.vessels_v20231013` diff --git a/data/structures_meta.sql b/data/structures_meta.sql new file mode 100644 index 0000000..62a74df --- /dev/null +++ b/data/structures_meta.sql @@ -0,0 +1,67 @@ +CREATE OR REPLACE TABLE +global-fishing-watch.paper_industrial_activity.offshore_infrastructure_meta_v20231106 ( + structure_id INTEGER OPTIONS (description = 'Unique identifier for all detections of the same structure'), + elevation_m FLOAT64 OPTIONS (description = 'Vertical distance from mean sea level in meters'), + distance_from_shore_m FLOAT64 OPTIONS (description = 'Distance from shore in meters'), + MRGID_EEZ INT64 OPTIONS (description = 'Marine Regions identifier from '), + TERRITORY1 STRING OPTIONS (description = 'Marine Regions territory name'), + ISO_TER1 STRING OPTIONS (description = 'Marine Regions ISO country identifier'), + POL_TYPE STRING OPTIONS (description = 'Marine Regions political unit type'), + `UNION` STRING OPTIONS (description = 'Marine Regions union') +) OPTIONS ( + description = """Extended metadata for structures based on the lat/lon location for each structure id. + +elevation_m is from the GEBCO gridded bathymetry data set + https://globalfishingwatch.org/data-download/datasets/public-bathymetry-v1 + https://www.gebco.net/data_and_products/gridded_bathymetry_data/ + +distance_from_shore_m is from this data set + https://globalfishingwatch.org/data-download/datasets/public-distance-from-shore-v1 + https://pae-paha.pacioos.hawaii.edu/thredds/dist2coast.html?dataset=dist2coast_1deg + +The eez fields come from Marine Regions, Flanders Marine Institute (2019). Maritime Boundaries Geodatabase: Maritime Boundaries and Exclusive Economic Zones (200NM), version 11. Available online at https://www.marineregions.org/. https://doi.org/10.14284/386 + +NB: There are 4 structures that appear twice in the meta data table because Marine Regions has overlapping boundaries for Peru and Ecuador and these +structures are located in the overlap region + """ +); + +INSERT INTO +global-fishing-watch.paper_industrial_activity.offshore_infrastructure_meta_v20231106 +with structures as ( + SELECT + *, + format("lon:%+07.2f_lat:%+07.2f", round(lon/0.01)*0.01, round(lat/0.01)*0.01) as gridcode, + ST_GEOGPOINT(lon, lat) as geometry + FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106` +), +sparial_measures as ( + SELECT + gridcode, elevation_m, distance_from_shore_m + FROM `world-fishing-827.pipe_static.spatial_measures_20201105` +), +gridded_eez as ( + SELECT distinct + CAST(eez as INT64) as MRGID_EEZ, + gridcode + FROM `world-fishing-827.pipe_static.spatial_measures_20201105` + cross join unnest(regions.eez) as eez +), +marine_regions as ( + select + MRGID_EEZ, TERRITORY1, ISO_TER1, POL_TYPE, `UNION`, geometry + from `world-fishing-827.pipe_regions_layers.EEZ_land_union_v3_202003` +), +structures_meta as ( + SELECT + s.structure_id, + m.* except(gridcode), + r.* except(geometry) + FROM structures s + LEFT JOIN sparial_measures m + USING (gridcode) + LEFT JOIN marine_regions r + ON ST_CONTAINS(r.geometry, s.geometry) +) + +SELECT distinct * FROM structures_meta diff --git a/data/structures_view.sql b/data/structures_view.sql new file mode 100644 index 0000000..4519199 --- /dev/null +++ b/data/structures_view.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE VIEW `global-fishing-watch.paper_industrial_activity.combined_offshore_infrastructure_v20231106` AS + +WITH structures as ( + SELECT * FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106` +), +structures_meta as ( + SELECT * FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_meta_v20231106` + WHERE POL_TYPE NOT IN ('Joint regime (EEZ)') -- Exclude duplicate entries in the Ecuador/Peru joint management regime +) + +SELECT * +FROM structures +LEFT JOIN structures_meta +USING (structure_id) + From a2a97c12fb62f2e37627ef71998a6ce4d8f1461a Mon Sep 17 00:00:00 2001 From: pwoods25443 Date: Wed, 5 Jun 2024 09:28:59 -0400 Subject: [PATCH 2/4] Update sql to generate public bigquery tables --- data/README.md | 13 +++++++++++-- data/structures_view.sql | 2 +- data/vessels_meta.sql | 37 +++++++++++++++++++++++++++++++++++++ data/vessels_view.sql | 18 ++++++++++++++++++ 4 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 data/vessels_meta.sql create mode 100644 data/vessels_view.sql diff --git a/data/README.md b/data/README.md index 1a6e14a..d990250 100644 --- a/data/README.md +++ b/data/README.md @@ -4,5 +4,14 @@ Data sets from this paper are available as public bigquery tables -`global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106` -`global-fishing-watch.paper_industrial_activity.vessels_v20231013` +Bigquery data set: `global-fishing-watch.paper_industrial_activity` + +Tables +`offshore_infrastructure_v20231106` +`offshore_infrastructure_meta_v20231106` +`vessels_v20231013` +`vessels_meta_v20231013` + +Views +`offshore_infrastructure` +`vessels` diff --git a/data/structures_view.sql b/data/structures_view.sql index 4519199..d60bf8a 100644 --- a/data/structures_view.sql +++ b/data/structures_view.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE VIEW `global-fishing-watch.paper_industrial_activity.combined_offshore_infrastructure_v20231106` AS +CREATE OR REPLACE VIEW `global-fishing-watch.paper_industrial_activity.offshore_infrastructure` AS WITH structures as ( SELECT * FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106` diff --git a/data/vessels_meta.sql b/data/vessels_meta.sql new file mode 100644 index 0000000..9a98c23 --- /dev/null +++ b/data/vessels_meta.sql @@ -0,0 +1,37 @@ +CREATE OR REPLACE TABLE +`global-fishing-watch.paper_industrial_activity.vessels_meta_v20231106` ( + mmsi STRING OPTIONS (description = 'Mobile Maritime Service Identifier. This is the primary ID that the vessel broadcasts'), + year INT64 OPTIONS (description = 'Calendar year this record applies to'), + flag_iso STRING OPTIONS (description = 'Three letter ISO code for the flag state. This is the country that issued the MMSI'), + shipname STRING OPTIONS (description = 'Name of the vessel'), + callsign STRING OPTIONS (description = 'Radio callsign'), + imo STRING OPTIONS (description = 'IMO number') +) OPTIONS ( + description = """Extended metadata for vessels keyed by MMSI. + + Note that many vessels will change their identifiers over the 5-year study period, and the only identifier that + is unchangeable is the IMO number. This table contains the best identifiers associated with each MMSI over the + course of a calendar year. However these can change and it is possible for more than one vessel to use the same + MMSI at the same time. + + Not all MMSI will have all the associated identity fields. The AIS messages that contain the position information + (lat, lon, timestamp) that are used to match to the SAR detects only contain MMSI. The other fields are matched + to the MMSI from other non-location messages. + """ +); + +INSERT INTO + global-fishing-watch.paper_industrial_activity.vessel_meta_v20231106 +SELECT + ssvid as mmsi, + year, + best.best_flag as flag, + ais_identity.n_shipname_mostcommon.value as shipname, + ais_identity.n_callsign_mostcommon.value as callsign, + ais_identity.n_imo_mostcommon.value as imo + +FROM `world-fishing-827.pipe_ais_v3_published.vi_ssvid_byyear_v20240401` +WHERE year in (2017, 2018, 2019, 2020, 2021) + + + diff --git a/data/vessels_view.sql b/data/vessels_view.sql new file mode 100644 index 0000000..23997ef --- /dev/null +++ b/data/vessels_view.sql @@ -0,0 +1,18 @@ +CREATE OR REPLACE VIEW `global-fishing-watch.paper_industrial_activity.vessels` AS + +WITH vessels as ( + SELECT * + FROM `global-fishing-watch.paper_industrial_activity.vessels_v20231013` +), +vessels_meta as ( + SELECT * except(mmsi), mmsi as mmsi_str + FROM `global-fishing-watch.paper_industrial_activity.vessels_meta_v20231106` +) + +SELECT * except(mmsi_str) +FROM vessels +LEFT JOIN vessels_meta +ON( + CAST(mmsi AS STRING) = mmsi_str + AND EXTRACT(YEAR FROM vessels.timestamp) = vessels_meta.year + ) From 5746b53e642f0486d02ac44c70dd4dabc6218f9c Mon Sep 17 00:00:00 2001 From: pwoods25443 Date: Thu, 6 Jun 2024 19:50:37 -0400 Subject: [PATCH 3/4] Update vessels meta data to include EEZ --- data/structures_meta.sql | 2 +- data/vessels_meta.sql | 92 +++++++++++++++++++++++++++++++++------- data/vessels_view.sql | 9 ++-- 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/data/structures_meta.sql b/data/structures_meta.sql index 62a74df..f5fe4cf 100644 --- a/data/structures_meta.sql +++ b/data/structures_meta.sql @@ -35,7 +35,7 @@ with structures as ( ST_GEOGPOINT(lon, lat) as geometry FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106` ), -sparial_measures as ( +spatial_measures as ( SELECT gridcode, elevation_m, distance_from_shore_m FROM `world-fishing-827.pipe_static.spatial_measures_20201105` diff --git a/data/vessels_meta.sql b/data/vessels_meta.sql index 9a98c23..bbd3867 100644 --- a/data/vessels_meta.sql +++ b/data/vessels_meta.sql @@ -1,11 +1,17 @@ CREATE OR REPLACE TABLE -`global-fishing-watch.paper_industrial_activity.vessels_meta_v20231106` ( - mmsi STRING OPTIONS (description = 'Mobile Maritime Service Identifier. This is the primary ID that the vessel broadcasts'), - year INT64 OPTIONS (description = 'Calendar year this record applies to'), +`global-fishing-watch.paper_industrial_activity.vessels_meta_v20240605` ( + detect_id INTEGER OPTIONS (description = 'Unique identifier for each detection'), flag_iso STRING OPTIONS (description = 'Three letter ISO code for the flag state. This is the country that issued the MMSI'), shipname STRING OPTIONS (description = 'Name of the vessel'), callsign STRING OPTIONS (description = 'Radio callsign'), - imo STRING OPTIONS (description = 'IMO number') + imo STRING OPTIONS (description = 'IMO number'), + elevation_m FLOAT64 OPTIONS (description = 'Vertical distance from mean sea level in meters'), + distance_from_shore_m FLOAT64 OPTIONS (description = 'Distance from shore in meters'), + MRGID_EEZ INT64 OPTIONS (description = 'Marine Regions identifier'), + TERRITORY1 STRING OPTIONS (description = 'Marine Regions territory name'), + ISO_TER1 STRING OPTIONS (description = 'Marine Regions ISO country identifier'), + POL_TYPE STRING OPTIONS (description = 'Marine Regions political unit type'), + `UNION` STRING OPTIONS (description = 'Marine Regions union') ) OPTIONS ( description = """Extended metadata for vessels keyed by MMSI. @@ -17,21 +23,75 @@ CREATE OR REPLACE TABLE Not all MMSI will have all the associated identity fields. The AIS messages that contain the position information (lat, lon, timestamp) that are used to match to the SAR detects only contain MMSI. The other fields are matched to the MMSI from other non-location messages. + + elevation_m is from the GEBCO gridded bathymetry data set + https://globalfishingwatch.org/data-download/datasets/public-bathymetry-v1 + https://www.gebco.net/data_and_products/gridded_bathymetry_data/ + + distance_from_shore_m is from this data set + https://globalfishingwatch.org/data-download/datasets/public-distance-from-shore-v1 + https://pae-paha.pacioos.hawaii.edu/thredds/dist2coast.html?dataset=dist2coast_1deg + + The eez fields come from Marine Regions, Flanders Marine Institute (2019). Maritime Boundaries Geodatabase: Maritime Boundaries and Exclusive Economic Zones (200NM), version 11. Available online at https://www.marineregions.org/. https://doi.org/10.14284/386 + + NB: There are about 13k detect_ids in this table that occur more than once because of overlapping EEZ areas which represent competing claims and join management areas. """ ); INSERT INTO - global-fishing-watch.paper_industrial_activity.vessel_meta_v20231106 -SELECT - ssvid as mmsi, - year, - best.best_flag as flag, - ais_identity.n_shipname_mostcommon.value as shipname, - ais_identity.n_callsign_mostcommon.value as callsign, - ais_identity.n_imo_mostcommon.value as imo - -FROM `world-fishing-827.pipe_ais_v3_published.vi_ssvid_byyear_v20240401` -WHERE year in (2017, 2018, 2019, 2020, 2021) - + global-fishing-watch.paper_industrial_activity.vessels_meta_v20240605 +WITH vessels as ( + SELECT + *, + format("lon:%+07.2f_lat:%+07.2f", round(lon/0.01)*0.01, round(lat/0.01)*0.01) as gridcode, + ST_GEOGPOINT(lon, lat) as geometry + FROM `global-fishing-watch.paper_industrial_activity.vessels_v20231013` +), +spatial_measures as ( + SELECT + gridcode, elevation_m, distance_from_shore_m + FROM `world-fishing-827.pipe_static.spatial_measures_20201105` +), +gridded_eez as ( + SELECT distinct + CAST(eez as INT64) as MRGID_EEZ, + gridcode + FROM `world-fishing-827.pipe_static.spatial_measures_20201105` + cross join unnest(regions.eez) as eez +), +marine_regions as ( + select + MRGID_EEZ, TERRITORY1, ISO_TER1, POL_TYPE, `UNION`, geometry + from `world-fishing-827.pipe_regions_layers.EEZ_land_union_v3_202003` +), +vessel_identity as ( + SELECT + ssvid as mmsi_str, + year, + best.best_flag as flag, + ais_identity.n_shipname_mostcommon.value as shipname, + ais_identity.n_callsign_mostcommon.value as callsign, + ais_identity.n_imo_mostcommon.value as imo + FROM `world-fishing-827.pipe_ais_v3_published.vi_ssvid_byyear_v20240401` + WHERE year in (2017, 2018, 2019, 2020, 2021) +), +vessels_meta as ( + SELECT + v.detect_id, + m.* except(gridcode), + r.* except(geometry), + i.* except(mmsi_str, year) + FROM vessels v + LEFT JOIN spatial_measures m + USING (gridcode) + LEFT JOIN marine_regions r + ON ST_CONTAINS(r.geometry, v.geometry) + LEFT JOIN vessel_identity i + ON( + CAST(mmsi AS STRING) = mmsi_str + AND EXTRACT(YEAR FROM v.timestamp) = i.year + ) +) +SELECT * from vessels_meta diff --git a/data/vessels_view.sql b/data/vessels_view.sql index 23997ef..f4365e9 100644 --- a/data/vessels_view.sql +++ b/data/vessels_view.sql @@ -5,14 +5,11 @@ WITH vessels as ( FROM `global-fishing-watch.paper_industrial_activity.vessels_v20231013` ), vessels_meta as ( - SELECT * except(mmsi), mmsi as mmsi_str + SELECT * FROM `global-fishing-watch.paper_industrial_activity.vessels_meta_v20231106` ) -SELECT * except(mmsi_str) +SELECT * FROM vessels LEFT JOIN vessels_meta -ON( - CAST(mmsi AS STRING) = mmsi_str - AND EXTRACT(YEAR FROM vessels.timestamp) = vessels_meta.year - ) +USING (detect_id) From 294ca49cbfd93bd4d274fef810d9aae3b7cb18de Mon Sep 17 00:00:00 2001 From: pwoods25443 Date: Thu, 6 Jun 2024 19:55:41 -0400 Subject: [PATCH 4/4] Update README for public bigquery table names --- data/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data/README.md b/data/README.md index d990250..c0dad10 100644 --- a/data/README.md +++ b/data/README.md @@ -10,8 +10,10 @@ Tables `offshore_infrastructure_v20231106` `offshore_infrastructure_meta_v20231106` `vessels_v20231013` -`vessels_meta_v20231013` +`vessels_meta_v20240605` Views `offshore_infrastructure` `vessels` + +