Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Public bigquery data #14

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Code repository for the paper "Satellite mapping reveals extensive industrial ac
├── README.md # this file
├── LICENSE # usage and distribution conditions
├── analysis # code and notebooks of analyses and figures
├── data # links to public data sets
├── detector # code of SAR detection system in Earth Engine
└── nnets # code of Deep Learning models and training

Expand Down
19 changes: 19 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Public Dataset

## Bigquery

Data sets from this paper are available as public bigquery tables

Bigquery data set: `global-fishing-watch.paper_industrial_activity`

Tables
`offshore_infrastructure_v20231106`
`offshore_infrastructure_meta_v20231106`
`vessels_v20231013`
`vessels_meta_v20240605`

Views
`offshore_infrastructure`
`vessels`


67 changes: 67 additions & 0 deletions data/structures_meta.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
CREATE OR REPLACE TABLE
global-fishing-watch.paper_industrial_activity.offshore_infrastructure_meta_v20231106 (
structure_id INTEGER OPTIONS (description = 'Unique identifier for all detections of the same structure'),
elevation_m FLOAT64 OPTIONS (description = 'Vertical distance from mean sea level in meters'),
distance_from_shore_m FLOAT64 OPTIONS (description = 'Distance from shore in meters'),
MRGID_EEZ INT64 OPTIONS (description = 'Marine Regions identifier from '),
TERRITORY1 STRING OPTIONS (description = 'Marine Regions territory name'),
ISO_TER1 STRING OPTIONS (description = 'Marine Regions ISO country identifier'),
POL_TYPE STRING OPTIONS (description = 'Marine Regions political unit type'),
`UNION` STRING OPTIONS (description = 'Marine Regions union')
) OPTIONS (
description = """Extended metadata for structures based on the lat/lon location for each structure id.

elevation_m is from the GEBCO gridded bathymetry data set
https://globalfishingwatch.org/data-download/datasets/public-bathymetry-v1
https://www.gebco.net/data_and_products/gridded_bathymetry_data/

distance_from_shore_m is from this data set
https://globalfishingwatch.org/data-download/datasets/public-distance-from-shore-v1
https://pae-paha.pacioos.hawaii.edu/thredds/dist2coast.html?dataset=dist2coast_1deg

The eez fields come from Marine Regions, Flanders Marine Institute (2019). Maritime Boundaries Geodatabase: Maritime Boundaries and Exclusive Economic Zones (200NM), version 11. Available online at https://www.marineregions.org/. https://doi.org/10.14284/386

NB: There are 4 structures that appear twice in the meta data table because Marine Regions has overlapping boundaries for Peru and Ecuador and these
structures are located in the overlap region
"""
);

INSERT INTO
global-fishing-watch.paper_industrial_activity.offshore_infrastructure_meta_v20231106
with structures as (
SELECT
*,
format("lon:%+07.2f_lat:%+07.2f", round(lon/0.01)*0.01, round(lat/0.01)*0.01) as gridcode,
ST_GEOGPOINT(lon, lat) as geometry
FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106`
),
spatial_measures as (
SELECT
gridcode, elevation_m, distance_from_shore_m
FROM `world-fishing-827.pipe_static.spatial_measures_20201105`
),
gridded_eez as (
SELECT distinct
CAST(eez as INT64) as MRGID_EEZ,
gridcode
FROM `world-fishing-827.pipe_static.spatial_measures_20201105`
cross join unnest(regions.eez) as eez
),
marine_regions as (
select
MRGID_EEZ, TERRITORY1, ISO_TER1, POL_TYPE, `UNION`, geometry
from `world-fishing-827.pipe_regions_layers.EEZ_land_union_v3_202003`
),
structures_meta as (
SELECT
s.structure_id,
m.* except(gridcode),
r.* except(geometry)
FROM structures s
LEFT JOIN sparial_measures m
USING (gridcode)
LEFT JOIN marine_regions r
ON ST_CONTAINS(r.geometry, s.geometry)
)

SELECT distinct * FROM structures_meta
15 changes: 15 additions & 0 deletions data/structures_view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CREATE OR REPLACE VIEW `global-fishing-watch.paper_industrial_activity.offshore_infrastructure` AS

WITH structures as (
SELECT * FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_v20231106`
),
structures_meta as (
SELECT * FROM `global-fishing-watch.paper_industrial_activity.offshore_infrastructure_meta_v20231106`
WHERE POL_TYPE NOT IN ('Joint regime (EEZ)') -- Exclude duplicate entries in the Ecuador/Peru joint management regime
)

SELECT *
FROM structures
LEFT JOIN structures_meta
USING (structure_id)

97 changes: 97 additions & 0 deletions data/vessels_meta.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
CREATE OR REPLACE TABLE
`global-fishing-watch.paper_industrial_activity.vessels_meta_v20240605` (
detect_id INTEGER OPTIONS (description = 'Unique identifier for each detection'),
flag_iso STRING OPTIONS (description = 'Three letter ISO code for the flag state. This is the country that issued the MMSI'),
shipname STRING OPTIONS (description = 'Name of the vessel'),
callsign STRING OPTIONS (description = 'Radio callsign'),
imo STRING OPTIONS (description = 'IMO number'),
elevation_m FLOAT64 OPTIONS (description = 'Vertical distance from mean sea level in meters'),
distance_from_shore_m FLOAT64 OPTIONS (description = 'Distance from shore in meters'),
MRGID_EEZ INT64 OPTIONS (description = 'Marine Regions identifier'),
TERRITORY1 STRING OPTIONS (description = 'Marine Regions territory name'),
ISO_TER1 STRING OPTIONS (description = 'Marine Regions ISO country identifier'),
POL_TYPE STRING OPTIONS (description = 'Marine Regions political unit type'),
`UNION` STRING OPTIONS (description = 'Marine Regions union')
) OPTIONS (
description = """Extended metadata for vessels keyed by MMSI.

Note that many vessels will change their identifiers over the 5-year study period, and the only identifier that
is unchangeable is the IMO number. This table contains the best identifiers associated with each MMSI over the
course of a calendar year. However these can change and it is possible for more than one vessel to use the same
MMSI at the same time.

Not all MMSI will have all the associated identity fields. The AIS messages that contain the position information
(lat, lon, timestamp) that are used to match to the SAR detects only contain MMSI. The other fields are matched
to the MMSI from other non-location messages.

elevation_m is from the GEBCO gridded bathymetry data set
https://globalfishingwatch.org/data-download/datasets/public-bathymetry-v1
https://www.gebco.net/data_and_products/gridded_bathymetry_data/

distance_from_shore_m is from this data set
https://globalfishingwatch.org/data-download/datasets/public-distance-from-shore-v1
https://pae-paha.pacioos.hawaii.edu/thredds/dist2coast.html?dataset=dist2coast_1deg

The eez fields come from Marine Regions, Flanders Marine Institute (2019). Maritime Boundaries Geodatabase: Maritime Boundaries and Exclusive Economic Zones (200NM), version 11. Available online at https://www.marineregions.org/. https://doi.org/10.14284/386

NB: There are about 13k detect_ids in this table that occur more than once because of overlapping EEZ areas which represent competing claims and join management areas.
"""
);

INSERT INTO
global-fishing-watch.paper_industrial_activity.vessels_meta_v20240605
WITH vessels as (
SELECT
*,
format("lon:%+07.2f_lat:%+07.2f", round(lon/0.01)*0.01, round(lat/0.01)*0.01) as gridcode,
ST_GEOGPOINT(lon, lat) as geometry
FROM `global-fishing-watch.paper_industrial_activity.vessels_v20231013`
),
spatial_measures as (
SELECT
gridcode, elevation_m, distance_from_shore_m
FROM `world-fishing-827.pipe_static.spatial_measures_20201105`
),
gridded_eez as (
SELECT distinct
CAST(eez as INT64) as MRGID_EEZ,
gridcode
FROM `world-fishing-827.pipe_static.spatial_measures_20201105`
cross join unnest(regions.eez) as eez
),
marine_regions as (
select
MRGID_EEZ, TERRITORY1, ISO_TER1, POL_TYPE, `UNION`, geometry
from `world-fishing-827.pipe_regions_layers.EEZ_land_union_v3_202003`
),
vessel_identity as (
SELECT
ssvid as mmsi_str,
year,
best.best_flag as flag,
ais_identity.n_shipname_mostcommon.value as shipname,
ais_identity.n_callsign_mostcommon.value as callsign,
ais_identity.n_imo_mostcommon.value as imo

FROM `world-fishing-827.pipe_ais_v3_published.vi_ssvid_byyear_v20240401`
WHERE year in (2017, 2018, 2019, 2020, 2021)
),
vessels_meta as (
SELECT
v.detect_id,
m.* except(gridcode),
r.* except(geometry),
i.* except(mmsi_str, year)
FROM vessels v
LEFT JOIN spatial_measures m
USING (gridcode)
LEFT JOIN marine_regions r
ON ST_CONTAINS(r.geometry, v.geometry)
LEFT JOIN vessel_identity i
ON(
CAST(mmsi AS STRING) = mmsi_str
AND EXTRACT(YEAR FROM v.timestamp) = i.year
)
)

SELECT * from vessels_meta
15 changes: 15 additions & 0 deletions data/vessels_view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CREATE OR REPLACE VIEW `global-fishing-watch.paper_industrial_activity.vessels` AS

WITH vessels as (
SELECT *
FROM `global-fishing-watch.paper_industrial_activity.vessels_v20231013`
),
vessels_meta as (
SELECT *
FROM `global-fishing-watch.paper_industrial_activity.vessels_meta_v20231106`
)

SELECT *
FROM vessels
LEFT JOIN vessels_meta
USING (detect_id)