From c1787a76395293df09968af20349f04c45fcb543 Mon Sep 17 00:00:00 2001 From: Pieter Provoost Date: Wed, 9 Oct 2024 20:04:28 +0200 Subject: [PATCH] speciesgrids docs --- README.md | 13 +- notebooks/Python/speciesgrids_demo.ipynb | 292 +++++++++++++++++++++++ 2 files changed, 300 insertions(+), 5 deletions(-) create mode 100644 notebooks/Python/speciesgrids_demo.ipynb diff --git a/README.md b/README.md index e075fe8..7646874 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ Here we outline the various tools, demos, and resources that can be used to acce - [Demo Notebooks](#notebook-demos) - [Other resources](#other-resources) - ## Access OBIS data There are several options available to download data from OBIS, some of which include: @@ -19,7 +18,7 @@ There are several options available to download data from OBIS, some of which in - [Full data exports](#full-data-exports) - [OBIS homepage search](https://obis.org/) or [advanced dataset search](https://obis.org/datasets) - [OBIS Mapper](https://mapper.obis.org/) - +- [speciesgrids](#speciesgrids) ### robis @@ -65,7 +64,7 @@ seamount_datasets <- datasets[ grepl(paste(search_terms, collapse = "|"), datasets$abstract, ignore.case = TRUE),] ``` -## Full data exports +### Full data exports [links to jupyter notebook for full data export?] @@ -75,15 +74,19 @@ A full data export of OBIS data is available for download as a Parquet file, [he - The exported file will be a single, flattened Occurrence table - The table includes all provided Event and Occurrence data, as well as 68 fields added by the OBIS Quality Control Pipeline, including taxonomic information obtained from WoRMS -## OBIS homepage +### OBIS homepage From the OBIS homepage, you can search for data in the search bar in the middle of the page. You can search by particular taxonomic groups, common names, dataset names, OBIS nodes, institute name, areas (e.g., Exclusive Economic Zone (EEZ)), or by the data provider’s country. See [here](https://manual.obis.org/access.html#obis-homepage-and-dataset-pages) for more details. -## OBIS Mapper +### OBIS Mapper The [OBIS Mapper](https://mapper.obis.org) lets you visualize and filter OBIS data by taxonomy, location, time, and data quality, with options to combine layers and download them as CSV. For more details, see the [OBIS manual](https://manual.obis.org/access.html#mapper). +### speciesgrids + +[speciesgrids](https://github.com/iobis/speciesgrids) is a Python package to build WoRMS aligned combined OBIS and GBIF species distribution datasets. The resulting dataset is available in a few resolutions on AWS S3. The dataset can be downloaded locally for best performance, or queried directly from the S3 bucket. For more details about downloading and using the dataset, see the [speciesgrids README](https://github.com/iobis/speciesgrids) or the [notebook](notebooks/Python/speciesgrids_demo.ipynb). + ## Notebook demos We have prepared several JupyterHub Notebooks that can be used for reference, see: https://github.com/iobis/hackathon/tree/master/notebooks. The notebooks cover several topics including OBIS data access, data cleaning, environmental information extraction, and data visualization. diff --git a/notebooks/Python/speciesgrids_demo.ipynb b/notebooks/Python/speciesgrids_demo.ipynb new file mode 100644 index 0000000..68bdd15 --- /dev/null +++ b/notebooks/Python/speciesgrids_demo.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# speciesgrids demo\n", + "\n", + "This notebook demonstrates querying the speciesgrids data product using geopandas and duckdb. The examples use the `s3://obis-products/speciesgrids/h3_7/` remote datasource as default. To query a local copy instead, use the local file path.\n", + "\n", + "## Regional species list using duckdb\n", + "\n", + "In the example below we use duckdb to query the gridded product with H3 resolution 7 to obtain a regional species list. The [geometry for our region of interest](https://wktmap.com/?b40bc054) is encoded as WKT and used in the duckdb query." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4e208851c92f4f51b4aa4c3bddd86d27", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesrecordsmin_yearmax_yearsource_obissource_gbif
0Larus argentatus1118392.01837.02024.0TrueTrue
1Larus fuscus84912.01964.02024.0TrueTrue
2Larus canus11298.01953.02024.0TrueTrue
3Larus ridibundus9528.01911.02024.0TrueTrue
4Phalacrocorax carbo8610.01910.02024.0TrueTrue
.....................
1620Wandonia haliotis1.02015.02015.0FalseTrue
1621Raja undulata1.02000.02000.0FalseTrue
1622Akashiwo sanguinea1.02021.02021.0TrueFalse
1623Pelagostrobilidium paraepacrum1.02014.02014.0FalseTrue
1624Slabberia halterata1.01972.01972.0FalseTrue
\n", + "

1625 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " species records min_year max_year \\\n", + "0 Larus argentatus 1118392.0 1837.0 2024.0 \n", + "1 Larus fuscus 84912.0 1964.0 2024.0 \n", + "2 Larus canus 11298.0 1953.0 2024.0 \n", + "3 Larus ridibundus 9528.0 1911.0 2024.0 \n", + "4 Phalacrocorax carbo 8610.0 1910.0 2024.0 \n", + "... ... ... ... ... \n", + "1620 Wandonia haliotis 1.0 2015.0 2015.0 \n", + "1621 Raja undulata 1.0 2000.0 2000.0 \n", + "1622 Akashiwo sanguinea 1.0 2021.0 2021.0 \n", + "1623 Pelagostrobilidium paraepacrum 1.0 2014.0 2014.0 \n", + "1624 Slabberia halterata 1.0 1972.0 1972.0 \n", + "\n", + " source_obis source_gbif \n", + "0 True True \n", + "1 True True \n", + "2 True True \n", + "3 True True \n", + "4 True True \n", + "... ... ... \n", + "1620 False True \n", + "1621 False True \n", + "1622 True False \n", + "1623 False True \n", + "1624 False True \n", + "\n", + "[1625 rows x 6 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import duckdb\n", + "import pyarrow.dataset as ds\n", + "\n", + "wkt = \"POLYGON ((2.694397 51.187951, 2.694397 51.271367, 3.013 51.271367, 3.013 51.187951, 2.694397 51.187951))\"\n", + "\n", + "con = duckdb.connect()\n", + "con.sql(\"\"\"\n", + " install spatial;\n", + " load spatial;\n", + "\"\"\")\n", + "dataset = ds.dataset(\"s3://obis-products/speciesgrids/h3_7/\", format=\"parquet\")\n", + "con.register(\"dataset\", dataset)\n", + "\n", + "df = con.execute(f\"\"\"\n", + "\tselect\n", + " species,\n", + " sum(records) as records,\n", + " min(min_year) as min_year,\n", + " max(max_year) as max_year,\n", + " max(source_obis) as source_obis,\n", + " max(source_gbif) as source_gbif\n", + " from dataset\n", + " where st_intersects(st_geomfromwkb(geometry), st_geomfromtext('{wkt}')) \n", + " group by species\n", + " order by sum(records) desc\n", + "\"\"\").fetchdf()\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Species distributions using geopandas\n", + "\n", + "This example uses geopandas to query the gridded product with H3 resolution 7 to obtain species distributions for the genus Abra, and lonboard to visualize the distributions on a map." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas\n", + "import lonboard\n", + "import seaborn as sns\n", + "\n", + "filters = [(\"genus\", \"==\", \"Abra\")]\n", + "gdf = geopandas.read_parquet(\"s3://obis-products/speciesgrids/h3_7/\", filters=filters)[[\"cell\", \"records\", \"geometry\", \"species\"]]\n", + "\n", + "def generate_colors(unique_species):\n", + " palette = sns.color_palette(\"Paired\", len(unique_species))\n", + " rgb_colors = [[int(r*255), int(g*255), int(b*255)] for r, g, b in palette]\n", + " color_map = dict(zip(unique_species, rgb_colors))\n", + " colors = lonboard.colormap.apply_categorical_cmap(gdf[\"species\"], color_map)\n", + " return colors\n", + "\n", + "point_layer = lonboard.ScatterplotLayer.from_geopandas(gdf)\n", + "point_layer.get_radius = 10000\n", + "point_layer.radius_max_pixels = 2\n", + "point_layer.get_fill_color = generate_colors(gdf[\"species\"].unique())\n", + "lonboard.Map([point_layer])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.12.0", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}