Commit to reload on local machine

OceanParcels · May 27, 2024 · ab14c55 · ab14c55
1 parent 5bf09a8
commit ab14c55
Showing 1 changed file with 254 additions and 0 deletions.
diff --git a/testing.ipynb b/testing.ipynb
@@ -0,0 +1,254 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import xarray as xr\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import urllib.request as urlr\n",
+    "import cartopy.io.shapereader as shpreader\n",
+    "import geopandas as gpd\n",
+    "import glob\n",
+    "from scipy import spatial\n",
+    "from scipy.interpolate import RegularGridInterpolator\n",
+    "\n",
+    "from plasticparcels.utils import distance, get_coords_from_polygon"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Function definitions\n",
+    "def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, gpw_filepath,\n",
+    "                                           distance_threshhold=50., grid_range=0.083,\n",
+    "                                           gpw_column_name='Population Density, v4.11 (2000, 2005, 2010, 2015, 2020): 2.5 arc-minutes',\n",
+    "                                           gpw_raster_number=4,\n",
+    "                                           jambeck_filepath=None,\n",
+    "                                           jambeck_url='https://www.science.org/doi/suppl/10.1126/science.1260352/suppl_file/1260352_supportingfile_suppl._excel_seq1_v1.xlsx',\n",
+    "                                           jambeck_url_backup='http://jambeck.engr.uga.edu/wp-content/uploads/2015/01/JambeckSData.xlsx'\n",
+    "                                           ):\n",
+    "    \"\"\"\n",
+    "    Description\n",
+    "    ----------\n",
+    "\n",
+    "    A function to create a particle release map based on the Mismanaged Plastice Waste data from Jambeck et al. (2015).\n",
+    "    We first load the Natural Earth coastline vector dataset. For each vertex in the vector dataset, we identify any\n",
+    "    associated coastal cell within distance_threshhold km. We then assign a population density from the GPW dataset,\n",
+    "    computed as the maximum population density within grid_range degrees. We then left-join the Jambeck mismanaged\n",
+    "    plastic waste data to compute the total mismanaged plastic waste in kg/day per coastal grid cell.\n",
+    "\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "\n",
+    "    mask_coast_filepath : str\n",
+    "        File path to the coastal mask generated in ...py TODO\n",
+    "    coords_filepath : str\n",
+    "        File path to the model grid coordinates.\n",
+    "    gpw_filepath : str\n",
+    "        File path to the 'Gridded Population of the World (GPW)' dataset.\n",
+    "    distance_threshhold : float, optional\n",
+    "        The threshhold distance used to find coastal grid cells associated to Natural Earth vector vertices, by default 50\n",
+    "    grid_range : float, optional\n",
+    "        The approximate grid width to search for the maximum population density surrounding a coastal grid cell, by default 0.083\n",
+    "    gpw_column_name : str, optional\n",
+    "        The column name of the GPW dataset that corresponds to the population density, by default 'Population Density, v4.11 (2000, 2005, 2010, 2015, 2020): 2.5 arc-minutes'\n",
+    "    gpw_raster_number : int, optional\n",
+    "        The raster number of the dataset to use. When using the GPW v4.11 data, raster = 4 provides the 2020 population density.\n",
+    "    jambeck_filepath : str, optional\n",
+    "        The filepath to the Jambeck dataset if downloaded manually.\n",
+    "    jambeck_url : str, optional\n",
+    "        The URL of the Jambeck dataset from the Science article.\n",
+    "    jambeck_url_backup : str, optional\n",
+    "        The URL of the Jambeck dataset from Jenna Jambeck's website.\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    coastal_density_mpw_df\n",
+    "        A pandas dataframe containing the coastal grid cells, associated countries, and associated mismanaged plastic waste in kg/day.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Open the Natural Earth coastline vector dataset\n",
+    "    shpfilename = shpreader.natural_earth(resolution='50m',\n",
+    "                                          category='cultural',\n",
+    "                                          name='admin_0_countries')\n",
+    "\n",
+    "    reader = shpreader.Reader(shpfilename)\n",
+    "    countries = reader.records()\n",
+    "\n",
+    "    # Open the GPW dataset, and set NaNs to zeros (i.e. zero population density in the ocean)\n",
+    "    gpw = xr.open_dataset(gpw_filepath)\n",
+    "    gpw = gpw.fillna(0.)\n",
+    "\n",
+    "    # Load in coast mask and model coordinates\n",
+    "    data_mask_coast = xr.open_dataset(mask_coast_filepath)\n",
+    "    coords = xr.open_dataset(coords_filepath, decode_cf=False)\n",
+    "\n",
+    "    lats_coast = data_mask_coast['lat'].data[np.where(data_mask_coast['mask_coast'])]\n",
+    "    lons_coast = data_mask_coast['lon'].data[np.where(data_mask_coast['mask_coast'])]\n",
+    "\n",
+    "    # Compute the area of each grid cell\n",
+    "    cell_areas = coords['e1t'][0] * coords['e2t'][0]/10e6  # in km**2\n",
+    "    coastal_cell_areas = cell_areas.data[np.where(data_mask_coast['mask_coast'])]\n",
+    "\n",
+    "    # Loop through all countries from Natural Earth dataset\n",
+    "    coastal_density_list = []\n",
+    "    for country in countries:\n",
+    "        # Country information\n",
+    "        continent = country.attributes['CONTINENT']\n",
+    "        region_un = country.attributes['REGION_UN']\n",
+    "        subregion = country.attributes['SUBREGION']\n",
+    "        country_name = country.attributes['NAME_LONG']\n",
+    "\n",
+    "        # Extract longitude and latitude of the coastal point\n",
+    "        country_coords = get_coords_from_polygon(country.geometry)\n",
+    "        country_lons, country_lats = country_coords[:, 0], country_coords[:, 1]\n",
+    "\n",
+    "        # Loop through country points to find coastal points within distance_threshhold km\n",
+    "        all_coastal_indices = []\n",
+    "        for i in range(len(country_lons)):\n",
+    "            distances = distance(np.repeat(country_lons[i], len(lons_coast)), np.repeat(country_lats[i], len(lats_coast)), lons_coast, lats_coast)\n",
+    "            coastal_indices = np.where(distances <= distance_threshhold)[0]  # Coastal indices are those that are within the thresshold distance\n",
+    "            all_coastal_indices.append(coastal_indices)\n",
+    "\n",
+    "        # Concatenate into one list and identify the unique coastal cells\n",
+    "        all_coastal_indices = np.unique(np.hstack(all_coastal_indices))\n",
+    "\n",
+    "        # For all coastal points assigned to the country, find the maximum population density around that point\n",
+    "        country_coastal_density_list = []\n",
+    "        for i in range(len(all_coastal_indices)):\n",
+    "            coastal_id = all_coastal_indices[i]\n",
+    "            coastal_point = [lons_coast[coastal_id], lats_coast[coastal_id]]\n",
+    "            coastal_cell_area = coastal_cell_areas[coastal_id]\n",
+    "\n",
+    "            # Compute the population density as the maximum population density around the coastal cell\n",
+    "            # Because the grid is ordered longitude ascending, latitude descending, the slice ordering is swapped for lat\n",
+    "            population_density = gpw[gpw_column_name].sel(longitude=slice(coastal_point[0] - grid_range, coastal_point[0] + grid_range),\n",
+    "                                                          latitude=slice(coastal_point[1] + grid_range, coastal_point[1] - grid_range),\n",
+    "                                                          raster=gpw_raster_number).max()\n",
+    "\n",
+    "            population_density = np.float32(population_density)\n",
+    "\n",
+    "            country_coastal_density_list.append({'Continent': continent,\n",
+    "                                                 'Region': region_un,\n",
+    "                                                 'Subregion': subregion,\n",
+    "                                                 'Country': country_name,\n",
+    "                                                 'Longitude': coastal_point[0],\n",
+    "                                                 'Latitude': coastal_point[1],\n",
+    "                                                 'Area[km2]': coastal_cell_area,\n",
+    "                                                 'PopulationDensity': population_density})\n",
+    "\n",
+    "        country_coastal_density_df = pd.DataFrame.from_records(country_coastal_density_list)\n",
+    "        coastal_density_list.append(country_coastal_density_df)\n",
+    "\n",
+    "    # Concatenate all the information into one dataframe\n",
+    "    coastal_density_df = pd.concat(coastal_density_list)\n",
+    "\n",
+    "    # Open the Jambeck dataset\n",
+    "    if jambeck_filepath is not None:\n",
+    "        MPW = pd.read_excel(jambeck_filepath, 'Jambeck et al. (2014)')\n",
+    "    else:\n",
+    "        try:\n",
+    "            socket = urlr.urlopen(jambeck_url)\n",
+    "        except:\n",
+    "            socket = urlr.urlopen(jambeck_url_backup)\n",
+    "        spreadsheet_from_url = pd.ExcelFile(socket.read())\n",
+    "        MPW = pd.read_excel(spreadsheet_from_url, 'Jambeck et al. (2014)')\n",
+    "\n",
+    "    # Perform some data cleaning steps\n",
+    "    # 1. Rename the columns to make it easier to work with\n",
+    "    MPW = MPW.rename(columns={'Economic status1': 'Economic status',\n",
+    "                              'Mismanaged plastic waste [kg/person/day]7': 'Mismanaged plastic waste [kg/person/day]'})\n",
+    "\n",
+    "    # 2. Set the bottom rows with black info to blank strings and only keep data with valid country names\n",
+    "    MPW = MPW.fillna('')\n",
+    "    MPW = MPW[MPW['Country'] != '']\n",
+    "\n",
+    "    # 3. Remove all sub/superscripts and standardise the 'and' and ampersands.\n",
+    "    MPW['Country'] = MPW['Country'].replace(regex='[0-9]', value='').str.replace('&', 'and')\n",
+    "\n",
+    "    # 4. Manually rename some countries to match the Natural Earth Dataset\n",
+    "    MPW['Country'] = MPW['Country'].replace('Brunei', 'Brunei Darussalam')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Curacao', 'Curaçao')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Congo, Dem rep. of', 'Democratic Republic of the Congo')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Korea, North', 'Dem. Rep. Korea')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Korea, South (Republic of Korea)', 'Republic of Korea')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Burma/Myanmar', 'Myanmar')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Micronesia', 'Federated States of Micronesia')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Faroe Islands', 'Faeroe Islands')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Falkland Islands', 'Falkland Islands / Malvinas')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Cote d\\'Ivoire', 'Côte d\\'Ivoire')\n",
+    "    MPW['Country'] = MPW['Country'].replace('East Timor', 'Timor-Leste')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Russia', 'Russian Federation')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Saint Pierre', 'Saint Pierre and Miquelon')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Congo Rep of', 'Republic of the Congo')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Palestine (Gaza Strip is only part on the coast)', 'Palestine')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Saint Maarten, DWI', 'Sint Maarten')\n",
+    "    MPW['Country'] = MPW['Country'].replace('USVI', 'United States Virgin Islands')\n",
+    "    MPW['Country'] = MPW['Country'].replace('Sao Tome and Principe', 'São Tomé and Principe')\n",
+    "\n",
+    "    # TODO: Sort out these countries...\n",
+    "    # Below commented out because it obviously doubles up rows when left-joining.\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Northern Cyprus', 'Cyprus') # Combines the two sets\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Gibraltar', 'United Kingdom') # Add to UK\n",
+    "    # MPW['Country'] = MPW['Country'].replace('French Guiana', 'France')\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Guadeloupe', 'France')\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Martinique', 'France')\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Christmas Island', 'Australia')\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Reunion', 'France')\n",
+    "    # MPW['Country'] = MPW['Country'].replace('Netherlands Antilles', 'Netherlands')\n",
+    "\n",
+    "    # Combine the coastal cell density data with the mismanaged plastic waste data, performing a left-join on the country column\n",
+    "    coastal_density_mpw_df = pd.merge(coastal_density_df, MPW[['Country', 'Economic status', 'Mismanaged plastic waste [kg/person/day]']], on=\"Country\", how=\"left\")\n",
+    "\n",
+    "    # Compute the mismanaged plastic waste in units of kg/day\n",
+    "    coastal_density_mpw_df['MPW_Cell'] = coastal_density_mpw_df['Area[km2]']*coastal_density_mpw_df['PopulationDensity']*coastal_density_mpw_df['Mismanaged plastic waste [kg/person/day]']\n",
+    "\n",
+    "    return coastal_density_mpw_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mask_land_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/output_data/masks/mask_land_NEMO0083.nc'\n",
+    "mask_coast_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/output_data/masks/mask_coast_NEMO0083.nc'\n",
+    "coords_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/input_data/MOi/domain_ORCA0083-N006/coordinates.nc'\n",
+    "\n",
+    "# Create coastal release data\n",
+    "gpw_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/release/GPWv4/gpw-v4-population-density-rev11_totpop_2pt5_min_nc/gpw_v4_population_density_rev11_2pt5_min.nc'"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "parcels-v3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}