Skip to content

Commit

Permalink
Commit to reload on local machine
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeldenes committed May 27, 2024
1 parent 5bf09a8 commit ab14c55
Showing 1 changed file with 254 additions and 0 deletions.
254 changes: 254 additions & 0 deletions testing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import xarray as xr\n",
"import numpy as np\n",
"import pandas as pd\n",
"import urllib.request as urlr\n",
"import cartopy.io.shapereader as shpreader\n",
"import geopandas as gpd\n",
"import glob\n",
"from scipy import spatial\n",
"from scipy.interpolate import RegularGridInterpolator\n",
"\n",
"from plasticparcels.utils import distance, get_coords_from_polygon"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Function definitions\n",
"def create_coastal_mpw_jambeck_release_map(mask_coast_filepath, coords_filepath, gpw_filepath,\n",
" distance_threshhold=50., grid_range=0.083,\n",
" gpw_column_name='Population Density, v4.11 (2000, 2005, 2010, 2015, 2020): 2.5 arc-minutes',\n",
" gpw_raster_number=4,\n",
" jambeck_filepath=None,\n",
" jambeck_url='https://www.science.org/doi/suppl/10.1126/science.1260352/suppl_file/1260352_supportingfile_suppl._excel_seq1_v1.xlsx',\n",
" jambeck_url_backup='http://jambeck.engr.uga.edu/wp-content/uploads/2015/01/JambeckSData.xlsx'\n",
" ):\n",
" \"\"\"\n",
" Description\n",
" ----------\n",
"\n",
" A function to create a particle release map based on the Mismanaged Plastice Waste data from Jambeck et al. (2015).\n",
" We first load the Natural Earth coastline vector dataset. For each vertex in the vector dataset, we identify any\n",
" associated coastal cell within distance_threshhold km. We then assign a population density from the GPW dataset,\n",
" computed as the maximum population density within grid_range degrees. We then left-join the Jambeck mismanaged\n",
" plastic waste data to compute the total mismanaged plastic waste in kg/day per coastal grid cell.\n",
"\n",
"\n",
" Parameters\n",
" ----------\n",
"\n",
" mask_coast_filepath : str\n",
" File path to the coastal mask generated in ...py TODO\n",
" coords_filepath : str\n",
" File path to the model grid coordinates.\n",
" gpw_filepath : str\n",
" File path to the 'Gridded Population of the World (GPW)' dataset.\n",
" distance_threshhold : float, optional\n",
" The threshhold distance used to find coastal grid cells associated to Natural Earth vector vertices, by default 50\n",
" grid_range : float, optional\n",
" The approximate grid width to search for the maximum population density surrounding a coastal grid cell, by default 0.083\n",
" gpw_column_name : str, optional\n",
" The column name of the GPW dataset that corresponds to the population density, by default 'Population Density, v4.11 (2000, 2005, 2010, 2015, 2020): 2.5 arc-minutes'\n",
" gpw_raster_number : int, optional\n",
" The raster number of the dataset to use. When using the GPW v4.11 data, raster = 4 provides the 2020 population density.\n",
" jambeck_filepath : str, optional\n",
" The filepath to the Jambeck dataset if downloaded manually.\n",
" jambeck_url : str, optional\n",
" The URL of the Jambeck dataset from the Science article.\n",
" jambeck_url_backup : str, optional\n",
" The URL of the Jambeck dataset from Jenna Jambeck's website.\n",
"\n",
" Returns\n",
" -------\n",
" coastal_density_mpw_df\n",
" A pandas dataframe containing the coastal grid cells, associated countries, and associated mismanaged plastic waste in kg/day.\n",
" \"\"\"\n",
"\n",
" # Open the Natural Earth coastline vector dataset\n",
" shpfilename = shpreader.natural_earth(resolution='50m',\n",
" category='cultural',\n",
" name='admin_0_countries')\n",
"\n",
" reader = shpreader.Reader(shpfilename)\n",
" countries = reader.records()\n",
"\n",
" # Open the GPW dataset, and set NaNs to zeros (i.e. zero population density in the ocean)\n",
" gpw = xr.open_dataset(gpw_filepath)\n",
" gpw = gpw.fillna(0.)\n",
"\n",
" # Load in coast mask and model coordinates\n",
" data_mask_coast = xr.open_dataset(mask_coast_filepath)\n",
" coords = xr.open_dataset(coords_filepath, decode_cf=False)\n",
"\n",
" lats_coast = data_mask_coast['lat'].data[np.where(data_mask_coast['mask_coast'])]\n",
" lons_coast = data_mask_coast['lon'].data[np.where(data_mask_coast['mask_coast'])]\n",
"\n",
" # Compute the area of each grid cell\n",
" cell_areas = coords['e1t'][0] * coords['e2t'][0]/10e6 # in km**2\n",
" coastal_cell_areas = cell_areas.data[np.where(data_mask_coast['mask_coast'])]\n",
"\n",
" # Loop through all countries from Natural Earth dataset\n",
" coastal_density_list = []\n",
" for country in countries:\n",
" # Country information\n",
" continent = country.attributes['CONTINENT']\n",
" region_un = country.attributes['REGION_UN']\n",
" subregion = country.attributes['SUBREGION']\n",
" country_name = country.attributes['NAME_LONG']\n",
"\n",
" # Extract longitude and latitude of the coastal point\n",
" country_coords = get_coords_from_polygon(country.geometry)\n",
" country_lons, country_lats = country_coords[:, 0], country_coords[:, 1]\n",
"\n",
" # Loop through country points to find coastal points within distance_threshhold km\n",
" all_coastal_indices = []\n",
" for i in range(len(country_lons)):\n",
" distances = distance(np.repeat(country_lons[i], len(lons_coast)), np.repeat(country_lats[i], len(lats_coast)), lons_coast, lats_coast)\n",
" coastal_indices = np.where(distances <= distance_threshhold)[0] # Coastal indices are those that are within the thresshold distance\n",
" all_coastal_indices.append(coastal_indices)\n",
"\n",
" # Concatenate into one list and identify the unique coastal cells\n",
" all_coastal_indices = np.unique(np.hstack(all_coastal_indices))\n",
"\n",
" # For all coastal points assigned to the country, find the maximum population density around that point\n",
" country_coastal_density_list = []\n",
" for i in range(len(all_coastal_indices)):\n",
" coastal_id = all_coastal_indices[i]\n",
" coastal_point = [lons_coast[coastal_id], lats_coast[coastal_id]]\n",
" coastal_cell_area = coastal_cell_areas[coastal_id]\n",
"\n",
" # Compute the population density as the maximum population density around the coastal cell\n",
" # Because the grid is ordered longitude ascending, latitude descending, the slice ordering is swapped for lat\n",
" population_density = gpw[gpw_column_name].sel(longitude=slice(coastal_point[0] - grid_range, coastal_point[0] + grid_range),\n",
" latitude=slice(coastal_point[1] + grid_range, coastal_point[1] - grid_range),\n",
" raster=gpw_raster_number).max()\n",
"\n",
" population_density = np.float32(population_density)\n",
"\n",
" country_coastal_density_list.append({'Continent': continent,\n",
" 'Region': region_un,\n",
" 'Subregion': subregion,\n",
" 'Country': country_name,\n",
" 'Longitude': coastal_point[0],\n",
" 'Latitude': coastal_point[1],\n",
" 'Area[km2]': coastal_cell_area,\n",
" 'PopulationDensity': population_density})\n",
"\n",
" country_coastal_density_df = pd.DataFrame.from_records(country_coastal_density_list)\n",
" coastal_density_list.append(country_coastal_density_df)\n",
"\n",
" # Concatenate all the information into one dataframe\n",
" coastal_density_df = pd.concat(coastal_density_list)\n",
"\n",
" # Open the Jambeck dataset\n",
" if jambeck_filepath is not None:\n",
" MPW = pd.read_excel(jambeck_filepath, 'Jambeck et al. (2014)')\n",
" else:\n",
" try:\n",
" socket = urlr.urlopen(jambeck_url)\n",
" except:\n",
" socket = urlr.urlopen(jambeck_url_backup)\n",
" spreadsheet_from_url = pd.ExcelFile(socket.read())\n",
" MPW = pd.read_excel(spreadsheet_from_url, 'Jambeck et al. (2014)')\n",
"\n",
" # Perform some data cleaning steps\n",
" # 1. Rename the columns to make it easier to work with\n",
" MPW = MPW.rename(columns={'Economic status1': 'Economic status',\n",
" 'Mismanaged plastic waste [kg/person/day]7': 'Mismanaged plastic waste [kg/person/day]'})\n",
"\n",
" # 2. Set the bottom rows with black info to blank strings and only keep data with valid country names\n",
" MPW = MPW.fillna('')\n",
" MPW = MPW[MPW['Country'] != '']\n",
"\n",
" # 3. Remove all sub/superscripts and standardise the 'and' and ampersands.\n",
" MPW['Country'] = MPW['Country'].replace(regex='[0-9]', value='').str.replace('&', 'and')\n",
"\n",
" # 4. Manually rename some countries to match the Natural Earth Dataset\n",
" MPW['Country'] = MPW['Country'].replace('Brunei', 'Brunei Darussalam')\n",
" MPW['Country'] = MPW['Country'].replace('Curacao', 'Curaçao')\n",
" MPW['Country'] = MPW['Country'].replace('Congo, Dem rep. of', 'Democratic Republic of the Congo')\n",
" MPW['Country'] = MPW['Country'].replace('Korea, North', 'Dem. Rep. Korea')\n",
" MPW['Country'] = MPW['Country'].replace('Korea, South (Republic of Korea)', 'Republic of Korea')\n",
" MPW['Country'] = MPW['Country'].replace('Burma/Myanmar', 'Myanmar')\n",
" MPW['Country'] = MPW['Country'].replace('Micronesia', 'Federated States of Micronesia')\n",
" MPW['Country'] = MPW['Country'].replace('Faroe Islands', 'Faeroe Islands')\n",
" MPW['Country'] = MPW['Country'].replace('Falkland Islands', 'Falkland Islands / Malvinas')\n",
" MPW['Country'] = MPW['Country'].replace('Cote d\\'Ivoire', 'Côte d\\'Ivoire')\n",
" MPW['Country'] = MPW['Country'].replace('East Timor', 'Timor-Leste')\n",
" MPW['Country'] = MPW['Country'].replace('Russia', 'Russian Federation')\n",
" MPW['Country'] = MPW['Country'].replace('Saint Pierre', 'Saint Pierre and Miquelon')\n",
" MPW['Country'] = MPW['Country'].replace('Congo Rep of', 'Republic of the Congo')\n",
" MPW['Country'] = MPW['Country'].replace('Palestine (Gaza Strip is only part on the coast)', 'Palestine')\n",
" MPW['Country'] = MPW['Country'].replace('Saint Maarten, DWI', 'Sint Maarten')\n",
" MPW['Country'] = MPW['Country'].replace('USVI', 'United States Virgin Islands')\n",
" MPW['Country'] = MPW['Country'].replace('Sao Tome and Principe', 'São Tomé and Principe')\n",
"\n",
" # TODO: Sort out these countries...\n",
" # Below commented out because it obviously doubles up rows when left-joining.\n",
" # MPW['Country'] = MPW['Country'].replace('Northern Cyprus', 'Cyprus') # Combines the two sets\n",
" # MPW['Country'] = MPW['Country'].replace('Gibraltar', 'United Kingdom') # Add to UK\n",
" # MPW['Country'] = MPW['Country'].replace('French Guiana', 'France')\n",
" # MPW['Country'] = MPW['Country'].replace('Guadeloupe', 'France')\n",
" # MPW['Country'] = MPW['Country'].replace('Martinique', 'France')\n",
" # MPW['Country'] = MPW['Country'].replace('Christmas Island', 'Australia')\n",
" # MPW['Country'] = MPW['Country'].replace('Reunion', 'France')\n",
" # MPW['Country'] = MPW['Country'].replace('Netherlands Antilles', 'Netherlands')\n",
"\n",
" # Combine the coastal cell density data with the mismanaged plastic waste data, performing a left-join on the country column\n",
" coastal_density_mpw_df = pd.merge(coastal_density_df, MPW[['Country', 'Economic status', 'Mismanaged plastic waste [kg/person/day]']], on=\"Country\", how=\"left\")\n",
"\n",
" # Compute the mismanaged plastic waste in units of kg/day\n",
" coastal_density_mpw_df['MPW_Cell'] = coastal_density_mpw_df['Area[km2]']*coastal_density_mpw_df['PopulationDensity']*coastal_density_mpw_df['Mismanaged plastic waste [kg/person/day]']\n",
"\n",
" return coastal_density_mpw_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mask_land_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/output_data/masks/mask_land_NEMO0083.nc'\n",
"mask_coast_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/output_data/masks/mask_coast_NEMO0083.nc'\n",
"coords_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/input_data/MOi/domain_ORCA0083-N006/coordinates.nc'\n",
"\n",
"# Create coastal release data\n",
"gpw_filepath = '/Users/denes001/Research/Projects/PlasticParcels/PlasticParcels/data/release/GPWv4/gpw-v4-population-density-rev11_totpop_2pt5_min_nc/gpw_v4_population_density_rev11_2pt5_min.nc'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "parcels-v3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit ab14c55

Please sign in to comment.