diff --git a/notebooks/dommel/get_data.py b/notebooks/dommel/get_data.py new file mode 100644 index 0000000..db734a4 --- /dev/null +++ b/notebooks/dommel/get_data.py @@ -0,0 +1,9 @@ +# %% +from ribasim_nl import CloudStorage + +cloud = CloudStorage() + +dommel_url = cloud.joinurl("dommel", "verwerkt") + +# %% +cloud.download_content(dommel_url) diff --git a/notebooks/vrij_afwaterend/hydamo_0_analyse_data_waterboard.ipynb b/notebooks/vrij_afwaterend/hydamo_0_analyse_data_waterboard.ipynb new file mode 100644 index 0000000..0f26f2b --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_0_analyse_data_waterboard.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "from pandas_xlsx_tables import xlsx_tables_to_dfs\n", + "\n", + "sys.path.append(\"Ribasim-NL\\\\src\\\\hydamo\")\n", + "from hydamo.datamodel import HyDAMO\n", + "from ribasim_lumping_tools.LHM_data_bewerking_analyse_utils import (\n", + " check_ids_hydamo_data,\n", + " check_if_object_on_hydroobject,\n", + " read_original_data,\n", + " translate_data_to_hydamo_format,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "Vertaal originele data naar Hydamo data zoals gedefinieerd in de tabel hydamo_data_format.xlsx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "base_dir = \"..\\\\\"\n", + "\n", + "waterboard = \"AAenMaas\"\n", + "waterboard_code = 1\n", + "path_hydamo_format = None # Daniel Tollenaar: toegevoegd opm 14/8 i.v.m. pre-commit\n", + "hydamo_format = None # Daniel Tollenaar: toegevoegd opm 14/8 i.v.m. pre-commit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "waterboard_dir = Path(base_dir, waterboard, \"verwerkt\")\n", + "path_hydamo_format_table = Path(waterboard_dir, \"HyDAMO_format_AAenMaas.xlsx\")\n", + "hydamo_format_table = xlsx_tables_to_dfs(path_hydamo_format)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# eerst inlezen hydroobject, vertalen naar hydamo\n", + "hydamo_object = \"hydroobject\"\n", + "hydamo_translate_table, data_original = read_original_data(waterboard_dir, hydamo_format, hydamo_object, waterboard)\n", + "hydroobject = translate_data_to_hydamo_format(hydamo_translate_table, data_original)\n", + "\n", + "# maak een created_date aan indien nodig\n", + "if \"created_date\" not in data_original.columns:\n", + " hydroobject[\"created_date\"] = pd.NaT\n", + "# transformeer created_date waardes indien nodig\n", + "hydroobject[\"created_date\"] = hydroobject[\"created_date\"].replace(\"\", pd.NaT)\n", + "\n", + "# hydroobject.loc[hydroobject['code'].duplicated(keep=False), 'data_issue'] = 'duplicate_id'\n", + "data_hydamo_dict = dict(hydroobject=hydroobject.set_crs(28992)) # NOQA (FIXME: voor passen precommit)\n", + "\n", + "# geometry hydroobject bufferen met 10 cm voor de spatial join\n", + "hydroobject[\"buffer\"] = hydroobject.copy().buffer(5) # 5 meter buffer omdat anders relevante gemalen wegvallen\n", + "hydroobject_buffered = hydroobject.set_geometry(\"buffer\").set_crs(28992)" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "Specificeer welke HyDAMO data je wilt omzetten" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "hydamo_objects = [\n", + " \"stuw\",\n", + " \"gemaal\",\n", + " \"afvoergebiedaanvoergebied\",\n", + " \"pomp\",\n", + " ##'peilgebiedvigerend',\n", + " ##'peilgebiedpraktijk',\n", + " ##'streefpeil',\n", + " \"duikersifonhevel\",\n", + " ##'afsluiter',\n", + " ##'sluis',\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "for hydamo_object in hydamo_objects:\n", + " # lees aangeleverde data en hydamo tabel voor gegeven kunstwerk en waterschap\n", + " table_hydamo, data_original = read_original_data(waterboard_dir, hydamo_format, hydamo_object, waterboard)\n", + " if data_original is None:\n", + " data_hydamo_dict[hydamo_object] = None\n", + " else:\n", + " # vertaal data naar hydamo-ribasim format\n", + " data_hydamo = translate_data_to_hydamo_format(table_hydamo, data_original)\n", + "\n", + " # maak een created_date aan indien nodig\n", + " if \"created_date\" not in data_original.columns and hydamo_object != \"sluis\":\n", + " hydroobject[\"created_date\"] = pd.NaT\n", + " if \"last_edited_date\" not in data_original.columns and hydamo_object == \"afsluiter\":\n", + " hydroobject[\"last_edited_date\"] = pd.NaT\n", + " if \"lvpublicatiedatum\" not in data_original.columns and hydamo_object == \"afsluiter\":\n", + " hydroobject[\"lvpublicatiedatum\"] = pd.NaT\n", + "\n", + " # transformeer created_date waardes indien nodig\n", + " if hydamo_object != \"sluis\":\n", + " data_hydamo[\"created_date\"] = data_hydamo[\"created_date\"].replace(\"\", pd.NaT)\n", + " if hydamo_object == \"afsluiter\":\n", + " data_hydamo[\"last_edited_date\"] = data_hydamo[\"last_edited_date\"].replace(\"\", pd.NaT)\n", + " data_hydamo[\"lvpublicatiedatum\"] = data_hydamo[\"lvpublicatiedatum\"].replace(\"\", pd.NaT)\n", + "\n", + " # check dubbele id's\n", + " if hydamo_object not in [\"streefpeil\"]: # streefpeil heeft geen code, alleen globalid etc\n", + " data_hydamo.loc[data_hydamo[\"code\"].duplicated(keep=False), \"data_issue\"] = \"duplicate_id\"\n", + " # TODO check op 'code' lijkt met logischer want die kolom wordt vaker gebruikt. Maar bij WDOD bijv. is die niet ingevuld. Toch op globalid?\n", + " # check of kuntstwerk op hydroobject ligt\n", + " if hydamo_object in [\"stuw\", \"gemaal\", \"duikersifonhevel\", \"sluis\"]:\n", + " data_hydamo = check_if_object_on_hydroobject(\n", + " data_hydamo=data_hydamo, hydroobject_buffered=hydroobject_buffered\n", + " )\n", + " # verwijder kunstwerken die niet op hydroobject liggen\n", + " data_hydamo = data_hydamo[data_hydamo[\"code_hydroobject\"] != \"niet op hydroobject\"]\n", + " data_hydamo = data_hydamo.reset_index()\n", + " # voeg toe aan de hydamo dataset\n", + " data_hydamo_dict[hydamo_object] = data_hydamo" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "Waterschap specifieke acties" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "Export normal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# for hydamo_object in ['hydroobject'] + hydamo_objects:\n", + "# # export to geopackage\n", + "# export_to_geopackage(\n", + "# data_hydamo=data_hydamo_dict[hydamo_object],\n", + "# hydamo_format=hydamo_format,\n", + "# waterboard=waterboard,\n", + "# hydamo_object=hydamo_object\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "### ribasim-nl hydamo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "hydamo = HyDAMO(version=\"2.2.1_sweco\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "for hydamo_object in [\"hydroobject\"] + hydamo_objects:\n", + " data_hydamo = data_hydamo_dict[hydamo_object]\n", + " if hydamo_object == \"stuw\":\n", + " data_hydamo = data_hydamo.drop(columns=[\"code_hydroobject\", \"data_issue\"]) # ,'index_right'\n", + " data_hydamo = check_ids_hydamo_data(data_hydamo, waterboard_code, hydamo_object)\n", + " setattr(hydamo, hydamo_object, data_hydamo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "hydamo.to_geopackage(\"..\\\\hydamo.gpkg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ribasim_lumping_venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/vrij_afwaterend/hydamo_1_process_hydamo_data.ipynb b/notebooks/vrij_afwaterend/hydamo_1_process_hydamo_data.ipynb new file mode 100644 index 0000000..ccf6472 --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_1_process_hydamo_data.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verwerk wijzigingen aan HyDAMO geopackages in nieuwe HyDAMO input geopackage\n", + "\n", + "Contactpersoon: Harm Nomden (Sweco)\n", + "\n", + "Laatste update: 15-03-2024" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "from pathlib import Path\n", + "\n", + "import fiona\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from hydamo_preprocessing.preprocessing import preprocess_hydamo_hydroobjects\n", + "\n", + "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n", + "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def process_hydamo_changes(\n", + " dir_waterschap, dir_hydamo_preprocess, dir_hydamo_changes, dir_hydamo_processed, sel_layers=None\n", + "):\n", + " # process hydamo changes (toevoegen en verwijderen) to new hydamo geopackage\n", + " path_hydamo_gpkg_preprocess = Path(dir_hydamo_preprocess, \"hydamo.gpkg\")\n", + " path_hydamo_gpkg_processed = Path(dir_hydamo_processed, \"hydamo.gpkg\")\n", + " path_hydamo_gpkg_remove = Path(dir_hydamo_changes, \"hydamo_verwijderen.gpkg\")\n", + " path_hydamo_gpkg_add = Path(dir_hydamo_changes, \"hydamo_toevoegen.gpkg\")\n", + "\n", + " if sel_layers is None or sel_layers == []:\n", + " sel_layers = fiona.listlayers(path_hydamo_gpkg_preprocess)\n", + " print(sel_layers)\n", + " for layer in sel_layers:\n", + " if layer == \"layer_styles\":\n", + " continue\n", + " print(f\" - {layer}\")\n", + " # read original hydamo gpkg (from specified region)\n", + " gdf = gpd.read_file(str(path_hydamo_gpkg_preprocess), layer=layer, crs=28992)\n", + "\n", + " # remove objects\n", + " if layer in fiona.listlayers(path_hydamo_gpkg_remove):\n", + " gdf_remove = gpd.read_file(path_hydamo_gpkg_remove, layer=layer, crs=28992)\n", + " try:\n", + " gdf = gdf.loc[~np.isin(gdf[\"code\"], gdf_remove[\"code\"])]\n", + " except KeyError:\n", + " gdf = gdf.loc[~np.isin(gdf[\"globalid\"], gdf_remove[\"globalid\"])]\n", + " # add new objects\n", + " if layer in fiona.listlayers(path_hydamo_gpkg_add):\n", + " gdf_add = gpd.read_file(path_hydamo_gpkg_add, layer=layer, crs=28992)\n", + " gdf_add = gdf_add.to_crs(28992)\n", + " gdf = gdf.to_crs(28992)\n", + " gdf = gpd.GeoDataFrame(pd.concat([gdf, gdf_add])).reset_index()\n", + "\n", + " # save to new hydamo gpkg\n", + " layer_options = \"ASPATIAL_VARIANT=GPKG_ATTRIBUTES\"\n", + " if gdf.geometry.isnull().all():\n", + " gdf.to_file(str(path_hydamo_gpkg_processed), layer=layer, driver=\"GPKG\", layer_options=layer_options)\n", + " else:\n", + " gdf.to_file(str(path_hydamo_gpkg_processed), layer=layer, driver=\"GPKG\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "main_dir = \"..\\\\Ribasim modeldata\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "waterschappen = [\n", + " # \"AaenMaas\",\n", + " # \"BrabantseDelta\",\n", + " \"DeDommel\",\n", + " # \"DrentsOverijsselseDelta\",\n", + " # \"HunzeenAas\",\n", + " # \"Limburg\",\n", + " # \"RijnenIJssel\",\n", + " # \"StichtseRijnlanden\",\n", + " # \"ValleienVeluwe\",\n", + " # \"Vechtstromen\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "optional: preprocess the hydro-objects (check and adapt endpoints)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "preprocess_hydroobjects = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if preprocess_hydroobjects:\n", + " for waterschap in waterschappen:\n", + " dir_waterschap = main_dir / waterschap / \"verwerkt\"\n", + " dir_hydamo_preprocess = dir_waterschap / \"2_voorbewerking\"\n", + " dir_hydamo_processed = dir_waterschap / \"4_ribasim\"\n", + "\n", + " hydroobjects = gpd.read_file(Path(dir_hydamo_preprocess, \"hydamo.gpkg\"), layer=\"hydroobject\")\n", + " wfd_lines = gpd.read_file(Path(dir_hydamo_processed, \"krw.gpkg\"), layer=\"krw_line\")\n", + " wfd_polygons = gpd.read_file(Path(dir_hydamo_processed, \"krw.gpkg\"), layer=\"krw_polygon\")\n", + "\n", + " hydroobject_new = preprocess_hydamo_hydroobjects(\n", + " hydroobjects,\n", + " wfd_lines=wfd_lines,\n", + " wfd_polygons=wfd_polygons,\n", + " buffer_distance_endpoints=0.5,\n", + " wfd_id_column=\"owmident\",\n", + " buffer_distance_wfd=10,\n", + " overlap_ratio_wfd=0.9,\n", + " )\n", + "\n", + " hydroobject_new.to_file(Path(dir_hydamo_preprocess, \"hydamo.gpkg\"), layer=\"hydroobject\", driver=\"GPKG\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sel_layers = [\n", + " \"hydroobject\",\n", + " # 'stuw',\n", + " # 'gemaal',\n", + " # 'afvoergebiedaanvoergebied',\n", + " # 'pomp',\n", + " # 'peilgebiedvigerend',\n", + " # 'peilgebiedpraktijk',\n", + " # 'streefpeil',\n", + " # 'duikersifonhevel',\n", + " # 'afsluiter',\n", + " # 'sluis',\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for waterschap in waterschappen:\n", + " print(f\"Waterschap {waterschap}\")\n", + " dir_waterschap = Path(main_dir, waterschap, \"verwerkt\")\n", + " dir_hydamo_preprocess = Path(dir_waterschap, \"2_voorbewerking\")\n", + " dir_hydamo_changes = Path(dir_waterschap, \"3_input\")\n", + " dir_hydamo_processed = Path(dir_waterschap, \"4_ribasim\")\n", + "\n", + " process_hydamo_changes(\n", + " dir_waterschap=dir_waterschap,\n", + " dir_hydamo_preprocess=dir_hydamo_preprocess,\n", + " dir_hydamo_changes=dir_hydamo_changes,\n", + " dir_hydamo_processed=dir_hydamo_processed,\n", + " sel_layers=sel_layers,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ribasim_lumping", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + }, + "vscode": { + "interpreter": { + "hash": "a036bb1803af6fe22f064fcf42d66cd9fc5247b5d3b121167c30abfc8c1c6b18" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/vrij_afwaterend/hydamo_2_run_ribasim_lumping_waterboard.ipynb b/notebooks/vrij_afwaterend/hydamo_2_run_ribasim_lumping_waterboard.ipynb new file mode 100644 index 0000000..64c0795 --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_2_run_ribasim_lumping_waterboard.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "from ribasim_lumping_tools.run_ribasim_lumping_waterboard import run_ribasim_lumping_for_waterboard\n", + "\n", + "warnings.simplefilter(\"ignore\")\n", + "pd.options.mode.chained_assignment = None\n", + "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n", + "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_dir = Path(\"..\\\\..\\\\Ribasim modeldata\")\n", + "dx = 250.0\n", + "\n", + "waterschappen = [\n", + " \"HunzeenAas\",\n", + " \"DrentsOverijsselseDelta\",\n", + " \"Vechtstromen\",\n", + " \"RijnenIJssel\",\n", + " \"ValleienVeluwe\",\n", + " \"StichtseRijnlanden\",\n", + " \"BrabantseDelta\",\n", + " \"DeDommel\",\n", + " \"AaenMaas\",\n", + " \"Limburg\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for waterschap in waterschappen:\n", + " run_ribasim_lumping_for_waterboard(\n", + " base_dir=base_dir,\n", + " waterschap=waterschap,\n", + " dx=dx,\n", + " buffer_distance=1.0,\n", + " assign_unassigned_areas_to_basins=False if waterschap == \"ValleienVeluwe\" else True,\n", + " remove_isolated_basins=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ribasim_lumping_venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/vrij_afwaterend/hydamo_3_create_ribasim_model_networks.ipynb b/notebooks/vrij_afwaterend/hydamo_3_create_ribasim_model_networks.ipynb new file mode 100644 index 0000000..ee3f8e3 --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_3_create_ribasim_model_networks.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "from pathlib import Path\n", + "\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n", + "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_ribasim_network(waterschap_path, split_nodes_type_conversion, split_nodes_id_conversion=None):\n", + " ribasim_network_dir = Path(waterschap_path, \"4_ribasim\")\n", + " ribasim_network_path = Path(ribasim_network_dir, \"ribasim_network.gpkg\")\n", + " # ribasim_foreign_input_path = Path(ribasim_network_dir, \"foreign_input.gpkg\") # Daniel Tollenaar: weggehaald ivm pre-commit\n", + " ribasim_model_path = Path(ribasim_network_dir, \"ribasim_model.gpkg\")\n", + "\n", + " # Ribasim basins\n", + " basins = gpd.read_file(ribasim_network_path, layer=\"basins\")\n", + " basins = basins[[\"basin\", \"basin_node_id\", \"geometry\"]].rename(\n", + " columns={\"basin\": \"meta_id\", \"basin_node_id\": \"node_id\"}\n", + " )\n", + " basins[\"node_type\"] = \"Basin\"\n", + " basin_areas = gpd.read_file(ribasim_network_path, layer=\"basin_areas\")\n", + " basin_areas.basin_node_id = basin_areas.basin_node_id.astype(int)\n", + " basin_areas[\"meta_area\"] = basin_areas.geometry.area\n", + " basins = basins.merge(\n", + " basin_areas[[\"basin_node_id\", \"meta_area\"]].rename(columns={\"basin_node_id\": \"node_id\"}),\n", + " how=\"left\",\n", + " on=\"node_id\",\n", + " )\n", + " basin_areas = basin_areas[[\"basin_node_id\", \"meta_area\", \"geometry\"]]\n", + " basins[\"name\"] = \"Basin\"\n", + "\n", + " # Ribasim boundaries\n", + " boundaries = gpd.read_file(ribasim_network_path, layer=\"boundaries\")\n", + " boundaries[\"node_id\"] = boundaries[\"boundary_id\"]\n", + " boundaries = boundaries[[\"node_id\", \"boundary_id\", \"boundary_name\", \"boundary_type\", \"geometry\"]].rename(\n", + " columns={\"boundary_id\": \"meta_id\", \"boundary_name\": \"meta_name\", \"boundary_type\": \"node_type\"}\n", + " )\n", + " level_boundaries = boundaries.loc[boundaries[\"node_type\"] == \"LevelBoundary\", :].copy()\n", + " level_boundaries[\"meta_water_level\"] = -10.0\n", + "\n", + " # flow_boundaries = boundaries.loc[boundaries[\"node_type\"] == \"FlowBoundary\", :].copy() # weggehaald i.v.m. pre-commit (14/08)\n", + "\n", + " # RIBASIM LUMPING split nodes\n", + " split_nodes = gpd.read_file(ribasim_network_path, layer=\"split_nodes\")\n", + " split_nodes = gpd.read_file(ribasim_network_path, layer=\"split_nodes\")\n", + " split_nodes = split_nodes.rename(\n", + " columns={\"object_type\": \"meta_object_type\", \"object_function\": \"meta_object_function\"}\n", + " )\n", + "\n", + " # Conversion split_node from object-type + object-function to node_type\n", + " if \"meta_object_function\" in split_nodes.columns:\n", + " split_nodes = split_nodes.merge(\n", + " split_nodes_type_conversion, how=\"left\", on=[\"meta_object_type\", \"meta_object_function\"]\n", + " )\n", + " else:\n", + " split_nodes = split_nodes.merge(split_nodes_type_conversion_dhydro, how=\"left\", on=\"meta_object_type\")\n", + "\n", + " if isinstance(split_nodes_id_conversion, dict):\n", + " for key, value in split_nodes_id_conversion.items():\n", + " if len(split_nodes[split_nodes[\"split_node_id\"] == key]) == 0:\n", + " print(f\" * split_node type conversion id={key} (type={value}) does not exist\")\n", + " split_nodes.loc[split_nodes[\"split_node_id\"] == key, \"meta_object_function\"] = value\n", + "\n", + " # define final split_nodes\n", + " split_nodes = split_nodes.loc[split_nodes.status & (split_nodes.meta_object_function != \"harde_knip\"), :].copy()\n", + " if \"meta_object_function\" not in split_nodes:\n", + " split_nodes[\"meta_object_function\"] = \"\"\n", + " split_nodes = split_nodes[\n", + " [\n", + " \"split_node\",\n", + " \"split_node_id\",\n", + " \"split_node_node_id\",\n", + " \"node_type\",\n", + " \"meta_object_type\",\n", + " \"meta_object_function\",\n", + " \"geometry\",\n", + " ]\n", + " ]\n", + " split_nodes = split_nodes.rename(\n", + " columns={\"split_node\": \"meta_split_node_id\", \"split_node_id\": \"name\", \"split_node_node_id\": \"node_id\"}\n", + " )\n", + "\n", + " # Combine all nodes\n", + " nodes = gpd.GeoDataFrame(pd.concat([boundaries, split_nodes, basins]), crs=28992).reset_index(drop=True)\n", + "\n", + " # Combine all edges\n", + " basin_connections = gpd.read_file(ribasim_network_path, layer=\"basin_connections\")\n", + " boundary_connections = gpd.read_file(ribasim_network_path, layer=\"boundary_connections\")\n", + "\n", + " edges_columns = [\"from_node_id\", \"to_node_id\", \"connection\", \"geometry\"]\n", + " edges = gpd.GeoDataFrame(\n", + " pd.concat([basin_connections[edges_columns], boundary_connections[edges_columns]]),\n", + " geometry=\"geometry\",\n", + " crs=28992,\n", + " )\n", + "\n", + " edges = edges[[\"from_node_id\", \"to_node_id\", \"geometry\"]]\n", + "\n", + " edges = edges.merge(\n", + " nodes[[\"node_id\", \"node_type\"]].rename(columns={\"node_id\": \"from_node_id\", \"node_type\": \"from_node_type\"}),\n", + " how=\"left\",\n", + " on=\"from_node_id\",\n", + " )\n", + " edges = edges.merge(\n", + " nodes[[\"node_id\", \"node_type\"]].rename(columns={\"node_id\": \"to_node_id\", \"node_type\": \"to_node_type\"}),\n", + " how=\"left\",\n", + " on=\"to_node_id\",\n", + " )\n", + " edges[\"edge_type\"] = \"flow\"\n", + "\n", + " # Export nodes and edges\n", + " nodes.drop_duplicates(keep=\"first\").to_file(ribasim_model_path, layer=\"Node\")\n", + " edges.drop_duplicates(keep=\"first\").to_file(ribasim_model_path, layer=\"Edge\")\n", + "\n", + " print(f\" - no of nodes: {len(nodes)}\")\n", + " print(f\" - no of edges: {len(edges)}\")\n", + " return nodes, edges, split_nodes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_dir = Path(\"..\\\\Ribasim modeldata\")\n", + "\n", + "waterschappen = [\n", + " \"Noorderzijlvest\",\n", + " \"HunzeenAas\",\n", + " \"DrentsOverijsselseDelta\",\n", + " \"Vechtstromen\",\n", + " \"RijnenIJssel\",\n", + " \"ValleienVeluwe\",\n", + " \"StichtseRijnlanden\",\n", + " \"BrabantseDelta\",\n", + " \"DeDommel\",\n", + " \"AaenMaas\",\n", + " \"Limburg\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# HYDAMO (10 waterschappen)\n", + "split_nodes_type_conversion_hydamo = pd.DataFrame(\n", + " columns=[\"meta_object_type\", \"meta_object_function\", \"node_type\"],\n", + " data=[\n", + " [\"stuw\", \"\", \"TabulatedRatingCurve\"],\n", + " [\"stuw\", \"afwaterend\", \"TabulatedRatingCurve\"],\n", + " [\"stuw\", \"inlaat\", \"Outlet\"],\n", + " [\"afsluitmiddel\", \"\", \"Outlet\"],\n", + " [\"afsluitmiddel\", \"inlaat\", \"Outlet\"],\n", + " [\"afsluitmiddel\", \"uitlaat\", \"Outlet\"],\n", + " [\"duikersifonhevel\", \"\", \"ManningResistance\"],\n", + " [\"duikersifonhevel\", \"inlaat\", \"Outlet\"],\n", + " [\"duikersifonhevel\", \"afwaterend\", \"TabulatedRatingCurve\"],\n", + " [\"duikersifonhevel\", \"open verbinding\", \"ManningResistance\"],\n", + " [\"openwater\", \"\", \"ManningResistance\"],\n", + " [\"openwater\", \"open verbinding\", \"ManningResistance\"],\n", + " [\"openwater\", \"afwaterend\", \"TabulatedRatingCurve\"],\n", + " [\"gemaal\", \"\", \"Pump\"],\n", + " [\"gemaal\", \"afvoer\", \"Pump\"],\n", + " [\"gemaal\", \"aanvoer\", \"Pump\"],\n", + " [\"gemaal\", \"aanvoer/afvoer\", \"Pump\"],\n", + " [\"sluis\", \"\", \"Outlet\"],\n", + " [\"sluis\", \"schut- en lekverlies\", \"Outlet\"],\n", + " [\"sluis\", \"spui\", \"Outlet\"],\n", + " [\"sluis\", \"keersluis\", \"Outlet\"],\n", + " ],\n", + ")\n", + "\n", + "# DHYDRO (NOORDERZIJLVEST)\n", + "split_nodes_type_conversion_dhydro = pd.DataFrame(\n", + " columns=[\"meta_object_type\", \"node_type\"],\n", + " data=[\n", + " [\"weir\", \"TabulatedRatingCurve\"],\n", + " [\"uniweir\", \"TabulatedRatingCurve\"],\n", + " [\"universalWeir\", \"TabulatedRatingCurve\"],\n", + " [\"pump\", \"Pump\"],\n", + " [\"culvert\", \"ManningResistance\"],\n", + " [\"openwater\", \"ManningResistance\"],\n", + " [\"orifice\", \"Outlet\"],\n", + " ],\n", + ")\n", + "\n", + "te_verwijderen_aanvoergemalen = [\n", + " \"iKGM004\",\n", + " \"iKGM036\",\n", + " \"iKGM069\",\n", + " \"iKGM073\",\n", + " \"iKGM086\",\n", + " \"iKGM101\",\n", + " \"iKGM102\",\n", + " \"iKGM129\",\n", + " \"iKGM157\",\n", + " \"iKGM163\",\n", + " \"iKGM165\",\n", + " \"iKGM189\",\n", + " \"iKGM190\",\n", + " \"iKGM192\",\n", + " \"iKGM194\",\n", + " \"iKGM198\",\n", + " \"iKGM206\",\n", + " \"iKGM214\",\n", + " \"iKGM226\",\n", + " \"iKGM241\",\n", + " \"iKGM248\",\n", + " \"iKGM260\",\n", + " \"iKGM265\",\n", + " \"iKGM295\",\n", + " \"iKGM302\",\n", + " \"iKST0163\",\n", + " \"iKST0470\",\n", + " \"iKST0569\",\n", + " \"iKST0572\",\n", + " \"iKST0624\",\n", + " \"iKST0707\",\n", + " \"iKST6330\",\n", + " \"iKST6352\",\n", + " \"iKST6386\",\n", + " \"iKST6388\",\n", + " \"iKST6415\",\n", + " \"iKST6622\",\n", + " \"iKST9950\",\n", + "]\n", + "split_nodes_id_conversion_dhydro = {g: \"harde_knip\" for g in te_verwijderen_aanvoergemalen}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for waterschap in waterschappen:\n", + " print(f\"Waterschap: {waterschap}\")\n", + " waterschap_path = Path(base_dir, waterschap, \"verwerkt\")\n", + " if waterschap == \"Noorderzijlvest\":\n", + " nodes, edges, split_nodes = generate_ribasim_network(\n", + " waterschap_path, split_nodes_type_conversion_dhydro, split_nodes_id_conversion_dhydro\n", + " )\n", + " else:\n", + " nodes, edges, split_nodes = generate_ribasim_network(waterschap_path, split_nodes_type_conversion_hydamo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ribasim_lumping_venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/vrij_afwaterend/hydamo_4_create_dummy_ribasim_models.ipynb b/notebooks/vrij_afwaterend/hydamo_4_create_dummy_ribasim_models.ipynb new file mode 100644 index 0000000..0e1693f --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_4_create_dummy_ribasim_models.ipynb @@ -0,0 +1,174 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "from pathlib import Path\n", + "\n", + "import fiona\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from ribasim_lumping_tools.default_model import DEFAULTS, default_model\n", + "\n", + "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n", + "warnings.simplefilter(action=\"ignore\", category=FutureWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Read RIBASIM model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_dir = Path(\"..\\\\Ribasim modeldata\\\\\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "waterschappen = {\n", + " \"AaenMaas\": \"NL38\",\n", + " \"BrabantseDelta\": \"NL25\",\n", + " \"DeDommel\": \"NL27\",\n", + " \"DrentsOverijsselseDelta\": \"NL59\",\n", + " \"HunzeenAas\": \"NL33\",\n", + " \"Limburg\": \"NL60\",\n", + " \"Noorderzijlvest\": \"NL34\",\n", + " \"RijnenIJssel\": \"NL7\",\n", + " \"StichtseRijnlanden\": \"NL14\",\n", + " \"ValleienVeluwe\": \"NL8\",\n", + " \"Vechtstromen\": \"NL44\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_model_dir_string = \"..\\\\modellen\\\\WATERBOARD\\\\modellen\\\\WATERBOARD_2024_6_3\"\n", + "\n", + "for waterschap, waterschap_code in waterschappen.items():\n", + " print(waterschap)\n", + " new_model_dir = Path(new_model_dir_string.replace(\"WATERBOARD\", waterschap))\n", + " print(new_model_dir)\n", + "\n", + " if not new_model_dir.exists():\n", + " os.makedirs(new_model_dir)\n", + "\n", + " # gpkg\n", + " old_ribasim_model_gpkg = Path(base_dir, waterschap, \"verwerkt\", \"4_ribasim\", \"ribasim_model.gpkg\")\n", + " old_krw_gpkg = Path(base_dir, waterschap, \"verwerkt\", \"4_ribasim\", \"krw.gpkg\")\n", + "\n", + " # read nodes\n", + " node_df = gpd.read_file(old_ribasim_model_gpkg, layer=\"Node\", engine=\"pyogrio\", fid_as_index=True)\n", + " node_df = node_df.rename(columns={\"type\": \"node_type\"})\n", + " node_df[\"meta_code\"] = waterschap_code\n", + "\n", + " # read edges\n", + " edge_df = gpd.read_file(old_ribasim_model_gpkg, layer=\"Edge\", engine=\"pyogrio\", fid_as_index=True)\n", + "\n", + " # read basin areas\n", + " basin_areas = gpd.read_file(\n", + " str(old_ribasim_model_gpkg).replace(\"ribasim_model.gpkg\", \"ribasim_network.gpkg\"), layer=\"basin_areas\"\n", + " )\n", + " basin_areas = basin_areas[[\"basin_node_id\", \"geometry\"]].rename(columns={\"basin_node_id\": \"node_id\"})\n", + " basin_areas.node_id = basin_areas.node_id.astype(int)\n", + "\n", + " # read krw\n", + " krw = gpd.GeoDataFrame()\n", + " krw_layers = fiona.listlayers(str(old_krw_gpkg))\n", + " if \"krw_line\" in krw_layers:\n", + " krw_line = gpd.read_file(str(old_krw_gpkg), layer=\"krw_line\").explode(index_parts=True)\n", + " krw_line.geometry = krw_line.geometry.buffer(10, join_style=\"bevel\")\n", + " krw = pd.concat([krw, krw_line])\n", + " if \"krw_vlak\" in krw_layers:\n", + " krw_vlak = gpd.read_file(str(old_krw_gpkg), layer=\"krw_vlak\").explode(index_parts=True)\n", + " krw = pd.concat([krw, krw_vlak])\n", + " krw = krw[[\"owmident\", \"owmnaam\", \"owmtype\", \"geometry\"]].reset_index(drop=True)\n", + " krw.columns = [\"meta_krw_id\", \"meta_krw_name\", \"meta_krw_type\", \"geometry\"]\n", + "\n", + " node_df = (\n", + " node_df.sjoin(krw, how=\"left\").drop(columns=[\"index_right\"]).drop_duplicates(subset=\"node_id\", keep=\"first\")\n", + " )\n", + " node_df[\"meta_categorie\"] = \"doorgaand\"\n", + " node_df.loc[~node_df.meta_krw_id.isna(), \"meta_categorie\"] = \"hoofdwater\"\n", + "\n", + " # create default model\n", + " model = default_model(node_df, edge_df, basin_areas, **DEFAULTS)\n", + "\n", + " # write model to disk\n", + " ribasim_toml = Path(new_model_dir, \"model.toml\")\n", + " model.write(str(ribasim_toml))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ribasim_lumping_venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/vrij_afwaterend/hydamo_preprocessing/__init__.py b/notebooks/vrij_afwaterend/hydamo_preprocessing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/vrij_afwaterend/hydamo_preprocessing/general.py b/notebooks/vrij_afwaterend/hydamo_preprocessing/general.py new file mode 100644 index 0000000..530afd2 --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_preprocessing/general.py @@ -0,0 +1,527 @@ +import datetime +import itertools +import logging +import time +import warnings + +import geopandas as gpd +import pandas as pd +from shapely.geometry import LineString, Point + +# %% Monitoring + + +def report_time_interval(start: datetime.datetime, end: datetime.datetime) -> str: + """ + Report time interval between a given start and end time + + Args: + start (datetime.datetime): start time + end (datetime.datetime): end time + + Returns + ------- + str: Report of time interval in hours, minu|tes and seconds + """ + temp = end - start + hours = temp // 3600 + temp = temp - 3600 * hours + minutes = int(temp // 60) + seconds = round(temp - 60 * minutes, 2) + passed_time = "" + if hours != 0: + passed_time += f"{hours} hour(s), " + if minutes != 0: + passed_time += f"{minutes} minute(s) and " + + passed_time += f"{seconds} seconds" + return passed_time + + +# %% Extended geospatial vector operations + + +def get_endpoints_from_lines(lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Extract all unique endpoints of line features from vector data + + Args: + lines (gpd.GeoDataFrame): GeoDataFrame containing line features + + Returns + ------- + gpd.GeoDataFrame: GeoDataFrame containing all unique endpoints from + line features + """ + lines[["startpoint", "endpoint"]] = lines["geometry"].apply(lambda x: pd.Series([x.coords[0], x.coords[-1]])) + endpoints = pd.unique(lines[["startpoint", "endpoint"]].values.ravel("K")) + endpoints = gpd.GeoDataFrame({"coordinates": endpoints}) + endpoints["starting_lines"] = endpoints["coordinates"].apply( + lambda x: lines["code"][lines["startpoint"] == x].values + ) + endpoints["ending_lines"] = endpoints["coordinates"].apply(lambda x: lines["code"][lines["endpoint"] == x].values) + endpoints["starting_line_count"] = endpoints.apply(lambda x: len(list(x["starting_lines"])), axis=1) + endpoints["ending_line_count"] = endpoints.apply(lambda x: len(list(x["ending_lines"])), axis=1) + endpoints["connected_line_count"] = endpoints.apply( + lambda x: x["starting_line_count"] + x["ending_line_count"], axis=1 + ) + endpoints_geometry = endpoints.coordinates.apply(lambda x: Point(x)) + endpoints = endpoints.set_geometry(endpoints_geometry) + return endpoints + + +def add_point_to_linestring(point: Point, linestring: LineString) -> LineString: + """ + Inserts point into a linestring, placing the point next to its nearest neighboring point in a way that minimizes the total length of the linestring. + + Args: + point (Point): point + linestring (LineString): linestring + + Returns + ------- + LineString: resulting linestring + """ + distances = [point.distance(Point(line_point)) for line_point in linestring.coords] + index_nearest_neighbour = distances.index(min(distances)) + modified_linestring1 = LineString( + list(linestring.coords)[: index_nearest_neighbour + 1] + + [point.coords[0]] + + list(linestring.coords)[index_nearest_neighbour + 1 :] + ) + modified_linestring2 = LineString( + list(linestring.coords)[:index_nearest_neighbour] + + [point.coords[0]] + + list(linestring.coords)[index_nearest_neighbour:] + ) + modified_linestring = ( + modified_linestring1 if modified_linestring1.length < modified_linestring2.length else modified_linestring2 + ) + return (modified_linestring, index_nearest_neighbour) + + +def split_linestring_by_indices(linestring: LineString, split_indices: list) -> list: + """ + Divides a linestring into multiple linestrings based on a list that contains the indices of the split points within the linestring + + Args: + linestring (LineString): Linestring + split_indices (list): List of indices of split nodes within the linestring + + Returns + ------- + list: list of resulting linestrings + """ + split_linestrings = [] + split_indices = sorted(list(set([0] + split_indices + [len(linestring.coords) - 1]))) # NOQA FIXME: list() weghalen + for i in range(len(split_indices) - 1): + split_linestrings.append(LineString(linestring.coords[split_indices[i] : split_indices[i + 1] + 1])) + + return split_linestrings + + +def remove_duplicate_split_lines(lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Removes duplicates of split lines from line feature vector dataset. Duplicates are removed from a subselection from the line feature dataset that contains all line features that have been split. + + Args: + lines (gpd.GeoDataFrame): Vector data containing line features + + Returns + ------- + gpd.GeoDataFrame: Vector data containing line features without duplicates + """ + lines["distance"] = list(map(lambda x: x.length, lines["geometry"])) # NOQA FIXME: Daniel Tollenaar, volgens mij kun je hier gewoon lines["distance"] = lines.length van maken + updated_endpoints = get_endpoints_from_lines(lines) + ending_lines_clean_start = updated_endpoints[(updated_endpoints["starting_lines"].str.len() > 1)]["starting_lines"] + ending_lines_clean_start = list(itertools.chain.from_iterable(ending_lines_clean_start)) + ending_lines_clean_end = updated_endpoints[(updated_endpoints["ending_lines"].str.len() > 1)]["ending_lines"] + ending_lines_clean_end = list(itertools.chain.from_iterable(ending_lines_clean_end)) + lines_to_remove = list(pd.unique(pd.Series(ending_lines_clean_start + ending_lines_clean_end))) + lines = lines[ + ~( + (lines["distance"] <= 0.5) + & (lines["preprocessing_split"] == "Opgeknipt") + & (lines["code"].isin(lines_to_remove)) + ) + ].reset_index(drop=True) + + return lines + + +def connect_lines_by_endpoints(split_endpoints: gpd.GeoDataFrame, lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Connects boundary lines to other lines, based on instructions for each line endpoint. Connections are + created by inserting the endpoints into their target lines. The target line features are + split afterwards in order to create nodes at the intersection of the connected linestrings. + + Args: + split_endpoints (gpd.GeoDataFrame): Dataframe containing line endpoints and instructions + lines (list): Vector Dataset containing line features + + Returns + ------- + gpd.GeoDataFrame: line feature dataframe + """ # NOQA + listed_lines = list(itertools.chain.from_iterable(split_endpoints["target_lines"])) + listed_points = list(itertools.chain.from_iterable(split_endpoints["points_to_target_lines"])) + connections_to_create = pd.DataFrame({"lines": listed_lines, "point": listed_points}) + connections_to_create["inserted"] = False + grouped_split_points_by_line = connections_to_create.groupby("lines")["point"] + splits = [{"split_line": line, "split_points": list(points)} for line, points in grouped_split_points_by_line] + + # A dataframe is created to store the resulting linestrings from the splits + split_lines = gpd.GeoDataFrame(columns=lines.columns) + split_lines["preprocessing_split"] = None + + for split_action in splits: + split_edge = split_action["split_line"] + line = lines[lines["code"] == split_edge] + linestring = line["geometry"].values[0] + nodes_to_add = [] + for node in split_action["split_points"]: + (modified_linestring, index_nearest_neighbour) = add_point_to_linestring(Point(node), linestring) + + if index_nearest_neighbour == 0 and linestring.coords[0] in list( + connections_to_create.loc[connections_to_create["inserted"] == True, "point"].values # NOQA FIXME: == True kan weg + ): + continue + + elif index_nearest_neighbour == len(linestring.coords) - 1 and linestring.coords[-1] in list( + connections_to_create.loc[connections_to_create["inserted"] == True, "point"].values # NOQA FIXME: == True kan weg + ): + continue + + linestring = modified_linestring + + connections_to_create["inserted"][connections_to_create["point"] == node] = True + + nodes_to_add += [node] + + # The modified line will be divided in seperate linestrings + split_indices = [list(linestring.coords).index(node) for node in nodes_to_add] + split_linestrings = split_linestring_by_indices(linestring, split_indices) + + # Append split linestrings to collection of new lines + for k, split_linestring in enumerate(split_linestrings): + snip_line = line.copy() + snip_line["geometry"] = split_linestring + snip_line["preprocessing_split"] = "Opgeknipt" + snip_line["code"] = f'{snip_line["code"].values[0]}-{k}' + split_lines = pd.concat([split_lines, snip_line], axis=0, join="inner") + + # Remove lines that have been devided from original geodataframe, and append resulting lines + uneditted_lines = lines[~lines["code"].isin(connections_to_create["lines"])] + connected_lines = pd.concat([uneditted_lines, split_lines], axis=0, join="outer").reset_index(drop=True) + + # Remove excessive split lines + lines = remove_duplicate_split_lines(connected_lines) + return lines + + +def connect_endpoints_by_buffer(lines: gpd.GeoDataFrame, buffer_distance: float = 0.5) -> gpd.GeoDataFrame: + """ + Connects boundary line endpoints within a vector dataset to neighbouring lines that pass + within a specified buffer distance with respect to the the boundary endpoints. The boundary + endpoints are inserted into the passing linestrings + + Args: + lines (gpd.GeoDataFrame): Line vector data + buffer distance (float): Buffer distance for connecting line boundary endpoints, expressed + in the distance unit of vector line dataset + + Returns + ------- + gpd.Geo: list of resulting linestrings + """ # NOQA + warnings.filterwarnings("ignore") + start_time = time.time() + iterations = 0 + unconnected_endpoints_count = 0 + finished = False + + logging.info(f"Detect unconnected endpoints nearby linestrings, buffer distance: {buffer_distance}m") + + while not finished: + endpoints = get_endpoints_from_lines(lines) + + boundary_endpoints = gpd.GeoDataFrame( + endpoints[(endpoints["starting_line_count"] == 0) | (endpoints["ending_line_count"] == 0)] + ) + lines["buffer_geometry"] = lines.geometry.buffer(buffer_distance, join_style="round") + + boundary_endpoints["overlaying_line_buffers"] = list( # noqa: C417: list-comprenension will do + map(lambda x: lines[lines.buffer_geometry.contains(x)].code.tolist(), boundary_endpoints.geometry) + ) + boundary_endpoints["startpoint_overlaying_line_buffers"] = boundary_endpoints.apply( + lambda x: list( # noqa: C417: list-comprenension will do + map( + lambda y: x["coordinates"] in list(lines[lines.code == y].endpoint.values), + x["overlaying_line_buffers"], + ) + ), + axis=1, + ) + boundary_endpoints["endpoint_overlaying_line_buffers"] = boundary_endpoints.apply( + lambda x: list( # noqa: C417: C417: list-comprenension will do + map( + lambda y: x["coordinates"] in list(lines[lines.code == y].startpoint.values), + x["overlaying_line_buffers"], + ) + ), + axis=1, + ) + boundary_endpoints["start_or_endpoint_overlaying_line_buffers"] = boundary_endpoints.apply( + lambda x: list(zip(x["startpoint_overlaying_line_buffers"], x["endpoint_overlaying_line_buffers"])), axis=1 + ) + boundary_endpoints["crossed_by_unconnected_lines"] = boundary_endpoints.apply( + lambda x: True in [True not in y for y in x["start_or_endpoint_overlaying_line_buffers"]], axis=1 + ) + unconnected_endpoints = boundary_endpoints[ + boundary_endpoints["crossed_by_unconnected_lines"] == True # noqa: E712: == True kan weg + ].reset_index(drop=True) + unconnected_endpoints["target_lines"] = unconnected_endpoints.apply( + lambda x: [ + x["overlaying_line_buffers"][i] + for i in range(len(x["overlaying_line_buffers"])) + if x["start_or_endpoint_overlaying_line_buffers"][i] == (False, False) + ], + axis=1, + ) + unconnected_endpoints["points_to_target_lines"] = unconnected_endpoints.apply( + lambda x: [x["coordinates"]] * len(x["target_lines"]), axis=1 + ) + previous_unconnected_endpoints_count = unconnected_endpoints_count + unconnected_endpoints_count = len(unconnected_endpoints) + if iterations == 0: + unconnected_endpoints_count_total = unconnected_endpoints_count + logging.info(f"{unconnected_endpoints_count} unconnected endpoints detected nearby intersecting lines") + if unconnected_endpoints_count != 0 and unconnected_endpoints_count != previous_unconnected_endpoints_count: + logging.info("Connecting linestrings...") + lines = connect_lines_by_endpoints(unconnected_endpoints, lines) + iterations += 1 + logging.info("Linestrings connected, starting new iteration...") + else: + lines = lines.drop(["startpoint", "endpoint", "buffer_geometry"], axis=1) + finished = True + + end_time = time.time() + passed_time = report_time_interval(start_time, end_time) + logging.info( + f"Summary:\n\n\ + Detected unconnected endpoints nearby intersecting lines: {unconnected_endpoints_count_total} \n\ + Connected endpoints: {unconnected_endpoints_count_total-unconnected_endpoints_count} \n\ + Remaining unconnected endpoints: {unconnected_endpoints_count}\n\ + Iterations: {iterations}" + ) + logging.info(f"Finished within {passed_time}") + return lines + + +def add_overlapping_polygons( + left_geodataframe: gpd.GeoDataFrame(), + right_geodataframe: gpd.GeoDataFrame(), + left_id_column: str, + right_id_column: str, +): + """ + creates a column in a left geodataframe where it lists the overlapping + polygons from the right geodataframe for each polygon in the left geodataframe. + The id columns of the right and left dataframe have to be defined. + + Parameters + ---------- + left_geodataframe : gpd.GeoDataFrame() + the left geodataframe + right_geodataframe : gpd.GeoDataFrame() + the right geodataframe + left_id_column : str + the name of the ID column in the left geodataframe + right_id_column : str + the name of the ID column in the right geodataframe, + from which the values will be added to the left geodataframe + + Returns + ------- + left_geodataframe : TYPE + the updated left geodataframe with an added column which contains + insights in the overlapping polygons from the right dataframe + + """ # noqa: D205 + # Calculate total areas of left and right polygons + left_geodataframe["left_area"] = left_geodataframe["geometry"].apply(lambda x: x.area) + right_geodataframe["surface_area"] = right_geodataframe.area + right_geodataframe["right_geometry"] = right_geodataframe["geometry"] + + # Join left and right polygons + joined = gpd.sjoin(left_geodataframe, right_geodataframe, how="left", op="intersects") + joined = joined.loc[:, ~joined.columns.duplicated()].copy() + + # Get overlapping right polygon ids, polygons & areas for left polygons + grouped = pd.DataFrame(joined.groupby(left_id_column)[right_id_column].unique().reset_index(name=right_id_column)) + grouped["right_geometry"] = ( + joined.groupby(left_id_column)["right_geometry"].unique().reset_index(name="right_geometry")["right_geometry"] + ) + grouped["right_area"] = ( + joined.groupby(left_id_column)["surface_area"].unique().reset_index(name="right_area")["right_area"] + ) + + # Drop NA values from overlapping polygon info columns + grouped[right_id_column] = grouped[right_id_column].apply(lambda x: pd.Series(x).dropna().tolist()) + grouped["right_area"] = grouped["right_area"].apply(lambda x: pd.Series(x).dropna().tolist()) + grouped["right_geometry"] = grouped["right_geometry"].apply(lambda x: pd.Series(x).dropna().tolist()) + + # Merge + left_geodataframe = left_geodataframe.merge( + grouped[[left_id_column, right_id_column, "right_geometry", "right_area"]], on=left_id_column, how="left" + ) + + # Postprocessing + left_geodataframe["overlapping_areas"] = left_geodataframe.apply( + lambda x: [y.intersection(x["geometry"]).area for y in x["right_geometry"]], axis=1 + ) + left_geodataframe["overlapping_areas"] = left_geodataframe.apply( + lambda x: [ + { + "id": x[right_id_column][i], + "right_area": x["right_area"][i], + "overlapping_area": x["overlapping_areas"][i], + "intersection_length": x["right_geometry"][i].intersection(x["geometry"]).length, + } + for i in range(len(x["right_area"])) + ], + axis=1, + ) + left_geodataframe = left_geodataframe.drop(columns=["right_area", "right_geometry"]) + + return left_geodataframe + + +def get_most_overlapping_polygon( + left_geodataframe: gpd.GeoDataFrame(), + right_geodataframe: gpd.GeoDataFrame(), + left_id_column: str, + right_id_column: str, +): + """ + creates a column in a left geodataframe that contains IDs of the most overlapping + polygon from the right geodataframe based on their geometries. + The id columns of the left and right dataframe have to be defined. + + Parameters + ---------- + left_geodataframe : gpd.GeoDataFrame() + the left geodataframe + right_geodataframe : gpd.GeoDataFrame() + the right geodataframe + left_id_column : str + the name of the ID column in the left geodataframe + right_id_column : str + the name of the ID column in the right geodataframe, + from which the values will be added to the left geodataframe + + Returns + ------- + left_geodataframe : TYPE + the updated left geodataframe + + """ # noqa: D205 + left_geodataframe = add_overlapping_polygons(left_geodataframe, right_geodataframe, left_id_column, right_id_column) + + left_geodataframe["overlapping_areas"] = left_geodataframe["overlapping_areas"].apply(lambda x: pd.DataFrame(x)) + + left_geodataframe["most_overlapping_polygon_id"] = left_geodataframe.apply( + lambda x: x["overlapping_areas"][ + x["overlapping_areas"]["overlapping_area"] == x["overlapping_areas"]["overlapping_area"].max() + ]["id"].values[0] + if len(x["overlapping_areas"]) != 0 + else None, + axis=1, + ) + + left_geodataframe["most_overlapping_polygon_area"] = left_geodataframe.apply( + lambda x: x["overlapping_areas"][ + x["overlapping_areas"]["overlapping_area"] == x["overlapping_areas"]["overlapping_area"].max() + ]["overlapping_area"].values[0] + if len(x["overlapping_areas"]) != 0 + else None, + axis=1, + ) + + return left_geodataframe + + +def get_polygon_with_largest_area(polygons, id_col, area_col): + if len(polygons) == 0: + return 0 + else: + polygons[area_col] = polygons.area + polygons = polygons[polygons[area_col] == max(polygons[area_col])] + return polygons[id_col].values[0] + + +def get_most_overlapping_polygon_from_other_gdf(left_gdf, right_gdf, left_id, right_id): + if right_id in left_gdf.columns: + future_right_id = f"{right_id}_2" + future_left_id = f"{left_id}_1" + else: + future_right_id = right_id + future_left_id = left_id + combined_gdf = left_gdf.overlay(right_gdf, how="intersection") + combined_gdf["area_geometry"] = combined_gdf.area + left_gdf["overlapping_polygons"] = left_gdf[left_id].apply( + lambda x: combined_gdf[combined_gdf[future_left_id] == x] + ) + left_gdf["right_id"] = left_gdf["overlapping_polygons"].apply( + lambda x: get_polygon_with_largest_area(x, future_right_id, "area_geometry") + ) + left_gdf = left_gdf.drop(columns=["overlapping_polygons"]) + return left_gdf + + +def get_touching_polygons_from_within_gdf(gdf, id_col): + id_col_right = f"{id_col}_2" + right_gdf, left_gdf = gdf.copy(), gdf.copy() + right_gdf[id_col_right] = right_gdf[id_col] + right_gdf = right_gdf.drop(columns=[id_col]) + joined_gdf = left_gdf.sjoin(right_gdf, predicate="touches") + gdf["touching_polygons"] = gdf[id_col].apply( + lambda x: list(joined_gdf[id_col_right][(joined_gdf[id_col] == x) & (joined_gdf[id_col_right] != x)].unique()) + ) + return gdf + + +def get_most_adjacent_polygon_within_gdf(left_gdf, left_id, right_gdf=None, right_id=None): + def get_most_intersecting(gdf, polygon, left_id): + try: + gdf = gpd.GeoDataFrame(gdf.drop(columns="geometry"), geometry=gdf["geometry"]) + gdf["geomtry"] = gdf.buffer(0.5) + gdf["intersection_length"] = gdf["geometry"].apply(lambda x: x.intersection(polygon).boundary.length) + most_overlapping_polygon = gdf[left_id][ + gdf["intersection_length"] == gdf["intersection_length"].max() + ].values[0] + return most_overlapping_polygon + except: # noqa: E722 FIXME: expliciet de juiste exception afvangen + return None + + left_gdf = get_touching_polygons_from_within_gdf(left_gdf, left_id) + if type(right_gdf) == gpd.GeoDataFrame: # noqa: E721: FIXME: isinstance(right_gdf, gpd.GeoDataFrame) + left_gdf = get_most_overlapping_polygon_from_other_gdf(left_gdf, right_gdf, left_id, right_id) + left_gdf["right_id"][left_gdf[left_id] == left_gdf["right_id"]] = None + left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply( + lambda x: pd.DataFrame(left_gdf[left_gdf[left_id].isin(x)]) + ) + left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply(lambda x: x[x["basin"] != None]) # noqa: E711 FIXME: is not None + left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply(lambda x: x[x["basin"].isna() == False]) # noqa: E712: FIXME: vervang met not x["basin"].isna() + if type(right_gdf) == gpd.GeoDataFrame: # noqa: E721: FIXME: isinstance(right_gdf, gpd.GeoDataFrame). waarschijnlijk moet right_gdf hier left_gdf zijn + left_gdf["touching_polygons"] = left_gdf.apply( + lambda x: x["touching_polygons"][x["touching_polygons"]["right_id"] == x["right_id"]] + if x["right_id"] != None # noqa: E711 FIXME is not None + else x["touching_polygons"], + axis=1, + ) + left_gdf["most_adjacent_polygon"] = left_gdf.apply( + lambda x: get_most_intersecting(x["touching_polygons"], x["geometry"], left_id), axis=1 + ) + left_gdf = left_gdf.drop(columns=["touching_polygons"]) + return left_gdf diff --git a/notebooks/vrij_afwaterend/hydamo_preprocessing/preprocessing.py b/notebooks/vrij_afwaterend/hydamo_preprocessing/preprocessing.py new file mode 100644 index 0000000..2a8ddb4 --- /dev/null +++ b/notebooks/vrij_afwaterend/hydamo_preprocessing/preprocessing.py @@ -0,0 +1,167 @@ +import logging +import time +import warnings + +import geopandas as gpd +import pandas as pd + +from .general import ( + connect_endpoints_by_buffer, + get_most_overlapping_polygon, + report_time_interval, +) + +# %% wfd + + +def add_wfd_id_to_hydroobjects( + hydroobjects: gpd.GeoDataFrame, + wfd_lines: gpd.GeoDataFrame = None, + wfd_polygon: gpd.GeoDataFrame = None, + wfd_id_column: str = "owmident", + buffer_distance: float = 10, + overlap_ratio: float = 0.9, +) -> gpd.GeoDataFrame: + """ + + + Parameters + ---------- + hydroobjects : gpd.GeoDataFrame + GeoDataFrame that contains (preprocessed) hydroobjects + wfd_lines : gpd.GeoDataFrame + GeoDataFrame that contains wfd lines. + wfd_polygon : gpd.GeoDataFrame + GeoDataFrame that contains wfd polygons. + buffer_distance : float, optional + Buffer distance for linefeatures. The default is 0.5. + overlap_ratio : float, optional + Minimum ratio for surface area overlap between + buffer polygons hydroobjects and buffered wfd lines / wfd polygons. + The default is 0.9. + + Returns + ------- + GeoDataFrame that contains hydroobjects with their assigned wfd body id + + """ # noqa: D205 + warnings.filterwarnings("ignore") + start_time = time.time() + + logging.info("Assigning wfd ids to corresponding hydroobjects...") + + # 1. Buffer lines and merge with polygons + hydroobjects["left_polygon_geometry"] = hydroobjects.geometry.buffer(buffer_distance, join_style="round") + + wfd_total = None + if wfd_lines is not None: + wfd_lines["geometry"] = wfd_lines.geometry.buffer(buffer_distance * 1.5, join_style="round") + wfd_lines = wfd_lines.explode().reset_index(drop=True) + if wfd_total is None: + wfd_total = wfd_lines[[wfd_id_column, "geometry"]] + + if wfd_polygon is not None: + if wfd_total is None: + wfd_total = wfd_polygon[[wfd_id_column, "geometry"]] + else: + wfd_total = pd.concat([wfd_total, wfd_polygon[[wfd_id_column, "geometry"]]], axis=0) + + wfd_total = wfd_total.dissolve(by=wfd_id_column, aggfunc="sum", as_index=False) + + # 3. Join wfd & hydroobjects + hydroobjects["line_geometry"] = hydroobjects.geometry + hydroobjects.geometry = hydroobjects.left_polygon_geometry + + # 4. Get most overlapping wfd polygons for each hydroobject + hydroobjects = get_most_overlapping_polygon(hydroobjects, wfd_total, "code", wfd_id_column) + + # 5. Remove overlapping polygons with less than 90 % overlap + hydroobjects[wfd_id_column] = hydroobjects.apply( + lambda x: x["most_overlapping_polygon_id"] + if x["most_overlapping_polygon_area"] > overlap_ratio * x["left_area"] + else None + if type(x["most_overlapping_polygon_area"]) == float # noqa: E721: FIXME: isinstance(x["most_overlapping_polygon_area"], float) + else None, + axis=1, + ) + + # 6. Postprocessing + hydroobjects["geometry"] = hydroobjects["line_geometry"] + + end_time = time.time() + passed_time = report_time_interval(start_time, end_time) + + nr_hydroobjects_wfd = len(hydroobjects[hydroobjects[wfd_id_column].isna() == False][wfd_id_column]) # noqa: E712 not hydroobjects[wfd_id_column].isna() + nr_unique_wfd_ids = len(hydroobjects[hydroobjects[wfd_id_column].isna() == False][wfd_id_column].unique()) # noqa: E712 not hydroobjects[wfd_id_column].isna() + + hydroobjects = hydroobjects.drop( + columns=[ + "left_area", + "left_polygon_geometry", + "overlapping_areas", + "line_geometry", + "most_overlapping_polygon_id", + "most_overlapping_polygon_area", + ] + ) #'overlapping_area' 'left_area' + + logging.info( + f"Summary:\n\n\ + Number of hydroobjects that received a wfd id: {nr_hydroobjects_wfd} \n\ + Number of unique wfd ids within hydroobjects: {nr_unique_wfd_ids}\n" + ) + + logging.info(f"Finished within {passed_time}") + + return hydroobjects + + +# %% Preprocess Hydamo Hydroobjects + + +def preprocess_hydamo_hydroobjects( + hydroobjects: gpd.GeoDataFrame, + wfd_lines: gpd.GeoDataFrame = None, + wfd_polygons: gpd.GeoDataFrame = None, + buffer_distance_endpoints: float = 0.5, + wfd_id_column: str = "owmident", + buffer_distance_wfd: float = 10, + overlap_ratio_wfd: float = 0.9, +) -> gpd.GeoDataFrame: + """ + + Parameters + ---------- + hydroobjects : gpd.GeoDataFrame + GeoDataFrame that contains (preprocessed) hydroobjects + wfd_lines : gpd.GeoDataFrame + GeoDataFrame that contains wfd lines. + wfd_polygon : gpd.GeoDataFrame + GeoDataFrame that contains wfd polygons. + buffer_distance_endpoints (float): Buffer distance for connecting line boundary endpoints, expressed + in the distance unit of vector line dataset + wfd_id_column : str + wfd id column + buffer_distance_wfd : float, optional + Buffer distance for linefeatures. The default is 10. + overlap_ratio_wfd : float, optional + Minimum ratio for surface area overlap between + buffer polygons hydroobjects and buffered wfd lines / wfd polygons for assigning a wfd body to + hydroobject. + The default is 0.9. + + Returns + ------- + GeoDataFrame that contains preprocessed hydroobjects + + """ # noqa: D205 + # Connect unconnected endpoints within buffer + preprocessed_hydroobjects = connect_endpoints_by_buffer(hydroobjects, buffer_distance_endpoints) + + # Assign wfd waterlichaam to hydroobjects + if wfd_lines is not None or wfd_polygons is not None: + preprocessed_hydroobjects = add_wfd_id_to_hydroobjects( + preprocessed_hydroobjects, wfd_lines, wfd_polygons, wfd_id_column, buffer_distance_wfd, overlap_ratio_wfd + ) + + return preprocessed_hydroobjects diff --git a/notebooks/vrij_afwaterend/ribasim_lumping_tools/LHM_data_bewerking_analyse_utils.py b/notebooks/vrij_afwaterend/ribasim_lumping_tools/LHM_data_bewerking_analyse_utils.py new file mode 100644 index 0000000..7e42023 --- /dev/null +++ b/notebooks/vrij_afwaterend/ribasim_lumping_tools/LHM_data_bewerking_analyse_utils.py @@ -0,0 +1,78 @@ +from pathlib import Path + +import geopandas as gpd +import pandas as pd + + +def read_original_data(waterboard_dir, hydamo_format, object_naam, waterboard): + print(f"- read data for {waterboard} - {object_naam}") + table_hydamo = hydamo_format[f"{object_naam}_hydamo"].loc[waterboard] + path_data_original = Path(waterboard_dir, "1_ontvangen_data", table_hydamo["1_ontvangen_data"]) + layer_data_original = table_hydamo["layer_ontvangen_data"] + if table_hydamo["1_ontvangen_data"] == "Niet aanwezig": + return table_hydamo, None + if layer_data_original == "": + data_original = gpd.read_file(path_data_original) + else: + data_original = gpd.read_file(path_data_original, layer=layer_data_original) + if data_original.geometry.isnull().all(): + data_original = data_original.drop("geometry", axis=1) + return table_hydamo, data_original + + +def translate_data_to_hydamo_format(hydamo_columns, data_original_gdf): + df = pd.DataFrame() + for col in range(5, len(hydamo_columns)): + if hydamo_columns[col] == "": + df[hydamo_columns.index[col]] = "" + else: + df[hydamo_columns.index[col]] = data_original_gdf[hydamo_columns[col]] + + if "geometry" not in data_original_gdf.columns: + df["geometry"] = None + else: + df["geometry"] = data_original_gdf.geometry + gdf = gpd.GeoDataFrame(df, geometry="geometry", crs=28992) + return gdf + + +def check_if_object_on_hydroobject(data_hydamo, hydroobject_buffered, groupby="globalid"): + # kunstwerk spatial joinen met hydroobject + data_hydamo = ( + gpd.sjoin( + data_hydamo, + hydroobject_buffered[["code", "buffer"]].rename(columns={"code": "code_hydroobject"}), + how="left", + ) + .groupby(groupby) + .first() + ) + data_hydamo["code_hydroobject"].fillna("niet op hydroobject", inplace=True) + data_hydamo = data_hydamo.drop(columns=["index_right"]) + # # geometry weer terug naar punt voor buffer + # data_hydamo = data_hydamo.set_geometry("geometry") + return data_hydamo + + +def export_to_geopackage(waterboard_dir, data_hydamo, hydamo_format, waterboard, hydamo_object): + if data_hydamo is None: + return + print(f"- export {waterboard} - {hydamo_object}") + table_hydamo = hydamo_format[f"{hydamo_object}_hydamo"].loc[waterboard] + path_data_hydamo = Path(waterboard_dir, "3_verwerkte_data", table_hydamo["3_verwerkte_data"]) + layer_data_hydamo = table_hydamo["layer_verwerkte_data"] + layer_options = "ASPATIAL_VARIANT=GPKG_ATTRIBUTES" + if data_hydamo.geometry.isnull().all(): + data_hydamo.to_file(path_data_hydamo, layer=layer_data_hydamo, driver="GPKG", layer_options=layer_options) + else: + data_hydamo.to_file(path_data_hydamo, layer=layer_data_hydamo, driver="GPKG") + + +def check_ids_hydamo_data(data_hydamo, waterboard_code, hydamo_object): + if "objectid" not in data_hydamo.columns: + data_hydamo["objectid"] = 99999 + if "nen3610id" not in data_hydamo.columns: + data_hydamo["nen3610id"] = f"NL.WBHCODE.{waterboard_code}.{hydamo_object}" + data_hydamo["objectid"].astype(str) + if "globalid" not in data_hydamo.columns: + data_hydamo["globalid"] = "" + return data_hydamo diff --git a/notebooks/vrij_afwaterend/ribasim_lumping_tools/default_model.py b/notebooks/vrij_afwaterend/ribasim_lumping_tools/default_model.py new file mode 100644 index 0000000..fd39ea2 --- /dev/null +++ b/notebooks/vrij_afwaterend/ribasim_lumping_tools/default_model.py @@ -0,0 +1,307 @@ +import logging + +import geopandas as gpd +import pandas as pd +import ribasim + +DEFAULT_PROFILE = { + "area": [0.01, 1000.0], + "level": [0.0, 1.0], +} +DEFAULT_PRECIPITATION = 0.005 / 86400 # m/s +DEFAULT_EVAPORATION = 0.001 / 86400 # m/s +DEFAULT_FLOW_RATE = 0.1 # m3/s +DEFAULT_MANNING = { + "length": 100, + "manning_n": 0.04, + "profile_width": 10, + "profile_slope": 1, +} +DEFAULT_RESISTANCE = 0.005 +DEFAULT_LEVEL = 0 +DEFAULT_RATING_CURVE = { + "level": [0, 5], + "flow_rate": [0.0, DEFAULT_FLOW_RATE], +} +DEFAULT_START_TIME = "2020-01-01 00:00:00" +DEFAULT_END_TIME = "2021-01-01 00:00:00" +DEFAULTS = { + "profile": DEFAULT_PROFILE, + "precipitation": DEFAULT_PRECIPITATION, + "evaporation": DEFAULT_EVAPORATION, + "flow_rate": DEFAULT_FLOW_RATE, + "manning": DEFAULT_MANNING, + "resistance": DEFAULT_RESISTANCE, + "level": DEFAULT_LEVEL, + "rating_curve": DEFAULT_RATING_CURVE, + "start_time": DEFAULT_START_TIME, + "end_time": DEFAULT_END_TIME, +} + +logger = logging.getLogger(__name__) + + +def default_model( + node_df: gpd.GeoDataFrame, + edge_df: gpd.GeoDataFrame, + basin_areas_df: gpd.GeoDataFrame, + profile: dict, + precipitation: float, + evaporation: float, + flow_rate: float, + manning: dict, + resistance: float, + level: float, + rating_curve: dict, + start_time: str, + end_time: str, + crs: int = "28992", +) -> ribasim.Model: + """Model with default settings. + + Parameters + ---------- + node_in_df : gpd.GeoDataFrame + GeoDataFrame with nodes. Should contain a `node_id` and `geometry` column + edge_in_df : gpd.GeoDataFrame + GeoDataFrame with edges. Should contain LineStrings connecting nodes + profile : dict + default profile to use for all basins, e.g.: + { + "area": [0.01, 1000.0], + "level": [0.0, 1.0], + } + precipitation : float + default precipitation value + evaporation : float + default precipitation value + flow_rate : float + default flow_rate + manning : dict + default manning values, e.g.: + { + "length": 100, + "manning_n": 0.04, + "profile_width": 10, + "profile_slope": 1, + } + resistance : float + default resistance to apply on LinearResistance nodes + level : float + default level value + rating_curve : dict + default rating curve, e.g.: + { + "level": [0, 5], + "flow_rate": [0.0, 0.1], + } + start_time : str + default start-time, e.g. `2020-01-01 00:00:00` + end_time : str + default end-time, e.g. `2021-01-01 00:00:00` + + Returns + ------- + ribasim.Model + Ribasim model with default tables + """ + model = ribasim.Model(starttime=start_time, endtime=end_time, crs=crs) + + # check and drop duplicated node ides + if node_df.node_id.duplicated().any(): + logger.warning("node_in_df contains duplicated node_ids that get dropped") + node_df.drop_duplicates("node_id", inplace=True) + + # correction column names ribasim Nodes + if "split_type" in node_df.columns: + rename_columns = { + "object_type": "meta_object_type", + "split_node_id": "meta_split_node_id", + "split_type": "meta_split_type", + "boundary": "meta_boundary", + "split_node": "meta_split_node", + "basin": "meta_basin", + "connection": "meta_connection", + } + node_df = node_df.rename(columns=rename_columns) + + # define all node types + model.basin.node.df = node_df[node_df.node_type == "Basin"] + model.basin.area.df = basin_areas_df + model.pump.node.df = node_df[node_df.node_type == "Pump"] + model.outlet.node.df = node_df[node_df.node_type == "Outlet"] + model.tabulated_rating_curve.node.df = node_df[node_df.node_type == "TabulatedRatingCurve"] + model.manning_resistance.node.df = node_df[node_df.node_type == "ManningResistance"] + model.linear_resistance.node.df = node_df[node_df.node_type == "LinearResistance"] + model.level_boundary.node.df = node_df[node_df.node_type == "LevelBoundary"] + model.flow_boundary.node.df = node_df[node_df.node_type == "FlowBoundary"] + + # check and drop duplicated edges + if "split_type" in edge_df.columns: + rename_columns = { + "object_type": "meta_object_type", + "split_node_id": "meta_split_node_id", + "split_type": "meta_split_type", + "boundary": "meta_boundary", + "split_node": "meta_split_node", + "basin": "meta_basin", + "connection": "meta_connection", + } + edge_df = edge_df.rename(columns=rename_columns) + edge_df["meta_boundary"] = edge_df["meta_boundary"].fillna(-1).astype(int) + if "from_node_id" not in edge_df.columns: + nodes = node_df[["node_id", "node_type", "meta_basin", "meta_boundary", "meta_split_node", "meta_categorie"]] + node_basin = nodes[nodes.meta_basin != -1] + node_boundary = nodes[nodes.meta_boundary != -1] + node_split_node = nodes[nodes.meta_split_node != -1] + rename_from_nodes = { + "node_id": "from_node_id", + "node_type": "from_node_type", + "meta_categorie": "meta_from_categorie", + } + rename_to_nodes = {"node_id": "to_node_id", "node_type": "to_node_type", "meta_categorie": "meta_to_categorie"} + from_node_basin = node_basin.rename(columns=rename_from_nodes)[ + ["from_node_id", "from_node_type", "meta_from_categorie", "meta_basin"] + ] + to_node_basin = node_basin.rename(columns=rename_to_nodes)[ + ["to_node_id", "to_node_type", "meta_to_categorie", "meta_basin"] + ] + from_node_boundary = node_boundary.rename(columns=rename_from_nodes)[ + ["from_node_id", "from_node_type", "meta_from_categorie", "meta_boundary"] + ] + to_node_boundary = node_boundary.rename(columns=rename_to_nodes)[ + ["to_node_id", "to_node_type", "meta_to_categorie", "meta_boundary"] + ] + from_node_split_node = node_split_node.rename(columns=rename_from_nodes)[ + ["from_node_id", "from_node_type", "meta_from_categorie", "meta_split_node"] + ] + to_node_split_node = node_split_node.rename(columns=rename_to_nodes)[ + ["to_node_id", "to_node_type", "meta_to_categorie", "meta_split_node"] + ] + + edge_split_node_to_basin = edge_df[edge_df.meta_connection == "split_node_to_basin"] + edge_basin_to_split_node = edge_df[edge_df.meta_connection == "basin_to_split_node"] + edge_split_node_to_boundary = edge_df[edge_df.meta_connection == "split_node_to_boundary"] + edge_boundary_to_split_node = edge_df[edge_df.meta_connection == "boundary_to_split_node"] + + edge_split_node_to_basin = to_node_basin.merge(edge_split_node_to_basin, how="inner", on="meta_basin") + edge_split_node_to_basin = from_node_split_node.merge( + edge_split_node_to_basin, how="inner", on="meta_split_node" + ) + edge_basin_to_split_node = to_node_split_node.merge(edge_basin_to_split_node, how="inner", on="meta_split_node") + edge_basin_to_split_node = from_node_basin.merge(edge_basin_to_split_node, how="inner", on="meta_basin") + edge_split_node_to_boundary = to_node_boundary.merge( + edge_split_node_to_boundary, how="inner", on="meta_boundary" + ) + edge_split_node_to_boundary = from_node_split_node.merge( + edge_split_node_to_boundary, how="inner", on="meta_split_node" + ) + edge_boundary_to_split_node = to_node_split_node.merge( + edge_boundary_to_split_node, how="inner", on="meta_split_node" + ) + edge_boundary_to_split_node = from_node_boundary.merge( + edge_boundary_to_split_node, how="inner", on="meta_boundary" + ) + + edge_df = pd.concat( + [ + edge_split_node_to_basin, + edge_basin_to_split_node, + edge_split_node_to_boundary, + edge_boundary_to_split_node, + ] + ) + edge_df = edge_df.reset_index(drop=True) + edge_df.index.name = "fid" + edge_df = gpd.GeoDataFrame(edge_df, geometry="geometry", crs=crs) + edge_df["meta_categorie"] = "doorgaand" + edge_df.loc[ + (edge_df["meta_from_categorie"] == "hoofdwater") & (edge_df["meta_to_categorie"] == "hoofdwater"), + "meta_categorie", + ] = "hoofdwater" + + if edge_df.duplicated(subset=["from_node_id", "from_node_type", "to_node_id", "to_node_type"]).any(): + logger.warning("edge_df contains duplicated node_ids that get dropped") + edge_df.drop_duplicates(["from_node_id", "from_node_type", "to_node_id", "to_node_type"], inplace=True) + + # convert to edge-table + model.edge.df = edge_df + + # define ribasim basin-table + profile_df = pd.concat( + [ + pd.DataFrame({"node_id": [i] * len(profile["area"]), **profile}) + for i in node_df[node_df.node_type == "Basin"].node_id + ], + ignore_index=True, + ) + + static_df = node_df[node_df.node_type == "Basin"][["node_id"]] + static_df.loc[:, ["precipitation"]] = precipitation + static_df.loc[:, ["potential_evaporation"]] = evaporation + static_df.loc[:, ["drainage"]] = 0 + static_df.loc[:, ["infiltration"]] = 0 + static_df.loc[:, ["urban_runoff"]] = 0 + + state_df = profile_df.groupby("node_id").min()["level"].reset_index() + + model.basin.profile.df = profile_df + model.basin.static.df = static_df + model.basin.state.df = state_df + + # define ribasim pump-table + static_df = node_df[node_df.node_type == "Pump"][["node_id"]] + static_df.loc[:, ["flow_rate"]] = flow_rate + + model.pump.static.df = static_df + + # define ribasim outlet-table + static_df = node_df[node_df.node_type == "Outlet"][["node_id"]] + static_df.loc[:, ["flow_rate"]] = flow_rate + + model.outlet.static.df = static_df + + # define ribasim manning resistance + static_df = node_df[node_df.node_type == "ManningResistance"][["node_id"]] + static_df.loc[:, ["length"]] = manning["length"] + static_df.loc[:, ["manning_n"]] = manning["manning_n"] + static_df.loc[:, ["profile_width"]] = manning["profile_width"] + static_df.loc[:, ["profile_slope"]] = manning["profile_slope"] + + model.manning_resistance.static.df = static_df + + # define ribasim linear resistance + static_df = node_df[node_df.node_type == "LinearResistance"][["node_id"]] + static_df.loc[:, ["resistance"]] = resistance + + model.linear_resistance.static.df = static_df + + # define ribasim tabulated reatingcurve + static_df = pd.concat( + [ + pd.DataFrame( + { + "node_id": [i] * len(rating_curve["level"]), + **rating_curve, + } + ) + for i in node_df[node_df.node_type == "TabulatedRatingCurve"].node_id + ], + ignore_index=True, + ) + + model.tabulated_rating_curve.static.df = static_df + + # define ribasim flow boundary + static_df = node_df[node_df.node_type == "FlowBoundary"][["node_id"]] + static_df.loc[:, ["flow_rate"]] = flow_rate + + model.flow_boundary.static.df = static_df + + # define ribasim level boundary + static_df = node_df[node_df.node_type == "LevelBoundary"][["node_id"]] + static_df.loc[:, ["level"]] = level + + model.level_boundary.static.df = static_df + + return model diff --git a/notebooks/vrij_afwaterend/ribasim_lumping_tools/run_ribasim_lumping_waterboard.py b/notebooks/vrij_afwaterend/ribasim_lumping_tools/run_ribasim_lumping_waterboard.py new file mode 100644 index 0000000..cfcc927 --- /dev/null +++ b/notebooks/vrij_afwaterend/ribasim_lumping_tools/run_ribasim_lumping_waterboard.py @@ -0,0 +1,132 @@ +""" +Generate Ribasim network and model with ribasim_lumping package for waterboard Rijn en IJssel based on HyDAMO data + +Contactperson: Harm Nomden (Sweco) + +Last update: 11-07-2024 +""" + +import os +import warnings +from pathlib import Path + +import pandas as pd +from ribasim_lumping import create_ribasim_lumping_network + +warnings.simplefilter("ignore") +pd.options.mode.chained_assignment = None +# change workdir +os.chdir(os.path.dirname(os.path.abspath(__file__))) +warnings.simplefilter(action="ignore", category=UserWarning) +warnings.simplefilter(action="ignore", category=FutureWarning) + + +# Settings # +def run_ribasim_lumping_for_waterboard( + base_dir: Path, + waterschap: str, + dx: float = 100.0, + buffer_distance: float = 1.0, + assign_unassigned_areas_to_basins: bool = True, + remove_isolated_basins: bool = True, + include_flow_boundary_basins: bool = True, + include_level_boundary_basins: bool = False, +): + ts_start = pd.Timestamp.now() + print(f"\n\nRun RIBASIM-lumping for waterboard {waterschap}") + base_dir = Path(base_dir, waterschap, "verwerkt") + + # define network name, base dir + source_type = "hydamo" + + # directory results + results_dir = Path(base_dir, "4_ribasim") + simulation_code = "." + + # Create networkanalysis + network = create_ribasim_lumping_network(base_dir=base_dir, name=waterschap, results_dir=results_dir, crs=28992) + + ## -------- AREAS -------- + # areas (discharge units: afwaterende eenheden) + areas_file = Path(base_dir, "3_input", "areas.gpkg") + areas_gpkg_layer = "areas" + areas_code_column = "CODE" + + # drainage areas (afvoergebieden) + drainage_areas_file = Path(base_dir, "3_input", "areas.gpkg") + drainage_areas_gpkg_layer = "drainage_areas" + drainage_areas_code_column = "CODE" + + # Load areas + network.read_areas( + areas_file_path=areas_file, + layer_name=areas_gpkg_layer, + areas_code_column=areas_code_column, + ) + network.read_drainage_areas( + drainage_areas_file_path=drainage_areas_file, + layer_name=drainage_areas_gpkg_layer, + drainage_areas_code_column=drainage_areas_code_column, + ) + + ## -------- HYDAMO DATA -------- + # HyDAMO data + hydamo_network_file = Path(base_dir, "4_ribasim", "hydamo.gpkg") + hydamo_split_network_dx = ( + dx # split up hydamo hydroobjects in sections of approximate 25 m. Use None to don't split + ) + + # Read HyDAMO network data + network.add_basis_network( + source_type=source_type, + hydamo_network_file=hydamo_network_file, + hydamo_split_network_dx=hydamo_split_network_dx, + ) + + ## -------- RIBASIM INPUT -------- + # input files + ribasim_input_boundary_file = Path(base_dir, "3_input", "ribasim_input.gpkg") + ribasim_input_boundary_gpkg_layer = "boundaries" + ribasim_input_split_nodes_file = Path(base_dir, "3_input", "ribasim_input.gpkg") + ribasim_input_split_nodes_gpkg_layer = "split_nodes" + + network.read_split_nodes( + split_nodes_file_path=ribasim_input_split_nodes_file, + layer_name=ribasim_input_split_nodes_gpkg_layer, + buffer_distance=buffer_distance, + crs=28992, + ) + + network.read_boundaries( + boundary_file_path=ribasim_input_boundary_file, + layer_name=ribasim_input_boundary_gpkg_layer, + buffer_distance=buffer_distance, + crs=28992, + ) + + # generate Ribasim network + network.generate_ribasim_lumping_network( + simulation_code=simulation_code, + remove_isolated_basins=remove_isolated_basins, + include_flow_boundary_basins=include_flow_boundary_basins, + include_level_boundary_basins=include_level_boundary_basins, + remove_holes_min_area=5000.0, + assign_unassigned_areas_to_basins=assign_unassigned_areas_to_basins, + ) + ts_end = pd.Timestamp.now() + print(f"RIBASIM-lumping for waterboard {waterschap} ready: {ts_end-ts_start}") + + +if __name__ == "__main__": + base_dir = Path("..\\..\\Ribasim modeldata") + waterschap = "AaenMaas" + dx = 250.0 + + run_ribasim_lumping_for_waterboard( + base_dir=base_dir, + waterschap=waterschap, + dx=dx, + buffer_distance=1.0, + assign_unassigned_areas_to_basins=False if waterschap == "ValleienVeluwe" else True, + remove_isolated_basins=False, + ) diff --git a/notebooks/vrij_afwaterend/xxxx_combine_waterschap_layers.ipynb b/notebooks/vrij_afwaterend/xxxx_combine_waterschap_layers.ipynb new file mode 100644 index 0000000..7158693 --- /dev/null +++ b/notebooks/vrij_afwaterend/xxxx_combine_waterschap_layers.ipynb @@ -0,0 +1,476 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import fiona\n", + "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from ribasim_lumping.utils.general_functions import remove_holes_from_polygons" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# locatie van de waterschapsmappen\n", + "base_dir = \"..\\\\Ribasim modeldata\\\\\"\n", + "\n", + "ribasim_folder = \"\"\n", + "\n", + "# creeer een lijst met alle namen van de waterschappen\n", + "waterschappen = {\n", + " \"Noorderzijlvest\": \"Noorderzijlvest\",\n", + " \"HunzeenAas\": \"Hunze en Aa's\",\n", + " \"DrentsOverijsselseDelta\": \"Drents Overijsselse Delta\",\n", + " \"Vechtstromen\": \"Vechtstromen\",\n", + " \"RijnenIJssel\": \"Rijn en IJssel\",\n", + " \"ValleienVeluwe\": \"Vallei en Veluwe\",\n", + " \"StichtseRijnlanden\": \"De Stichtse Rijnlanden\",\n", + " \"BrabantseDelta\": \"Brabantse Delta\",\n", + " \"DeDommel\": \"De Dommel\",\n", + " \"AaenMaas\": \"Aa en Maas\",\n", + " \"Limburg\": \"Limburg\",\n", + "}\n", + "\n", + "# lijst met de benodigde layers\n", + "layers = {\n", + " \"basins\": \"ribasim_network.gpkg\",\n", + " \"basin_areas\": \"ribasim_network.gpkg\",\n", + " \"split_nodes\": \"ribasim_network.gpkg\",\n", + " \"boundaries\": \"ribasim_network.gpkg\",\n", + " \"boundary_connections\": \"ribasim_network.gpkg\",\n", + " \"basin_connections\": \"ribasim_network.gpkg\",\n", + " \"areas\": \"areas.gpkg\",\n", + " \"drainage_areas\": \"areas.gpkg\",\n", + " \"foreign_drainage_areas\": \"foreign_input.gpkg\",\n", + " # \"gemaal\": \"hydamo.gpkg\",\n", + " # \"stuw\": \"hydamo.gpkg\",\n", + " # \"onderdoorlaat\": \"hydamo.gpkg\",\n", + " # \"afsluitmiddel\": \"hydamo.gpkg\",\n", + " # \"duikersifonhevel\": \"hydamo.gpkg\",\n", + " # \"hydroobject\": \"hydamo.gpkg\",\n", + "}\n", + "\n", + "output_gpkg = \"data//alle_waterschappen.gpkg\"\n", + "# output_gpkg = \"data//foreign_input.gpkg\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# waterschappen_geoms = gpd.read_file(\"data_oud//waterschappen.gpkg\").to_crs(28992)\n", + "waterschappen_lijst = []\n", + "waterschappen_labels = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "split_nodes = gpd.read_file(\n", + " Path(base_dir, list(waterschappen.keys())[1], \"verwerkt\", \"4_ribasim\", layers[\"split_nodes\"])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# loop door de verschillende shapefiles die je wilt hebben per waterschap\n", + "for layer, gpkg_file in layers.items():\n", + " print(layer)\n", + " layer_totaal = None\n", + " # loop door de directories van de waterschappen\n", + " print(\" - \", end=\"\")\n", + " for i, waterschap in enumerate(waterschappen):\n", + " print(waterschap[:3], end=\" \")\n", + " # maak de directory\n", + " locatie_gpkg = Path(base_dir, waterschap, \"verwerkt\", \"4_ribasim\", gpkg_file)\n", + " if not locatie_gpkg.exists():\n", + " continue\n", + " if layer not in fiona.listlayers(locatie_gpkg):\n", + " continue\n", + "\n", + " # read the shapefile layers\n", + " layer_data = gpd.read_file(locatie_gpkg, layer=layer, engine=\"pyogrio\")\n", + " if layer == \"areas\":\n", + " layer_data = layer_data[[\"code\", \"geometry\"]]\n", + " if layer == \"foreign_drainage_areas\":\n", + " layer_data = layer_data[[\"name\", \"boundary_name\", \"geometry\"]]\n", + " if layer in [\n", + " \"drainage_areas\",\n", + " \"gemaal\",\n", + " \"stuw\",\n", + " \"afsluitmiddel\",\n", + " \"onderdoorlaat\",\n", + " \"duikersifonhevel\",\n", + " \"hydroobject\",\n", + " ]:\n", + " if \"code\" not in layer_data.columns:\n", + " layer_data[\"code\"] = None\n", + " layer_data = layer_data[[\"code\", \"geometry\"]]\n", + "\n", + " # add waterschap name\n", + " layer_data[\"waterschap\"] = waterschap\n", + "\n", + " layer_data = layer_data.set_crs(28992, allow_override=True)\n", + "\n", + " if layer_totaal is None:\n", + " layer_totaal = layer_data.copy()\n", + " else:\n", + " layer_totaal = pd.concat([layer_totaal, layer_data])\n", + "\n", + " if layer_totaal is not None:\n", + " layer_totaal.to_file(output_gpkg, layer=layer, driver=\"GPKG\")\n", + " print(\" -> saved\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the data\n", + "areas = gpd.read_file(output_gpkg, layer=\"areas\")\n", + "basins = gpd.read_file(output_gpkg, layer=\"basins\")\n", + "basin_areas = gpd.read_file(output_gpkg, layer=\"basin_areas\")\n", + "split_nodes = gpd.read_file(output_gpkg, layer=\"split_nodes\")\n", + "boundaries = gpd.read_file(output_gpkg, layer=\"boundaries\")\n", + "\n", + "drainage_areas = gpd.read_file(output_gpkg, layer=\"drainage_areas\")\n", + "foreign_drainage_areas = gpd.read_file(output_gpkg, layer=\"foreign_drainage_areas\")\n", + "gemaal = gpd.read_file(output_gpkg, layer=\"gemaal\")\n", + "stuw = gpd.read_file(output_gpkg, layer=\"stuw\")\n", + "onderdoorlaat = gpd.read_file(output_gpkg, layer=\"onderdoorlaat\")\n", + "afsluitmiddel = gpd.read_file(output_gpkg, layer=\"afsluitmiddel\")\n", + "duikersifonhevel = gpd.read_file(output_gpkg, layer=\"duikersifonhevel\")\n", + "hydroobject = gpd.read_file(output_gpkg, layer=\"hydroobject\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# BOUNDARIES: FILL TYPE\n", + "boundaries[\"Type\"] = (\n", + " boundaries[\"Type\"]\n", + " .fillna(boundaries[\"quantity\"])\n", + " .replace({\"dischargebnd\": \"FlowBoundary\", \"waterlevelbnd\": \"LevelBoundary\"})\n", + ")\n", + "# CHECK BOUNDARIES\n", + "boundaries[[\"Type\", \"quantity\", \"waterschap\"]].fillna(\"\").groupby(\n", + " [\"Type\", \"quantity\", \"waterschap\"]\n", + ").size().reset_index() # .rename(columns={0:'count'})\n", + "boundaries.to_file(output_gpkg, layer=\"boundaries\")\n", + "# SEPARATE FLOW AND LEVEL BOUNDARIES\n", + "flow_boundaries = boundaries[boundaries[\"Type\"] == \"FlowBoundary\"]\n", + "level_boundaries = boundaries[boundaries[\"Type\"] == \"LevelBoundary\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# BASIN AREAS\n", + "basin_areas_waterschap = areas.dissolve(by=[\"waterschap\", \"basin_node_id\"])\n", + "basin_areas_waterschap.area = basin_areas_waterschap.geometry.area\n", + "basin_areas_waterschap[\"color_no\"] = np.random.choice(np.arange(50), size=len(basin_areas_waterschap)) # noqa: NPY002: FIXME: np.random.Generator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basin_areas_waterschap = remove_holes_from_polygons(basin_areas_waterschap.explode(), 100_000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basin_areas_waterschap.to_file(output_gpkg, layer=\"basin_areas\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basin_areas_waterschap.reset_index().to_file(output_gpkg, layer=\"basin_areas\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "basin_areas_waterschap[\"color_no\"] = np.random.choice(np.arange(50), size=len(basin_areas_waterschap)) # noqa: NPY002: FIXME: np.random.Generator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# BASIN AREAS\n", + "fig, ax = plt.subplots()\n", + "basin_areas_waterschap.reset_index(drop=True).plot(ax=ax, column=\"color_no\")\n", + "waterschappen.plot(ax=ax, facecolor=\"None\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CALCULATE SURFACE AREA OF WATER BOARDS\n", + "areas[\"area\"] = areas.geometry.area / 1_000_000\n", + "areas[[\"area\", \"waterschap\"]].groupby(\"waterschap\").sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# PLOT FOR SURFACE AREA, BOUNDARIES, SPLIT NODES, BASINS, BASIN AREAS\n", + "\n", + "\n", + "def addlabels(ax, x, y):\n", + " for _x, _y in zip(x, y):\n", + " ax.text(_x, _y, _y, ha=\"center\", va=\"bottom\", fontsize=7)\n", + "\n", + "\n", + "# make the plots\n", + "fig, axs = plt.subplots(4, 1, figsize=(5, 7), sharex=True, gridspec_kw={\"hspace\": 0.25, \"wspace\": 0.3})\n", + "# fig.tight_layout()\n", + "\n", + "data_sets = [boundaries, split_nodes, basins, basin_areas]\n", + "columns = [\"Boundaries\", \"Split nodes\", \"Basins\", \"Basin areas\"]\n", + "data_labels = [\"Boundaries\", \"Split nodes\", \"Basins\", \"Basin areas\"]\n", + "\n", + "for data_set, data_label, ax in zip(data_sets, data_labels, axs.flatten()):\n", + " labels, counts = np.unique(data_set.waterschap, return_counts=True)\n", + " counts_def = []\n", + " for w_lab in waterschappen.keys():\n", + " counts_new = 0\n", + " for label, count in zip(labels, counts):\n", + " if label == w_lab:\n", + " counts_new = count\n", + " counts_def += [counts_new]\n", + " ax.bar(waterschappen.values, counts_def, align=\"center\")\n", + " addlabels(ax, waterschappen.values, counts_def)\n", + " ax.set_ylim([0, max(counts_def) * 1.2])\n", + " ax.set_title(data_label, fontsize=10, ha=\"left\", x=-0.1, fontweight=\"bold\")\n", + " ax.tick_params(axis=\"x\", which=\"major\", labelsize=10)\n", + " ax.tick_params(axis=\"y\", which=\"major\", labelsize=9)\n", + "\n", + "basin_areas.area = basin_areas.geometry.area\n", + "basin_areas[\"area_km2\"] = basin_areas.geometry.area / 1000000\n", + "# basin_areas[basin_areas.waterschap==\"Noorderzijlvest\", \"color_no\"] =\n", + "\n", + "ax = axs[-1] # [-1]\n", + "# basin_areas_km2 = basin_areas[[\"waterschap\", \"area_km2\"]].groupby(\"waterschap\").sum().area_km2\n", + "# ax.bar(basin_areas_km2.index, basin_areas_km2.values, align='center')\n", + "# addlabels(ax, basin_areas_km2.index, basin_areas_km2.round(0).values)#((basin_areas_km2/1000).round(0)*1000.0).values)\n", + "# ax.set_ylim([0, basin_areas_km2.max()*1.2])\n", + "# ax.set_ylabel(\"area [km2]\")\n", + "ax.tick_params(axis=\"x\", labelrotation=90)\n", + "ax.set_xticklabels(waterschappen.values);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# PLOT FOR PUMPS, WEIRS, CULVERTS, HYDROOBJECTS\n", + "\n", + "# make the plots\n", + "fig, axs = plt.subplots(4, 1, figsize=(5, 7), sharex=True, gridspec_kw={\"hspace\": 0.25, \"wspace\": 0.3})\n", + "fig.tight_layout()\n", + "\n", + "waterschap_areas = areas[[\"area\", \"waterschap\"]].groupby(\"waterschap\").sum()\n", + "counts_def = []\n", + "for w_lab in waterschappen_lijst:\n", + " counts_new = 0\n", + " for label, count in zip(waterschap_areas.index, waterschap_areas.area.round(0).values):\n", + " if label == w_lab:\n", + " counts_new = count\n", + " counts_def += [int(counts_new)]\n", + "axs[0].bar(waterschappen_labels, counts_def, align=\"center\")\n", + "addlabels(axs[0], waterschappen_labels, counts_def)\n", + "axs[0].set_ylim([0, max(counts_def) * 1.2])\n", + "axs[0].set_title(\"Surface area [km2]\", fontsize=10, ha=\"left\", x=-0.1, fontweight=\"bold\")\n", + "axs[0].tick_params(axis=\"x\", which=\"major\", labelsize=10)\n", + "axs[0].tick_params(axis=\"y\", which=\"major\", labelsize=9)\n", + "\n", + "hydroobject[\"length\"] = hydroobject.geometry.length / 1000\n", + "hydroobject_length = hydroobject[[\"length\", \"waterschap\"]].groupby(\"waterschap\").sum()\n", + "counts_def = []\n", + "for w_lab in waterschappen_lijst:\n", + " counts_new = 0\n", + " for label, count in zip(hydroobject_length.index, hydroobject_length.length.round(0).values):\n", + " if label == w_lab:\n", + " counts_new = count\n", + " counts_def += [int(counts_new)]\n", + "axs[1].bar(waterschappen_labels, counts_def, align=\"center\")\n", + "addlabels(axs[1], waterschappen_labels, counts_def)\n", + "axs[1].set_ylim([0, max(counts_def) * 1.2])\n", + "axs[1].set_title(\"Hydro-objects [km]\", fontsize=10, ha=\"left\", x=-0.1, fontweight=\"bold\")\n", + "axs[1].tick_params(axis=\"x\", which=\"major\", labelsize=10)\n", + "axs[1].tick_params(axis=\"y\", which=\"major\", labelsize=9)\n", + "\n", + "afsluitmiddel = pd.concat([afsluitmiddel, onderdoorlaat])\n", + "\n", + "data_sets = [gemaal, stuw]\n", + "columns = [\"Gemaal\", \"Stuw\"]\n", + "data_labels = [\"Pumping stations\", \"Weirs\"]\n", + "\n", + "\n", + "def addlabels(ax, x, y):\n", + " for _x, _y in zip(x, y):\n", + " ax.text(_x, _y, _y, ha=\"center\", va=\"bottom\", fontsize=7)\n", + "\n", + "\n", + "for data_set, data_label, ax in zip(data_sets, data_labels, axs.flatten()[2:]):\n", + " labels, counts = np.unique(data_set.waterschap, return_counts=True)\n", + " counts_def = []\n", + " for w_lab in waterschappen_lijst:\n", + " counts_new = 0\n", + " for label, count in zip(labels, counts):\n", + " if label == w_lab:\n", + " counts_new = count\n", + " counts_def += [int(counts_new)]\n", + " ax.bar(waterschappen_labels, counts_def, align=\"center\")\n", + " addlabels(ax, waterschappen_labels, counts_def)\n", + " ax.set_ylim([0, max(counts_def) * 1.2])\n", + " ax.set_title(data_label, fontsize=10, ha=\"left\", x=-0.1, fontweight=\"bold\")\n", + " ax.tick_params(axis=\"x\", which=\"major\", labelsize=10)\n", + " ax.tick_params(axis=\"y\", which=\"major\", labelsize=9)\n", + "\n", + "basin_areas.area = basin_areas.geometry.area\n", + "basin_areas[\"area_km2\"] = basin_areas.geometry.area / 1000000\n", + "\n", + "ax = axs[-1] # [-1]\n", + "ax.tick_params(axis=\"x\", labelrotation=90)\n", + "ax.set_xticklabels(waterschappen_labels);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shapely.geometry import Polygon\n", + "\n", + "\n", + "def remove_small_holes_from_areas(gdf, min_area):\n", + " list_geometry = []\n", + " for polygon in gdf.geometry:\n", + " list_interiors = []\n", + " for interior in polygon.interiors:\n", + " p = Polygon(interior)\n", + " if p.area > min_area:\n", + " list_interiors.append(interior)\n", + " temp_pol = Polygon(polygon.exterior.coords, holes=list_interiors)\n", + " list_geometry.append(temp_pol)\n", + " gdf.geometry = list_geometry\n", + " return gdf\n", + "\n", + "\n", + "drainage_areas = remove_small_holes_from_areas(drainage_areas, 1000.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "drainage_areas.to_file(Path(ribasim_folder, \"areas.gpkg\"), layer=\"drainage_areas\", driver=\"GPKG\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ribasim_lumping_venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + }, + "vscode": { + "interpreter": { + "hash": "5bd738815a0acbb3ad0a69908638385386c988630a46c6a41055953a8964d49b" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}