diff --git a/src/peilbeheerst_model/peilbeheerst_model/crossings_to_ribasim.py b/src/peilbeheerst_model/peilbeheerst_model/crossings_to_ribasim.py index 2753b81..fe4d598 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/crossings_to_ribasim.py +++ b/src/peilbeheerst_model/peilbeheerst_model/crossings_to_ribasim.py @@ -45,7 +45,6 @@ def __init__(self, model_characteristics): def read_gpkg_layers( self, - variables=["hydroobject", "gemaal", "stuw", "peilgebied", "streefpeil", "duikersifonhevel"], print_var=False, data=None, ): @@ -53,9 +52,6 @@ def read_gpkg_layers( Parameters ---------- - variables : list, optional - List of layer names to be read from the GeoPackage, by default - ["hydroobject", "gemaal", "stuw", "peilgebied", "streefpeil", "aggregation_area", 'duikersifonhevel'] print_var : bool, optional Flag to print each layer name when reading, by default False data : _type_, optional @@ -69,11 +65,13 @@ def read_gpkg_layers( """ if data is None: data = {} - for variable in variables: + gpkg_path = self.model_characteristics["path_postprocessed_data"] + layers = gpd.list_layers(gpkg_path) + for layer in layers.name: if print_var: - print(variable) - data_temp = gpd.read_file(self.model_characteristics["path_postprocessed_data"], layer=variable) - data[variable] = data_temp + print(layer) + data_temp = gpd.read_file(gpkg_path, layer=layer) + data[layer] = data_temp return data diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/general_functions.py b/src/peilbeheerst_model/peilbeheerst_model/general_functions.py similarity index 97% rename from src/peilbeheerst_model/peilbeheerst_model/postprocess_data/general_functions.py rename to src/peilbeheerst_model/peilbeheerst_model/general_functions.py index d12a988..ca7fff6 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/general_functions.py +++ b/src/peilbeheerst_model/peilbeheerst_model/general_functions.py @@ -4,14 +4,13 @@ import pandas as pd -def read_gpkg_layers(gpkg_path, variables, engine="fiona", print_var=False): +def read_gpkg_layers(gpkg_path, engine="fiona", print_var=False): """ Read specified layers from a GeoPackage (GPKG) file and return them as a dictionary. Parameters ---------- gpkg_path (str): The file path to the GeoPackage (GPKG) file to read from. - variables (list): A list of layer names to read from the GeoPackage. print_var (bool, optional): If True, print the name of each variable as it is read. Default is False. Returns @@ -22,11 +21,12 @@ def read_gpkg_layers(gpkg_path, variables, engine="fiona", print_var=False): choose to print the names of variables as they are read by setting `print_var` to True. """ data = {} - for variable in variables: + layers = gpd.list_layers(gpkg_path) + for layer in layers.name: if print_var: - print(variable) - data_temp = gpd.read_file(gpkg_path, layer=variable, engine=engine) - data[variable] = data_temp + print(layer) + data_temp = gpd.read_file(gpkg_path, layer=layer, engine=engine) + data[layer] = data_temp return data diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_WSRL.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_WSRL.py index 5474c02..6031b7d 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_WSRL.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_WSRL.py @@ -8,7 +8,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -35,18 +36,7 @@ # Load HHNK files -WSRL = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +WSRL = read_gpkg_layers(gpkg_path=data_path) WSRL["peilgebied"] = WSRL["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_agv.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_agv.py index ea285ed..d2fea1a 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_agv.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_agv.py @@ -9,8 +9,8 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import read_gpkg_layers +from peilbeheerst_model.general_functions import read_gpkg_layers from ribasim_nl import CloudStorage # %% @@ -23,9 +23,8 @@ # %% cloud = CloudStorage() cloud.download_verwerkt(waterschap) - -# cloud.download_basisgegevens() -# cloud.download_aangeleverd("Rijkswaterstaat") +cloud.download_verwerkt("Rijkswaterstaat") +cloud.download_basisgegevens() # %% verwerkt_dir = cloud.joinpath(waterschap, "verwerkt") @@ -37,27 +36,9 @@ # Hoofdwatersysteem boundaries hws_path = cloud.joinpath("Rijkswaterstaat/verwerkt/krw_basins_vlakken.gpkg") -# Buffer boundaries -buffer_path = cloud.joinpath("Rijkswaterstaat/verwerkt/hws_buffer_agv.gpkg") - -# Buffer RWHS -rhws_path = cloud.joinpath("Rijkswaterstaat/verwerkt/agv_rhws_buffer.gpkg") - - # %% Load Files # Load HHNK files -AVG = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +AVG = read_gpkg_layers(gpkg_path=data_path) AVG["peilgebied"] = AVG["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries @@ -66,18 +47,8 @@ # Load hws gdf_hws = gpd.read_file(hws_path) -# Load buffer -gdf_buffer = gpd.read_file(buffer_path) -gdf_buffer = gdf_buffer.to_crs("EPSG:28992") -gdf_buffer = gdf_buffer.dissolve() - -# Load rhws -# gdf_rhws = gpd.read_file(rhws_path) -# gdf_rhws = gdf_rhws.to_crs('EPSG:28992') -# gdf_rhws = gdf_rhws.dissolve() - # %% -AVG["peilgebied"].globalid.is_unique +assert AVG["peilgebied"].globalid.is_unique # Select waterschap boundaries and clip hws layer @@ -89,9 +60,6 @@ # Use waterschap boundaries to clip HWS layer gdf_hws = gpd.overlay(gdf_grens, gdf_hws, how="intersection") -# Use waterschap boundaries to clip HWS layer -# gdf_rhws = gpd.overlay(gdf_grens, gdf_rhws, how='intersection') - # Peilgebied and HWS layer overlap: # 1. Identify the overlapping areas # 2. Clip @@ -122,10 +90,8 @@ peilgebieden_cat = [] for index, row in AVG["peilgebied"].iterrows(): - # if row.code == "Oosterpark" or row.code == "Vechtboezem": if "Oosterpark" in row.code or "Vechtboezem" in row.code or "Stadsboezem Amsterdam" in row.code: - print("true") - + print(f"true {row.code}") peilgebieden_cat.append(1) else: peilgebieden_cat.append(0) @@ -133,31 +99,7 @@ # Add new column and drop old HWS_BZM column AVG["peilgebied"]["peilgebied_cat"] = peilgebieden_cat -# %% Add rhws to ['peilgebied','streefpeil'] -# update peilgebied dict key -# gdf_rhws['globalid'] = 'dummy_globalid_rhws_' + gdf_rhws.index.astype(str) -# gdf_rhws['code'] = 'dummy_code_nhws_' + gdf_rhws.index.astype(str) -# gdf_rhws['nen3610id'] = 'dummy_nen3610id_rhws_' + gdf_rhws.index.astype(str) -# gdf_rhws['peilgebied_cat'] = 1 - -# gdf_rhws = gdf_rhws[['globalid', 'code', 'nen3610id', 'peilgebied_cat', 'geometry']] - -# AVG['peilgebied'] = pd.concat([gdf_rhws, AVG['peilgebied']]) - -# %% -# # Create boezem streefpeil layer -# streefpeil_hws = pd.DataFrame() -# streefpeil_hws['waterhoogte'] = [np.nan] * len(gdf_rhws) -# streefpeil_hws['globalid'] = 'dummy_globalid_rhws_' + gdf_rhws.index.astype(str) -# streefpeil_hws['geometry'] = [None]* len(gdf_rhws) - -# AVG['streefpeil'] = pd.concat([streefpeil_hws, AVG['streefpeil']]) -# AVG['streefpeil'] = gpd.GeoDataFrame(AVG['streefpeil']) - -# Add nhws to ['peilgebied','streefpeil'] - -# %% -# update peilgebied dict key +# %% update peilgebied dict key gdf_hws["globalid"] = "dummy_globalid_nhws_" + gdf_hws.index.astype(str) gdf_hws["code"] = "dummy_code_nhws_" + gdf_hws.index.astype(str) gdf_hws["nen3610id"] = "dummy_nen3610id_nhws_" + gdf_hws.index.astype(str) @@ -167,8 +109,7 @@ AVG["peilgebied"] = pd.concat([gdf_hws, AVG["peilgebied"]]) -# %% -# Create boezem streefpeil layer +# %% Create boezem streefpeil layer streefpeil_hws = pd.DataFrame() streefpeil_hws["waterhoogte"] = [np.nan] * len(gdf_hws) streefpeil_hws["globalid"] = "dummy_globalid_nhws_" + gdf_hws.index.astype(str) @@ -177,44 +118,6 @@ AVG["streefpeil"] = pd.concat([streefpeil_hws, AVG["streefpeil"]]) AVG["streefpeil"] = gpd.GeoDataFrame(AVG["streefpeil"]) -# %% Create buffer polygon between NHWS and peilgebied/RHWS -# buffer_polygon = gdf_buffer.geometry.iat[0].intersection(gdf_grens.geometry.iat[0]) -# buffer_polygon = buffer_polygon.difference(shapely.geometry.MultiPolygon(gdf_hws.geometry.tolist())) -# buffer_polygon = buffer_polygon.difference(shapely.ops.unary_union(AVG['peilgebied'].geometry.tolist())) - -# buffer_polygon = gpd.GeoDataFrame(buffer_polygon) -# buffer_polygon = buffer_polygon.set_geometry(0) -# buffer_polygon = buffer_polygon.dissolve() -# buffer_polygon = buffer_polygon.rename(columns={0:'geometry'}) -# buffer_polygon = buffer_polygon.set_geometry('geometry') -# buffer_polygon = buffer_polygon.set_crs('EPSG:28992') - - -# %% Add buffer to ['peilgebied','streefpeil'] - -# update peilgebied dict key -# buffer_polygon = gpd.GeoDataFrame(buffer_polygon) -# buffer_polygon['globalid'] = 'dummy_globalid_nhws_buffer_' + buffer_polygon.index.astype(str) -# buffer_polygon['code'] = 'dummy_code_nhws_buffer_' + buffer_polygon.index.astype(str) -# buffer_polygon['nen3610id'] = 'dummy_nen3610id_nhws_buffer_' + buffer_polygon.index.astype(str) -# buffer_polygon['peilgebied_cat'] = 2 -# buffer_polygon = buffer_polygon.rename(columns={0:'geometry'}) -# buffer_polygon = buffer_polygon[['globalid', 'code', 'nen3610id', 'peilgebied_cat', 'geometry']] - -# AVG['peilgebied'] = pd.concat([buffer_polygon, AVG['peilgebied']]) -# AVG['peilgebied'] = gpd.GeoDataFrame(AVG['peilgebied']) - -# %% -# # Create boezem streefpeil layer -# streefpeil_buffer = pd.DataFrame() -# streefpeil_buffer['waterhoogte'] = [np.nan] -# streefpeil_buffer['globalid'] = 'dummy_globalid_nhws_buffer_' + buffer_polygon.index.astype(str) -# streefpeil_buffer['geometry'] = [None] - - -# AVG['streefpeil'] = pd.concat([streefpeil_buffer, AVG['streefpeil']]) -# AVG['streefpeil'] = gpd.GeoDataFrame(AVG['streefpeil']) - # %% if remove_cat_2: AVG["peilgebied"] = AVG["peilgebied"].loc[AVG["peilgebied"].peilgebied_cat != 2] @@ -227,6 +130,6 @@ print(key) AVG[str(key)].to_file(output_gpkg_path, layer=str(key), driver="GPKG") -cloud.upload_verwerkt(output_gpkg_path) +cloud.upload_verwerkt(waterschap) # %% AVG["peilgebied"]["peilgebied_cat"].unique() diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_delfland.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_delfland.py index fe82a3f..51a949d 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_delfland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_delfland.py @@ -9,7 +9,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -36,18 +37,7 @@ # Load HHNK files -delfland = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +delfland = read_gpkg_layers(gpkg_path=data_path) delfland["peilgebied"] = delfland["peilgebied"].to_crs("EPSG:28992") diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_rijnland.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_rijnland.py index 789cd48..d11963e 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_rijnland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_rijnland.py @@ -10,7 +10,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -37,18 +38,7 @@ # Load HHNK files -Rijnland = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +Rijnland = read_gpkg_layers(gpkg_path=data_path) Rijnland["peilgebied"] = Rijnland["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_wetterskip.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_wetterskip.py index 4eab36f..1e66262 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_wetterskip.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_wetterskip.py @@ -8,7 +8,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -36,18 +37,7 @@ # Load HHNK files -Wetterskip = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +Wetterskip = read_gpkg_layers(gpkg_path=data_path) Wetterskip["peilgebied"] = Wetterskip["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_zuiderzeeland.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_zuiderzeeland.py index cc3577d..ad9ed77 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_zuiderzeeland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-process_zuiderzeeland.py @@ -8,7 +8,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -35,18 +36,7 @@ # Load HHNK files -Zuiderzeeland = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +Zuiderzeeland = read_gpkg_layers(gpkg_path=data_path) Zuiderzeeland["peilgebied"] = Zuiderzeeland["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HD.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HD.py index ec46b06..fe3c2c4 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HD.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HD.py @@ -8,7 +8,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -36,18 +37,7 @@ # Load HHNK files -HD = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +HD = read_gpkg_layers(gpkg_path=data_path) HD["peilgebied"] = HD["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHNK.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHNK.py index 29eecc7..90395ae 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHNK.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHNK.py @@ -8,7 +8,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -36,18 +37,7 @@ # Load HHNK files -HHNK = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +HHNK = read_gpkg_layers(gpkg_path=data_path) HHNK["peilgebied"] = HHNK["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHSK.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHSK.py index 23bbcdd..1089af1 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHSK.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_HHSK.py @@ -9,7 +9,8 @@ import geopandas as gpd import shapely -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -35,18 +36,7 @@ # Load HHNK files -HHSK = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +HHSK = read_gpkg_layers(gpkg_path=data_path) HHSK["peilgebied"] = HHSK["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_scheldestromen.py b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_scheldestromen.py index 55f65b7..b45b4dd 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_scheldestromen.py +++ b/src/peilbeheerst_model/peilbeheerst_model/postprocess_data/post-processing_scheldestromen.py @@ -8,7 +8,8 @@ import geopandas as gpd import numpy as np -from general_functions import * + +from peilbeheerst_model.general_functions import * remove_cat_2 = True @@ -36,18 +37,7 @@ # Load HHNK files -Scheldestromen = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuw", - "gemaal", - "hydroobject", - "duikersifonhevel", - "peilgebied", - "streefpeil", - "aggregation_area", - ], -) +Scheldestromen = read_gpkg_layers(gpkg_path=data_path) Scheldestromen["peilgebied"] = Scheldestromen["peilgebied"].to_crs("EPSG:28992") # Load waterschap boundaries diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/AmstelGooienVecht.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/AmstelGooienVecht.py index 482d593..db8ad3d 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/AmstelGooienVecht.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/AmstelGooienVecht.py @@ -1,9 +1,9 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import show_layers_and_columns, store_data from shapely import wkt +from peilbeheerst_model.general_functions import show_layers_and_columns, store_data from ribasim_nl import CloudStorage pd.set_option("display.max_columns", None) @@ -177,6 +177,12 @@ AVG["hydroobject"] = AVG["hydroobject"].drop_duplicates(subset="globalid") # in case it is run multiple times AVG["hydroobject"] = gpd.GeoDataFrame(AVG["hydroobject"]).set_crs("epsg:28992") +# aggregation_area +AVG["aggregation_area"] = AVG["peilgebied"].copy() +AVG["aggregation_area"]["globalid"] = "dummy_globalid_agg_area_" + AVG["aggregation_area"].index.astype(str) +AVG["aggregation_area"]["code"] = ( + AVG["aggregation_area"]["code"] + "_dummy_id_" + AVG["aggregation_area"].index.astype(str) +) # # Control, store diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Delfland.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Delfland.py index 653f493..7f7ba50 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Delfland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Delfland.py @@ -3,7 +3,8 @@ import geopandas as gpd import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * # # Delfland @@ -14,9 +15,7 @@ output_gpkg_path = "../../Data_postprocessed/Waterschappen/Delfland" -Delfland = read_gpkg_layers( - gpkg_path=gdb_path, variables=["stuw", "gemaal", "watergang", "duikersifonhevel", "peilgebiedpraktijk", "keerschot"] -) +Delfland = read_gpkg_layers(gpkg_path=gdb_path) # 'peilafwijkinggebied', # 'pomp']) # 'streefpeil']) diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHNK.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHNK.py index afe2333..0ddb13d 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHNK.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHNK.py @@ -2,7 +2,8 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * # # Hollands Noorderkwartier @@ -16,24 +17,11 @@ # retrieve the data -HHNK = read_gpkg_layers( - gpkg_path=gpkg_path_HHNK, - variables=[ - # 'stuw', #nalevering - # 'gemaal', #nalevering - "afsluitmiddel", - "hydroobject", - "duikersifonhevel", - ], -) -# 'peilafwijkinggebied', -# 'peilgebiedpraktijk', -# 'pomp']) -# 'streefpeil']) +HHNK = read_gpkg_layers(gpkg_path=gpkg_path_HHNK) # retrieve data from a gdb, as the gpkg of HHNK does not contain all relevant data data_gdb = gpd.read_file(gdb_path_HHNK, layer="BWN_ruimtekaart") -HHNK_nalevering = read_gpkg_layers(gpkg_path=gdb_path_HHNK_nalevering, variables=["stuw", "gemaal"]) # nalevering +HHNK_nalevering = read_gpkg_layers(gpkg_path=gdb_path_HHNK_nalevering) # nalevering HHNK["stuw"] = HHNK_nalevering["stuw"] HHNK["gemaal"] = HHNK_nalevering["gemaal"] diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHSK.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHSK.py index 55bec86..6b8a91a 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHSK.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/HHSK.py @@ -4,7 +4,8 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * pd.set_option("display.max_columns", None) @@ -15,32 +16,16 @@ output_gpkg_path = "../../Data_postprocessed/Waterschappen/HHSK" -HHSK = read_gpkg_layers( - gpkg_path=path_HHSK, - variables=[ - "stuw", - "gemaal", - "afsluitmiddel", - "duikersifonhevel", - "hydroobject", - # 'peilgebiedvigerend', - # 'peilafwijkinggebied', - # 'peilbesluitgebied', - "streefpeil", - ], - engine="pyogrio", -) +HHSK = read_gpkg_layers(gpkg_path=path_HHSK, engine="pyogrio") HHSK_nalevering = read_gpkg_layers( - gpkg_path=r"D:\Users\Bruijns\Documents\PR4750_20\Data_preprocessed\Waterschappen\HHSK\LHM_hhsk_nalevering.gpkg", - variables=["Persleiding"], + gpkg_path=r"D:\Users\Bruijns\Documents\PR4750_20\Data_preprocessed\Waterschappen\HHSK\LHM_hhsk_nalevering.gpkg" ) HHSK_2nalevering = read_gpkg_layers( - gpkg_path=r"D:\Users\Bruijns\Documents\PR4750_20\Data_preprocessed\Waterschappen\HHSK\hhsklevering_07032024.gpkg", - variables=["PeilgebiedPraktijk", "VigerendPeilgebiedZPP"], + gpkg_path=r"D:\Users\Bruijns\Documents\PR4750_20\Data_preprocessed\Waterschappen\HHSK\hhsklevering_07032024.gpkg" ) diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Hollandse_Delta.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Hollandse_Delta.py index 567ed70..55f7664 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Hollandse_Delta.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Hollandse_Delta.py @@ -4,7 +4,8 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * # # HD @@ -15,17 +16,7 @@ output_gpkg_path = "../../Data_postprocessed/Waterschappen/Hollandse_Delta" -HD = read_gpkg_layers( - gpkg_path=data_path, - variables=[ - "stuwen", - "gemalen", - "afsluitmiddelen", - "sluizen", - "HydroObjectWatergangtype", # = hydroobject - "HydroObjectKunstwerkvakken", - ], -) # = duikersifonhevel +HD = read_gpkg_layers(gpkg_path=data_path) # change names HD["stuw"] = HD.pop("stuwen") diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rijnland.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rijnland.py index ef8b895..89fc438 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rijnland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rijnland.py @@ -5,7 +5,8 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * pd.set_option("display.max_columns", None) @@ -16,20 +17,7 @@ output_gpkg_path = "../../Data_postprocessed/Waterschappen/Rijnland" -Rijnland = read_gpkg_layers( - gpkg_path=path_Rijnland, - variables=[ - "stuw", - "gemaal", - "afsluitmiddel", - "duikersifonhevel", - "hydroobject", - "peilgebiedvigerend", - "peilgebiedpraktijk", - "peilafwijkinggebied", - "streefpeil", - ], -) +Rijnland = read_gpkg_layers(gpkg_path=path_Rijnland) # # Rijnland diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rivierenland.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rivierenland.py index b7eb5a2..1a91055 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rivierenland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Rivierenland.py @@ -5,7 +5,8 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * pd.set_option("display.max_columns", None) @@ -28,13 +29,9 @@ gpkg_path = os.path.join(root, file) if WSRL == {}: - WSRL = read_gpkg_layers( - gpkg_path=gpkg_path, variables=["Stuw", "Gemaal", "Hydroobject", "Duikersifonhevel"] - ) + WSRL = read_gpkg_layers(gpkg_path=gpkg_path) else: - temp_WSRL = read_gpkg_layers( - gpkg_path=gpkg_path, variables=["Stuw", "Gemaal", "Hydroobject", "Duikersifonhevel"] - ) + temp_WSRL = read_gpkg_layers(gpkg_path=gpkg_path) for variable in WSRL.keys(): WSRL[variable] = pd.concat([WSRL[variable], temp_WSRL[variable]]).reset_index(drop=True) @@ -43,7 +40,7 @@ gdb_path = r"..\..\Data_preprocessed\Waterschappen\WSRL\OverigeGegevens.gdb" -WSRL_gdb = read_gpkg_layers(gpkg_path=gdb_path, variables=["PeilgebiedenPraktijk"]) +WSRL_gdb = read_gpkg_layers(gpkg_path=gdb_path) # add the gdb to the dict # WSRL['peilgebiedafwijking'] = WSRL_gdb['Peilafwijkingen'] diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Scheldestromen.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Scheldestromen.py index 6ccca61..528db26 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Scheldestromen.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Scheldestromen.py @@ -4,7 +4,8 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * pd.set_option("display.max_columns", None) @@ -17,17 +18,6 @@ Scheldestromen = read_gpkg_layers( gpkg_path=path_Scheldestromen, - variables=[ - "stuw", - "gemaal", - # 'afsluitmiddel', - "duikersifonhevel", - "hydroobject", - # 'peilgebiedvigerend', - # 'peilgebiedpraktijk', - # 'peilafwijkinggebied', - # 'streefpeil', - ], engine="pyogrio", ) diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Wetterskip.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Wetterskip.py index ab07e73..8f155ee 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Wetterskip.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Wetterskip.py @@ -1,7 +1,8 @@ # import packages and functions import geopandas as gpd import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * pd.set_option("display.max_columns", None) @@ -23,12 +24,6 @@ # retrieve the data Wetterskip = read_gpkg_layers( gpkg_path=gpkg_path_Wetterskip, - variables=["stuw", "gemaal", "afsluitmiddel", "hydroobject", "duikersifonhevel"], - # 'peilmerk', - # 'streefpeil', - # 'peilgebiedpraktijk', - # 'peilgebiedvigerend', - # 'peilbesluitgebied'], print_var=False, ) diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Zuiderzeeland.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Zuiderzeeland.py index a41fc62..54a8d3e 100644 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Zuiderzeeland.py +++ b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/Zuiderzeeland.py @@ -5,7 +5,8 @@ import geopandas as gpd import numpy as np import pandas as pd -from general_functions import * + +from peilbeheerst_model.general_functions import * pd.set_option("display.max_columns", None) diff --git a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/general_functions.py b/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/general_functions.py deleted file mode 100644 index 0e40153..0000000 --- a/src/peilbeheerst_model/peilbeheerst_model/preprocess_data/general_functions.py +++ /dev/null @@ -1,319 +0,0 @@ -# import packages and functions - -import geopandas as gpd -import matplotlib.pyplot as plt -import pandas as pd - - -def read_gpkg_layers(gpkg_path, variables, engine="fiona", print_var=False): - """ - Read specified layers from a GeoPackage (GPKG) file and return them as a dictionary. - - Parameters - ---------- - gpkg_path (str): The file path to the GeoPackage (GPKG) file to read from. - variables (list): A list of layer names to read from the GeoPackage. - print_var (bool, optional): If True, print the name of each variable as it is read. Default is False. - - Returns - ------- - dict: A dictionary containing the GeoDataFrames, with layer names as keys. - - This function reads specified layers from a GeoPackage (GPKG) file and returns them as a dictionary. You can - choose to print the names of variables as they are read by setting `print_var` to True. - """ - data = {} - for variable in variables: - if print_var: - print(variable) - data_temp = gpd.read_file(gpkg_path, layer=variable, engine=engine) - data[variable] = data_temp - - return data - - -def show_layers_and_columns(waterschap): - """ - Display Information About Layers and Columns in a Geospatial Dataset. - - Parameters - ---------- - waterschap (dict): A dictionary containing geospatial datasets as GeoDataFrames. - - Returns - ------- - None - - This function prints the names of all layers and the columns within each layer of a geospatial dataset stored - in a dictionary. - - """ - for key in waterschap.keys(): - print(key) - print(waterschap[str(key)].columns.values) - print("type = ", type(waterschap[str(key)])) - print("crs = ", waterschap[str(key)].crs) - print() - - -def store_data(waterschap, output_gpkg_path): - """ - Store Geospatial Data to a GeoPackage (GPKG) File. - - Parameters - ---------- - waterschap (dict): A dictionary containing GeoDataFrames to be stored in the GPKG file. - output_gpkg_path (str): The file path (including the file name without extension) to save the GPKG file. - - Returns - ------- - None - - This function stores geospatial data from a dictionary of GeoDataFrames into a GeoPackage (GPKG) file. - - Parameters - ---------- - - waterschap: A dictionary where the keys represent layer names, and the values are GeoDataFrames. - - output_gpkg_path: The file path for the output GPKG file. The '.gpkg' extension is added automatically. - """ - for key in waterschap.keys(): - waterschap[str(key)].to_file(output_gpkg_path + ".gpkg", layer=str(key), driver="GPKG") - - -def overlapping_peilgebieden(waterschap_peilgebieden): - """ - Identify and calculate the percentage of overlapping peilgebieden. - - Parameters - ---------- - waterschap_peilgebieden (geopandas.GeoDataFrame): A GeoDataFrame containing polygons (the peilgebieden). - - Returns - ------- - geopandas.GeoDataFrame: A GeoDataFrame with overlapping polygons and their overlap percentages. - - This function analyzes a GeoDataFrame of peilgebied polygons to find overlapping polygons and calculate - the percentage of overlap between them. It returns a GeoDataFrame with information about the overlapping - polygons, including their overlap percentages. - - Parameters - ---------- - - waterschap_peilgebieden: A GeoDataFrame containing the peilgebieden polygons. - """ - peilgebied = waterschap_peilgebieden - peilgebied.geometry = peilgebied.buffer(distance=0) # make invalid geometries valid - peilgebied.set_crs(crs="EPSG:28992", inplace=True) - - # Create an empty GeoDataFrame to store the overlapping polygons and additional information - overlapping_polygons = gpd.GeoDataFrame(columns=peilgebied.columns) - - # Iterate through each polygon in peilgebied - for index, row in peilgebied.iterrows(): - current_polygon = peilgebied.iloc[[index]] # select the current polygon - other_polygons = peilgebied.drop(index) # create a GeoDataFrame without the current polygon - overlaps = other_polygons[ - other_polygons.geometry.overlaps(current_polygon.geometry.iloc[0]) - ] # check for overlaps with other polygons - - if not overlaps.empty: - # calculate the percentage of overlap, and add this to the gdf including the overlapping indexes - current_overlap_percentage = ( - overlaps.geometry.intersection(current_polygon.geometry.iloc[0]).area - / current_polygon.geometry.iloc[0].area - * 100 - ) - - overlaps["overlap_percentage"], overlaps["source_globalid"] = pd.NA, pd.NA # create columns - - # fill columns - overlaps["overlap_percentage"] = ( - current_overlap_percentage # multiple peilgebieden will be added to the temporal gdf if there are multiple overlapping polygons - ) - overlaps["source_globalid"] = current_polygon["globalid"].values[ - 0 - ] # add the global id of the current polygon. - - # add to the results - overlapping_polygons = pd.concat([overlapping_polygons, overlaps]) - - return overlapping_polygons - - -def plot_histogram_overlap(overlapping_polygons): - """ - Plots a histogram of the overlapping polygons in a DataFrame. - - Parameters - ---------- - overlapping_polygons (pd.DataFrame): A DataFrame containing information about overlapping polygons. - It should have a 'overlap_percentage' column to represent the percentage of overlap between polygons. - - Returns - ------- - None - - The function calculates a histogram of overlapping percentages, providing insights into the distribution of overlaps - between polygons. It handles potential NaN values in the 'overlap_percentage' column and creates bins ranging - from 0% to 100% in 10% increments for the histogram. The number of overlapping polygons is displayed in the title. - - """ - overlapping_polygons["overlap_percentage"] = overlapping_polygons["overlap_percentage"].fillna( - 0 - ) # Handle potential NaN values - bins = range(0, 101, 10) # Create bins from 0% to 100% in 10% increments - - # Create the histogram - plt.hist(overlapping_polygons["overlap_percentage"], bins=bins, color="cornflowerblue", edgecolor="k") - - # Set labels and title - plt.xlabel("Overlap [%]") - plt.ylabel("Frequency [#]") # Update the y-axis label - # plt.yscale('log') # Set the y-axis scale to 'log' - plt.ylim(0, 15) - plt.suptitle("Histogram of overlapping percentages") - plt.title(f"Number of overlapping polygons = {len(overlapping_polygons)}", fontsize=8) - plt.show() - - -def plot_overlapping_peilgebieden(peilgebied, overlapping_polygons, minimum_percentage): - """ - Plot Overlapping Peilgebieden on a map, including a Minimum Percentage of Overlap to show. - - Parameters - ---------- - peilgebied (geopandas.GeoDataFrame): A GeoDataFrame representing the peilgebied polygons. - overlapping_polygons (geopandas.GeoDataFrame): A GeoDataFrame containing information about overlapping polygons/peilgebieden. - minimum_percentage (float or int): The minimum overlap percentage required for polygons to be displayed. - - Returns - ------- - None - - This function creates a plot to visualize overlapping peilgebieden based on a specified minimum overlap percentage. - It displays a subset of overlapping polygons with a percentage greater than the specified minimum. - - Parameters - ---------- - - peilgebied: The entire peilgebieden GeoDataFrame serving as the background. - - overlapping_polygons: GeoDataFrame containing information about overlapping polygons. - - minimum_percentage: The minimum overlap percentage required for polygons to be displayed. - - """ - # make a subsect of overlapping polygons, based on a percentage - overlap_subset = overlapping_polygons.loc[overlapping_polygons["overlap_percentage"] > minimum_percentage] - - # plot - fig, ax = plt.subplots() - peilgebied.plot(ax=ax, color="lightgray") # background - overlap_subset.plot( - ax=ax, cmap="coolwarm", column=overlap_subset.overlap_percentage, label="Percentage of overlap", legend=True - ) - - plt.show() - - -# def intersect_using_spatial_index(peilgebied_praktijk, peilgebied_afwijking, check): -# """ -# Conduct spatial intersection using spatial index for candidates GeoDataFrame to make queries faster. -# Note, with this function, you can have multiple Polygons in the 'intersecting_gdf' and it will return all the points -# intersect with ANY of those geometries. -# """ -# peilgebied_praktijk_sindex = peilgebied_praktijk.sindex -# possible_matches_index = [] - -# # 'itertuples()' function is a faster version of 'iterrows()' -# for other in peilgebied_afwijking.itertuples(): -# bounds = other.geometry.bounds -# c = list(peilgebied_praktijk_sindex.intersection(bounds)) -# possible_matches_index += c - -# # Get unique candidates -# unique_candidate_matches = list(set(possible_matches_index)) -# possible_matches = peilgebied_praktijk.iloc[unique_candidate_matches] - -# possible_matches.to_file('possible_matches_Rijnland.shp') -# un_un = possible_matches.intersects(peilgebied_afwijking.unary_union) -# # print('un_un =') -# # display(un_un) -# # print() - -# # print('possible_matches =') -# # display(possible_matches) -# # print() - -# # print('overlapping_pg_praktijk =') -# # display(possible_matches[un_un]) - -# # possible_matches[un_un].to_file('peilgebied_afwijking_unary_union_Rijnland.shp') - - -# # Conduct the actual intersect -# overlapping_pg_praktijk = possible_matches.loc[un_un] #the entire peilgebied praktijk polygons - - -# #remove the peilgebied afwijking from the peilgebied praktijk -# intersection = gpd.overlay(overlapping_pg_praktijk, peilgebied_afwijking, how='intersection') - -# #fix possible invalid geometries -# overlapping_pg_praktijk['geometry'] = overlapping_pg_praktijk.buffer(distance = 0) -# peilgebied_afwijking['geometry'] = peilgebied_afwijking.buffer(distance = 0) - -# overlapping_updated = gpd.overlay(peilgebied_praktijk, intersection, how='symmetric_difference') ##remove the difference between pg_praktijk and pg_afwijking -# peilgebied = overlapping_updated.append(intersection, ignore_index=True) #add the removed difference, but now only the intersected part of pg_afwijking - - -# if check: -# peilgebied_praktijk.to_file('Checks/Rivierenland/peilgebied_praktijk.gpkg', driver='GPKG') -# peilgebied_afwijking.to_file('Checks/Rivierenland/peilgebied_afwijking.gpkg', driver='GPKG') - -# intersection.to_file('Checks/Rivierenland/intersection.gpkg', driver='GPKG') -# overlapping_updated.to_file('Checks/Rivierenland/overlapping_updated.gpkg', driver='GPKG') -# peilgebied.to_file('Checks/Rivierenland/peilgebied.gpkg', driver='GPKG') - -# return peilgebied - - -def burn_in_peilgebieden(base_layer, overlay_layer, plot=True): - # remove the overlapping parts from the base_layer - base_layer_without_overlapping = gpd.overlay( - base_layer, overlay_layer, how="symmetric_difference", keep_geom_type=False - ) ##remove the difference between pg_praktijk and pg_afwijking - - # fill each column - base_layer_without_overlapping.code_1.fillna(value=base_layer_without_overlapping.code_2, inplace=True) - base_layer_without_overlapping.nen3610id_1.fillna(value=base_layer_without_overlapping.nen3610id_2, inplace=True) - base_layer_without_overlapping.globalid_1.fillna(value=base_layer_without_overlapping.globalid_2, inplace=True) - # base_layer_without_overlapping.waterhoogte_1.fillna(value = base_layer_without_overlapping.waterhoogte, inplace=True) - - if ( - "waterhoogte_1" in base_layer_without_overlapping.keys() - ): # sometimes a waterhoogte is present in the peilgebieden. Manage this. - base_layer_without_overlapping.rename( - columns={ - "code_1": "code", - "nen3610id_1": "nen3610id", - "globalid_1": "globalid", - "waterhoogte_1": "waterhoogte", - }, - inplace=True, - ) - base_layer_without_overlapping.drop( - columns=["code_2", "nen3610id_2", "globalid_2", "waterhoogte_2"], inplace=True - ) - - else: - base_layer_without_overlapping.rename( - columns={"code_1": "code", "nen3610id_1": "nen3610id", "globalid_1": "globalid"}, inplace=True - ) - base_layer_without_overlapping.drop(columns=["code_2", "nen3610id_2", "globalid_2"], inplace=True) - - burned_base_layer = pd.concat([pd.DataFrame(base_layer_without_overlapping), pd.DataFrame(overlay_layer)], axis=0) - - burned_base_layer = burned_base_layer.drop_duplicates(subset="globalid", keep="last") - - if plot: - fig, ax = plt.subplots() - base_layer.plot(ax=ax, color="cornflowerblue") - overlay_layer.plot(ax=ax, color="blue") - - return burned_base_layer diff --git a/src/peilbeheerst_model/pyproject.toml b/src/peilbeheerst_model/pyproject.toml index a2770e4..856c664 100644 --- a/src/peilbeheerst_model/pyproject.toml +++ b/src/peilbeheerst_model/pyproject.toml @@ -14,7 +14,7 @@ authors = [ license = { text = "MIT" } requires-python = ">=3.10" -dependencies = ["geopandas", "numpy", "pandas", "pydantic", "shapely", "tqdm"] +dependencies = ["geopandas", "numpy", "pandas", "pydantic", "shapely", "tqdm", "matplotlib"] dynamic = ["version"] [project.optional-dependencies]