diff --git a/docs/api.rst b/docs/api.rst index 7529a64d..5416d263 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -36,6 +36,7 @@ Setup components WflowModel.setup_glaciers WflowModel.setup_lulcmaps WflowModel.setup_laimaps + WflowModel.setup_laimaps_from_lulc_mapping WflowModel.setup_ksathorfrac WflowModel.setup_rootzoneclim WflowModel.setup_soilmaps @@ -155,6 +156,7 @@ Setup components WflowSedimentModel.setup_reservoirs WflowSedimentModel.setup_lulcmaps WflowSedimentModel.setup_laimaps + WflowSedimentModel.setup_laimaps_from_lulc_mapping WflowSedimentModel.setup_canopymaps WflowSedimentModel.setup_soilmaps WflowSedimentModel.setup_riverwidth @@ -247,6 +249,9 @@ Wflow workflows workflows.river_bathymetry workflows.pet workflows.landuse + workflows.lai + workflows.create_lulc_lai_mapping_table + workflows.lai_from_lulc_mapping workflows.ksathorfrac workflows.soilgrids workflows.soilgrids_sediment diff --git a/docs/changelog.rst b/docs/changelog.rst index 35e0a014..1304ed3c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -22,6 +22,7 @@ Added - new utils method **get_grid_from_config** to get the right wflow staticmaps variable based on the TOML configuration (e.g. detects name in netcdf, value, scale and offset). Only applied now to prepare cold states (e.g. not yet in read_grid). PR #252 - Added support for the "GLCNMO" land-use dataset, with a default parameter mapping table (similar to the existing tables). PR #272 - Added the `alpha_h1` parameter (based on land use maps). This parameter represents whether root water uptake reduction at soil water pressure head h1 occurs or not. By default, it is set to 0.0 for all "non-natural" vegetation (crops) and to 1.0 for all "natural vegetation" PR #272 +- New function **setup_laimaps_from_lulc_mapping** to set leaf area index (LAI) climatology maps per month based on landuse mapping. PR #273 Changed ------- @@ -29,6 +30,7 @@ Changed - **setup_soilmaps**: the user can now supply variable sbm soil layer thicknesses. The Brooks Corey coefficient is then computed as weighted average over the sbm layers. PR #242 - **setup_outlets**: the IDs of wflow_subcatch are used to define the outlets IDs rather than [1:n]. PR #247 - wflow forcing data type should always be float32. PR #268 +- **setup_laimaps**: if a landuse map if provided, setup_laimaps can also prepare landuse mapping tables for LAI. PR #273 Fixed ----- diff --git a/docs/user_guide/sediment_model_setup.rst b/docs/user_guide/sediment_model_setup.rst index 4f8ecfb8..a0fbbb7e 100644 --- a/docs/user_guide/sediment_model_setup.rst +++ b/docs/user_guide/sediment_model_setup.rst @@ -44,6 +44,8 @@ a specific method see its documentation. - This component derives several wflow maps are derived based on landuse- landcover (LULC) data. * - :py:func:`~WflowSedimentModel.setup_laimaps` - This component sets leaf area index (LAI) climatology maps per month. + * - :py:func:`~WflowSedimentModel.setup_laimaps_from_lulc_mapping` + - This component sets leaf area index (LAI) climatology maps per month based on landuse mapping. * - :py:func:`~WflowSedimentModel.setup_canopymaps` - Setup sediments based canopy height maps. * - :py:func:`~WflowSedimentModel.setup_soilmaps` @@ -52,17 +54,17 @@ a specific method see its documentation. - This component sets the river width parameter based on a power-lay relationship with a predictor. * - :py:func:`~WflowSedimentModel.setup_riverbedsed` - Setup sediments based river bed characteristics maps. - * - :py:func:`~WflowModel.setup_outlets` + * - :py:func:`~WflowSedimentModel.setup_outlets` - This method sets the default gauge map based on basin outlets. - * - :py:func:`~WflowModel.setup_gauges` + * - :py:func:`~WflowSedimentModel.setup_gauges` - This method sets the default gauge map based on a gauges_fn data. - * - :py:func:`~WflowModel.setup_areamap` + * - :py:func:`~WflowSedimentModel.setup_areamap` - Setup area map from vector data to save wflow outputs for specific area. - * - :py:func:`~WflowModel.setup_config_output_timeseries` + * - :py:func:`~WflowSedimentModel.setup_config_output_timeseries` - This method add a new variable/column to the netcf/csv output section of the toml based on a selected gauge/area map. * - :py:func:`~WflowSedimentModel.setup_constant_pars` - Setup constant parameter maps. - * - :py:func:`~WflowModel.setup_grid_from_raster` + * - :py:func:`~WflowSedimentModel.setup_grid_from_raster` - Setup staticmaps from raster to add parameters from direct data. diff --git a/docs/user_guide/wflow_model_setup.rst b/docs/user_guide/wflow_model_setup.rst index 4fc9ebf9..b57812fd 100644 --- a/docs/user_guide/wflow_model_setup.rst +++ b/docs/user_guide/wflow_model_setup.rst @@ -50,6 +50,8 @@ a specific method see its documentation. - This component derives several wflow maps are derived based on landuse- landcover (LULC) data. * - :py:func:`~WflowModel.setup_laimaps` - This component sets leaf area index (LAI) climatology maps per month. + * - :py:func:`~WflowModel.setup_laimaps_from_lulc_mapping` + - This component sets leaf area index (LAI) climatology maps per month based on landuse mapping. * - :py:func:`~WflowModel.setup_soilmaps` - This component derives several (layered) soil parameters based on a database with physical soil properties using available point-scale (pedo)transfer functions (PTFs) from literature with upscaling rules to ensure flux matching across scales. * - :py:func:`~WflowModel.setup_ksathorfrac` diff --git a/hydromt_wflow/wflow.py b/hydromt_wflow/wflow.py index 1ba13e18..926f72c5 100644 --- a/hydromt_wflow/wflow.py +++ b/hydromt_wflow/wflow.py @@ -953,13 +953,27 @@ def setup_lulcmaps( if wflow_param is not None: self.set_config(wflow_param, name) - def setup_laimaps(self, lai_fn: Union[str, xr.DataArray]): + def setup_laimaps( + self, + lai_fn: Union[str, xr.DataArray], + lulc_fn: Optional[Union[str, xr.DataArray]] = None, + lulc_sampling_method: str = "any", + lulc_zero_classes: List[int] = [], + buffer: int = 2, + ): """ Set leaf area index (LAI) climatology maps per month [1,2,3,...,12]. The values are resampled to the model resolution using the average value. Currently only directly cyclic LAI data is supported. + If `lulc_fn` is provided, mapping tables from landuse classes to LAI values + will be derived from the LULC data. These tables can then be re-used later if + you would like to add new LAI maps derived from this mapping table and new + landuse scenarios. We advise to use a larger `buffer` to ensure that LAI values + can be assigned for all landuse classes and based on a lage enough sample of the + LULC data. + Adds model layers: * **LAI** map: Leaf Area Index climatology [-] @@ -974,10 +988,62 @@ def setup_laimaps(self, lai_fn: Union[str, xr.DataArray]): * Required variables: 'LAI' [-] * Required dimensions: 'time' = [1,2,3,...,12] (months) + lulc_fn : str, xarray.DataArray, optional + Name of RasterDataset source for landuse-landcover data. + If provided, the LAI values are mapped to landuse classes and will be saved + to a csv file. + lulc_sampling_method : str, optional + Resampling method for the LULC data to the LAI resolution. Two methods are + supported: + + * 'any' (default): if any cell of the desired landuse class is present in + the resampling window (even just one), it will be used to derive LAI + values. This method is less exact but will provide LAI values for all + landuse classes for the high resolution landuse map. + * 'mode': the most frequent value in the resampling window is + used. This method is less precise as for cells with a lot of different + landuse classes, the most frequent value might still be only a small + fraction of the cell. More landuse classes should however be covered and + it can always be used with the landuse map of the wflow model instead of + the original high resolution one. + * 'q3': only cells with the most frequent value (mode) and that cover 75% + (q3) of the resampling window will be used. This method is more exact but + for small basins, you may have less or no samples to derive LAI values + for some classes. + lulc_zero_classes : list, optional + List of landuse classes that should have zero for leaf area index values + for example waterbodies, open ocean etc. For very high resolution landuse + maps, urban surfaces and bare areas can be included here as well. + By default empty. + buffer : int, optional + Buffer around the region to read the data, by default 2. """ # retrieve data for region self.logger.info("Preparing LAI maps.") - da = self.data_catalog.get_rasterdataset(lai_fn, geom=self.region, buffer=2) + da = self.data_catalog.get_rasterdataset( + lai_fn, geom=self.region, buffer=buffer + ) + if lulc_fn is not None: + self.logger.info("Preparing LULC-LAI mapping table.") + da_lulc = self.data_catalog.get_rasterdataset( + lulc_fn, geom=self.region, buffer=buffer + ) + # derive mapping + df_lai_mapping = workflows.create_lulc_lai_mapping_table( + da_lulc=da_lulc, + da_lai=da.copy(), + sampling_method=lulc_sampling_method, + lulc_zero_classes=lulc_zero_classes, + logger=self.logger, + ) + # Save to csv + if isinstance(lulc_fn, str): + df_fn = f"lai_per_lulc_{lulc_fn}.csv" + else: + df_fn = "lai_per_lulc.csv" + df_lai_mapping.to_csv(join(self.root, df_fn)) + + # Resample LAI data to wflow model resolution da_lai = workflows.lai( da=da, ds_like=self.grid, @@ -987,6 +1053,53 @@ def setup_laimaps(self, lai_fn: Union[str, xr.DataArray]): rmdict = {da_lai.dims[0]: "time"} self.set_grid(da_lai.rename(rmdict), name="LAI") + def setup_laimaps_from_lulc_mapping( + self, + lulc_fn: Optional[Union[str, xr.DataArray]], + lai_mapping_fn: Union[str, pd.DataFrame], + ): + """ + Derive cyclic LAI maps from a LULC data source and a LULC-LAI mapping table. + + Adds model layers: + + * **LAI** map: Leaf Area Index climatology [-] + Resampled from source data using average. Assuming that missing values + correspond to bare soil, these are set to zero before resampling. + + Parameters + ---------- + lulc_fn : str, xarray.DataArray + Name of RasterDataset source for landuse-landcover data. + lai_mapping_fn : str, pd.DataFrame + Path to a mapping csv file from landuse in source name to + LAI values. The csv file should contain rows with landuse classes + and LAI values for each month. The columns should be named as the + months (1,2,3,...,12). + This table can be created using the :py:meth:`setup_laimaps` method. + """ + self.logger.info( + "Preparing LAI maps from LULC data using LULC-LAI mapping table." + ) + + # read landuse map to DataArray + da = self.data_catalog.get_rasterdataset( + lulc_fn, geom=self.region, buffer=2, variables=["landuse"] + ) + df_lai_mapping = self.data_catalog.get_dataframe( + lai_mapping_fn, + driver_kwargs={"index_col": 0}, # only used if fn_map is a file path + ) + # process landuse with LULC-LAI mapping table + da_lai = workflows.lai_from_lulc_mapping( + da=da, + ds_like=self.grid, + df=df_lai_mapping, + logger=self.logger, + ) + # Add to grid + self.set_grid(da_lai, name="LAI") + def setup_config_output_timeseries( self, mapname: str, diff --git a/hydromt_wflow/workflows/landuse.py b/hydromt_wflow/workflows/landuse.py index 3cbaf905..aded9279 100644 --- a/hydromt_wflow/workflows/landuse.py +++ b/hydromt_wflow/workflows/landuse.py @@ -1,22 +1,29 @@ """Landuse workflows for Wflow plugin.""" import logging +from typing import List, Optional import numpy as np +import pandas as pd import xarray as xr logger = logging.getLogger(__name__) -__all__ = ["landuse", "lai"] +__all__ = ["landuse", "lai", "create_lulc_lai_mapping_table", "lai_from_lulc_mapping"] RESAMPLING = {"landuse": "nearest", "lai": "average", "alpha_h1": "mode"} DTYPES = {"landuse": np.int16, "alpha_h1": np.int16} -# def landuse(da, ds_like, data_catalog, fn_map, logger=logger, params=None): -def landuse(da, ds_like, df, logger=logger, params=None): +def landuse( + da: xr.DataArray, + ds_like: xr.Dataset, + df: pd.DataFrame, + params: Optional[List] = None, + logger=logger, +): """Return landuse map and related parameter maps. The parameter maps are prepared based on landuse map and @@ -70,7 +77,7 @@ def reclass(x): return ds_out -def lai(da, ds_like, logger=logger): +def lai(da: xr.DataArray, ds_like: xr.Dataset, logger=logger): """Return climatology of Leaf Area Index (LAI). The following topography maps are calculated: @@ -102,3 +109,189 @@ def lai(da, ds_like, logger=logger): da_out = da.raster.reproject_like(ds_like, method=method) da_out.attrs.update(_FillValue=nodata) return da_out + + +def create_lulc_lai_mapping_table( + da_lulc: xr.DataArray, + da_lai: xr.DataArray, + sampling_method: str = "any", + lulc_zero_classes: List[int] = [], + logger=logger, +) -> pd.DataFrame: + """ + Derive LAI values per landuse class. + + Parameters + ---------- + da_lulc : xr.DataArray + Landuse map. + da_lai : xr.DataArray + Cyclic LAI map. + sampling_method : str, optional + Resampling method for the LULC data to the LAI resolution. Two methods are + supported: + + * 'any' (default): if any cell of the desired landuse class is present in the + resampling window (even just one), it will be used to derive LAI values. + This method is less exact but will provide LAI values for all landuse + classes for the high resolution landuse map. + * 'mode': the most frequent value in the resampling window is + used. This method is less precise as for cells with a lot of different + landuse classes, the most frequent value might still be only a small + fraction of the cell. More landuse classes should however be covered and + it can always be used with the landuse map of the wflow model instead of + the original high resolution one. + * 'q3': only cells with the most frequent value (mode) and that cover 75% + (q3) of the resampling window will be used. This method is more exact but + for small basins, you may have less or no samples to derive LAI values + for some classes. + lulc_zero_classes : list of int, optional + List of landuse classes that should have zero for leaf area index values + for example waterbodies, open ocean etc. For very high resolution landuse + maps, urban surfaces and bare areas can be included here as well. + By default empty. + + Returns + ------- + df_lai_mapping : pd.DataFrame + Mapping table with LAI values per landuse class. One column for each month and + one line per landuse class. The number of samples used to derive the mapping + values is also added to a `samples` column in the dataframe. + """ + # check the method values + if sampling_method not in ["any", "mode", "q3"]: + raise ValueError(f"Unsupported resampling method: {sampling_method}") + + # process the lai da + if "dim0" in da_lai.dims: + da_lai = da_lai.rename({"dim0": "time"}) + da_lai = da_lai.raster.mask_nodata() + da_lai = da_lai.fillna( + 0 + ) # use zeros to better represent city and open water surfaces + + # landuse + da_lulc.name = "landuse" + lulc_classes = np.unique(da_lulc.values) + + # Initialise the outputs + df_lai_mapping = None + + if sampling_method != "any": + # The data can already be resampled to the LAI resolution + da_lulc_mode = da_lulc.raster.reproject_like(da_lai, method="mode") + if sampling_method == "q3": + # Filter mode cells that cover less than 75% of the resampling window + da_lulc_q3 = da_lulc.raster.reproject_like(da_lai, method="q3") + da_lulc = da_lulc_mode.where( + da_lulc_q3 == da_lulc_mode, da_lulc_mode.raster.nodata + ) + else: + da_lulc = da_lulc_mode + + # Loop over the landuse classes + for lulc_id in lulc_classes: + logger.info(f"Processing landuse class {lulc_id}") + + if lulc_id in lulc_zero_classes: + logger.info(f"Using zeros for landuse class {lulc_id}") + df_lai = pd.DataFrame( + columns=da_lai.time.values, + data=[[0] * 12], + index=[lulc_id], + ) + df_lai.index.name = "landuse" + n_samples = 0 + + else: + # Select for a specific landuse class + lu = da_lulc.where(da_lulc == lulc_id, da_lulc.raster.nodata) + lu = lu.raster.mask_nodata() + + if sampling_method == "any": + # Resample only now the landuse data to the LAI resolution + lu = lu.raster.reproject_like(da_lai, method="mode") + + # Add lai + lu = lu.to_dataset() + lu["lai"] = da_lai + + # Stack and remove the nodata values + lu = lu.stack(z=("y", "x")).dropna(dim="z", how="all", subset=["landuse"]) + + # Count the number of samples + n_samples = len(lu["z"]) + if n_samples == 0: + logger.info( + f"No samples found for landuse class {lulc_id}. " + "Try using a different resampling method." + ) + df_lai = pd.DataFrame( + columns=da_lai.time.values, + data=[[0] * 12], + index=[lulc_id], + ) + df_lai.index.name = "landuse" + else: + # Compute the mean + lai_mean_per_lu = np.round(lu["lai"].load().mean(dim="z") / 10, 3) + # Add the landuse id as an extra dimension + lai_mean_per_lu = lai_mean_per_lu.expand_dims("landuse") + lai_mean_per_lu["landuse"] = [lulc_id] + + # Convert to dataframe + df_lai = lai_mean_per_lu.drop_vars( + "spatial_ref", errors="ignore" + ).to_pandas() + + # Add number of samples in the first column + df_lai.insert(0, "samples", n_samples) + + # Append to the output + if df_lai_mapping is None: + df_lai_mapping = df_lai + else: + df_lai_mapping = pd.concat([df_lai_mapping, df_lai]) + + return df_lai_mapping + + +def lai_from_lulc_mapping( + da: xr.DataArray, + ds_like: xr.Dataset, + df: pd.DataFrame, + logger=logger, +) -> xr.Dataset: + """ + Derive LAI values from a landuse map and a mapping table. + + Parameters + ---------- + da : xr.DataArray + Landuse map. + ds_like : xr.Dataset + Dataset at model resolution. + df : pd.DataFrame + Mapping table with LAI values per landuse class. One column for each month and + one line per landuse class. + logger : logging.Logger, optional + Logger object. + + Returns + ------- + ds_lai : xr.Dataset + Dataset with LAI values for each month. + """ + months = np.arange(1, 13) + # Map the monthly LAI values to the landuse map + ds_lai = landuse( + da=da, + ds_like=ds_like, + df=df, + params=months, + logger=logger, + ) + # Re-organise the dataset to have a time dimension + da_lai = ds_lai.to_array(dim="time", name="LAI") + + return da_lai diff --git a/tests/test_model_methods.py b/tests/test_model_methods.py index 10fb03db..9cd765fe 100644 --- a/tests/test_model_methods.py +++ b/tests/test_model_methods.py @@ -13,6 +13,7 @@ import xarray as xr from hydromt.raster import full_like +from hydromt_wflow import workflows from hydromt_wflow.wflow import WflowModel TESTDATADIR = join(dirname(abspath(__file__)), "data") @@ -310,6 +311,58 @@ def test_setup_ksathorfrac(tmpdir, example_wflow_model): assert int(mean_val * 100) == 22020 +def test_setup_lai(tmpdir, example_wflow_model): + # Use vito and MODIS lai data for testing + # Read LAI data + da_lai = example_wflow_model.data_catalog.get_rasterdataset( + "modis_lai", geom=example_wflow_model.region, buffer=2 + ) + # Read landuse data + da_landuse = example_wflow_model.data_catalog.get_rasterdataset( + "vito", geom=example_wflow_model.region, buffer=2 + ) + + # Derive mapping for using the method any + df_lai_any = workflows.create_lulc_lai_mapping_table( + da_lulc=da_landuse, + da_lai=da_lai.copy(), + sampling_method="any", + lulc_zero_classes=[80, 200, 0], + ) + # Check that all landuse classes are present in the mapping + assert np.all(df_lai_any.index.values == np.unique(da_landuse)) + + # Try with the other two methods + df_lai_mode = workflows.create_lulc_lai_mapping_table( + da_lulc=da_landuse, + da_lai=da_lai.copy(), + sampling_method="mode", + lulc_zero_classes=[80, 200, 0], + ) + df_lai_q3 = workflows.create_lulc_lai_mapping_table( + da_lulc=da_landuse, + da_lai=da_lai.copy(), + sampling_method="q3", + lulc_zero_classes=[80, 200, 0], + ) + # Check the number of landuse classes in the mapping tables + assert len(df_lai_any[df_lai_any.samples == 0]) == 1 + assert len(df_lai_mode[df_lai_mode.samples == 0]) == 3 + assert len(df_lai_q3[df_lai_q3.samples == 0]) == 3 + # Check number of samples for landuse class 20 with the different methods + assert int(df_lai_any.loc[20].samples) == 2481 + assert int(df_lai_mode.loc[20].samples) == 59 + assert int(df_lai_q3.loc[20].samples) == 4 + + # Try to use the mapping tables to setup the LAI + example_wflow_model.setup_laimaps_from_lulc_mapping( + lulc_fn="vito", + lai_mapping_fn=df_lai_any, + ) + + assert "LAI" in example_wflow_model.grid + + def test_setup_rootzoneclim(example_wflow_model): # load csv with dummy data for long timeseries of precip, pet and dummy Q data. test_data = pd.read_csv(