diff --git a/dfm_tools/interpolate_grid2bnd.py b/dfm_tools/interpolate_grid2bnd.py index cd7977adf..1b99cea82 100644 --- a/dfm_tools/interpolate_grid2bnd.py +++ b/dfm_tools/interpolate_grid2bnd.py @@ -478,7 +478,7 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame) -> # TODO: revert after fixing https://github.com/Deltares/xugrid/issues/274 vars_without_facedim = [] for varn in uds.variables: - if facedim not in uds[varn].dims: + if facedim not in uds.variables[varn].dims: vars_without_facedim.append(varn) uds_face = uds.drop(vars_without_facedim) @@ -488,8 +488,9 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame) -> # re-add removed variables again, sometimes important for e.g. depth # TODO: remove after fixing https://github.com/Deltares/xugrid/issues/274 for varn in vars_without_facedim: - if edgedim not in uds[varn].dims and nodedim not in uds[varn].dims: - ds[varn] = uds[varn] + vardims = uds.variables[varn].dims + if edgedim not in vardims and nodedim not in vardims: + ds[varn] = uds.variables[varn] # rename station dimname and varname (is index, are both mesh2d_nFaces to start with) ds = ds.rename({facedim:dimn_point}) # rename mesh2d_nFaces to plipoints diff --git a/dfm_tools/xarray_helpers.py b/dfm_tools/xarray_helpers.py index 0c8d9db70..057906705 100644 --- a/dfm_tools/xarray_helpers.py +++ b/dfm_tools/xarray_helpers.py @@ -111,8 +111,8 @@ def preprocess_ERA5(ds): # Prevent writing to (incorrectly scaled) int, since it might mess up mfdataset (https://github.com/Deltares/dfm_tools/issues/239) # By dropping scaling/offset encoding and converting to float32 (will result in a larger dataset) # ERA5 datasets retrieved with the new CDS-beta are zipped float32 instead of scaled int, so this is only needed for backwards compatibility with old files. - for var in ds.data_vars: - if not set(['dtype','scale_factor','add_offset']).issubset(ds[var].encoding.keys()): + for var in ds.data_vars.keys(): + if not set(['dtype','scale_factor','add_offset']).issubset(ds.variables[var].encoding.keys()): continue # the _FillValue will still be -32767 (int default), but this is no issue for float32 ds[var].encoding.pop('scale_factor') @@ -315,7 +315,7 @@ def Dataset_varswithdim(ds,dimname): #TODO: dit zit ook in xugrid, wordt nu gebr varlist_keep = [] for varname in ds.variables.keys(): - if dimname in ds[varname].dims: + if dimname in ds.variables[varname].dims: varlist_keep.append(varname) ds = ds[varlist_keep] diff --git a/dfm_tools/xugrid_helpers.py b/dfm_tools/xugrid_helpers.py index 20c9a028f..dbb133237 100644 --- a/dfm_tools/xugrid_helpers.py +++ b/dfm_tools/xugrid_helpers.py @@ -71,7 +71,7 @@ def remove_ghostcells(uds, fname): #TODO: remove ghostcells from output or align #drop ghostcells part_domainno_fromfname = int(part_domainno_fromfname) - da_domainno = uds[varn_domain] + da_domainno = uds.variables[varn_domain] idx = np.flatnonzero(da_domainno == part_domainno_fromfname) uds = uds.isel({uds.grid.face_dimension:idx}) return uds @@ -118,11 +118,11 @@ def decode_default_fillvals(ds): # TODO: this function can be removed when xarray does it automatically: https://github.com/Deltares/dfm_tools/issues/490 nfillattrs_added = 0 - for varn in ds.variables: + for varn in ds.variables.keys(): # TODO: possible to get always_mask boolean with `netCDF4.Dataset(file_nc).variables[varn].always_mask`, but this seems to be always True for FM mapfiles - if '_FillValue' in ds[varn].encoding: + if '_FillValue' in ds.variables[varn].encoding: continue - dtype_str = ds[varn].dtype.str[1:] + dtype_str = ds.variables[varn].dtype.str[1:] if dtype_str not in default_fillvals.keys(): continue varn_fillval = default_fillvals[dtype_str] @@ -144,9 +144,9 @@ def remove_nan_fillvalue_attrs(ds : (xr.Dataset, xu.UgridDataset)): ds = ds.obj count = 0 - for varn in ds.variables: - if '_FillValue' in ds[varn].encoding: - if np.isnan(ds[varn].encoding['_FillValue']): + for varn in ds.variables.keys(): + if '_FillValue' in ds.variables[varn].encoding: + if np.isnan(ds.variables[varn].encoding['_FillValue']): ds[varn].encoding.pop('_FillValue') count += 1 if count > 0: @@ -299,9 +299,9 @@ def open_dataset_curvilinear(file_nc, print('>> getting vertices from ds: ',end='') dtstart = dt.datetime.now() - vertices_longitude = ds[varn_vert_lon].to_numpy() + vertices_longitude = ds.variables[varn_vert_lon].to_numpy() vertices_longitude = vertices_longitude.reshape(-1,vertices_longitude.shape[-1]) - vertices_latitude = ds[varn_vert_lat].to_numpy() + vertices_latitude = ds.variables[varn_vert_lat].to_numpy() vertices_latitude = vertices_latitude.reshape(-1,vertices_latitude.shape[-1]) print(f'{(dt.datetime.now()-dtstart).total_seconds():.2f} sec') diff --git a/docs/whats-new.md b/docs/whats-new.md index 5c0283b30..d1cfb682d 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -8,6 +8,7 @@ - update to cdsapi 0.7.2 and properly catching error for dummy dataset in [#972](https://github.com/Deltares/dfm_tools/pull/972) - deprecated `dfmt.open_dataset_extra()` (partly replaced by `dfmt.open_prepare_dataset()`) in [#974](https://github.com/Deltares/dfm_tools/pull/974) - improved nan-conversion in `dfmt.forcinglike_to_Dataset()` in [#982](https://github.com/Deltares/dfm_tools/pull/982) +- improved performance of `dfmt.open_partitioned_dataset()` for datasets with many variables in [#984](https://github.com/Deltares/dfm_tools/pull/984) ## 0.25.0 (2024-08-16) diff --git a/pyproject.toml b/pyproject.toml index 266a67d08..4f88a0a48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,8 @@ dependencies = [ "netcdf4>=1.5.4", #bottleneck<1.3.3 pip install fails in py39 "bottleneck>=1.3.3", - #xugrid<0.11.2 sometimes fails on merged chunks that are inconsistent - "xugrid>=0.11.2", + #xugrid<0.12.0 has sub-optimal performance because of accessing dataarrays of variables + "xugrid>=0.12.0", #cdsapi<0.7.2 has different error upon dummy dataset "cdsapi>=0.7.2", #pydap<3.4.0 is from May 2017 and does not support newer python versions