diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index 82fb373c..900c2840 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -751,6 +751,7 @@ def open_fieldset( indexpath: T.Optional[str] = None, filter_by_keys: T.Dict[str, T.Any] = {}, read_keys: T.Sequence[str] = (), + ignore_keys: T.Sequence[str] = [], time_dims: T.Sequence[str] = ("time", "step"), extra_coords: T.Dict[str, str] = {}, computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS, @@ -762,6 +763,7 @@ def open_fieldset( log.warning(f"indexpath value {indexpath} is ignored") index_keys = compute_index_keys(time_dims, extra_coords, filter_by_keys) + index_keys = [key for key in index_keys if key not in ignore_keys] index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys) filtered_index = index.subindex(filter_by_keys) return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs) @@ -771,10 +773,12 @@ def open_fileindex( stream: messages.FileStream, indexpath: str = messages.DEFAULT_INDEXPATH, index_keys: T.Sequence[str] = INDEX_KEYS + ["time", "step"], + ignore_keys: T.Sequence[str] = [], filter_by_keys: T.Dict[str, T.Any] = {}, computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS, ) -> messages.FileIndex: index_keys = sorted(set(index_keys) | set(filter_by_keys)) + index_keys = [key for key in index_keys if key not in ignore_keys] index = messages.FileIndex.from_indexpath_or_filestream( stream, index_keys, indexpath=indexpath, computed_keys=computed_keys ) @@ -789,12 +793,13 @@ def open_file( read_keys: T.Sequence[str] = (), time_dims: T.Sequence[str] = ("time", "step"), extra_coords: T.Dict[str, str] = {}, + ignore_keys: T.Sequence[str] = [], **kwargs: T.Any, ) -> Dataset: """Open a GRIB file as a ``cfgrib.Dataset``.""" path = os.fspath(path) stream = messages.FileStream(path, errors=errors) index_keys = compute_index_keys(time_dims, extra_coords) - index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys) + index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys) return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs) diff --git a/cfgrib/xarray_plugin.py b/cfgrib/xarray_plugin.py index a9268208..4a972512 100644 --- a/cfgrib/xarray_plugin.py +++ b/cfgrib/xarray_plugin.py @@ -99,6 +99,7 @@ def open_dataset( indexpath: str = messages.DEFAULT_INDEXPATH, filter_by_keys: T.Dict[str, T.Any] = {}, read_keys: T.Iterable[str] = (), + ignore_keys: T.Iterable[str] = (), encode_cf: T.Sequence[str] = ("parameter", "time", "geography", "vertical"), squeeze: bool = True, time_dims: T.Iterable[str] = ("time", "step"), @@ -111,6 +112,7 @@ def open_dataset( indexpath=indexpath, filter_by_keys=filter_by_keys, read_keys=read_keys, + ignore_keys=ignore_keys, encode_cf=encode_cf, squeeze=squeeze, time_dims=time_dims, diff --git a/tests/test_30_dataset.py b/tests/test_30_dataset.py index afbd28ad..9914e482 100644 --- a/tests/test_30_dataset.py +++ b/tests/test_30_dataset.py @@ -132,6 +132,13 @@ def test_build_dataset_components_time_dims() -> None: assert dims == {"number": 28, "indexing_time": 2, "step": 20, "latitude": 6, "longitude": 11} +def test_build_dataset_components_ignore_keys() -> None: + stream = messages.FileStream(TEST_DATA_UKMO, "warn") + index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS) + assert "subCentre" in index.index_keys + index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS, ignore_keys=["subCentre"]) + assert "subCentre" not in index.index_keys + def test_Dataset() -> None: res = dataset.open_file(TEST_DATA) assert "Conventions" in res.attributes @@ -173,6 +180,14 @@ def test_Dataset_encode_cf_time() -> None: assert res.variables["t"].data[:, :, :, :].mean() > 0.0 +def test_Dataset_encode_ignore_keys() -> None: + res = dataset.open_file(TEST_DATA) + assert res.attributes["GRIB_edition"] == 1 + + res = dataset.open_file(TEST_DATA, ignore_keys=["edition"]) + assert "GRIB_edition" not in res.attributes + + def test_Dataset_encode_cf_geography() -> None: res = dataset.open_file(TEST_DATA, encode_cf=("geography",)) assert "history" in res.attributes @@ -304,6 +319,27 @@ def test_open_fieldset_computed_keys() -> None: assert np.array_equal(res.variables["2t"].data[()], np.array(fieldset[0]["values"])) +def test_open_fieldset_ignore_keys() -> None: + fieldset = { + -10: { + "gridType": "regular_ll", + "Nx": 2, + "Ny": 3, + "distinctLatitudes": [-10.0, 0.0, 10.0], + "distinctLongitudes": [0.0, 10.0], + "paramId": 167, + "shortName": "2t", + "subCentre": "test", + "values": [[1, 2], [3, 4], [5, 6]], + } + } + + res = dataset.open_fieldset(fieldset) + assert "GRIB_subCentre" in res.attributes + + res = dataset.open_fieldset(fieldset, ignore_keys="subCentre") + assert "GRIB_subCentre" not in res.attributes + def test_open_file() -> None: res = dataset.open_file(TEST_DATA) diff --git a/tests/test_50_xarray_plugin.py b/tests/test_50_xarray_plugin.py index bf3cecc8..16d63dd8 100644 --- a/tests/test_50_xarray_plugin.py +++ b/tests/test_50_xarray_plugin.py @@ -54,6 +54,13 @@ def test_xr_open_dataset_file_filter_by_keys() -> None: assert "u" not in ds.data_vars +def test_xr_open_dataset_file_ignore_keys() -> None: + ds = xr.open_dataset(TEST_DATA, engine="cfgrib") + assert "GRIB_typeOfLevel" in ds["skt"].attrs + ds = xr.open_dataset(TEST_DATA, engine="cfgrib", ignore_keys=["typeOfLevel"]) + assert "GRIB_typeOfLevel" not in ds["skt"].attrs + + def test_xr_open_dataset_dict() -> None: fieldset = { -10: { @@ -74,6 +81,26 @@ def test_xr_open_dataset_dict() -> None: assert list(ds.data_vars) == ["2t"] +def test_xr_open_dataset_dict_ignore_keys() -> None: + fieldset = { + -10: { + "gridType": "regular_ll", + "Nx": 2, + "Ny": 3, + "distinctLatitudes": [-10.0, 0.0, 10.0], + "distinctLongitudes": [0.0, 10.0], + "paramId": 167, + "shortName": "2t", + "typeOfLevel": "surface", + "values": [[1, 2], [3, 4], [5, 6]], + } + } + ds = xr.open_dataset(fieldset, engine="cfgrib") + assert "GRIB_typeOfLevel" in ds["2t"].attrs + ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"]) + assert "GRIB_typeOfLevel" not in ds["2t"].attrs + + def test_xr_open_dataset_list() -> None: fieldset = [ { @@ -98,6 +125,27 @@ def test_xr_open_dataset_list() -> None: assert ds_empty.equals(xr.Dataset()) +def test_xr_open_dataset_list_ignore_keys() -> None: + fieldset = [ + { + "gridType": "regular_ll", + "Nx": 2, + "Ny": 3, + "distinctLatitudes": [-10.0, 0.0, 10.0], + "distinctLongitudes": [0.0, 10.0], + "paramId": 167, + "shortName": "2t", + "typeOfLevel": "surface", + "values": [[1, 2], [3, 4], [5, 6]], + } + ] + + ds = xr.open_dataset(fieldset, engine="cfgrib") + assert "GRIB_typeOfLevel" in ds["2t"].attrs + ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"]) + assert "GRIB_typeOfLevel" not in ds["2t"].attrs + + def test_read() -> None: expected = { "latitude": 37,