From afc884a86ba0c0bb9f429306e89f0076685e29ad Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Wed, 19 Jun 2024 16:57:46 +0100 Subject: [PATCH 1/8] ignore_keys basic --- cfgrib/dataset.py | 5 ++++- cfgrib/xarray_plugin.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index f37266f3..0967f6df 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -771,10 +771,12 @@ def open_fileindex( stream: messages.FileStream, indexpath: str = messages.DEFAULT_INDEXPATH, index_keys: T.Sequence[str] = INDEX_KEYS + ["time", "step"], + ignore_keys: T.Sequence[str] = [], filter_by_keys: T.Dict[str, T.Any] = {}, computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS, ) -> messages.FileIndex: index_keys = sorted(set(index_keys) | set(filter_by_keys)) + index_keys = [key for key in index_keys if key not in ignore_keys] index = messages.FileIndex.from_indexpath_or_filestream( stream, index_keys, indexpath=indexpath, computed_keys=computed_keys ) @@ -789,12 +791,13 @@ def open_file( read_keys: T.Sequence[str] = (), time_dims: T.Sequence[str] = ("time", "step"), extra_coords: T.Dict[str, str] = {}, + ignore_keys: T.Sequence[str] = [], **kwargs: T.Any, ) -> Dataset: """Open a GRIB file as a ``cfgrib.Dataset``.""" path = os.fspath(path) stream = messages.FileStream(path, errors=errors) index_keys = compute_index_keys(time_dims, extra_coords) - index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys) + index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys) return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs) diff --git a/cfgrib/xarray_plugin.py b/cfgrib/xarray_plugin.py index a9268208..4a972512 100644 --- a/cfgrib/xarray_plugin.py +++ b/cfgrib/xarray_plugin.py @@ -99,6 +99,7 @@ def open_dataset( indexpath: str = messages.DEFAULT_INDEXPATH, filter_by_keys: T.Dict[str, T.Any] = {}, read_keys: T.Iterable[str] = (), + ignore_keys: T.Iterable[str] = (), encode_cf: T.Sequence[str] = ("parameter", "time", "geography", "vertical"), squeeze: bool = True, time_dims: T.Iterable[str] = ("time", "step"), @@ -111,6 +112,7 @@ def open_dataset( indexpath=indexpath, filter_by_keys=filter_by_keys, read_keys=read_keys, + ignore_keys=ignore_keys, encode_cf=encode_cf, squeeze=squeeze, time_dims=time_dims, From c3a668130bddafed4b683dffa9ee419d024e5db0 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Thu, 20 Jun 2024 12:02:43 +0100 Subject: [PATCH 2/8] functionality added to fieldset --- cfgrib/dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index 0967f6df..f4054d1c 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -751,6 +751,7 @@ def open_fieldset( indexpath: T.Optional[str] = None, filter_by_keys: T.Dict[str, T.Any] = {}, read_keys: T.Sequence[str] = (), + ignore_keys: T.Sequence[str] = [], time_dims: T.Sequence[str] = ("time", "step"), extra_coords: T.Dict[str, str] = {}, computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS, @@ -762,6 +763,7 @@ def open_fieldset( log.warning(f"indexpath value {indexpath} is ignored") index_keys = compute_index_keys(time_dims, extra_coords, filter_by_keys) + index_keys = [key for key in index_keys if key not in ignore_keys] index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys) filtered_index = index.subindex(filter_by_keys) return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs) From 4baeb37f155ce097faef267b4573a61a141ff26e Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 21 Jun 2024 08:33:10 +0100 Subject: [PATCH 3/8] optional level encoding --- cfgrib/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index f4054d1c..d87f1b53 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -463,7 +463,8 @@ def encode_cf_first(data_var_attrs, encode_cf=("parameter", "time"), time_dims=( raise ValueError("time_dims %r not a subset of %r" % (time_dims, ALL_REF_TIME_KEYS)) else: coords_map.extend(DATA_TIME_KEYS) - coords_map.extend(VERTICAL_KEYS) + if "level" in encode_cf: + coords_map.extend(VERTICAL_KEYS) coords_map.extend(SPECTRA_KEYS) return coords_map From 4da53e10c10596c0c95ba9dc9f56954ec50cb791 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 21 Jun 2024 13:53:34 +0100 Subject: [PATCH 4/8] ignore_keys test added --- tests/test_30_dataset.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_30_dataset.py b/tests/test_30_dataset.py index 5523d3ee..2fdb9105 100644 --- a/tests/test_30_dataset.py +++ b/tests/test_30_dataset.py @@ -303,6 +303,27 @@ def test_open_fieldset_computed_keys() -> None: assert np.array_equal(res.variables["2t"].data[()], np.array(fieldset[0]["values"])) +def test_open_fieldset_ignore_keys() -> None: + fieldset = { + -10: { + "gridType": "regular_ll", + "Nx": 2, + "Ny": 3, + "distinctLatitudes": [-10.0, 0.0, 10.0], + "distinctLongitudes": [0.0, 10.0], + "paramId": 167, + "shortName": "2t", + "subCentre": "test", + "values": [[1, 2], [3, 4], [5, 6]], + } + } + + res = dataset.open_fieldset(fieldset) + assert "GRIB_subCentre" in res.attributes + + res = dataset.open_fieldset(fieldset, ignore_keys="subCentre") + assert "GRIB_subCentre" not in res.attributes + def test_open_file() -> None: res = dataset.open_file(TEST_DATA, filter_by_keys={"shortName": "t"}) From 8d4f14eae4ad3b2cd50932745b986b11c65cb884 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 21 Jun 2024 14:33:25 +0100 Subject: [PATCH 5/8] ignore_keys test --- tests/test_30_dataset.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_30_dataset.py b/tests/test_30_dataset.py index 2fdb9105..83f0392f 100644 --- a/tests/test_30_dataset.py +++ b/tests/test_30_dataset.py @@ -131,6 +131,13 @@ def test_build_dataset_components_time_dims() -> None: assert dims == {"number": 28, "indexing_time": 2, "step": 20, "latitude": 6, "longitude": 11} +def test_build_dataset_components_ignore_keys() -> None: + stream = messages.FileStream(TEST_DATA_UKMO, "warn") + index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS) + assert "subCentre" in index.index_keys + index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, index_keys, ignore_keys=["subCentre"]) + assert "subCentre" not in index.index_keys + def test_Dataset() -> None: res = dataset.open_file(TEST_DATA) assert "Conventions" in res.attributes From d91d4d6fe67b4a767358d01f13c4217023835c87 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 21 Jun 2024 14:42:33 +0100 Subject: [PATCH 6/8] remove vertical encode_cf change as too hard coded into system --- cfgrib/dataset.py | 3 +-- tests/test_30_dataset.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index d87f1b53..f4054d1c 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -463,8 +463,7 @@ def encode_cf_first(data_var_attrs, encode_cf=("parameter", "time"), time_dims=( raise ValueError("time_dims %r not a subset of %r" % (time_dims, ALL_REF_TIME_KEYS)) else: coords_map.extend(DATA_TIME_KEYS) - if "level" in encode_cf: - coords_map.extend(VERTICAL_KEYS) + coords_map.extend(VERTICAL_KEYS) coords_map.extend(SPECTRA_KEYS) return coords_map diff --git a/tests/test_30_dataset.py b/tests/test_30_dataset.py index 83f0392f..08329b5a 100644 --- a/tests/test_30_dataset.py +++ b/tests/test_30_dataset.py @@ -135,7 +135,7 @@ def test_build_dataset_components_ignore_keys() -> None: stream = messages.FileStream(TEST_DATA_UKMO, "warn") index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS) assert "subCentre" in index.index_keys - index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, index_keys, ignore_keys=["subCentre"]) + index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS, ignore_keys=["subCentre"]) assert "subCentre" not in index.index_keys def test_Dataset() -> None: From 44c7a86960c48f461dadd834665670e0f7967eb0 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 21 Jun 2024 15:23:35 +0100 Subject: [PATCH 7/8] open_fiel test --- tests/test_30_dataset.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_30_dataset.py b/tests/test_30_dataset.py index 08329b5a..a61cde53 100644 --- a/tests/test_30_dataset.py +++ b/tests/test_30_dataset.py @@ -179,6 +179,14 @@ def test_Dataset_encode_cf_time() -> None: assert res.variables["t"].data[:, :, :, :].mean() > 0.0 +def test_Dataset_encode_ignore_keys() -> None: + res = dataset.open_file(TEST_DATA) + assert res.attributes["GRIB_edition"] == 1 + + res = dataset.open_file(TEST_DATA, ignore_keys=["edition"]) + assert "GRIB_edition" not in res.attributes + + def test_Dataset_encode_cf_geography() -> None: res = dataset.open_file(TEST_DATA, encode_cf=("geography",)) assert "history" in res.attributes From e325b4fd318a0dfcb5a642a8684d9de49531e920 Mon Sep 17 00:00:00 2001 From: EddyCMWF Date: Fri, 21 Jun 2024 15:39:11 +0100 Subject: [PATCH 8/8] QA --- tests/test_50_xarray_plugin.py | 48 ++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/test_50_xarray_plugin.py b/tests/test_50_xarray_plugin.py index d638af62..388e68cb 100644 --- a/tests/test_50_xarray_plugin.py +++ b/tests/test_50_xarray_plugin.py @@ -29,6 +29,13 @@ def test_xr_open_dataset_file() -> None: assert list(ds.data_vars) == ["skt"] +def test_xr_open_dataset_file_ignore_keys() -> None: + ds = xr.open_dataset(TEST_DATA, engine="cfgrib") + assert "GRIB_typeOfLevel" in ds["skt"].attrs + ds = xr.open_dataset(TEST_DATA, engine="cfgrib", ignore_keys=["typeOfLevel"]) + assert "GRIB_typeOfLevel" not in ds["skt"].attrs + + def test_xr_open_dataset_dict() -> None: fieldset = { -10: { @@ -49,6 +56,26 @@ def test_xr_open_dataset_dict() -> None: assert list(ds.data_vars) == ["2t"] +def test_xr_open_dataset_dict_ignore_keys() -> None: + fieldset = { + -10: { + "gridType": "regular_ll", + "Nx": 2, + "Ny": 3, + "distinctLatitudes": [-10.0, 0.0, 10.0], + "distinctLongitudes": [0.0, 10.0], + "paramId": 167, + "shortName": "2t", + "typeOfLevel": "surface", + "values": [[1, 2], [3, 4], [5, 6]], + } + } + ds = xr.open_dataset(fieldset, engine="cfgrib") + assert "GRIB_typeOfLevel" in ds["2t"].attrs + ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"]) + assert "GRIB_typeOfLevel" not in ds["2t"].attrs + + def test_xr_open_dataset_list() -> None: fieldset = [ { @@ -73,6 +100,27 @@ def test_xr_open_dataset_list() -> None: assert ds_empty.equals(xr.Dataset()) +def test_xr_open_dataset_list_ignore_keys() -> None: + fieldset = [ + { + "gridType": "regular_ll", + "Nx": 2, + "Ny": 3, + "distinctLatitudes": [-10.0, 0.0, 10.0], + "distinctLongitudes": [0.0, 10.0], + "paramId": 167, + "shortName": "2t", + "typeOfLevel": "surface", + "values": [[1, 2], [3, 4], [5, 6]], + } + ] + + ds = xr.open_dataset(fieldset, engine="cfgrib") + assert "GRIB_typeOfLevel" in ds["2t"].attrs + ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"]) + assert "GRIB_typeOfLevel" not in ds["2t"].attrs + + def test_read() -> None: expected = { "latitude": 37,