Merge pull request #382 from ecmwf/ignore_keys

Ignore keys
ecmwf · Jun 24, 2024 · b098bc5 · b098bc5
2 parents b06ab31 + 0fd4cd9
commit b098bc5
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 1 deletion.
diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py
@@ -751,6 +751,7 @@ def open_fieldset(
     indexpath: T.Optional[str] = None,
     filter_by_keys: T.Dict[str, T.Any] = {},
     read_keys: T.Sequence[str] = (),
+    ignore_keys: T.Sequence[str] = [],
     time_dims: T.Sequence[str] = ("time", "step"),
     extra_coords: T.Dict[str, str] = {},
     computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
@@ -762,6 +763,7 @@ def open_fieldset(
         log.warning(f"indexpath value {indexpath} is ignored")
 
     index_keys = compute_index_keys(time_dims, extra_coords, filter_by_keys)
+    index_keys = [key for key in index_keys if key not in ignore_keys]
     index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys)
     filtered_index = index.subindex(filter_by_keys)
     return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs)
@@ -771,10 +773,12 @@ def open_fileindex(
     stream: messages.FileStream,
     indexpath: str = messages.DEFAULT_INDEXPATH,
     index_keys: T.Sequence[str] = INDEX_KEYS + ["time", "step"],
+    ignore_keys: T.Sequence[str] = [],
     filter_by_keys: T.Dict[str, T.Any] = {},
     computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
 ) -> messages.FileIndex:
     index_keys = sorted(set(index_keys) | set(filter_by_keys))
+    index_keys = [key for key in index_keys if key not in ignore_keys]
     index = messages.FileIndex.from_indexpath_or_filestream(
         stream, index_keys, indexpath=indexpath, computed_keys=computed_keys
     )
@@ -789,12 +793,13 @@ def open_file(
     read_keys: T.Sequence[str] = (),
     time_dims: T.Sequence[str] = ("time", "step"),
     extra_coords: T.Dict[str, str] = {},
+    ignore_keys: T.Sequence[str] = [],
     **kwargs: T.Any,
 ) -> Dataset:
     """Open a GRIB file as a ``cfgrib.Dataset``."""
     path = os.fspath(path)
     stream = messages.FileStream(path, errors=errors)
     index_keys = compute_index_keys(time_dims, extra_coords)
-    index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys)
+    index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys)
 
     return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)
diff --git a/cfgrib/xarray_plugin.py b/cfgrib/xarray_plugin.py
@@ -99,6 +99,7 @@ def open_dataset(
         indexpath: str = messages.DEFAULT_INDEXPATH,
         filter_by_keys: T.Dict[str, T.Any] = {},
         read_keys: T.Iterable[str] = (),
+        ignore_keys: T.Iterable[str] = (),
         encode_cf: T.Sequence[str] = ("parameter", "time", "geography", "vertical"),
         squeeze: bool = True,
         time_dims: T.Iterable[str] = ("time", "step"),
@@ -111,6 +112,7 @@ def open_dataset(
             indexpath=indexpath,
             filter_by_keys=filter_by_keys,
             read_keys=read_keys,
+            ignore_keys=ignore_keys,
             encode_cf=encode_cf,
             squeeze=squeeze,
             time_dims=time_dims,

diff --git a/tests/test_30_dataset.py b/tests/test_30_dataset.py
@@ -131,6 +131,13 @@ def test_build_dataset_components_time_dims() -> None:
     assert dims == {"number": 28, "indexing_time": 2, "step": 20, "latitude": 6, "longitude": 11}
 
 
+def test_build_dataset_components_ignore_keys() -> None:
+    stream = messages.FileStream(TEST_DATA_UKMO, "warn")
+    index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS)
+    assert "subCentre" in index.index_keys
+    index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS, ignore_keys=["subCentre"])
+    assert "subCentre" not in index.index_keys
+
 def test_Dataset() -> None:
     res = dataset.open_file(TEST_DATA)
     assert "Conventions" in res.attributes
@@ -172,6 +179,14 @@ def test_Dataset_encode_cf_time() -> None:
     assert res.variables["t"].data[:, :, :, :].mean() > 0.0
 
 
+def test_Dataset_encode_ignore_keys() -> None:
+    res = dataset.open_file(TEST_DATA)
+    assert res.attributes["GRIB_edition"] == 1
+
+    res = dataset.open_file(TEST_DATA, ignore_keys=["edition"])
+    assert "GRIB_edition" not in res.attributes
+
+
 def test_Dataset_encode_cf_geography() -> None:
     res = dataset.open_file(TEST_DATA, encode_cf=("geography",))
     assert "history" in res.attributes
@@ -303,6 +318,27 @@ def test_open_fieldset_computed_keys() -> None:
     assert np.array_equal(res.variables["2t"].data[()], np.array(fieldset[0]["values"]))
 
 
+def test_open_fieldset_ignore_keys() -> None:
+    fieldset = {
+        -10: {
+            "gridType": "regular_ll",
+            "Nx": 2,
+            "Ny": 3,
+            "distinctLatitudes": [-10.0, 0.0, 10.0],
+            "distinctLongitudes": [0.0, 10.0],
+            "paramId": 167,
+            "shortName": "2t",
+            "subCentre": "test",
+            "values": [[1, 2], [3, 4], [5, 6]],
+        }
+    }
+
+    res = dataset.open_fieldset(fieldset)
+    assert "GRIB_subCentre" in res.attributes
+
+    res = dataset.open_fieldset(fieldset, ignore_keys="subCentre")
+    assert "GRIB_subCentre" not in res.attributes
+
 def test_open_file() -> None:
     res = dataset.open_file(TEST_DATA, filter_by_keys={"shortName": "t"})
 

diff --git a/tests/test_50_xarray_plugin.py b/tests/test_50_xarray_plugin.py
@@ -29,6 +29,13 @@ def test_xr_open_dataset_file() -> None:
     assert list(ds.data_vars) == ["skt"]
 
 
+def test_xr_open_dataset_file_ignore_keys() -> None:
+    ds = xr.open_dataset(TEST_DATA, engine="cfgrib")
+    assert "GRIB_typeOfLevel" in ds["skt"].attrs
+    ds = xr.open_dataset(TEST_DATA, engine="cfgrib", ignore_keys=["typeOfLevel"])
+    assert "GRIB_typeOfLevel" not in ds["skt"].attrs
+
+
 def test_xr_open_dataset_dict() -> None:
     fieldset = {
         -10: {
@@ -49,6 +56,26 @@ def test_xr_open_dataset_dict() -> None:
     assert list(ds.data_vars) == ["2t"]
 
 
+def test_xr_open_dataset_dict_ignore_keys() -> None:
+    fieldset = {
+        -10: {
+            "gridType": "regular_ll",
+            "Nx": 2,
+            "Ny": 3,
+            "distinctLatitudes": [-10.0, 0.0, 10.0],
+            "distinctLongitudes": [0.0, 10.0],
+            "paramId": 167,
+            "shortName": "2t",
+            "typeOfLevel": "surface",
+            "values": [[1, 2], [3, 4], [5, 6]],
+        }
+    }
+    ds = xr.open_dataset(fieldset, engine="cfgrib")
+    assert "GRIB_typeOfLevel" in ds["2t"].attrs
+    ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"])
+    assert "GRIB_typeOfLevel" not in ds["2t"].attrs
+
+
 def test_xr_open_dataset_list() -> None:
     fieldset = [
         {
@@ -73,6 +100,27 @@ def test_xr_open_dataset_list() -> None:
     assert ds_empty.equals(xr.Dataset())
 
 
+def test_xr_open_dataset_list_ignore_keys() -> None:
+    fieldset = [
+        {
+            "gridType": "regular_ll",
+            "Nx": 2,
+            "Ny": 3,
+            "distinctLatitudes": [-10.0, 0.0, 10.0],
+            "distinctLongitudes": [0.0, 10.0],
+            "paramId": 167,
+            "shortName": "2t",
+            "typeOfLevel": "surface",
+            "values": [[1, 2], [3, 4], [5, 6]],
+        }
+    ]
+
+    ds = xr.open_dataset(fieldset, engine="cfgrib")
+    assert "GRIB_typeOfLevel" in ds["2t"].attrs
+    ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"])
+    assert "GRIB_typeOfLevel" not in ds["2t"].attrs
+
+
 def test_read() -> None:
     expected = {
         "latitude": 37,