Skip to content

Commit

Permalink
Merge pull request #382 from ecmwf/ignore_keys
Browse files Browse the repository at this point in the history
Ignore keys
  • Loading branch information
iainrussell authored Jun 24, 2024
2 parents b06ab31 + 0fd4cd9 commit b098bc5
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 1 deletion.
7 changes: 6 additions & 1 deletion cfgrib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ def open_fieldset(
indexpath: T.Optional[str] = None,
filter_by_keys: T.Dict[str, T.Any] = {},
read_keys: T.Sequence[str] = (),
ignore_keys: T.Sequence[str] = [],
time_dims: T.Sequence[str] = ("time", "step"),
extra_coords: T.Dict[str, str] = {},
computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
Expand All @@ -762,6 +763,7 @@ def open_fieldset(
log.warning(f"indexpath value {indexpath} is ignored")

index_keys = compute_index_keys(time_dims, extra_coords, filter_by_keys)
index_keys = [key for key in index_keys if key not in ignore_keys]
index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys)
filtered_index = index.subindex(filter_by_keys)
return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs)
Expand All @@ -771,10 +773,12 @@ def open_fileindex(
stream: messages.FileStream,
indexpath: str = messages.DEFAULT_INDEXPATH,
index_keys: T.Sequence[str] = INDEX_KEYS + ["time", "step"],
ignore_keys: T.Sequence[str] = [],
filter_by_keys: T.Dict[str, T.Any] = {},
computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
) -> messages.FileIndex:
index_keys = sorted(set(index_keys) | set(filter_by_keys))
index_keys = [key for key in index_keys if key not in ignore_keys]
index = messages.FileIndex.from_indexpath_or_filestream(
stream, index_keys, indexpath=indexpath, computed_keys=computed_keys
)
Expand All @@ -789,12 +793,13 @@ def open_file(
read_keys: T.Sequence[str] = (),
time_dims: T.Sequence[str] = ("time", "step"),
extra_coords: T.Dict[str, str] = {},
ignore_keys: T.Sequence[str] = [],
**kwargs: T.Any,
) -> Dataset:
"""Open a GRIB file as a ``cfgrib.Dataset``."""
path = os.fspath(path)
stream = messages.FileStream(path, errors=errors)
index_keys = compute_index_keys(time_dims, extra_coords)
index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys)
index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys)

return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)
2 changes: 2 additions & 0 deletions cfgrib/xarray_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def open_dataset(
indexpath: str = messages.DEFAULT_INDEXPATH,
filter_by_keys: T.Dict[str, T.Any] = {},
read_keys: T.Iterable[str] = (),
ignore_keys: T.Iterable[str] = (),
encode_cf: T.Sequence[str] = ("parameter", "time", "geography", "vertical"),
squeeze: bool = True,
time_dims: T.Iterable[str] = ("time", "step"),
Expand All @@ -111,6 +112,7 @@ def open_dataset(
indexpath=indexpath,
filter_by_keys=filter_by_keys,
read_keys=read_keys,
ignore_keys=ignore_keys,
encode_cf=encode_cf,
squeeze=squeeze,
time_dims=time_dims,
Expand Down
36 changes: 36 additions & 0 deletions tests/test_30_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ def test_build_dataset_components_time_dims() -> None:
assert dims == {"number": 28, "indexing_time": 2, "step": 20, "latitude": 6, "longitude": 11}


def test_build_dataset_components_ignore_keys() -> None:
stream = messages.FileStream(TEST_DATA_UKMO, "warn")
index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS)
assert "subCentre" in index.index_keys
index = dataset.open_fileindex(stream, messages.DEFAULT_INDEXPATH, dataset.INDEX_KEYS, ignore_keys=["subCentre"])
assert "subCentre" not in index.index_keys

def test_Dataset() -> None:
res = dataset.open_file(TEST_DATA)
assert "Conventions" in res.attributes
Expand Down Expand Up @@ -172,6 +179,14 @@ def test_Dataset_encode_cf_time() -> None:
assert res.variables["t"].data[:, :, :, :].mean() > 0.0


def test_Dataset_encode_ignore_keys() -> None:
res = dataset.open_file(TEST_DATA)
assert res.attributes["GRIB_edition"] == 1

res = dataset.open_file(TEST_DATA, ignore_keys=["edition"])
assert "GRIB_edition" not in res.attributes


def test_Dataset_encode_cf_geography() -> None:
res = dataset.open_file(TEST_DATA, encode_cf=("geography",))
assert "history" in res.attributes
Expand Down Expand Up @@ -303,6 +318,27 @@ def test_open_fieldset_computed_keys() -> None:
assert np.array_equal(res.variables["2t"].data[()], np.array(fieldset[0]["values"]))


def test_open_fieldset_ignore_keys() -> None:
fieldset = {
-10: {
"gridType": "regular_ll",
"Nx": 2,
"Ny": 3,
"distinctLatitudes": [-10.0, 0.0, 10.0],
"distinctLongitudes": [0.0, 10.0],
"paramId": 167,
"shortName": "2t",
"subCentre": "test",
"values": [[1, 2], [3, 4], [5, 6]],
}
}

res = dataset.open_fieldset(fieldset)
assert "GRIB_subCentre" in res.attributes

res = dataset.open_fieldset(fieldset, ignore_keys="subCentre")
assert "GRIB_subCentre" not in res.attributes

def test_open_file() -> None:
res = dataset.open_file(TEST_DATA, filter_by_keys={"shortName": "t"})

Expand Down
48 changes: 48 additions & 0 deletions tests/test_50_xarray_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def test_xr_open_dataset_file() -> None:
assert list(ds.data_vars) == ["skt"]


def test_xr_open_dataset_file_ignore_keys() -> None:
ds = xr.open_dataset(TEST_DATA, engine="cfgrib")
assert "GRIB_typeOfLevel" in ds["skt"].attrs
ds = xr.open_dataset(TEST_DATA, engine="cfgrib", ignore_keys=["typeOfLevel"])
assert "GRIB_typeOfLevel" not in ds["skt"].attrs


def test_xr_open_dataset_dict() -> None:
fieldset = {
-10: {
Expand All @@ -49,6 +56,26 @@ def test_xr_open_dataset_dict() -> None:
assert list(ds.data_vars) == ["2t"]


def test_xr_open_dataset_dict_ignore_keys() -> None:
fieldset = {
-10: {
"gridType": "regular_ll",
"Nx": 2,
"Ny": 3,
"distinctLatitudes": [-10.0, 0.0, 10.0],
"distinctLongitudes": [0.0, 10.0],
"paramId": 167,
"shortName": "2t",
"typeOfLevel": "surface",
"values": [[1, 2], [3, 4], [5, 6]],
}
}
ds = xr.open_dataset(fieldset, engine="cfgrib")
assert "GRIB_typeOfLevel" in ds["2t"].attrs
ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"])
assert "GRIB_typeOfLevel" not in ds["2t"].attrs


def test_xr_open_dataset_list() -> None:
fieldset = [
{
Expand All @@ -73,6 +100,27 @@ def test_xr_open_dataset_list() -> None:
assert ds_empty.equals(xr.Dataset())


def test_xr_open_dataset_list_ignore_keys() -> None:
fieldset = [
{
"gridType": "regular_ll",
"Nx": 2,
"Ny": 3,
"distinctLatitudes": [-10.0, 0.0, 10.0],
"distinctLongitudes": [0.0, 10.0],
"paramId": 167,
"shortName": "2t",
"typeOfLevel": "surface",
"values": [[1, 2], [3, 4], [5, 6]],
}
]

ds = xr.open_dataset(fieldset, engine="cfgrib")
assert "GRIB_typeOfLevel" in ds["2t"].attrs
ds = xr.open_dataset(fieldset, engine="cfgrib", ignore_keys=["typeOfLevel"])
assert "GRIB_typeOfLevel" not in ds["2t"].attrs


def test_read() -> None:
expected = {
"latitude": 37,
Expand Down

0 comments on commit b098bc5

Please sign in to comment.