-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PiperOrigin-RevId: 617337572
- Loading branch information
Showing
2 changed files
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# Copyright 2024 The swirl_dynamics Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Utility functions for Zarr file reading and writing.""" | ||
|
||
from collections.abc import Mapping | ||
import os | ||
from typing import Any | ||
|
||
from etils import epath | ||
import xarray as xr | ||
|
||
filesys = epath.backend.tf_backend | ||
|
||
|
||
def collected_metrics_to_ds( | ||
data: Mapping[str, Any], | ||
append_dim: str, | ||
append_slice: slice, | ||
coords: xr.core.coordinates.DatasetCoordinates | None = None, | ||
) -> xr.Dataset: | ||
"""Packages collected metrics as an xarray.Dataset. | ||
Args: | ||
data: A mapping of metric names to their collected values. | ||
append_dim: The name of the axis dimension of metric collection, enforced to | ||
allow downstream dataset appending. | ||
append_slice: Current index slice in the `append_dim` axis. | ||
coords: xarray coordinates of the label dataset used to compute the metrics. | ||
Returns: | ||
A dataset containing all collected metrics as variables, with coordinate | ||
metadata. | ||
""" | ||
fixed_shape = next(iter(data.values())).shape[1:] | ||
coord_dict = None | ||
dims = [append_dim] | ||
if coords is not None: | ||
for cur_size in fixed_shape: | ||
dims.append( | ||
list(coords.dims.keys())[list(coords.dims.values()).index(cur_size)] | ||
) | ||
|
||
coord_dict = { | ||
elem: coords[elem].data for elem in dims if elem != append_dim | ||
} | ||
coord_dict[append_dim] = coords[append_dim].data[append_slice] | ||
|
||
data_vars = {} | ||
for key, value in data.items(): | ||
if coords is None: | ||
dims.extend([f'dim_{i}' for i in range(value.ndim - 1)]) | ||
data_vars[key] = (dims, value) | ||
|
||
return xr.Dataset( | ||
data_vars=data_vars, | ||
coords=coord_dict, | ||
attrs=dict(description='Collected local metrics.'), | ||
) | ||
|
||
|
||
def collected_metrics_to_zarr( | ||
data: Mapping[str, Any], | ||
*, | ||
out_dir: epath.PathLike, | ||
basename: str, | ||
append_dim: str, | ||
coords: xr.core.coordinates.DatasetCoordinates | None = None, | ||
append_slice: slice, | ||
) -> None: | ||
"""Writes collected metrics to zarr.""" | ||
ds = collected_metrics_to_ds( | ||
data, | ||
append_dim, | ||
append_slice, | ||
coords, | ||
) | ||
write_to_file(ds, out_dir, basename, append_dim) | ||
|
||
|
||
def write_to_file( | ||
ds, out_dir: epath.PathLike, basename: str, append_dim: str | None = None | ||
) -> None: | ||
"""Writes an xarray.Dataset to zarr or appends to an existing zarr file.""" | ||
out_path = os.path.join(out_dir, basename + '.zarr') | ||
if filesys.exists(out_path) and append_dim is not None: | ||
kwargs = {'mode': 'a', 'append_dim': append_dim} | ||
else: | ||
kwargs = {'mode': 'w'} | ||
ds.to_zarr(out_path, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# Copyright 2024 The swirl_dynamics Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
|
||
from absl import flags | ||
from absl.testing import absltest | ||
from absl.testing import parameterized | ||
import numpy as np | ||
import pandas as pd | ||
from swirl_dynamics.data import zarr_utils | ||
import xarray as xr | ||
|
||
FLAGS = flags.FLAGS | ||
|
||
|
||
class ZarrUtilsTest(parameterized.TestCase): | ||
|
||
def test_collected_metrics_to_ds(self): | ||
|
||
shape = (2, 10, 5, 3) | ||
data = {"foo": np.ones(shape), "bar": np.ones(shape)} | ||
append_dim = "time" | ||
append_slice = slice(2, 4) | ||
coord_dict = { | ||
append_dim: pd.date_range("2012-01-01", "2012-01-08"), | ||
"lon": range(shape[1]), | ||
"lat": range(shape[2]), | ||
"field": ["var1", "var2", "var3"], | ||
} | ||
coords = xr.Dataset(coords=coord_dict).coords | ||
ds = zarr_utils.collected_metrics_to_ds( | ||
data, | ||
append_dim, | ||
append_slice, | ||
coords, | ||
) | ||
with self.subTest("Correct output format"): | ||
self.assertIsInstance(ds, xr.Dataset) | ||
self.assertIn(append_dim, ds.dims) | ||
self.assertIn("field", ds.dims) | ||
with self.subTest("Correct coordinates"): | ||
self.assertEqual(ds.dims[append_dim], 2) | ||
self.assertSequenceAlmostEqual( | ||
ds.coords[append_dim], pd.date_range("2012-01-03", "2012-01-04") | ||
) | ||
|
||
def test_collected_metrics_to_zarr(self): | ||
|
||
shape = (2, 10, 5, 3) | ||
data = {"foo": np.ones(shape), "bar": np.ones(shape)} | ||
append_dim = "time" | ||
append_slice = slice(2, 4) | ||
coord_dict = { | ||
append_dim: pd.date_range("2012-01-01", "2012-01-08"), | ||
"lon": range(shape[1]), | ||
"lat": range(shape[2]), | ||
"field": ["var1", "var2", "var3"], | ||
} | ||
coords = xr.Dataset(coords=coord_dict).coords | ||
outdir = self.create_tempdir() | ||
zarr_utils.collected_metrics_to_zarr( | ||
data, | ||
out_dir=outdir, | ||
basename="test_metrics", | ||
append_dim=append_dim, | ||
append_slice=append_slice, | ||
coords=coords, | ||
) | ||
|
||
self.assertTrue(os.path.exists(os.path.join(outdir, "test_metrics.zarr"))) | ||
|
||
def test_write_to_file(self): | ||
foo = np.ones((3,)) | ||
outdir = self.create_tempdir() | ||
ds = xr.Dataset(data_vars=dict(foo=(["x"], foo))) | ||
zarr_utils.write_to_file(ds, outdir, "written_file") | ||
|
||
with self.subTest("Correct file creation"): | ||
self.assertTrue(os.path.exists(os.path.join(outdir, "written_file.zarr"))) | ||
|
||
with self.subTest("Correct appending"): | ||
zarr_utils.write_to_file(ds, outdir, "written_file", "x") | ||
ds_appended = xr.open_zarr(os.path.join(outdir, "written_file.zarr")) | ||
self.assertEqual(ds_appended.dims["x"], 6) | ||
|
||
|
||
if __name__ == "__main__": | ||
absltest.main() |