Skip to content

Commit

Permalink
Code update
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 617337572
  • Loading branch information
ilopezgp authored and The swirl_dynamics Authors committed Mar 20, 2024
1 parent 1f77160 commit 9e8d199
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 0 deletions.
101 changes: 101 additions & 0 deletions swirl_dynamics/data/zarr_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2024 The swirl_dynamics Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Utility functions for Zarr file reading and writing."""

from collections.abc import Mapping
import os
from typing import Any

from etils import epath
import xarray as xr

filesys = epath.backend.tf_backend


def collected_metrics_to_ds(
data: Mapping[str, Any],
append_dim: str,
append_slice: slice,
coords: xr.core.coordinates.DatasetCoordinates | None = None,
) -> xr.Dataset:
"""Packages collected metrics as an xarray.Dataset.
Args:
data: A mapping of metric names to their collected values.
append_dim: The name of the axis dimension of metric collection, enforced to
allow downstream dataset appending.
append_slice: Current index slice in the `append_dim` axis.
coords: xarray coordinates of the label dataset used to compute the metrics.
Returns:
A dataset containing all collected metrics as variables, with coordinate
metadata.
"""
fixed_shape = next(iter(data.values())).shape[1:]
coord_dict = None
dims = [append_dim]
if coords is not None:
for cur_size in fixed_shape:
dims.append(
list(coords.dims.keys())[list(coords.dims.values()).index(cur_size)]
)

coord_dict = {
elem: coords[elem].data for elem in dims if elem != append_dim
}
coord_dict[append_dim] = coords[append_dim].data[append_slice]

data_vars = {}
for key, value in data.items():
if coords is None:
dims.extend([f'dim_{i}' for i in range(value.ndim - 1)])
data_vars[key] = (dims, value)

return xr.Dataset(
data_vars=data_vars,
coords=coord_dict,
attrs=dict(description='Collected local metrics.'),
)


def collected_metrics_to_zarr(
data: Mapping[str, Any],
*,
out_dir: epath.PathLike,
basename: str,
append_dim: str,
coords: xr.core.coordinates.DatasetCoordinates | None = None,
append_slice: slice,
) -> None:
"""Writes collected metrics to zarr."""
ds = collected_metrics_to_ds(
data,
append_dim,
append_slice,
coords,
)
write_to_file(ds, out_dir, basename, append_dim)


def write_to_file(
ds, out_dir: epath.PathLike, basename: str, append_dim: str | None = None
) -> None:
"""Writes an xarray.Dataset to zarr or appends to an existing zarr file."""
out_path = os.path.join(out_dir, basename + '.zarr')
if filesys.exists(out_path) and append_dim is not None:
kwargs = {'mode': 'a', 'append_dim': append_dim}
else:
kwargs = {'mode': 'w'}
ds.to_zarr(out_path, **kwargs)
100 changes: 100 additions & 0 deletions swirl_dynamics/data/zarr_utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2024 The swirl_dynamics Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from absl import flags
from absl.testing import absltest
from absl.testing import parameterized
import numpy as np
import pandas as pd
from swirl_dynamics.data import zarr_utils
import xarray as xr

FLAGS = flags.FLAGS


class ZarrUtilsTest(parameterized.TestCase):

def test_collected_metrics_to_ds(self):

shape = (2, 10, 5, 3)
data = {"foo": np.ones(shape), "bar": np.ones(shape)}
append_dim = "time"
append_slice = slice(2, 4)
coord_dict = {
append_dim: pd.date_range("2012-01-01", "2012-01-08"),
"lon": range(shape[1]),
"lat": range(shape[2]),
"field": ["var1", "var2", "var3"],
}
coords = xr.Dataset(coords=coord_dict).coords
ds = zarr_utils.collected_metrics_to_ds(
data,
append_dim,
append_slice,
coords,
)
with self.subTest("Correct output format"):
self.assertIsInstance(ds, xr.Dataset)
self.assertIn(append_dim, ds.dims)
self.assertIn("field", ds.dims)
with self.subTest("Correct coordinates"):
self.assertEqual(ds.dims[append_dim], 2)
self.assertSequenceAlmostEqual(
ds.coords[append_dim], pd.date_range("2012-01-03", "2012-01-04")
)

def test_collected_metrics_to_zarr(self):

shape = (2, 10, 5, 3)
data = {"foo": np.ones(shape), "bar": np.ones(shape)}
append_dim = "time"
append_slice = slice(2, 4)
coord_dict = {
append_dim: pd.date_range("2012-01-01", "2012-01-08"),
"lon": range(shape[1]),
"lat": range(shape[2]),
"field": ["var1", "var2", "var3"],
}
coords = xr.Dataset(coords=coord_dict).coords
outdir = self.create_tempdir()
zarr_utils.collected_metrics_to_zarr(
data,
out_dir=outdir,
basename="test_metrics",
append_dim=append_dim,
append_slice=append_slice,
coords=coords,
)

self.assertTrue(os.path.exists(os.path.join(outdir, "test_metrics.zarr")))

def test_write_to_file(self):
foo = np.ones((3,))
outdir = self.create_tempdir()
ds = xr.Dataset(data_vars=dict(foo=(["x"], foo)))
zarr_utils.write_to_file(ds, outdir, "written_file")

with self.subTest("Correct file creation"):
self.assertTrue(os.path.exists(os.path.join(outdir, "written_file.zarr")))

with self.subTest("Correct appending"):
zarr_utils.write_to_file(ds, outdir, "written_file", "x")
ds_appended = xr.open_zarr(os.path.join(outdir, "written_file.zarr"))
self.assertEqual(ds_appended.dims["x"], 6)


if __name__ == "__main__":
absltest.main()

0 comments on commit 9e8d199

Please sign in to comment.