Skip to content

Commit

Permalink
Merge pull request #58 from bcdev/forman-56-no_gobal_attrs
Browse files Browse the repository at this point in the history
Attributes of target dataset no longer empty
  • Loading branch information
forman authored Feb 12, 2024
2 parents b654f6f + 4c50302 commit c4f97d1
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### Fixes

* Global metadata attributes of target dataset is no longer empty. [#56]

* If the target _parent_ directory did not exist, an exception was raised
reporting that the lock file to be written does not exist. Changed this to
report that the target parent directory does not exist. [#55]
Expand Down
4 changes: 4 additions & 0 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ def make_test_dataset(
np.linspace(0, 1, shape[2], dtype="float64"), dims=dims[2]
),
},
attrs={
"Conventions": "CF-1.8",
"title": f"Test {index + 1}-{index + shape[0]}",
},
)

if crs:
Expand Down
78 changes: 65 additions & 13 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,37 @@ def test_no_slices(self):
zappend([], target_dir=target_dir)
self.assertFalse(FileObj(target_dir).exists())

def test_one_slices_memory(self):
target_dir = "memory://target.zarr"
slices = [make_test_dataset()]
zappend(slices, target_dir=target_dir)
ds = xr.open_zarr(target_dir)
self.assertEqual({"time": 3, "y": 50, "x": 100}, ds.sizes)
self.assertEqual({"chl", "tsm"}, set(ds.data_vars))
self.assertEqual({"time", "y", "x"}, set(ds.coords))
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 1-3",
},
ds.attrs,
)

def test_some_slices_memory(self):
target_dir = "memory://target.zarr"
slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()]
slices = [make_test_dataset(index=3 * i) for i in range(3)]
zappend(slices, target_dir=target_dir)
ds = xr.open_zarr(target_dir)
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes)
self.assertEqual({"chl", "tsm"}, set(ds.data_vars))
self.assertEqual({"time", "y", "x"}, set(ds.coords))
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 1-3",
},
ds.attrs,
)

def test_some_slices_local(self):
target_dir = "target.zarr"
Expand All @@ -42,14 +65,21 @@ def test_some_slices_local(self):
"slice-2.zarr",
"slice-3.zarr",
]
for uri in slices:
make_test_dataset(uri=uri)
for index, uri in enumerate(slices):
make_test_dataset(uri=uri, index=3 * index)
try:
zappend(slices, target_dir=target_dir)
ds = xr.open_zarr(target_dir)
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes)
self.assertEqual({"chl", "tsm"}, set(ds.data_vars))
self.assertEqual({"time", "y", "x"}, set(ds.coords))
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 1-3",
},
ds.attrs,
)
finally:
shutil.rmtree(target_dir, ignore_errors=True)
for slice_dir in slices:
Expand All @@ -72,52 +102,74 @@ def test_some_slices_local_output_to_non_existing_dir(self):

def test_some_slices_with_class_slice_source(self):
target_dir = "memory://target.zarr"
slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()]
slices = [make_test_dataset(index=3 * i) for i in range(3)]
zappend(slices, target_dir=target_dir, slice_source=MySliceSource)
ds = xr.open_zarr(target_dir)
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes)
self.assertEqual({"chl"}, set(ds.data_vars))
self.assertEqual({"time", "y", "x"}, set(ds.coords))
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 1-3",
},
ds.attrs,
)

def test_some_slices_with_func_slice_source(self):
def process_slice(ctx, slice_ds: xr.Dataset) -> SliceSource:
return MySliceSource(ctx, slice_ds)

target_dir = "memory://target.zarr"
slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()]
slices = [make_test_dataset(index=3 * i) for i in range(3)]
zappend(slices, target_dir=target_dir, slice_source=process_slice)
ds = xr.open_zarr(target_dir)
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes)
self.assertEqual({"chl"}, set(ds.data_vars))
self.assertEqual({"time", "y", "x"}, set(ds.coords))
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 1-3",
},
ds.attrs,
)

def test_some_slices_with_inc_append_step(self):
target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=0, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=2, shape=(1, 50, 100)),
]
slices = [make_test_dataset(index=i, shape=(1, 50, 100)) for i in range(3)]
zappend(slices, target_dir=target_dir, append_step="1D")
ds = xr.open_zarr(target_dir)
np.testing.assert_array_equal(
ds.time.values,
np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=np.datetime64),
)
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 1-1",
},
ds.attrs,
)

def test_some_slices_with_dec_append_step(self):
target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=2, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=0, shape=(1, 50, 100)),
make_test_dataset(index=i, shape=(1, 50, 100)) for i in reversed(range(3))
]
zappend(slices, target_dir=target_dir, append_step="-1D")
ds = xr.open_zarr(target_dir)
np.testing.assert_array_equal(
ds.time.values,
np.array(["2024-01-03", "2024-01-02", "2024-01-01"], dtype=np.datetime64),
)
self.assertEqual(
{
"Conventions": "CF-1.8",
"title": "Test 3-3",
},
ds.attrs,
)

# # See https://github.com/bcdev/zappend/issues/21
#
Expand Down
11 changes: 10 additions & 1 deletion zappend/tailoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,18 @@ def tailor_slice_dataset(
# as dimension, e.g., "x", "y", "crs", ...
dataset = dataset.drop_vars(const_variables)

# https://github.com/bcdev/zappend/issues/56
# slice_dataset.to_zarr(store, mode="a", ...) will replace
# global attributes.
# Therefore, we must replace slice dataset attributes by
# existing target dataset attributes.
# However, users should be able to select the appropriate
# operation, e.g., a new config setting target_attrs_op with
# values "first" (default), "last", "update".
dataset.attrs = target_metadata.attrs

# Remove any encoding and attributes from slice,
# since both are prescribed by target
dataset.attrs.clear()
for variable in dataset.variables.values():
variable.encoding = {}
variable.attrs = {}
Expand Down

0 comments on commit c4f97d1

Please sign in to comment.