diff --git a/CHANGES.md b/CHANGES.md index eeaea62..e3c24bd 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ### Fixes +* Global metadata attributes of target dataset is no longer empty. [#56] + * If the target _parent_ directory did not exist, an exception was raised reporting that the lock file to be written does not exist. Changed this to report that the target parent directory does not exist. [#55] diff --git a/tests/helpers.py b/tests/helpers.py index 3d802f5..62059aa 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -108,6 +108,10 @@ def make_test_dataset( np.linspace(0, 1, shape[2], dtype="float64"), dims=dims[2] ), }, + attrs={ + "Conventions": "CF-1.8", + "title": f"Test {index + 1}-{index + shape[0]}", + }, ) if crs: diff --git a/tests/test_api.py b/tests/test_api.py index 0e6c2bf..993d6d7 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -26,14 +26,37 @@ def test_no_slices(self): zappend([], target_dir=target_dir) self.assertFalse(FileObj(target_dir).exists()) + def test_one_slices_memory(self): + target_dir = "memory://target.zarr" + slices = [make_test_dataset()] + zappend(slices, target_dir=target_dir) + ds = xr.open_zarr(target_dir) + self.assertEqual({"time": 3, "y": 50, "x": 100}, ds.sizes) + self.assertEqual({"chl", "tsm"}, set(ds.data_vars)) + self.assertEqual({"time", "y", "x"}, set(ds.coords)) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 1-3", + }, + ds.attrs, + ) + def test_some_slices_memory(self): target_dir = "memory://target.zarr" - slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()] + slices = [make_test_dataset(index=3 * i) for i in range(3)] zappend(slices, target_dir=target_dir) ds = xr.open_zarr(target_dir) self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes) self.assertEqual({"chl", "tsm"}, set(ds.data_vars)) self.assertEqual({"time", "y", "x"}, set(ds.coords)) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 1-3", + }, + ds.attrs, + ) def test_some_slices_local(self): target_dir = "target.zarr" @@ -42,14 +65,21 @@ def test_some_slices_local(self): "slice-2.zarr", "slice-3.zarr", ] - for uri in slices: - make_test_dataset(uri=uri) + for index, uri in enumerate(slices): + make_test_dataset(uri=uri, index=3 * index) try: zappend(slices, target_dir=target_dir) ds = xr.open_zarr(target_dir) self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes) self.assertEqual({"chl", "tsm"}, set(ds.data_vars)) self.assertEqual({"time", "y", "x"}, set(ds.coords)) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 1-3", + }, + ds.attrs, + ) finally: shutil.rmtree(target_dir, ignore_errors=True) for slice_dir in slices: @@ -72,45 +102,60 @@ def test_some_slices_local_output_to_non_existing_dir(self): def test_some_slices_with_class_slice_source(self): target_dir = "memory://target.zarr" - slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()] + slices = [make_test_dataset(index=3 * i) for i in range(3)] zappend(slices, target_dir=target_dir, slice_source=MySliceSource) ds = xr.open_zarr(target_dir) self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes) self.assertEqual({"chl"}, set(ds.data_vars)) self.assertEqual({"time", "y", "x"}, set(ds.coords)) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 1-3", + }, + ds.attrs, + ) def test_some_slices_with_func_slice_source(self): def process_slice(ctx, slice_ds: xr.Dataset) -> SliceSource: return MySliceSource(ctx, slice_ds) target_dir = "memory://target.zarr" - slices = [make_test_dataset(), make_test_dataset(), make_test_dataset()] + slices = [make_test_dataset(index=3 * i) for i in range(3)] zappend(slices, target_dir=target_dir, slice_source=process_slice) ds = xr.open_zarr(target_dir) self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes) self.assertEqual({"chl"}, set(ds.data_vars)) self.assertEqual({"time", "y", "x"}, set(ds.coords)) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 1-3", + }, + ds.attrs, + ) def test_some_slices_with_inc_append_step(self): target_dir = "memory://target.zarr" - slices = [ - make_test_dataset(index=0, shape=(1, 50, 100)), - make_test_dataset(index=1, shape=(1, 50, 100)), - make_test_dataset(index=2, shape=(1, 50, 100)), - ] + slices = [make_test_dataset(index=i, shape=(1, 50, 100)) for i in range(3)] zappend(slices, target_dir=target_dir, append_step="1D") ds = xr.open_zarr(target_dir) np.testing.assert_array_equal( ds.time.values, np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=np.datetime64), ) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 1-1", + }, + ds.attrs, + ) def test_some_slices_with_dec_append_step(self): target_dir = "memory://target.zarr" slices = [ - make_test_dataset(index=2, shape=(1, 50, 100)), - make_test_dataset(index=1, shape=(1, 50, 100)), - make_test_dataset(index=0, shape=(1, 50, 100)), + make_test_dataset(index=i, shape=(1, 50, 100)) for i in reversed(range(3)) ] zappend(slices, target_dir=target_dir, append_step="-1D") ds = xr.open_zarr(target_dir) @@ -118,6 +163,13 @@ def test_some_slices_with_dec_append_step(self): ds.time.values, np.array(["2024-01-03", "2024-01-02", "2024-01-01"], dtype=np.datetime64), ) + self.assertEqual( + { + "Conventions": "CF-1.8", + "title": "Test 3-3", + }, + ds.attrs, + ) # # See https://github.com/bcdev/zappend/issues/21 # diff --git a/zappend/tailoring.py b/zappend/tailoring.py index 68ca7dc..d4c5dad 100644 --- a/zappend/tailoring.py +++ b/zappend/tailoring.py @@ -40,9 +40,18 @@ def tailor_slice_dataset( # as dimension, e.g., "x", "y", "crs", ... dataset = dataset.drop_vars(const_variables) + # https://github.com/bcdev/zappend/issues/56 + # slice_dataset.to_zarr(store, mode="a", ...) will replace + # global attributes. + # Therefore, we must replace slice dataset attributes by + # existing target dataset attributes. + # However, users should be able to select the appropriate + # operation, e.g., a new config setting target_attrs_op with + # values "first" (default), "last", "update". + dataset.attrs = target_metadata.attrs + # Remove any encoding and attributes from slice, # since both are prescribed by target - dataset.attrs.clear() for variable in dataset.variables.values(): variable.encoding = {} variable.attrs = {}