diff --git a/CHANGELOG.md b/CHANGELOG.md index c9c6e89f..63f8e5ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # HDMF-ZARR Changelog +## 0.5.0 (Future) + +### New Features +* Added support for using ``SQLiteStore`` Zarr storage backend with ``ZarrIO`` + @oruebel [#66](https://github.com/hdmf-dev/hdmf-zarr/pull/66) + +### Internal changes +* Added ``ZarrIO.__opened_stores_references`` to track Zarr stores opened to resolve + references in order to be able to close those stores as part of ``ZarrIO.close()`` + @oruebel [#66](https://github.com/hdmf-dev/hdmf-zarr/pull/66) + + ## 0.4.0 (Upcoming) ### Enhancements @@ -24,6 +36,10 @@ ``NestedDirectoryStore`` Zarr storage backends with ``ZarrIO`` and ``NWBZarrIO``. @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) +### API Changes +* Removed unused ``filepath`` argument from ``ZarrIO.get_builder_exists_on_disk`` + [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) + ### Minor enhancements * Updated handling of references on read to simplify future integration of file-based Zarr stores (e.g., ZipStore or database stores). @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) @@ -37,14 +53,10 @@ * Updated tests to handle upcoming changes to ``HDMFIO``. @rly [#102](https://github.com/hdmf-dev/hdmf-zarr/pull/102) - ### Docs * Added developer documentation on how to integrate new storage backends with ZarrIO. @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) -### API Changes -* Removed unused ``filepath`` argument from ``ZarrIO.get_builder_exists_on_disk`` [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) - ### Bug fixes * Fixed error in nightly CI. @rly [#93](https://github.com/hdmf-dev/hdmf-zarr/pull/93) diff --git a/docs/gallery/resources/README.rst b/docs/gallery/resources/README similarity index 100% rename from docs/gallery/resources/README.rst rename to docs/gallery/resources/README diff --git a/docs/source/integrating_data_stores.rst b/docs/source/integrating_data_stores.rst index d7573e73..08c0c145 100644 --- a/docs/source/integrating_data_stores.rst +++ b/docs/source/integrating_data_stores.rst @@ -40,6 +40,11 @@ Updating ZarrIO particular in case the links to your store also modify the storage schema for links (e.g., if you need to store additional metadata in order to resolve links to your store). + * Note, if your store has to be closed explicitly (e.g., a SQLiteStore), then any stores + that are opened to resolve references should be added to ``ZarrIO.__opened_stores_references`` + list so that the stores are closed when :py:meth:`~hdmf_zarr.backend.ZarrIO.close` is + called. + Updating NWBZarrIO ================== diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index dd8b93ad..e314c57e 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -5,6 +5,7 @@ import numpy as np import tempfile import logging +from sqlite3 import ProgrammingError as SQLiteProgrammingError # Zarr imports import zarr @@ -12,7 +13,9 @@ from zarr.core import Array from zarr.storage import (DirectoryStore, TempStore, - NestedDirectoryStore) + NestedDirectoryStore, + SQLiteStore) + import numcodecs # HDMF-ZARR imports @@ -66,7 +69,8 @@ SUPPORTED_ZARR_STORES = (DirectoryStore, TempStore, - NestedDirectoryStore) + NestedDirectoryStore, + SQLiteStore) """ Tuple listing all Zarr storage backends supported by ZarrIO """ @@ -113,6 +117,7 @@ def __init__(self, **kwargs): self.__path = path self.__file = None self.__built = dict() + self.__opened_stores_references = [] # Zarr stores that were opened to resolve links that need to be closed self._written_builders = WriteStatusTracker() # track which builders were written (or read) by this IO object self.__dci_queue = None # Will be initialized on call to io.write # Codec class to be used. Alternates, e.g., =numcodecs.JSON @@ -152,12 +157,29 @@ def object_codec_class(self): def open(self): """Open the Zarr file""" if self.__file is None: - self.__file = zarr.open(store=self.path, - mode=self.__mode, - synchronizer=self.__synchronizer) + # Open Zarr file + if isinstance(self.path, SQLiteStore): + overwrite = 'w' in self.__mode + self.__file = zarr.group(store=self.path, + overwrite=overwrite) + else: + self.__file = zarr.open(store=self.path, + mode=self.__mode, + synchronizer=self.__synchronizer) def close(self): """Close the Zarr file""" + if isinstance(self.path, SQLiteStore): + try: + self.path.close() + except SQLiteProgrammingError: # raised if close has been called previously + pass + for store in self.__opened_stores_references: + try: + store.close() + except Exception: # May be raised if close has been called previously + pass + self.__file = None return @@ -170,9 +192,14 @@ def close(self): 'doc': 'the path to the Zarr file or a supported Zarr store'}, {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None}) def load_namespaces(cls, namespace_catalog, path, namespaces=None): - ''' + """ Load cached namespaces from a file. - ''' + + .. note:: + + The function does NOT close the path. E.g., when using a + SQLiteStore remember to close the store. + """ f = zarr.open(path, 'r') if SPEC_LOC_ATTR not in f.attrs: msg = "No cached namespaces found in %s" % path @@ -349,6 +376,12 @@ def get_builder_exists_on_disk(self, **kwargs): builder = getargs('builder', kwargs) builder_path = self.get_builder_disk_path(builder=builder, filepath=None) exists_on_disk = os.path.exists(builder_path) + if isinstance(self.path, SQLiteStore): + try: + self.file[self.__get_path(builder)] + exists_on_disk = True + except Exception: + exists_on_disk = False return exists_on_disk @docval({'name': 'builder', 'type': Builder, 'doc': 'The builder of interest'}, @@ -557,6 +590,14 @@ def get_zarr_paths(zarr_object): :type zarr_object: Zarr Group or Array :return: Tuple of two string with: 1) path of the Zarr file and 2) full path within the zarr file to the object """ + filepath = zarr_object.store.path.replace("\\", "/") + objectpath = ("/" + zarr_object.path).replace("\\", "/") + return filepath, objectpath + # NOTE: Leaving this code here for now as there (at least used to be) as reason why we could not + # use the paths from Zarr directly. However, the code below would need to be fixed, as for + # file-based stores (e.g., SQLiteStore) we can't check for objects with os.path.exists since + # there are not directories for groups (they are in the file). + """ # In Zarr the path is a combination of the path of the store and the path of the object. So we first need to # merge those two paths, then remove the path of the file, add the missing leading "/" and then compute the # directory name to get the path of the parent @@ -572,6 +613,7 @@ def get_zarr_paths(zarr_object): objectpath = "/" + os.path.relpath(fullpath, filepath) # return the result return filepath, objectpath + """ @staticmethod def get_zarr_parent_path(zarr_object): @@ -636,10 +678,21 @@ def resolve_ref(self, zarr_ref): target_name = os.path.basename(object_path) else: target_name = ROOT_NAME - target_zarr_obj = zarr.open(source_file, mode='r') + # Open the source_file containing the link. We here need to determine the correct zarr.storage store to use + try: + target_zarr_file = zarr.open(source_file, mode='r') + except zarr.errors.FSPathExistNotDir: + try: + fstore = SQLiteStore(source_file) + target_zarr_file = zarr.open(fstore, mode='r') + self.__opened_stores_references.append(fstore) + except Exception: + raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file)) + # Get the linked object from the file + target_zarr_obj = target_zarr_file if object_path is not None: try: - target_zarr_obj = target_zarr_obj[object_path] + target_zarr_obj = target_zarr_file[object_path] except Exception: raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file)) # Return the create path diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index b0853b06..9fa7e496 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -2,16 +2,21 @@ Module defining the base unit test cases for ZarrIO. The actual tests are then instantiated with various different backends in the -test_zarrio.py module.""" +test_zarrio.py module. +""" + import unittest import os import numpy as np import shutil import warnings +from sqlite3 import ProgrammingError as SQLiteProgrammingError # Try to import Zarr and disable tests if Zarr is not available import zarr -from hdmf_zarr.backend import ZarrIO +from zarr.storage import SQLiteStore +from hdmf_zarr.backend import (ZarrIO, + SUPPORTED_ZARR_STORES) from hdmf_zarr.utils import ZarrDataIO # Try to import numcodecs and disable compression tests if it is not available @@ -42,6 +47,19 @@ from abc import ABCMeta, abstractmethod +def reopen_store(in_store): + """ + Helper function to reopen a store or create a new instance, needed by + some test cases for reading data after it has been written and closed. + + :param in_store: zarr.storage store that needs to be reopened + """ + re_store = in_store # most stores we don't need to reopen + if isinstance(in_store, SQLiteStore): + re_store = SQLiteStore(in_store.path) + return re_store + + def total_size(source): """Helper function used to compute the size of a directory or file""" dsize = os.path.getsize(source) @@ -75,6 +93,15 @@ def tearDown(self): """ Remove all files and folders defined by self.store_path """ + # close the stores. Needed on Windows to avoid access conflict when deleting files + stores = self.store if isinstance(self.store, list) else [self.store] + for sto in stores: + if sto is not None and isinstance(sto, SUPPORTED_ZARR_STORES): + try: + sto.close() + except SQLiteProgrammingError: + pass + # clean up files created as part of the tests paths = self.store_path if isinstance(self.store_path, list) else [self.store_path, ] for path in paths: if os.path.exists(path): @@ -84,6 +111,8 @@ def tearDown(self): os.remove(path) else: warnings.warn("Could not remove: %s" % path) + # except PermissionError: # This can happen on Windows + # warnings.warn("Could not remove: %s" % path) class BaseTestZarrWriter(BaseZarrWriterTestCase): @@ -110,6 +139,16 @@ def setUp(self): self.manager = get_foo_buildmanager() self.store = "test_io.zarr" self.store_path = self.store + self.io = None # may need to keep an ZarrIO object open, e.g., in read() + + def tearDown(self): + if self.io is not None: + try: + self.io.close() + except Exception: + pass + del self.io + super().tearDown() def createGroupBuilder(self): self.foo_builder = GroupBuilder('foo1', @@ -174,36 +213,39 @@ def test_cannot_read(self): assert not ZarrIO.can_read("incorrect_path") def read_test_dataset(self): - reader = ZarrIO(self.store, manager=self.manager, mode='r') + reader = ZarrIO(reopen_store(self.store), manager=self.manager, mode='r') self.root = reader.read_builder() dataset = self.root['test_bucket/foo_holder/foo1/my_data'] - return dataset + data = dataset['data'][:] + del reader # delete and close the store + return data def read(self): - reader = ZarrIO(self.store, manager=self.manager, mode='r') - self.root = reader.read_builder() + self.io = ZarrIO(reopen_store(self.store), manager=self.manager, mode='r') + self.root = self.io.read_builder() def test_cache_spec(self): - tempIO = ZarrIO(self.store, manager=self.manager, mode='w') + with ZarrIO(reopen_store(self.store), manager=self.manager, mode='w') as tempIO: + # Setup all the data we need + foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) + foo2 = Foo('foo2', [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) + foobucket = FooBucket('test_bucket', [foo1, foo2]) + foofile = FooFile(buckets=[foobucket]) - # Setup all the data we need - foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) - foo2 = Foo('foo2', [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) - foobucket = FooBucket('test_bucket', [foo1, foo2]) - foofile = FooFile(buckets=[foobucket]) - - # Write the first file - tempIO.write(foofile, cache_spec=True) - tempIO.close() + # Write the first file + tempIO.write(foofile, cache_spec=True) # Load the spec and assert that it is valid ns_catalog = NamespaceCatalog() - ZarrIO.load_namespaces(ns_catalog, self.store) + temp_store = reopen_store(self.store) + ZarrIO.load_namespaces(namespace_catalog=ns_catalog, path=temp_store) self.assertEqual(ns_catalog.namespaces, ('test_core',)) source_types = CacheSpecTestHelper.get_types(self.manager.namespace_catalog) read_types = CacheSpecTestHelper.get_types(ns_catalog) self.assertSetEqual(source_types, read_types) + if isinstance(temp_store, SUPPORTED_ZARR_STORES): + temp_store.close() def test_write_int(self, test_data=None): data = np.arange(100, 200, 10).reshape(2, 5) if test_data is None else test_data @@ -265,17 +307,16 @@ def test_write_link_array(self): data = np.arange(100, 200, 10).reshape(2, 5) self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) self.createGroupBuilder() - writer = ZarrIO(self.store, manager=self.manager, mode='a') - writer.write_builder(self.builder) - zarr_file = zarr.open(self.store, mode='r') - zarr_array = zarr_file["/test_bucket/foo_holder/foo1/my_data"] - link_io = ZarrDataIO(data=zarr_array, link_data=True) - link_dataset = DatasetBuilder('dataset_link', link_io) - self.builder['test_bucket'].set_dataset(link_dataset) - writer.write_builder(self.builder) - writer.close() - - reader = ZarrIO(self.store, manager=self.manager, mode='r') + with ZarrIO(self.store, manager=self.manager, mode='a') as writer: + writer.write_builder(self.builder) + zarr_file = writer.file + zarr_array = zarr_file["/test_bucket/foo_holder/foo1/my_data"] + link_io = ZarrDataIO(data=zarr_array, link_data=True) + link_dataset = DatasetBuilder('dataset_link', link_io) + self.builder['test_bucket'].set_dataset(link_dataset) + writer.write_builder(self.builder) + + reader = ZarrIO(reopen_store(self.store), manager=self.manager, mode='r') self.root = reader.read_builder() read_link = self.root['test_bucket/dataset_link'] read_link_data = read_link['builder']['data'][:] @@ -299,31 +340,31 @@ def test_write_reference_compound(self): def test_read_int(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_int(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] - self.assertTrue(np.all(test_data == dataset)) + data = self.read_test_dataset() + self.assertTrue(np.all(test_data == data)) def test_read_chunk(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_chunk(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] - self.assertTrue(np.all(test_data == dataset)) + data = self.read_test_dataset() + self.assertTrue(np.all(test_data == data)) def test_read_strings(self): test_data = [['a1', 'aa2', 'aaa3', 'aaaa4', 'aaaaa5'], ['b1', 'bb2', 'bbb3', 'bbbb4', 'bbbbb5']] self.test_write_strings(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] - self.assertTrue(np.all(np.asarray(test_data) == dataset)) + data = self.read_test_dataset() + self.assertTrue(np.all(np.asarray(test_data) == data)) def test_read_compound(self): test_data = [(1, 'Allen1'), (2, 'Bob1'), (3, 'Mike1')] self.test_write_compound(test_data=test_data) - dataset = self.read_test_dataset()['data'] - self.assertTupleEqual(test_data[0], tuple(dataset[0])) - self.assertTupleEqual(test_data[1], tuple(dataset[1])) - self.assertTupleEqual(test_data[2], tuple(dataset[2])) + data = self.read_test_dataset() + self.assertTupleEqual(test_data[0], tuple(data[0])) + self.assertTupleEqual(test_data[1], tuple(data[1])) + self.assertTupleEqual(test_data[2], tuple(data[2])) def test_read_link(self): test_data = np.arange(100, 200, 10).reshape(5, 2) @@ -418,7 +459,7 @@ def test_set_object_codec(self): tempIO = ZarrIO(self.store, mode='w') self.assertEqual(tempIO.object_codec_class.__qualname__, 'Pickle') del tempIO # also calls tempIO.close() - tempIO = ZarrIO(self.store, mode='w', object_codec_class=JSON) + tempIO = ZarrIO(reopen_store(self.store), mode='w', object_codec_class=JSON) self.assertEqual(tempIO.object_codec_class.__qualname__, 'JSON') tempIO.close() @@ -427,7 +468,7 @@ def test_synchronizer_constructor_arg_bool(self): tempIO = ZarrIO(self.store, mode='w', synchronizer=False) self.assertIsNone(tempIO.synchronizer) del tempIO # also calls tempIO.close() - tempIO = ZarrIO(self.store, mode='w', synchronizer=True) + tempIO = ZarrIO(reopen_store(self.store), mode='w', synchronizer=True) self.assertTrue(isinstance(tempIO.synchronizer, zarr.ProcessSynchronizer)) tempIO.close() @@ -466,7 +507,7 @@ def test_get_builder_exists_on_disk(self): dset_builder = DatasetBuilder('test_dataset', 10, attributes={}) tempIO = ZarrIO(self.store, mode='w') self.assertFalse(tempIO.get_builder_exists_on_disk(builder=dset_builder)) # Make sure is False is before write - tempIO .write_dataset(tempIO.file, dset_builder) + tempIO.write_dataset(tempIO.file, dset_builder) self.assertTrue(tempIO.get_builder_exists_on_disk(builder=dset_builder)) # Make sure is True after write tempIO.close() @@ -501,7 +542,7 @@ def __write_attribute_test_helper(self, name, value, assert_value=True): :returns: the value read from disk so we can do our own tests if needed """ # write the attribute - tempIO = ZarrIO(self.store, mode='w') + tempIO = ZarrIO(reopen_store(self.store), mode='w') tempIO.open() testgroup = tempIO.file # For testing we just use our file and create some attributes attr = {name: value} @@ -516,8 +557,7 @@ def __write_attribute_test_helper(self, name, value, assert_value=True): self.assertListEqual(list(read_val), value.tolist()) else: self.assertEqual(testgroup.attrs[name], value) - tempIO.close() - return read_val + return read_val, tempIO def test_write_attributes_write_scalar_int(self): self.__write_attribute_test_helper('intattr', np.int32(5)) @@ -561,9 +601,11 @@ def test_write_attributes_write_list_of_bytes(self): Test writing of lists of bytes. Bytes are not JSON serializable and therefore cover a differnt code path. Note, bytes are here encoded as strings to the return value does not match exactly but the data type changes. """ - val = self.__write_attribute_test_helper('attr', [b'a', b'b', b'c', b'd'], assert_value=False) + val, zio = self.__write_attribute_test_helper('attr', [b'a', b'b', b'c', b'd'], assert_value=False) self.assertTupleEqual(val, tuple(['a', 'b', 'c', 'd'])) - val = self.__write_attribute_test_helper('attr', [b'e', b'f', b'g'], assert_value=False) + del zio + del val + val, zio = self.__write_attribute_test_helper('attr', [b'e', b'f', b'g'], assert_value=False) self.assertTupleEqual(val, tuple(['e', 'f', 'g'])) def test_write_attributes_write_1Darray_of_floats(self): @@ -811,7 +853,7 @@ def test_link_zarr_dataset_input(self): tempIO.write_dataset(tempIO.file, builder=dset) softlink = DatasetBuilder('test_softlink', tempIO.file['test_dataset'], attributes={}) tempIO.write_dataset(tempIO.file, builder=softlink) - tempf = zarr.open(store=self.store, mode='r') + tempf = tempIO.file expected_link = {'name': 'test_softlink', 'path': '/test_dataset', 'source': os.path.abspath(self.store_path)} @@ -841,7 +883,7 @@ def test_link_dataset_zarrdataio_input(self): ZarrDataIO(data=tempIO.file['test_dataset'], link_data=True), attributes={}) ) - tempf = zarr.open(self.store, mode='r') + tempf = tempIO.file expected_link = {'name': 'test_softlink', 'path': '/test_dataset', 'source': os.path.abspath(self.store_path)} @@ -910,14 +952,14 @@ def test_basic(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io) self.assertTrue(os.path.exists(self.store_path[1])) self.assertEqual(foofile.container_source, self.store_path[0]) - with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[1]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() self.assertEqual(read_foofile.container_source, self.store_path[1]) self.assertContainerEqual(foofile, read_foofile, ignore_hdmf_attrs=True) @@ -931,7 +973,7 @@ def test_basic_container(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_foofile) @@ -939,7 +981,7 @@ def test_basic_container(self): self.assertTrue(os.path.exists(self.store_path[1])) self.assertEqual(foofile.container_source, self.store_path[0]) - with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[1]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() self.assertEqual(read_foofile.container_source, self.store_path[1]) self.assertContainerEqual(foofile, read_foofile, ignore_hdmf_attrs=True) @@ -953,7 +995,7 @@ def test_container_part(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() with ZarrIO(self.store[1], mode='w') as export_io: msg = ("The provided container must be the root of the hierarchy of the source used to read the " @@ -970,7 +1012,7 @@ def test_container_unknown(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: with ZarrIO(self.store[1], mode='w') as export_io: dummy_file = FooFile(buckets=[]) msg = "The provided container must have been read by the provided src_io." @@ -986,7 +1028,7 @@ def test_cache_spec_disabled(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile, cache_spec=False) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() with ZarrIO(self.store[1], mode='w') as export_io: @@ -1005,7 +1047,7 @@ def test_cache_spec_enabled(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() with ZarrIO(self.store[1], mode='w') as export_io: @@ -1014,8 +1056,8 @@ def test_cache_spec_enabled(self): container=read_foofile, cache_spec=True) - with zarr.open(self.store[1], mode='r') as zarr_io: - self.assertTrue('specifications' in zarr_io.keys()) + with ZarrIO(reopen_store(self.store[1]), mode='r') as zarr_io: + self.assertTrue('specifications' in zarr_io.file.keys()) def test_soft_link_group(self): """ @@ -1027,19 +1069,26 @@ def test_soft_link_group(self): foofile = FooFile(buckets=[foobucket], foo_link=foo1) with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io, write_args=dict(link_data=False)) - with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[1]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the linked group is within the same file self.assertEqual(read_foofile2.foo_link.container_source, self.store_path[1]) - zarr_linkspec1 = zarr.open(self.store_path[0])['links'].attrs.asdict()['zarr_link'][0] - zarr_linkspec2 = zarr.open(self.store_path[1])['links'].attrs.asdict()['zarr_link'][0] + # normally we only need the Zarr file object here, but by using ZarrIO + # we can reuse the logic defined in ZarrIO.open to ensure the file gets + # opened correctly for the different backends, whereas, zarr.open only + # supports DirectoryStores and ZipStore (i.e., it fails, e.g,. for SQLiteStore) + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as temp_io: + zarr_linkspec1 = temp_io.file['links'].attrs.asdict()['zarr_link'][0] + zarr_linkspec2 = read_io.file['links'].attrs.asdict()['zarr_link'][0] + # Compare the links specifications self.assertEqual(zarr_linkspec1.pop('source'), ".") self.assertEqual(zarr_linkspec2.pop('source'), ".") self.assertDictEqual(zarr_linkspec1, zarr_linkspec2) + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_soft_link_dataset(self): """Test that exporting a written file with soft linked datasets keeps links within the file.""" """Link to a dataset in the same file should have a link to the same new dataset in the new file """ @@ -1063,6 +1112,7 @@ def test_soft_link_dataset(self): self.assertEqual(read_foofile2.foofile_data.path, self.source_paths[1]) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_external_link_group(self): """Test that exporting a written file with external linked groups maintains the links.""" """External links remain""" @@ -1101,6 +1151,7 @@ def test_external_link_group(self): self.assertEqual(read_foofile2.foo_link.container_source, self.source_paths[0]) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_external_link_dataset(self): """Test that exporting a written file with external linked datasets maintains the links.""" pass # TODO this test currently fails @@ -1126,6 +1177,7 @@ def test_external_link_dataset(self): self.assertEqual(read_foofile2.foofile_data.file.filename, self.source_paths[0]) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_external_link_link(self): """Test that exporting a written file with external links to external links maintains the links.""" pass # TODO this test currently fails @@ -1158,6 +1210,7 @@ def test_external_link_link(self): self.assertEqual(read_foofile3.foo_link.container_source, self.source_paths[0]) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_attr_reference(self): """Test that exporting a written file with attribute references maintains the references.""" """Attribute with object reference needs to point to the new object in the new file""" @@ -1192,14 +1245,14 @@ def test_pop_data(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() read_foofile.remove_bucket('bucket1') # remove child group with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_foofile) - with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[1]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the read foofile has no buckets @@ -1219,7 +1272,7 @@ def test_pop_linked_group(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() read_foofile.buckets['bucket1'].remove_foo('foo1') # remove child group @@ -1229,6 +1282,7 @@ def test_pop_linked_group(self): with self.assertRaisesWith(OrphanContainerBuildError, msg): export_io.export(src_io=read_io, container=read_foofile) + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_append_data(self): """Test that exporting a written container after adding groups, links, and references to it works.""" # TODO: This test currently fails because I do not understand how the link to my_data is expected to be @@ -1274,6 +1328,7 @@ def test_append_data(self): # self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_append_external_link_data(self): """Test that exporting a written container after adding a link with link_data=True creates external links.""" pass # TODO: This test currently fails @@ -1316,6 +1371,7 @@ def test_append_external_link_data(self): # self.assertIsInstance(f.get('foofile_data', getlink=True), h5py.ExternalLink) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_append_external_link_copy_data(self): """Test that exporting a written container after adding a link with link_data=False copies the data.""" pass # TODO: This test currently fails @@ -1357,6 +1413,7 @@ def test_append_external_link_copy_data(self): # self.assertEqual(f['foofile_data'].file.filename, self.source_paths[2]) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_export_dset_refs(self): """Test that exporting a written container with a dataset of references works.""" pass # TODO: This test currently fails @@ -1388,6 +1445,7 @@ def test_export_dset_refs(self): self.assertIs(read_bucket2.baz_data.data[i], read_bucket2.bazs[baz_name]) """ + @unittest.skip("Test case ported from HDMF but not finalized yet") def test_export_cpd_dset_refs(self): """Test that exporting a written container with a compound dataset with references works.""" pass # TODO: This test currently fails @@ -1501,7 +1559,7 @@ def test_wrong_mode(self): with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.store[0], mode='r') as read_io: + with ZarrIO(reopen_store(self.store[0]), mode='r') as read_io: with ZarrIO(self.store[1], mode='a') as export_io: msg = "Cannot export to file %s in mode 'a'. Please use mode 'w'." % self.store_path[1] with self.assertRaisesWith(UnsupportedOperation, msg): diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index fe75ab76..37a1e743 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -34,6 +34,7 @@ """ import os import shutil +from sqlite3 import ProgrammingError as SQLiteProgrammingError import numpy as np from abc import ABCMeta, abstractmethod @@ -55,7 +56,10 @@ from zarr.storage import (DirectoryStore, TempStore, - NestedDirectoryStore) + NestedDirectoryStore, + SQLiteStore) + +from tests.unit.base_tests_zarrio import reopen_store class MixinTestCaseConvert(metaclass=ABCMeta): @@ -124,15 +128,24 @@ def tearDown(self): self.close_files_and_ios() def close_files_and_ios(self): - for io in self.ios: + paths = (self.filenames + + [p if isinstance(p, str) else p.path + for p in (self.EXPORT_PATHS + self.WRITE_PATHS) + if p is not None]) + for io in (self.ios + self.EXPORT_PATHS + self.WRITE_PATHS): if io is not None: - io.close() - for fn in self.filenames: + try: + io.close() + except SQLiteProgrammingError: + pass + for fn in paths: if fn is not None and os.path.exists(fn): if os.path.isdir(fn): shutil.rmtree(fn) else: os.remove(fn) + # except PermissionError: # This can happen on Windows + # warnings.warn("Could not remove: %s" % fn) self.filenames = [] self.ios = [] @@ -199,7 +212,11 @@ def test_export_roundtrip(self): ignore_hdmf_attrs=self.IGNORE_HDMF_ATTRS, ignore_string_to_byte=self.IGNORE_STRING_TO_BYTE, message=message) - self.close_files_and_ios() + + # TODO: May need to add further asserts here + # we are essentially running a new test each iteration so tearDown and setUp after each + self.tearDown() # calls self.close_files_and_ios() + self.setUp() ########################################################## @@ -216,7 +233,8 @@ class MixinTestHDF5ToZarr(): EXPORT_PATHS = [None, DirectoryStore('test_export_DirectoryStore.zarr'), TempStore(), - NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] + NestedDirectoryStore('test_export_NestedDirectoryStore.zarr'), + SQLiteStore('test_export_SQLiteStore.zarr.sqlite')] TARGET_FORMAT = "ZARR" def get_manager(self): @@ -227,10 +245,10 @@ def roundtripExportContainer(self, container, write_path, export_path): write_io.write(container, cache_spec=True) with HDF5IO(write_path, manager=self.get_manager(), mode='r') as read_io: - with ZarrIO(export_path, mode='w') as export_io: + with ZarrIO(reopen_store(export_path), mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) - read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r') + read_io = ZarrIO(reopen_store(export_path), manager=self.get_manager(), mode='r') self.ios.append(read_io) exportContainer = read_io.read() return exportContainer @@ -246,7 +264,8 @@ class MixinTestZarrToHDF5(): WRITE_PATHS = [None, DirectoryStore('test_export_DirectoryStore.zarr'), TempStore(), - NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] + NestedDirectoryStore('test_export_NestedDirectoryStore.zarr'), + SQLiteStore('test_export_SQLiteStore.zarr.sqlite')] EXPORT_PATHS = [None, ] TARGET_FORMAT = "H5" @@ -254,10 +273,10 @@ def get_manager(self): return get_hdmfcommon_manager() def roundtripExportContainer(self, container, write_path, export_path): - with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: - write_io.write(container) + with ZarrIO(reopen_store(write_path), manager=self.get_manager(), mode='w') as write_io: + write_io.write(container, cache_spec=True) - with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: + with ZarrIO(reopen_store(write_path), manager=self.get_manager(), mode='r') as read_io: with HDF5IO(export_path, mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) @@ -277,25 +296,27 @@ class MixinTestZarrToZarr(): WRITE_PATHS = [None, DirectoryStore('test_export_DirectoryStore_Source.zarr'), TempStore(dir=os.path.dirname(__file__)), # set dir to avoid switching drives on Windows - NestedDirectoryStore('test_export_NestedDirectoryStore_Source.zarr')] + NestedDirectoryStore('test_export_NestedDirectoryStore_Source.zarr'), + SQLiteStore('test_export_SQLiteStore_Source.zarr.sqlite')] EXPORT_PATHS = [None, DirectoryStore('test_export_DirectoryStore_Export.zarr'), TempStore(dir=os.path.dirname(__file__)), # set dir to avoid switching drives on Windows - NestedDirectoryStore('test_export_NestedDirectoryStore_Export.zarr')] + NestedDirectoryStore('test_export_NestedDirectoryStore_Export.zarr'), + SQLiteStore('test_export_SQLiteStore_Export.zarr.sqlite')] TARGET_FORMAT = "ZARR" def get_manager(self): return get_hdmfcommon_manager() def roundtripExportContainer(self, container, write_path, export_path): - with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: + with ZarrIO(reopen_store(write_path), manager=self.get_manager(), mode='w') as write_io: write_io.write(container, cache_spec=True) - with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: - with ZarrIO(export_path, mode='w') as export_io: + with ZarrIO(reopen_store(write_path), manager=self.get_manager(), mode='r') as read_io: + with ZarrIO(reopen_store(export_path), mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) - read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r') + read_io = ZarrIO(reopen_store(export_path), manager=self.get_manager(), mode='r') self.ios.append(read_io) exportContainer = read_io.read() return exportContainer @@ -447,9 +468,6 @@ class TestHDF5ToZarrDynamicTableC0(MixinTestDynamicTableContainer, IGNORE_STRING_TO_BYTE = False TABLE_TYPE = 0 - def test_simple(self, write_path=None, export_path=None): - print(write_path, export_path) - class TestZarrToHDF5DynamicTableC0(MixinTestDynamicTableContainer, MixinTestZarrToHDF5, diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py index e1526282..9211a440 100644 --- a/tests/unit/test_zarrio.py +++ b/tests/unit/test_zarrio.py @@ -14,7 +14,8 @@ BaseTestExportZarrToZarr) from zarr.storage import (DirectoryStore, TempStore, - NestedDirectoryStore) + NestedDirectoryStore, + SQLiteStore) ###################################################### @@ -122,3 +123,29 @@ class TestExportZarrToZarrNestedDirectoryStore(BaseTestExportZarrToZarr): def setUp(self): super().setUp() self.store = [NestedDirectoryStore(p) for p in self.store_path] + + +######################################### +# SQLiteStore tests +######################################### +class TestZarrWriterSQLiteStore(BaseTestZarrWriter): + """Test writing of builder with Zarr using a custom SQLiteStore """ + def setUp(self): + super().setUp() + self.store_path += ".sqldb" + self.store = SQLiteStore(self.store_path) + + +class TestZarrWriteUnitSQLiteStore(BaseTestZarrWriteUnit): + """Unit test for individual write functions using a custom DirectoryStore""" + def setUp(self): + super().setUp() + self.store_path += ".sqldb" + self.store = SQLiteStore(self.store_path) + + +class TestExportZarrToZarrSQLiteStore(BaseTestExportZarrToZarr): + """Test exporting Zarr to Zarr using SQLiteStore""" + def setUp(self): + super().setUp() + self.store = [SQLiteStore(p) for p in self.store_path]