Skip to content

Commit

Permalink
Fix #117 Fix #137 Support overwriting an existing file at the same lo…
Browse files Browse the repository at this point in the history
…cation
  • Loading branch information
oruebel committed Nov 8, 2024
1 parent 3a8e9c2 commit 0f01281
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 11 deletions.
39 changes: 31 additions & 8 deletions src/hdmf_zarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,17 @@ def can_read(path):
'default': None},
{'name': 'storage_options', 'type': dict,
'doc': 'Zarr storage options to read remote folders',
'default': None})
'default': None},
{'name': 'force_overwrite',
'type': bool,
'doc': "force overwriting existing object when in 'w' mode even"
" if the existing object is a file (e.g., and HDF5 file)",
'default': False}
)
def __init__(self, **kwargs):
self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__))
path, manager, mode, synchronizer, object_codec_class, storage_options = popargs(
'path', 'manager', 'mode', 'synchronizer', 'object_codec_class', 'storage_options', kwargs)
path, manager, mode, synchronizer, object_codec_class, storage_options, force_overwrite = popargs(
'path', 'manager', 'mode', 'synchronizer', 'object_codec_class', 'storage_options', 'force_overwrite', kwargs)
if manager is None:
manager = BuildManager(TypeMap(NamespaceCatalog()))
if isinstance(synchronizer, bool):
Expand All @@ -118,6 +124,7 @@ def __init__(self, **kwargs):
else:
self.__synchronizer = synchronizer
self.__mode = mode
self.__force_overwrite = force_overwrite
if isinstance(path, Path):
path = str(path)
self.__path = path
Expand Down Expand Up @@ -160,25 +167,41 @@ def synchronizer(self):
def object_codec_class(self):
return self.__codec_cls

@property
def mode(self):
"""
The mode specified by the user when creating the ZarrIO instance.
NOTE: The Zarr library may not honor the mode. E.g., DirectoryStore in Zarr uses
append mode and does not allow setting a file to read-only mode.
"""
return self.__mode

def open(self):
"""Open the Zarr file"""
if self.__file is None:
# Allow overwriting an exist file (e.g., and HDF5 file). Zarr will normally fail if the
# existing object at the path is a file. So if we are in `w` mode we need to delete the file first
if self.mode == 'w' and self.__force_overwrite:
if isinstance(self.path, (str, Path)) and os.path.exists(self.path) and os.path.isfile(self.path):
os.remove(self.path)

# Within zarr, open_consolidated only allows the mode to be 'r' or 'r+'.
# As a result, when in other modes, the file will not use consolidated metadata.
if self.__mode != 'r':
if self.mode != 'r':
# When we consolidate metadata, we use ConsolidatedMetadataStore.
# This interface does not allow for setting items.
# In the doc string, it says it is "read only". As a result, we cannot use r+ with consolidate_metadata.
# r- is only an internal mode in ZarrIO to force the use of regular open. For Zarr we need to
# use the regular mode r when r- is specified
mode_to_use = self.__mode if self.__mode != 'r-' else 'r'
mode_to_use = self.mode if self.mode != 'r-' else 'r'
self.__file = zarr.open(store=self.path,
mode=mode_to_use,
synchronizer=self.__synchronizer,
storage_options=self.__storage_options)
else:
self.__file = self.__open_file_consolidated(store=self.path,
mode=self.__mode,
mode=self.mode,
synchronizer=self.__synchronizer,
storage_options=self.__storage_options)

Expand Down Expand Up @@ -343,9 +366,9 @@ def export(self, **kwargs):
"""Export data read from a file from any backend to Zarr.
See :py:meth:`hdmf.backends.io.HDMFIO.export` for more details.
"""
if self.__mode != 'w':
if self.mode != 'w':
raise UnsupportedOperation("Cannot export to file %s in mode '%s'. Please use mode 'w'."
% (self.source, self.__mode))
% (self.source, self.mode))

src_io = getargs('src_io', kwargs)
write_args, cache_spec = popargs('write_args', 'cache_spec', kwargs)
Expand Down
9 changes: 6 additions & 3 deletions tests/unit/base_tests_zarrio.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ def setUp(self):

def tearDown(self):
if os.path.exists(self.store):
shutil.rmtree(self.store)
if os.path.isdir(self.store):
shutil.rmtree(self.store)
else: # in case a test created a file instead of a directory
os.remove(self.store)

def createReferenceBuilder(self):
data_1 = np.arange(100, 200, 10).reshape(2, 5)
Expand All @@ -117,9 +120,9 @@ def createReferenceBuilder(self):
'ref_dataset': dataset_ref})
return builder

def create_zarr(self, consolidate_metadata=True):
def create_zarr(self, consolidate_metadata=True, force_overwrite=False, mode='a'):
builder = self.createReferenceBuilder()
writer = ZarrIO(self.store, mode='a')
writer = ZarrIO(self.store, mode=mode, force_overwrite=force_overwrite)
writer.write_builder(builder, consolidate_metadata)
writer.close()

Expand Down
13 changes: 13 additions & 0 deletions tests/unit/test_zarrio.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,19 @@ def test_force_open_without_consolidated_fails(self):
except ValueError as e:
self.fail("ZarrIO.__open_file_consolidated raised an unexpected ValueError: {}".format(e))

class TestOverwriteExistingFile(ZarrStoreTestCase):
def test_force_overwrite_when_file_exists(self):
"""
Test that we can overwrite a file when opening with `w` mode even if there is
an existing file. Zarr can write into a directory but not a file.
"""
# create a dummy text file
with open(self.store, "w") as file:
file.write("Just a test file used in TestOverwriteExistingFile")
# try to create a Zarr file at the same location (i.e., self.store) as the
# test text file to force overwriting the exsiting file.

Check failure on line 211 in tests/unit/test_zarrio.py

View workflow job for this annotation

GitHub Actions / Check for spelling errors

exsiting ==> existing
self.create_zarr(force_overwrite=True, mode='w')


class TestDimensionLabels(BuildDatasetShapeMixin):
"""
Expand Down

0 comments on commit 0f01281

Please sign in to comment.