Skip to content

Commit

Permalink
Fix extra files handling for cached object stores when reset_cache ca…
Browse files Browse the repository at this point in the history
…lled.
  • Loading branch information
jmchilton committed May 10, 2024
1 parent 31d2c9d commit 18453e4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 13 deletions.
5 changes: 4 additions & 1 deletion lib/galaxy/objectstore/_caching_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,10 @@ def _get_filename(self, obj, **kwargs):
return cache_path

# Check if the file exists in the cache first, always pull if file size in cache is zero
if self._in_cache(rel_path) and (dir_only or os.path.getsize(self._get_cache_path(rel_path)) > 0):
# For dir_only - the cache cleaning may have left empty directories so I think we need to
# always resync the cache. Gotta make sure we're being judicious in out data.extra_files_path
# calls I think.
if not dir_only and self._in_cache(rel_path) and os.path.getsize(self._get_cache_path(rel_path)) > 0:
return cache_path

# Check if the file exists in persistent storage and, if it does, pull it into cache
Expand Down
29 changes: 17 additions & 12 deletions test/unit/objectstore/test_objectstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,22 +1346,29 @@ def verify_caching_object_store_functionality(tmp_path, object_store, check_get_
extra_files_dataset._extra_files_rel_path,
)

# kind of a huge problem, the object stores that pass the following
# tests, do not do so if we use Galaxy's cache cleaning which does
# not remove directories. The @mvdbeek integration test cache clearing
# here (remove and restore the whole caching directory) does work and
# definitely should work. We just need to also figure out how to make
# reset_cache work here.

# reset_cache(object_store.cache_target)
# The following checks used to exhibit different behavior depending
# on how the cache was cleaned - removing the whole directory vs
# just cleaning up files the way Galaxy's internal caching works with
# reset_cache. So we test both here.

# hard reset
shutil.rmtree(object_store.cache_target.path)
os.makedirs(object_store.cache_target.path)

extra_path = _extra_file_path(object_store, extra_files_dataset)
assert os.path.exists(extra_path)
expected_extra_file = os.path.join(extra_path, "new_value.txt")
assert os.path.exists(expected_extra_file)
assert open(expected_extra_file, "r").read() == "My new value"
assert open(expected_extra_file).read() == "My new value"

# Redo the above test with Galaxy's reset_cache which leaves empty directories
# around.
reset_cache(object_store.cache_target)
extra_path = _extra_file_path(object_store, extra_files_dataset)
assert os.path.exists(extra_path)
expected_extra_file = os.path.join(extra_path, "new_value.txt")
assert os.path.exists(expected_extra_file)
assert open(expected_extra_file).read() == "My new value"

# Test get_object_url returns a read-only URL
url = object_store.get_object_url(hello_world_dataset)
Expand All @@ -1375,9 +1382,7 @@ def _extra_file_path(object_store, dataset):
# invoke the magic calls the model layer would invoke here...
if object_store.exists(dataset, dir_only=True, extra_dir=dataset._extra_files_rel_path):
return object_store.get_filename(dataset, dir_only=True, extra_dir=dataset._extra_files_rel_path)
return object_store.construct_path(
dataset, dir_only=True, extra_dir=dataset._extra_files_rel_path, in_cache=True
)
return object_store.construct_path(dataset, dir_only=True, extra_dir=dataset._extra_files_rel_path, in_cache=True)


def verify_object_store_functionality(tmp_path, object_store, check_get_url=True):
Expand Down

0 comments on commit 18453e4

Please sign in to comment.