Skip to content

Commit

Permalink
Remove duplication around _delete in object stores.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed May 8, 2024
1 parent f068601 commit c4c6001
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 115 deletions.
30 changes: 0 additions & 30 deletions lib/galaxy/objectstore/azure_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,36 +283,6 @@ def _push_to_os(self, rel_path, source_file=None, from_string=None):
# Public Methods #
##################

def _delete(self, obj, entire_dir=False, **kwargs):
rel_path = self._construct_path(obj, **kwargs)
extra_dir = kwargs.get("extra_dir", None)
base_dir = kwargs.get("base_dir", None)
dir_only = kwargs.get("dir_only", False)
obj_dir = kwargs.get("obj_dir", False)
try:
if base_dir and dir_only and obj_dir:
# Remove temporary data in JOB_WORK directory
shutil.rmtree(os.path.abspath(rel_path))
return True

# For the case of extra_files, because we don't have a reference to
# individual files/blobs we need to remove the entire directory structure
# with all the files in it. This is easy for the local file system,
# but requires iterating through each individual blob in Azure and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
log.debug("Deleting from Azure: %s", rel_path)
return self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False

def _delete_remote_all(self, rel_path: str) -> bool:
try:
blobs = self.service.get_container_client(self.container_name).list_blobs(name_starts_with=rel_path)
Expand Down
30 changes: 30 additions & 0 deletions lib/galaxy/objectstore/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ExecutionTimer,
nice_size,
string_as_bool,
unlink,
)
from galaxy.util.path import safe_relpath
from galaxy.util.sleeper import Sleeper
Expand Down Expand Up @@ -432,6 +433,35 @@ def _download_directory_into_cache(self, rel_path, cache_path):
# maybe implement this for those object stores.
pass

def _delete(self, obj, entire_dir=False, **kwargs):
rel_path = self._construct_path(obj, **kwargs)
extra_dir = kwargs.get("extra_dir", None)
base_dir = kwargs.get("base_dir", None)
dir_only = kwargs.get("dir_only", False)
obj_dir = kwargs.get("obj_dir", False)
try:
# Remove temporary data in JOB_WORK directory
if base_dir and dir_only and obj_dir:
shutil.rmtree(os.path.abspath(rel_path))
return True

# For the case of extra_files, because we don't have a reference to
# individual files/keys we need to remove the entire directory structure
# with all the files in it. This is easy for the local file system,
# but requires iterating through each individual key in S3 and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
return self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False

def _update_from_file(self, obj, file_name=None, create=False, **kwargs):
if create:
self._create(obj, **kwargs)
Expand Down
29 changes: 0 additions & 29 deletions lib/galaxy/objectstore/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,35 +380,6 @@ def _push_to_os(self, rel_path, source_file=None, from_string=None):
log.exception("Trouble pushing S3 key '%s' from file '%s'", rel_path, source_file)
return False

def _delete(self, obj, entire_dir=False, **kwargs):
rel_path = self._construct_path(obj, **kwargs)
extra_dir = kwargs.get("extra_dir", None)
base_dir = kwargs.get("base_dir", None)
dir_only = kwargs.get("dir_only", False)
obj_dir = kwargs.get("obj_dir", False)
try:
# Remove temparory data in JOB_WORK directory
if base_dir and dir_only and obj_dir:
shutil.rmtree(os.path.abspath(rel_path))
return True

# For the case of extra_files, because we don't have a reference to
# individual files/keys we need to remove the entire directory structure
# with all the files in it. This is easy for the local file system,
# but requires iterating through each individual key in S3 and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
return self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False

def _delete_remote_all(self, rel_path: str) -> bool:
try:
results = self.bucket.objects.list(prefix=rel_path)
Expand Down
26 changes: 0 additions & 26 deletions lib/galaxy/objectstore/pithos.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,32 +234,6 @@ def _get_remote_size(self, path):
return 0
return int(file["content-length"])

def _delete(self, obj, **kwargs):
"""Delete the object
:returns: weather the object was deleted
"""
path = self._construct_path(obj, **kwargs)
base_dir = kwargs.get("base_dir", None)
dir_only = kwargs.get("dir_only", False)
obj_dir = kwargs.get("obj_dir", False)
try:
if all((base_dir, dir_only, obj_dir)):
shutil.rmtree(os.path.abspath(path))
return True
cache_path = self._get_cache_path(path)

entire_dir = kwargs.get("entire_dir", False)
extra_dir = kwargs.get("extra_dir", False)
if entire_dir and extra_dir:
shutil.rmtree(cache_path)
return self._delete_remote_all(path)
else:
os.unlink(cache_path)
return self._delete_existing_remote(path)
except OSError:
log.exception(f"{self._get_filename(obj, **kwargs)} delete error")
return False

def _delete_remote_all(self, path: str) -> bool:
try:
log.debug(f"On Pithos: delete -r {path}/")
Expand Down
30 changes: 0 additions & 30 deletions lib/galaxy/objectstore/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,36 +418,6 @@ def _push_to_os(self, rel_path, source_file=None, from_string=None):
raise
return False

def _delete(self, obj, entire_dir=False, **kwargs):
rel_path = self._construct_path(obj, **kwargs)
extra_dir = kwargs.get("extra_dir", None)
base_dir = kwargs.get("base_dir", None)
dir_only = kwargs.get("dir_only", False)
obj_dir = kwargs.get("obj_dir", False)
try:
# Remove temparory data in JOB_WORK directory
if base_dir and dir_only and obj_dir:
shutil.rmtree(os.path.abspath(rel_path))
return True

# For the case of extra_files, because we don't have a reference to
# individual files/keys we need to remove the entire directory structure
# with all the files in it. This is easy for the local file system,
# but requires iterating through each individual key in S3 and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False
# return cache_path # Until the upload tool does not explicitly create the dataset, return expected path

def _delete_remote_all(self, rel_path: str) -> bool:
try:
results = self._bucket.get_all_keys(prefix=rel_path)
Expand Down

0 comments on commit c4c6001

Please sign in to comment.