Skip to content

Commit

Permalink
Refactor object stores so we can remove _delete...
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed May 8, 2024
1 parent 5d057a4 commit f068601
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 37 deletions.
30 changes: 21 additions & 9 deletions lib/galaxy/objectstore/azure_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,25 +301,37 @@ def _delete(self, obj, entire_dir=False, **kwargs):
# but requires iterating through each individual blob in Azure and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
blobs = self.service.get_container_client(self.container_name).list_blobs(name_starts_with=rel_path)
for blob in blobs:
log.debug("Deleting from Azure: %s", blob)
self._blob_client(blob.name).delete_blob()
return True
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
log.debug("Deleting from Azure: %s", rel_path)
self._blob_client(rel_path).delete_blob()
return True
except AzureHttpError:
log.exception("Could not delete blob '%s' from Azure", rel_path)
return self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False

def _delete_remote_all(self, rel_path: str) -> bool:
try:
blobs = self.service.get_container_client(self.container_name).list_blobs(name_starts_with=rel_path)
for blob in blobs:
log.debug("Deleting from Azure: %s", blob)
self._blob_client(blob.name).delete_blob()
return True
except AzureHttpError:
log.exception("Could not delete blob '%s' from Azure", rel_path)
return False

def _delete_existing_remote(self, rel_path: str) -> bool:
try:
self._blob_client(rel_path).delete_blob()
return True
except AzureHttpError:
log.exception("Could not delete blob '%s' from Azure", rel_path)
return False

def _get_object_url(self, obj, **kwargs):
if self._exists(obj, **kwargs):
rel_path = self._construct_path(obj, **kwargs)
Expand Down
34 changes: 23 additions & 11 deletions lib/galaxy/objectstore/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,26 +398,38 @@ def _delete(self, obj, entire_dir=False, **kwargs):
# but requires iterating through each individual key in S3 and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
results = self.bucket.objects.list(prefix=rel_path)
for key in results:
log.debug("Deleting key %s", key.name)
key.delete()
return True
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
key = self.bucket.objects.get(rel_path)
log.debug("Deleting key %s", key.name)
key.delete()
return True
except Exception:
log.exception("Could not delete key '%s' from cloud", rel_path)
return self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False

def _delete_remote_all(self, rel_path: str) -> bool:
try:
results = self.bucket.objects.list(prefix=rel_path)
for key in results:
log.debug("Deleting key %s", key.name)
key.delete()
return True
except Exception:
log.exception("Could not delete key '%s' from cloud", rel_path)
return False

def _delete_existing_remote(self, rel_path: str) -> bool:
try:
key = self.bucket.objects.get(rel_path)
log.debug("Deleting key %s", key.name)
key.delete()
return True
except Exception:
log.exception("Could not delete key '%s' from cloud", rel_path)
return False

def _get_object_url(self, obj, **kwargs):
if self._exists(obj, **kwargs):
rel_path = self._construct_path(obj, **kwargs)
Expand Down
25 changes: 19 additions & 6 deletions lib/galaxy/objectstore/pithos.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,18 +252,31 @@ def _delete(self, obj, **kwargs):
extra_dir = kwargs.get("extra_dir", False)
if entire_dir and extra_dir:
shutil.rmtree(cache_path)
log.debug(f"On Pithos: delete -r {path}/")
self.pithos.del_object(path, delimiter="/")
return True
return self._delete_remote_all(path)
else:
os.unlink(cache_path)
self.pithos.del_object(path)
return self._delete_existing_remote(path)
except OSError:
log.exception(f"{self._get_filename(obj, **kwargs)} delete error")
except ClientError as ce:
log.exception(f"Could not delete {path} from Pithos, {ce}")
return False

def _delete_remote_all(self, path: str) -> bool:
try:
log.debug(f"On Pithos: delete -r {path}/")
self.pithos.del_object(path, delimiter="/")
return True
except ClientError:
log.exception(f"Could not delete path '{path}' from Pithos")
return False

def _delete_existing_remote(self, path: str) -> bool:
try:
self.pithos.del_object(path)
return True
except ClientError:
log.exception(f"Could not delete path '{path}' from Pithos")
return False

def _get_object_url(self, obj, **kwargs):
"""
:returns: URL for direct access, None if no object
Expand Down
34 changes: 23 additions & 11 deletions lib/galaxy/objectstore/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,27 +436,39 @@ def _delete(self, obj, entire_dir=False, **kwargs):
# but requires iterating through each individual key in S3 and deleing it.
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)
results = self._bucket.get_all_keys(prefix=rel_path)
for key in results:
log.debug("Deleting key %s", key.name)
key.delete()
return True
return self._delete_remote_all(rel_path)
else:
# Delete from cache first
unlink(self._get_cache_path(rel_path), ignore_errors=True)
# Delete from S3 as well
if self._exists_remotely(rel_path):
key = Key(self._bucket, rel_path)
log.debug("Deleting key %s", key.name)
key.delete()
return True
except S3ResponseError:
log.exception("Could not delete key '%s' from S3", rel_path)
self._delete_existing_remote(rel_path)
except OSError:
log.exception("%s delete error", self._get_filename(obj, **kwargs))
return False
# return cache_path # Until the upload tool does not explicitly create the dataset, return expected path

def _delete_remote_all(self, rel_path: str) -> bool:
try:
results = self._bucket.get_all_keys(prefix=rel_path)
for key in results:
log.debug("Deleting key %s", key.name)
key.delete()
return True
except S3ResponseError:
log.exception("Could not delete blob '%s' from S3", rel_path)
return False

def _delete_existing_remote(self, rel_path: str) -> bool:
try:
key = Key(self._bucket, rel_path)
log.debug("Deleting key %s", key.name)
key.delete()
return True
except S3ResponseError:
log.exception("Could not delete blob '%s' from S3", rel_path)
return False

def _download_directory_into_cache(self, rel_path, cache_path):
download_directory(self._bucket, rel_path, cache_path)

Expand Down

0 comments on commit f068601

Please sign in to comment.