From c8ee71a53134914eac898e6ebdb79e368493a9fe Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 10 Jan 2023 17:09:23 +0100 Subject: [PATCH 01/42] Add storage cleaner manager for histories --- lib/galaxy/managers/base.py | 26 +++++++++++ lib/galaxy/managers/histories.py | 66 +++++++++++++++++++++++++++- lib/galaxy/schema/storage_cleaner.py | 57 ++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 lib/galaxy/schema/storage_cleaner.py diff --git a/lib/galaxy/managers/base.py b/lib/galaxy/managers/base.py index dc50acf9f5fd..5600f42803e9 100644 --- a/lib/galaxy/managers/base.py +++ b/lib/galaxy/managers/base.py @@ -56,6 +56,11 @@ from galaxy.model import tool_shed_install from galaxy.schema import ValueFilterQueryParams from galaxy.schema.fields import DecodedDatabaseIdField +from galaxy.schema.storage_cleaner import ( + DiscardedItemsSummary, + StorageItemsCleanupResult, + StoredItem, +) from galaxy.security.idencoding import IdEncodingHelper from galaxy.structured_app import ( BasicSharedApp, @@ -1298,3 +1303,24 @@ def parse_order_by(self, order_by_string, default=None): """Return an ORM compatible order_by clause using the given string (i.e.: 'name-dsc,create_time'). This must be implemented by the manager.""" raise NotImplementedError + + +class StorageCleanerManager(Protocol): + """ + Interface for monitoring storage usage and managing deletion/purging of objects that consume user's storage space. + """ + + def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: + """Returns information with the total storage space taken by discarded items for the given user. + + Discarded items are those that are deleted but not purged yet. + """ + raise NotImplementedError + + def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: + """Returns a paginated list of items deleted by the given user that are not yet purged.""" + raise NotImplementedError + + def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCleanupResult: + """Purges the given list of items by ID. The items must be owned by the user.""" + raise NotImplementedError diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 1c976773f66a..c3819ec05ee6 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -19,6 +19,10 @@ and_, asc, desc, + false, + func, + select, + true, ) from typing_extensions import Literal @@ -36,6 +40,7 @@ ModelDeserializingError, Serializer, SortableManager, + StorageCleanerManager, ) from galaxy.managers.export_tracker import StoreExportTracker from galaxy.schema.fields import DecodedDatabaseIdField @@ -45,13 +50,21 @@ HDABasicInfo, ShareHistoryExtra, ) +from galaxy.schema.storage_cleaner import ( + DiscardedItemsSummary, + StorageItemCleanupError, + StorageItemsCleanupResult, + StoredItem, +) from galaxy.security.validate_user_input import validate_preferred_object_store_id from galaxy.structured_app import MinimalManagerApp log = logging.getLogger(__name__) -class HistoryManager(sharable.SharableModelManager, deletable.PurgableManagerMixin, SortableManager): +class HistoryManager( + sharable.SharableModelManager, deletable.PurgableManagerMixin, SortableManager, StorageCleanerManager +): model_class = model.History foreign_key_name = "history" user_share_model = model.HistoryUserShareAssociation @@ -354,6 +367,57 @@ def make_members_public(self, trans, item): else: log.warning(f"User without permissions tried to make dataset with id: {dataset.id} public") + def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: + stmt = select([func.sum(model.History.disk_size), func.count(model.History.id)]).where( + model.History.user_id == user.id, + model.History.deleted == true(), + model.History.purged == false(), + ) + result = self.session().execute(stmt).fetchone() + return DiscardedItemsSummary(total_size=result[0], total_items=result[1]) + + def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: + stmt = select(model.History).where( + model.History.user_id == user.id, + model.History.deleted == true(), + model.History.purged == false(), + ) + if offset: + stmt = stmt.offset(offset) + if limit: + stmt = stmt.limit(limit) + result = self.session().execute(stmt).scalars() + discarded = [self._history_to_stored_item(item) for item in result] + return discarded + + def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCleanupResult: + success_item_count = 0 + total_free_bytes = 0 + errors: List[StorageItemCleanupError] = [] + + for history_id in item_ids: + try: + history = self.get_owned(history_id, user) + self.purge(history, flush=False) + success_item_count += 1 + total_free_bytes += int(history.disk_size) + except BaseException as e: + errors.append(StorageItemCleanupError(item_id=history_id, error=str(e))) + + if success_item_count: + self.session().flush() + return StorageItemsCleanupResult( + total_item_count=len(item_ids), + success_item_count=success_item_count, + total_free_bytes=total_free_bytes, + errors=errors, + ) + + def _history_to_stored_item(self, history: model.History) -> StoredItem: + return StoredItem( + id=history.id, name=history.name, type="history", size=history.disk_size, update_time=history.update_time + ) + class HistoryExportManager: export_object_type = ExportObjectType.HISTORY diff --git a/lib/galaxy/schema/storage_cleaner.py b/lib/galaxy/schema/storage_cleaner.py new file mode 100644 index 000000000000..7bfac5f55081 --- /dev/null +++ b/lib/galaxy/schema/storage_cleaner.py @@ -0,0 +1,57 @@ +from datetime import datetime +from typing import ( + List, + Union, +) + +from pydantic import Field +from typing_extensions import Literal + +from galaxy.schema.fields import ( + DecodedDatabaseIdField, + EncodedDatabaseIdField, +) +from galaxy.schema.schema import ( + Model, + UpdateTimeField, +) + + +class DiscardedItemsSummary(Model): + total_size: int = Field( + ..., + title="Total Size", + description="The total size in bytes that can be recovered by purging all the items.", + ) + total_items: int = Field( + ..., + title="Total Items", + description="The total number of items that could be purged.", + ) + + +StoredItemType = Union[Literal["history"], Literal["hda"]] + + +class StoredItem(Model): + id: EncodedDatabaseIdField + name: str + type: StoredItemType + size: int + update_time: datetime = UpdateTimeField + + +class StorageItemCleanupError(Model): + item_id: EncodedDatabaseIdField + error: str + + +class CleanupStorageItemsRequest(Model): + item_ids: List[DecodedDatabaseIdField] + + +class StorageItemsCleanupResult(Model): + total_item_count: int + success_item_count: int + total_free_bytes: int + errors: List[StorageItemCleanupError] From f6ea6a872e3b66a092422bc3c945286f3d1ab87a Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 10 Jan 2023 17:43:19 +0100 Subject: [PATCH 02/42] Make IdEncodingHelper optional in services No point in keeping it required, since the encoding/decoding should be done directly in the model parsing going forward. --- lib/galaxy/webapps/galaxy/services/base.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/webapps/galaxy/services/base.py b/lib/galaxy/webapps/galaxy/services/base.py index 2b302c89a910..752458090639 100644 --- a/lib/galaxy/webapps/galaxy/services/base.py +++ b/lib/galaxy/webapps/galaxy/services/base.py @@ -44,6 +44,10 @@ def ensure_celery_tasks_enabled(config): ) +class SecurityNotProvidedError(Exception): + pass + + class ServiceBase: """Base class with common logic and utils reused by other services. @@ -56,8 +60,16 @@ class ServiceBase: the required parameters and outputs of each operation. """ - def __init__(self, security: IdEncodingHelper): - self.security = security + def __init__(self, security: Optional[IdEncodingHelper] = None): + self._security = security + + @property + def security(self) -> IdEncodingHelper: + if self._security is None: + raise SecurityNotProvidedError( + "Security encoding helper must be set in the service constructor to encode/decode ids." + ) + return self._security def decode_id(self, id: EncodedDatabaseIdField, kind: Optional[str] = None) -> int: """Decodes a previously encoded database ID.""" From 8bf6b9b929bb9701ea9b37d570284cb9e065a717 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 10 Jan 2023 17:46:11 +0100 Subject: [PATCH 03/42] Mark manual service encoding/decoding as deprecated This should be done directly in the model using the appropriate DecodedDatabaseIdField or EncodedDatabaseIdField. --- lib/galaxy/webapps/galaxy/services/base.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/galaxy/webapps/galaxy/services/base.py b/lib/galaxy/webapps/galaxy/services/base.py index 752458090639..164e4a8fd7c7 100644 --- a/lib/galaxy/webapps/galaxy/services/base.py +++ b/lib/galaxy/webapps/galaxy/services/base.py @@ -8,6 +8,7 @@ ) from celery.result import AsyncResult +from deprecated import deprecated from galaxy.exceptions import ( AuthenticationRequired, @@ -48,6 +49,11 @@ class SecurityNotProvidedError(Exception): pass +MANUAL_ENCODING_DEPRECATION_MESSAGE = ( + "Use model decoding/encoding instead with DecodedDatabaseIdField or EncodedDatabaseIdField." +) + + class ServiceBase: """Base class with common logic and utils reused by other services. @@ -64,6 +70,7 @@ def __init__(self, security: Optional[IdEncodingHelper] = None): self._security = security @property + @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def security(self) -> IdEncodingHelper: if self._security is None: raise SecurityNotProvidedError( @@ -71,20 +78,24 @@ def security(self) -> IdEncodingHelper: ) return self._security + @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def decode_id(self, id: EncodedDatabaseIdField, kind: Optional[str] = None) -> int: """Decodes a previously encoded database ID.""" return decode_with_security(self.security, id, kind=kind) + @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def encode_id(self, id: int, kind: Optional[str] = None) -> EncodedDatabaseIdField: """Encodes a raw database ID.""" return encode_with_security(self.security, id, kind=kind) + @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def decode_ids(self, ids: List[EncodedDatabaseIdField]) -> List[int]: """ Decodes all encoded IDs in the given list. """ return [self.decode_id(id) for id in ids] + @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def encode_all_ids(self, rval, recursive: bool = False): """ Encodes all integer values in the dict rval whose keys are 'id' or end with '_id' From cd60784b994425f23431dcb5ee3f3c9f4afd768a Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 10 Jan 2023 17:49:23 +0100 Subject: [PATCH 04/42] Add StorageCleanerService Initial support only for histories. --- .../galaxy/services/storage_cleaner.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 lib/galaxy/webapps/galaxy/services/storage_cleaner.py diff --git a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py new file mode 100644 index 000000000000..f6bf65d06f68 --- /dev/null +++ b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py @@ -0,0 +1,38 @@ +import logging +from typing import ( + Optional, + Set, +) + +from galaxy.managers.context import ProvidesHistoryContext +from galaxy.managers.histories import HistoryManager +from galaxy.managers.users import UserManager +from galaxy.webapps.galaxy.services.base import ServiceBase + +log = logging.getLogger(__name__) + + +class StorageCleanerService(ServiceBase): + """Service providing actions to monitor and recover storage space used by the user.""" + + def __init__( + self, + user_manager: UserManager, + history_manager: HistoryManager, + ): + self.user_manager = user_manager + self.history_manager = history_manager + + def get_discarded_histories_summary(self, trans: ProvidesHistoryContext): + user = self.get_authenticated_user(trans) + return self.history_manager.get_discarded_summary(user) + + def get_discarded_histories( + self, trans: ProvidesHistoryContext, offset: Optional[int] = None, limit: Optional[int] = None + ): + user = self.get_authenticated_user(trans) + return self.history_manager.get_discarded(user, offset, limit) + + def cleanup_histories(self, trans: ProvidesHistoryContext, item_ids: Set[int]): + user = self.get_authenticated_user(trans) + return self.history_manager.cleanup_items(user, item_ids) From 3b2871700d5a4726d3c24afc1d6bd99dabe8bda8 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 10 Jan 2023 17:52:59 +0100 Subject: [PATCH 05/42] Add fast API routes for history storage management --- .../webapps/galaxy/api/storage_cleaner.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 lib/galaxy/webapps/galaxy/api/storage_cleaner.py diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py new file mode 100644 index 000000000000..c32c4d1b1f70 --- /dev/null +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -0,0 +1,69 @@ +""" +API operations on User storage management. +""" +import logging +from typing import ( + List, + Optional, +) + +from fastapi import Body + +from galaxy.managers.context import ProvidesHistoryContext +from galaxy.schema.storage_cleaner import ( + CleanupStorageItemsRequest, + DiscardedItemsSummary, + StorageItemsCleanupResult, + StoredItem, +) +from galaxy.webapps.galaxy.api import ( + depends, + DependsOnTrans, + Router, +) +from galaxy.webapps.galaxy.api.common import ( + LimitQueryParam, + OffsetQueryParam, +) +from galaxy.webapps.galaxy.services.storage_cleaner import StorageCleanerService + +log = logging.getLogger(__name__) + + +router = Router(tags=["storage management"]) + + +@router.cbv +class FastAPIStorageCleaner: + service: StorageCleanerService = depends(StorageCleanerService) + + @router.get( + "/api/storage/summary/discarded/histories", + summary="Returns information with the total storage space taken by discarded histories associated with the given user.", + ) + def discarded_histories_summary( + self, + trans: ProvidesHistoryContext = DependsOnTrans, + ) -> DiscardedItemsSummary: + return self.service.get_discarded_histories_summary(trans) + + @router.get( + "/api/storage/discarded/histories", + summary="Returns all discarded histories associated with the given user.", + ) + def discarded_histories( + self, + trans: ProvidesHistoryContext = DependsOnTrans, + offset: Optional[int] = OffsetQueryParam, + limit: Optional[int] = LimitQueryParam, + ) -> List[StoredItem]: + return self.service.get_discarded_histories(trans, offset, limit) + + @router.delete( + "/api/storage/discarded/histories", + summary="Purges histories that has been previously deleted by the user.", + ) + def cleanup_histories( + self, trans: ProvidesHistoryContext = DependsOnTrans, payload: CleanupStorageItemsRequest = Body(...) + ) -> StorageItemsCleanupResult: + return self.service.cleanup_histories(trans, set(payload.item_ids)) From f44598aab4d8ef6762406254e56950d160f590d1 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 11 Jan 2023 13:28:29 +0100 Subject: [PATCH 06/42] Change summary route for consistency --- lib/galaxy/webapps/galaxy/api/storage_cleaner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py index c32c4d1b1f70..1eb85cddaa54 100644 --- a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -38,7 +38,7 @@ class FastAPIStorageCleaner: service: StorageCleanerService = depends(StorageCleanerService) @router.get( - "/api/storage/summary/discarded/histories", + "/api/storage/discarded/histories/summary", summary="Returns information with the total storage space taken by discarded histories associated with the given user.", ) def discarded_histories_summary( From 532aaa5a9996f5c7af83efcda270d3051b532fe6 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 11 Jan 2023 13:31:05 +0100 Subject: [PATCH 07/42] Add API test for discarded histories --- lib/galaxy_test/api/test_storage_cleaner.py | 81 +++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 lib/galaxy_test/api/test_storage_cleaner.py diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py new file mode 100644 index 000000000000..cf6e7cf8d223 --- /dev/null +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -0,0 +1,81 @@ +from typing import ( + Dict, + List, + NamedTuple, +) +from uuid import uuid4 + +from galaxy_test.base.populators import DatasetPopulator +from ._framework import ApiTestCase + + +class TestHistoryData(NamedTuple): + name: str + size: int + + +class TestStorageCleanerApi(ApiTestCase): + def setUp(self): + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + + def test_discarded_histories_monitoring_and_cleanup(self): + # Create some histories for testing + history_data_01 = TestHistoryData(name=f"TestHistory01_{uuid4()}", size=10) + history_data_02 = TestHistoryData(name=f"TestHistory02_{uuid4()}", size=25) + history_data_03 = TestHistoryData(name=f"TestHistory03_{uuid4()}", size=50) + test_histories = [history_data_01, history_data_02, history_data_03] + history_name_id_map = self._create_histories(test_histories) + + # Initially, there shouldn't be any deleted and not purged (discarded) histories + summary_response = self._get("storage/discarded/histories/summary") + self._assert_status_code_is_ok(summary_response) + summary = summary_response.json() + assert summary["total_items"] == 0 + assert summary["total_size"] == 0 + + # Delete the histories + for history_id in history_name_id_map.values(): + self._delete(f"histories/{history_id}", json=True) + expected_discarded_histories_count = len(test_histories) + expected_total_size = sum([test_history.size for test_history in test_histories]) + + # All the `test_histories` should be in the summary + summary_response = self._get("storage/discarded/histories/summary") + self._assert_status_code_is_ok(summary_response) + summary = summary_response.json() + assert summary["total_items"] == expected_discarded_histories_count + assert summary["total_size"] == expected_total_size + + # Check listing all the discarded histories + discarded_histories_response = self._get("storage/discarded/histories") + self._assert_status_code_is_ok(discarded_histories_response) + discarded_histories = discarded_histories_response.json() + assert len(discarded_histories) == expected_discarded_histories_count + assert sum([history["size"] for history in discarded_histories]) == expected_total_size + + # Cleanup all the histories + payload = {"item_ids": list(history_name_id_map.values())} + cleanup_response = self._delete("storage/discarded/histories", data=payload, json=True) + self._assert_status_code_is_ok(cleanup_response) + cleanup_result = cleanup_response.json() + assert cleanup_result["total_item_count"] == expected_discarded_histories_count + assert cleanup_result["success_item_count"] == expected_discarded_histories_count + assert cleanup_result["total_free_bytes"] == expected_total_size + assert not cleanup_result["errors"] + + def _create_histories(self, test_histories: List[TestHistoryData], wait_for_histories=True) -> Dict[str, str]: + history_name_id_map = {} + for history_data in test_histories: + post_data = dict(name=history_data.name) + create_response = self._post("histories", data=post_data).json() + self._assert_has_keys(create_response, "name", "id") + history_id = create_response["id"] + history_name_id_map[history_data.name] = history_id + # Create a dataset with content equal to the expected size of the history + if history_data.size: + self.dataset_populator.new_dataset(history_id, content=f"{'0'*(history_data.size-1)}\n") + if wait_for_histories: + for history_id in history_name_id_map.values(): + self.dataset_populator.wait_for_history(history_id) + return history_name_id_map From 9e29a35361b34d1e43f5164024dd725d3e22b339 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 11 Jan 2023 13:32:37 +0100 Subject: [PATCH 08/42] Fix possible None when calculating total size --- lib/galaxy/managers/histories.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index c3819ec05ee6..6d5ae9d9e9da 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -374,7 +374,8 @@ def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: model.History.purged == false(), ) result = self.session().execute(stmt).fetchone() - return DiscardedItemsSummary(total_size=result[0], total_items=result[1]) + total_size = 0 if result[0] is None else result[0] + return DiscardedItemsSummary(total_size=total_size, total_items=result[1]) def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: stmt = select(model.History).where( From 49c3e23b2e89070b2ce9eee949d41c289bca7e93 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 11 Jan 2023 14:48:30 +0100 Subject: [PATCH 09/42] Change StoredItemType from "hda" to "dataset" So it shall not be confused with a real HDA which has much more information. --- lib/galaxy/schema/storage_cleaner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/schema/storage_cleaner.py b/lib/galaxy/schema/storage_cleaner.py index 7bfac5f55081..fc1d2bdfb736 100644 --- a/lib/galaxy/schema/storage_cleaner.py +++ b/lib/galaxy/schema/storage_cleaner.py @@ -30,7 +30,7 @@ class DiscardedItemsSummary(Model): ) -StoredItemType = Union[Literal["history"], Literal["hda"]] +StoredItemType = Union[Literal["history"], Literal["dataset"]] class StoredItem(Model): From bf24f17ed79badc1a2fcdae16bf537515191828b Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 11 Jan 2023 15:27:00 +0100 Subject: [PATCH 10/42] Make delete route more generic It just purges the given IDs, it doesn't care about discarded or any other condition. --- lib/galaxy/webapps/galaxy/api/storage_cleaner.py | 7 +++++-- lib/galaxy_test/api/test_storage_cleaner.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py index 1eb85cddaa54..83dfe37e50fb 100644 --- a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -60,10 +60,13 @@ def discarded_histories( return self.service.get_discarded_histories(trans, offset, limit) @router.delete( - "/api/storage/discarded/histories", - summary="Purges histories that has been previously deleted by the user.", + "/api/storage/histories", + summary="Purges a set of histories by ID. The histories must be owned by the user.", ) def cleanup_histories( self, trans: ProvidesHistoryContext = DependsOnTrans, payload: CleanupStorageItemsRequest = Body(...) ) -> StorageItemsCleanupResult: + """ + **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. + """ return self.service.cleanup_histories(trans, set(payload.item_ids)) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index cf6e7cf8d223..4d688c2c33f1 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -56,7 +56,7 @@ def test_discarded_histories_monitoring_and_cleanup(self): # Cleanup all the histories payload = {"item_ids": list(history_name_id_map.values())} - cleanup_response = self._delete("storage/discarded/histories", data=payload, json=True) + cleanup_response = self._delete("storage/histories", data=payload, json=True) self._assert_status_code_is_ok(cleanup_response) cleanup_result = cleanup_response.json() assert cleanup_result["total_item_count"] == expected_discarded_histories_count From 14e620b1b06b8a6a024507456a95f77aac383978 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 16 Jan 2023 15:36:21 +0100 Subject: [PATCH 11/42] Change routes as suggested in code review --- lib/galaxy/webapps/galaxy/api/storage_cleaner.py | 4 ++-- lib/galaxy_test/api/test_storage_cleaner.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py index 83dfe37e50fb..3ff5a0d38a29 100644 --- a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -38,7 +38,7 @@ class FastAPIStorageCleaner: service: StorageCleanerService = depends(StorageCleanerService) @router.get( - "/api/storage/discarded/histories/summary", + "/api/storage/histories/discarded/summary", summary="Returns information with the total storage space taken by discarded histories associated with the given user.", ) def discarded_histories_summary( @@ -48,7 +48,7 @@ def discarded_histories_summary( return self.service.get_discarded_histories_summary(trans) @router.get( - "/api/storage/discarded/histories", + "/api/storage/histories/discarded", summary="Returns all discarded histories associated with the given user.", ) def discarded_histories( diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index 4d688c2c33f1..8d62da7f1cd9 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -28,7 +28,7 @@ def test_discarded_histories_monitoring_and_cleanup(self): history_name_id_map = self._create_histories(test_histories) # Initially, there shouldn't be any deleted and not purged (discarded) histories - summary_response = self._get("storage/discarded/histories/summary") + summary_response = self._get("storage/histories/discarded/summary") self._assert_status_code_is_ok(summary_response) summary = summary_response.json() assert summary["total_items"] == 0 @@ -41,14 +41,14 @@ def test_discarded_histories_monitoring_and_cleanup(self): expected_total_size = sum([test_history.size for test_history in test_histories]) # All the `test_histories` should be in the summary - summary_response = self._get("storage/discarded/histories/summary") + summary_response = self._get("storage/histories/discarded/summary") self._assert_status_code_is_ok(summary_response) summary = summary_response.json() assert summary["total_items"] == expected_discarded_histories_count assert summary["total_size"] == expected_total_size # Check listing all the discarded histories - discarded_histories_response = self._get("storage/discarded/histories") + discarded_histories_response = self._get("storage/histories/discarded") self._assert_status_code_is_ok(discarded_histories_response) discarded_histories = discarded_histories_response.json() assert len(discarded_histories) == expected_discarded_histories_count From 62694269ec449d000e50948027862fc788c5810a Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 23 Jan 2023 16:02:11 +0100 Subject: [PATCH 12/42] Add HDAStorageCleanerManager --- lib/galaxy/managers/hdas.py | 92 +++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 476d9aa16a11..96e4b532b9fc 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -12,8 +12,16 @@ Dict, List, Optional, + Set, ) +from sqlalchemy import ( + and_, + false, + func, + select, + true, +) from sqlalchemy.orm.session import object_session from galaxy import ( @@ -33,6 +41,12 @@ from galaxy.model.deferred import materializer_factory from galaxy.model.tags import GalaxyTagHandler from galaxy.schema.schema import DatasetSourceType +from galaxy.schema.storage_cleaner import ( + DiscardedItemsSummary, + StorageItemCleanupError, + StorageItemsCleanupResult, + StoredItem, +) from galaxy.schema.tasks import ( MaterializeDatasetInstanceTaskRequest, RequestUser, @@ -315,6 +329,84 @@ def _set_permissions(self, trans, hda, role_ids_dict): raise exceptions.RequestParameterInvalidException(error) +class HDAStorageCleanerManager(base.StorageCleanerManager): + def __init__(self, hda_manager: HDAManager): + self.hda_manager = hda_manager + + def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: + stmt = ( + select([func.sum(model.Dataset.total_size), func.count(model.HistoryDatasetAssociation.id)]) + .select_from(model.HistoryDatasetAssociation) + .join(model.Dataset, model.HistoryDatasetAssociation.table.c.dataset_id == model.Dataset.id) + .join(model.History, model.HistoryDatasetAssociation.table.c.history_id == model.History.id) + .where( + and_( + model.HistoryDatasetAssociation.deleted == true(), + model.HistoryDatasetAssociation.purged == false(), + model.History.user_id == user.id, + ) + ) + ) + result = self.hda_manager.session().execute(stmt).fetchone() + total_size = 0 if result[0] is None else result[0] + return DiscardedItemsSummary(total_size=total_size, total_items=result[1]) + + def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: + stmt = ( + select( + [ + model.HistoryDatasetAssociation.id, + model.HistoryDatasetAssociation.name, + model.HistoryDatasetAssociation.update_time, + model.Dataset.total_size, + ] + ) + .select_from(model.HistoryDatasetAssociation) + .join(model.Dataset, model.HistoryDatasetAssociation.table.c.dataset_id == model.Dataset.id) + .join(model.History, model.HistoryDatasetAssociation.table.c.history_id == model.History.id) + .where( + and_( + model.HistoryDatasetAssociation.deleted == true(), + model.HistoryDatasetAssociation.purged == false(), + model.History.user_id == user.id, + ) + ) + ) + if offset: + stmt = stmt.offset(offset) + if limit: + stmt = stmt.limit(limit) + result = self.hda_manager.session().execute(stmt) + discarded = [ + StoredItem(id=row.id, name=row.name, type="dataset", size=row.total_size, update_time=row.update_time) + for row in result + ] + return discarded + + def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCleanupResult: + success_item_count = 0 + total_free_bytes = 0 + errors: List[StorageItemCleanupError] = [] + + for hda_id in item_ids: + try: + hda = self.hda_manager.get_owned(hda_id, user) + self.hda_manager.purge(hda) + success_item_count += 1 + total_free_bytes += int(hda.get_size()) + except BaseException as e: + errors.append(StorageItemCleanupError(item_id=hda_id, error=str(e))) + + if success_item_count: + self.hda_manager.session().flush() + return StorageItemsCleanupResult( + total_item_count=len(item_ids), + success_item_count=success_item_count, + total_free_bytes=total_free_bytes, + errors=errors, + ) + + class HDASerializer( # datasets._UnflattenedMetadataDatasetAssociationSerializer, datasets.DatasetAssociationSerializer[HDAManager], taggable.TaggableSerializerMixin, From 3110b2dc5267be8e8ae19d75031cc7c2f45ef31e Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 23 Jan 2023 16:27:37 +0100 Subject: [PATCH 13/42] Add FastAPI routes for dataset storage management --- .../webapps/galaxy/api/storage_cleaner.py | 32 +++++++++++++++++++ .../galaxy/services/storage_cleaner.py | 17 ++++++++++ 2 files changed, 49 insertions(+) diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py index 3ff5a0d38a29..5a4db7a20961 100644 --- a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -70,3 +70,35 @@ def cleanup_histories( **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. """ return self.service.cleanup_histories(trans, set(payload.item_ids)) + + @router.get( + "/api/storage/datasets/discarded/summary", + summary="Returns information with the total storage space taken by discarded datasets owned by the given user.", + ) + def discarded_datasets_summary( + self, + trans: ProvidesHistoryContext = DependsOnTrans, + ) -> DiscardedItemsSummary: + return self.service.get_discarded_datasets_summary(trans) + + @router.get( + "/api/storage/datasets/discarded", + summary="Returns discarded datasets owned by the given user. The results can be paginated.", + ) + def discarded_datasets( + self, + trans: ProvidesHistoryContext = DependsOnTrans, + ) -> List[StoredItem]: + return self.service.get_discarded_datasets(trans) + + @router.delete( + "/api/storage/datasets", + summary="Purges a set of datasets by ID from disk. The datasets must be owned by the user.", + ) + def cleanup_datasets( + self, trans: ProvidesHistoryContext = DependsOnTrans, payload: CleanupStorageItemsRequest = Body(...) + ) -> StorageItemsCleanupResult: + """ + **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. + """ + return self.service.cleanup_datasets(trans, set(payload.item_ids)) diff --git a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py index f6bf65d06f68..f16d8cf1ec64 100644 --- a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py @@ -5,6 +5,7 @@ ) from galaxy.managers.context import ProvidesHistoryContext +from galaxy.managers.hdas import HDAStorageCleanerManager from galaxy.managers.histories import HistoryManager from galaxy.managers.users import UserManager from galaxy.webapps.galaxy.services.base import ServiceBase @@ -19,9 +20,11 @@ def __init__( self, user_manager: UserManager, history_manager: HistoryManager, + hda_cleaner: HDAStorageCleanerManager, ): self.user_manager = user_manager self.history_manager = history_manager + self.hda_cleaner = hda_cleaner def get_discarded_histories_summary(self, trans: ProvidesHistoryContext): user = self.get_authenticated_user(trans) @@ -36,3 +39,17 @@ def get_discarded_histories( def cleanup_histories(self, trans: ProvidesHistoryContext, item_ids: Set[int]): user = self.get_authenticated_user(trans) return self.history_manager.cleanup_items(user, item_ids) + + def get_discarded_datasets_summary(self, trans: ProvidesHistoryContext): + user = self.get_authenticated_user(trans) + return self.hda_cleaner.get_discarded_summary(user) + + def get_discarded_datasets( + self, trans: ProvidesHistoryContext, offset: Optional[int] = None, limit: Optional[int] = None + ): + user = self.get_authenticated_user(trans) + return self.hda_cleaner.get_discarded(user, offset, limit) + + def cleanup_datasets(self, trans: ProvidesHistoryContext, item_ids: Set[int]): + user = self.get_authenticated_user(trans) + return self.hda_cleaner.cleanup_items(user, item_ids) From f7244926a0b6891ecd9d30d8ebf276253b8f6f79 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 23 Jan 2023 18:10:16 +0100 Subject: [PATCH 14/42] Refactor extract HistoryStorageCleanerManager --- lib/galaxy/managers/histories.py | 20 +++++++++++-------- .../galaxy/services/storage_cleaner.py | 12 +++++------ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 6d5ae9d9e9da..ec8f9c0b8004 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -62,9 +62,8 @@ log = logging.getLogger(__name__) -class HistoryManager( - sharable.SharableModelManager, deletable.PurgableManagerMixin, SortableManager, StorageCleanerManager -): +class HistoryManager(sharable.SharableModelManager, deletable.PurgableManagerMixin, SortableManager): + model_class = model.History foreign_key_name = "history" user_share_model = model.HistoryUserShareAssociation @@ -367,13 +366,18 @@ def make_members_public(self, trans, item): else: log.warning(f"User without permissions tried to make dataset with id: {dataset.id} public") + +class HistoryStorageCleanerManager(StorageCleanerManager): + def __init__(self, history_manager: HistoryManager): + self.history_manager = history_manager + def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: stmt = select([func.sum(model.History.disk_size), func.count(model.History.id)]).where( model.History.user_id == user.id, model.History.deleted == true(), model.History.purged == false(), ) - result = self.session().execute(stmt).fetchone() + result = self.history_manager.session().execute(stmt).fetchone() total_size = 0 if result[0] is None else result[0] return DiscardedItemsSummary(total_size=total_size, total_items=result[1]) @@ -387,7 +391,7 @@ def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional stmt = stmt.offset(offset) if limit: stmt = stmt.limit(limit) - result = self.session().execute(stmt).scalars() + result = self.history_manager.session().execute(stmt).scalars() discarded = [self._history_to_stored_item(item) for item in result] return discarded @@ -398,15 +402,15 @@ def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCle for history_id in item_ids: try: - history = self.get_owned(history_id, user) - self.purge(history, flush=False) + history = self.history_manager.get_owned(history_id, user) + self.history_manager.purge(history, flush=False) success_item_count += 1 total_free_bytes += int(history.disk_size) except BaseException as e: errors.append(StorageItemCleanupError(item_id=history_id, error=str(e))) if success_item_count: - self.session().flush() + self.history_manager.session().flush() return StorageItemsCleanupResult( total_item_count=len(item_ids), success_item_count=success_item_count, diff --git a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py index f16d8cf1ec64..37f4ec6ca5a7 100644 --- a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py @@ -6,7 +6,7 @@ from galaxy.managers.context import ProvidesHistoryContext from galaxy.managers.hdas import HDAStorageCleanerManager -from galaxy.managers.histories import HistoryManager +from galaxy.managers.histories import HistoryStorageCleanerManager from galaxy.managers.users import UserManager from galaxy.webapps.galaxy.services.base import ServiceBase @@ -19,26 +19,26 @@ class StorageCleanerService(ServiceBase): def __init__( self, user_manager: UserManager, - history_manager: HistoryManager, + history_cleaner: HistoryStorageCleanerManager, hda_cleaner: HDAStorageCleanerManager, ): self.user_manager = user_manager - self.history_manager = history_manager + self.history_cleaner = history_cleaner self.hda_cleaner = hda_cleaner def get_discarded_histories_summary(self, trans: ProvidesHistoryContext): user = self.get_authenticated_user(trans) - return self.history_manager.get_discarded_summary(user) + return self.history_cleaner.get_discarded_summary(user) def get_discarded_histories( self, trans: ProvidesHistoryContext, offset: Optional[int] = None, limit: Optional[int] = None ): user = self.get_authenticated_user(trans) - return self.history_manager.get_discarded(user, offset, limit) + return self.history_cleaner.get_discarded(user, offset, limit) def cleanup_histories(self, trans: ProvidesHistoryContext, item_ids: Set[int]): user = self.get_authenticated_user(trans) - return self.history_manager.cleanup_items(user, item_ids) + return self.history_cleaner.cleanup_items(user, item_ids) def get_discarded_datasets_summary(self, trans: ProvidesHistoryContext): user = self.get_authenticated_user(trans) From 8e1076cf2fec1031c674c922a3a27a005c978856 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 23 Jan 2023 18:17:45 +0100 Subject: [PATCH 15/42] Fix helper name in API test and add decorator --- lib/galaxy_test/api/test_storage_cleaner.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index 8d62da7f1cd9..cf76f929ab24 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -5,25 +5,29 @@ ) from uuid import uuid4 +from galaxy_test.base.decorators import requires_new_history from galaxy_test.base.populators import DatasetPopulator from ._framework import ApiTestCase -class TestHistoryData(NamedTuple): +class HistoryDataForTests(NamedTuple): name: str size: int class TestStorageCleanerApi(ApiTestCase): + dataset_populator: DatasetPopulator + def setUp(self): super().setUp() self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + @requires_new_history def test_discarded_histories_monitoring_and_cleanup(self): # Create some histories for testing - history_data_01 = TestHistoryData(name=f"TestHistory01_{uuid4()}", size=10) - history_data_02 = TestHistoryData(name=f"TestHistory02_{uuid4()}", size=25) - history_data_03 = TestHistoryData(name=f"TestHistory03_{uuid4()}", size=50) + history_data_01 = HistoryDataForTests(name=f"TestHistory01_{uuid4()}", size=10) + history_data_02 = HistoryDataForTests(name=f"TestHistory02_{uuid4()}", size=25) + history_data_03 = HistoryDataForTests(name=f"TestHistory03_{uuid4()}", size=50) test_histories = [history_data_01, history_data_02, history_data_03] history_name_id_map = self._create_histories(test_histories) @@ -64,7 +68,7 @@ def test_discarded_histories_monitoring_and_cleanup(self): assert cleanup_result["total_free_bytes"] == expected_total_size assert not cleanup_result["errors"] - def _create_histories(self, test_histories: List[TestHistoryData], wait_for_histories=True) -> Dict[str, str]: + def _create_histories(self, test_histories: List[HistoryDataForTests], wait_for_histories=True) -> Dict[str, str]: history_name_id_map = {} for history_data in test_histories: post_data = dict(name=history_data.name) From 4b3a80d5571dba39176e5d0035f22f953ce9efc0 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 23 Jan 2023 18:32:16 +0100 Subject: [PATCH 16/42] Add API test for discarded datasets --- lib/galaxy_test/api/test_storage_cleaner.py | 50 +++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index cf76f929ab24..8bcb2aa66970 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -83,3 +83,53 @@ def _create_histories(self, test_histories: List[HistoryDataForTests], wait_for_ for history_id in history_name_id_map.values(): self.dataset_populator.wait_for_history(history_id) return history_name_id_map + + @requires_new_history + def test_discarded_datasets_monitoring_and_cleanup(self): + # Prepare history with some datasets + history_id = self.dataset_populator.new_history(f"History for discarded datasets {uuid4()}") + num_datasets = 3 + dataset_size = 30 + dataset_ids = [] + for _ in range(num_datasets): + dataset = self.dataset_populator.new_dataset(history_id, content=f"{'0'*(dataset_size-1)}\n") + dataset_ids.append(dataset["id"]) + self.dataset_populator.wait_for_history(history_id) + + # Initially, there shouldn't be any deleted and not purged (discarded) datasets + summary_response = self._get("storage/datasets/discarded/summary") + self._assert_status_code_is_ok(summary_response) + summary = summary_response.json() + assert summary["total_items"] == 0 + assert summary["total_size"] == 0 + + # Delete the datasets + for dataset_id in dataset_ids: + self.dataset_populator.delete_dataset(history_id, dataset_id) + + # All datasets should be in the summary + expected_num_discarded_datasets = len(dataset_ids) + expected_total_size = expected_num_discarded_datasets * dataset_size + summary_response = self._get("storage/datasets/discarded/summary") + self._assert_status_code_is_ok(summary_response) + summary = summary_response.json() + assert summary["total_items"] == expected_num_discarded_datasets + assert summary["total_size"] == expected_total_size + + # Check listing all the discarded datasets + discarded_datasets_response = self._get("storage/datasets/discarded") + self._assert_status_code_is_ok(discarded_datasets_response) + discarded_datasets = discarded_datasets_response.json() + assert len(discarded_datasets) == expected_num_discarded_datasets + for dataset in discarded_datasets: + assert dataset["size"] == dataset_size + + # Cleanup all the datasets + payload = {"item_ids": dataset_ids} + cleanup_response = self._delete("storage/datasets", data=payload, json=True) + self._assert_status_code_is_ok(cleanup_response) + cleanup_result = cleanup_response.json() + assert cleanup_result["total_item_count"] == expected_num_discarded_datasets + assert cleanup_result["success_item_count"] == expected_num_discarded_datasets + assert cleanup_result["total_free_bytes"] == expected_total_size + assert not cleanup_result["errors"] From fb421e9588bd13365a30671765219a8b719c68d4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 26 Jan 2023 11:08:13 +0100 Subject: [PATCH 17/42] Update client API schema --- client/src/schema/schema.ts | 268 ++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) diff --git a/client/src/schema/schema.ts b/client/src/schema/schema.ts index 955f7fb91184..bf495f013177 100644 --- a/client/src/schema/schema.ts +++ b/client/src/schema/schema.ts @@ -1070,6 +1070,36 @@ export interface paths { /** Determine if specified storage request ID is ready for download. */ get: operations["is_ready_api_short_term_storage__storage_request_id__ready_get"]; }; + "/api/storage/datasets": { + /** + * Purges a set of datasets by ID from disk. The datasets must be owned by the user. + * @description **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. + */ + delete: operations["cleanup_datasets_api_storage_datasets_delete"]; + }; + "/api/storage/datasets/discarded": { + /** Returns discarded datasets owned by the given user. The results can be paginated. */ + get: operations["discarded_datasets_api_storage_datasets_discarded_get"]; + }; + "/api/storage/datasets/discarded/summary": { + /** Returns information with the total storage space taken by discarded datasets owned by the given user. */ + get: operations["discarded_datasets_summary_api_storage_datasets_discarded_summary_get"]; + }; + "/api/storage/histories": { + /** + * Purges a set of histories by ID. The histories must be owned by the user. + * @description **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. + */ + delete: operations["cleanup_histories_api_storage_histories_delete"]; + }; + "/api/storage/histories/discarded": { + /** Returns all discarded histories associated with the given user. */ + get: operations["discarded_histories_api_storage_histories_discarded_get"]; + }; + "/api/storage/histories/discarded/summary": { + /** Returns information with the total storage space taken by discarded histories associated with the given user. */ + get: operations["discarded_histories_summary_api_storage_histories_discarded_summary_get"]; + }; "/api/tags": { /** * Apply a new set of tags to an item. @@ -1588,6 +1618,14 @@ export interface components { */ type: string; }; + /** + * CleanupStorageItemsRequest + * @description Base model definition with common configuration used by all derived models. + */ + CleanupStorageItemsRequest: { + /** Item Ids */ + item_ids: string[]; + }; /** * CollectionElementIdentifier * @description Base model definition with common configuration used by all derived models. @@ -2824,6 +2862,22 @@ export interface components { */ purge?: boolean; }; + /** + * DiscardedItemsSummary + * @description Base model definition with common configuration used by all derived models. + */ + DiscardedItemsSummary: { + /** + * Total Items + * @description The total number of items that could be purged. + */ + total_items: number; + /** + * Total Size + * @description The total size in bytes that can be recovered by purging all the items. + */ + total_size: number; + }; /** * DisplayApp * @description Basic linked information about an application that can display certain datatypes. @@ -6659,6 +6713,35 @@ export interface components { * @enum {string} */ Src: "url" | "pasted" | "files" | "path" | "composite" | "ftp_import" | "server_dir"; + /** + * StorageItemCleanupError + * @description Base model definition with common configuration used by all derived models. + */ + StorageItemCleanupError: { + /** Error */ + error: string; + /** + * Item Id + * @example [ + * "0123456789ABCDEF" + * ] + */ + item_id: string; + }; + /** + * StorageItemsCleanupResult + * @description Base model definition with common configuration used by all derived models. + */ + StorageItemsCleanupResult: { + /** Errors */ + errors: components["schemas"]["StorageItemCleanupError"][]; + /** Success Item Count */ + success_item_count: number; + /** Total Free Bytes */ + total_free_bytes: number; + /** Total Item Count */ + total_item_count: number; + }; /** * StoreExportPayload * @description Base model definition with common configuration used by all derived models. @@ -6688,6 +6771,31 @@ export interface components { */ model_store_format?: components["schemas"]["ModelStoreFormat"]; }; + /** + * StoredItem + * @description Base model definition with common configuration used by all derived models. + */ + StoredItem: { + /** + * Id + * @example [ + * "0123456789ABCDEF" + * ] + */ + id: string; + /** Name */ + name: string; + /** Size */ + size: number; + /** Type */ + type: "history" | "dataset"; + /** + * Update Time + * Format: date-time + * @description The last time and date this item was updated. + */ + update_time: string; + }; /** * SuitableConverter * @description Base model definition with common configuration used by all derived models. @@ -13375,6 +13483,166 @@ export interface operations { }; }; }; + cleanup_datasets_api_storage_datasets_delete: { + /** + * Purges a set of datasets by ID from disk. The datasets must be owned by the user. + * @description **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. + */ + parameters?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + }; + requestBody: { + content: { + "application/json": components["schemas"]["CleanupStorageItemsRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": components["schemas"]["StorageItemsCleanupResult"]; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + discarded_datasets_api_storage_datasets_discarded_get: { + /** Returns discarded datasets owned by the given user. The results can be paginated. */ + parameters?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": components["schemas"]["StoredItem"][]; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + discarded_datasets_summary_api_storage_datasets_discarded_summary_get: { + /** Returns information with the total storage space taken by discarded datasets owned by the given user. */ + parameters?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": components["schemas"]["DiscardedItemsSummary"]; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + cleanup_histories_api_storage_histories_delete: { + /** + * Purges a set of histories by ID. The histories must be owned by the user. + * @description **Warning**: This operation cannot be undone. All objects will be deleted permanently from the disk. + */ + parameters?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + }; + requestBody: { + content: { + "application/json": components["schemas"]["CleanupStorageItemsRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": components["schemas"]["StorageItemsCleanupResult"]; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + discarded_histories_api_storage_histories_discarded_get: { + /** Returns all discarded histories associated with the given user. */ + parameters?: { + /** @description Starts at the beginning skip the first ( offset - 1 ) items and begin returning at the Nth item */ + /** @description The maximum number of items to return. */ + query?: { + offset?: number; + limit?: number; + }; + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": components["schemas"]["StoredItem"][]; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + discarded_histories_summary_api_storage_histories_discarded_summary_get: { + /** Returns information with the total storage space taken by discarded histories associated with the given user. */ + parameters?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + header?: { + "run-as"?: string; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + content: { + "application/json": components["schemas"]["DiscardedItemsSummary"]; + }; + }; + /** @description Validation Error */ + 422: { + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; update_api_tags_put: { /** * Apply a new set of tags to an item. From 34d6d0b91413fe2f7b3fe08425fa4601ba2a727c Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 26 Jan 2023 15:29:04 +0100 Subject: [PATCH 18/42] Rename DiscardedItemsSummary to CleanableItemsSummary --- client/src/schema/schema.ts | 36 +++++++++---------- lib/galaxy/managers/base.py | 4 +-- lib/galaxy/managers/hdas.py | 6 ++-- lib/galaxy/managers/histories.py | 6 ++-- lib/galaxy/schema/storage_cleaner.py | 2 +- .../webapps/galaxy/api/storage_cleaner.py | 6 ++-- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/client/src/schema/schema.ts b/client/src/schema/schema.ts index bf495f013177..4d7d2d984642 100644 --- a/client/src/schema/schema.ts +++ b/client/src/schema/schema.ts @@ -1618,6 +1618,22 @@ export interface components { */ type: string; }; + /** + * CleanableItemsSummary + * @description Base model definition with common configuration used by all derived models. + */ + CleanableItemsSummary: { + /** + * Total Items + * @description The total number of items that could be purged. + */ + total_items: number; + /** + * Total Size + * @description The total size in bytes that can be recovered by purging all the items. + */ + total_size: number; + }; /** * CleanupStorageItemsRequest * @description Base model definition with common configuration used by all derived models. @@ -2862,22 +2878,6 @@ export interface components { */ purge?: boolean; }; - /** - * DiscardedItemsSummary - * @description Base model definition with common configuration used by all derived models. - */ - DiscardedItemsSummary: { - /** - * Total Items - * @description The total number of items that could be purged. - */ - total_items: number; - /** - * Total Size - * @description The total size in bytes that can be recovered by purging all the items. - */ - total_size: number; - }; /** * DisplayApp * @description Basic linked information about an application that can display certain datatypes. @@ -13549,7 +13549,7 @@ export interface operations { /** @description Successful Response */ 200: { content: { - "application/json": components["schemas"]["DiscardedItemsSummary"]; + "application/json": components["schemas"]["CleanableItemsSummary"]; }; }; /** @description Validation Error */ @@ -13632,7 +13632,7 @@ export interface operations { /** @description Successful Response */ 200: { content: { - "application/json": components["schemas"]["DiscardedItemsSummary"]; + "application/json": components["schemas"]["CleanableItemsSummary"]; }; }; /** @description Validation Error */ diff --git a/lib/galaxy/managers/base.py b/lib/galaxy/managers/base.py index 5600f42803e9..9f5ee7e9f236 100644 --- a/lib/galaxy/managers/base.py +++ b/lib/galaxy/managers/base.py @@ -57,7 +57,7 @@ from galaxy.schema import ValueFilterQueryParams from galaxy.schema.fields import DecodedDatabaseIdField from galaxy.schema.storage_cleaner import ( - DiscardedItemsSummary, + CleanableItemsSummary, StorageItemsCleanupResult, StoredItem, ) @@ -1310,7 +1310,7 @@ class StorageCleanerManager(Protocol): Interface for monitoring storage usage and managing deletion/purging of objects that consume user's storage space. """ - def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: + def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: """Returns information with the total storage space taken by discarded items for the given user. Discarded items are those that are deleted but not purged yet. diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 96e4b532b9fc..523619d5a759 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -42,7 +42,7 @@ from galaxy.model.tags import GalaxyTagHandler from galaxy.schema.schema import DatasetSourceType from galaxy.schema.storage_cleaner import ( - DiscardedItemsSummary, + CleanableItemsSummary, StorageItemCleanupError, StorageItemsCleanupResult, StoredItem, @@ -333,7 +333,7 @@ class HDAStorageCleanerManager(base.StorageCleanerManager): def __init__(self, hda_manager: HDAManager): self.hda_manager = hda_manager - def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: + def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: stmt = ( select([func.sum(model.Dataset.total_size), func.count(model.HistoryDatasetAssociation.id)]) .select_from(model.HistoryDatasetAssociation) @@ -349,7 +349,7 @@ def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: ) result = self.hda_manager.session().execute(stmt).fetchone() total_size = 0 if result[0] is None else result[0] - return DiscardedItemsSummary(total_size=total_size, total_items=result[1]) + return CleanableItemsSummary(total_size=total_size, total_items=result[1]) def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: stmt = ( diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index ec8f9c0b8004..185212aa424a 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -51,7 +51,7 @@ ShareHistoryExtra, ) from galaxy.schema.storage_cleaner import ( - DiscardedItemsSummary, + CleanableItemsSummary, StorageItemCleanupError, StorageItemsCleanupResult, StoredItem, @@ -371,7 +371,7 @@ class HistoryStorageCleanerManager(StorageCleanerManager): def __init__(self, history_manager: HistoryManager): self.history_manager = history_manager - def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: + def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: stmt = select([func.sum(model.History.disk_size), func.count(model.History.id)]).where( model.History.user_id == user.id, model.History.deleted == true(), @@ -379,7 +379,7 @@ def get_discarded_summary(self, user: model.User) -> DiscardedItemsSummary: ) result = self.history_manager.session().execute(stmt).fetchone() total_size = 0 if result[0] is None else result[0] - return DiscardedItemsSummary(total_size=total_size, total_items=result[1]) + return CleanableItemsSummary(total_size=total_size, total_items=result[1]) def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: stmt = select(model.History).where( diff --git a/lib/galaxy/schema/storage_cleaner.py b/lib/galaxy/schema/storage_cleaner.py index fc1d2bdfb736..f6f228890518 100644 --- a/lib/galaxy/schema/storage_cleaner.py +++ b/lib/galaxy/schema/storage_cleaner.py @@ -17,7 +17,7 @@ ) -class DiscardedItemsSummary(Model): +class CleanableItemsSummary(Model): total_size: int = Field( ..., title="Total Size", diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py index 5a4db7a20961..d9d7ad9d9e8e 100644 --- a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -11,8 +11,8 @@ from galaxy.managers.context import ProvidesHistoryContext from galaxy.schema.storage_cleaner import ( + CleanableItemsSummary, CleanupStorageItemsRequest, - DiscardedItemsSummary, StorageItemsCleanupResult, StoredItem, ) @@ -44,7 +44,7 @@ class FastAPIStorageCleaner: def discarded_histories_summary( self, trans: ProvidesHistoryContext = DependsOnTrans, - ) -> DiscardedItemsSummary: + ) -> CleanableItemsSummary: return self.service.get_discarded_histories_summary(trans) @router.get( @@ -78,7 +78,7 @@ def cleanup_histories( def discarded_datasets_summary( self, trans: ProvidesHistoryContext = DependsOnTrans, - ) -> DiscardedItemsSummary: + ) -> CleanableItemsSummary: return self.service.get_discarded_datasets_summary(trans) @router.get( From 6eb3ec9130d4ef05d6f8c7b693c80811ad3e0be4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 27 Jan 2023 11:40:24 +0100 Subject: [PATCH 19/42] Allow sorting storage items results --- lib/galaxy/managers/base.py | 11 ++++++++- lib/galaxy/managers/hdas.py | 21 ++++++++++++++++- lib/galaxy/managers/histories.py | 19 ++++++++++++++- lib/galaxy/schema/storage_cleaner.py | 12 ++++++++++ .../webapps/galaxy/api/storage_cleaner.py | 23 ++++++++++++++++--- .../galaxy/services/storage_cleaner.py | 17 ++++++++++---- 6 files changed, 93 insertions(+), 10 deletions(-) diff --git a/lib/galaxy/managers/base.py b/lib/galaxy/managers/base.py index 9f5ee7e9f236..464df2c23a4e 100644 --- a/lib/galaxy/managers/base.py +++ b/lib/galaxy/managers/base.py @@ -60,6 +60,7 @@ CleanableItemsSummary, StorageItemsCleanupResult, StoredItem, + StoredItemOrderBy, ) from galaxy.security.idencoding import IdEncodingHelper from galaxy.structured_app import ( @@ -1310,6 +1311,8 @@ class StorageCleanerManager(Protocol): Interface for monitoring storage usage and managing deletion/purging of objects that consume user's storage space. """ + sort_map: Dict[StoredItemOrderBy, Any] + def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: """Returns information with the total storage space taken by discarded items for the given user. @@ -1317,7 +1320,13 @@ def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: """ raise NotImplementedError - def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: + def get_discarded( + self, + user: model.User, + offset: Optional[int], + limit: Optional[int], + order: Optional[StoredItemOrderBy], + ) -> List[StoredItem]: """Returns a paginated list of items deleted by the given user that are not yet purged.""" raise NotImplementedError diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 523619d5a759..3eb44650e60a 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -17,6 +17,8 @@ from sqlalchemy import ( and_, + asc, + desc, false, func, select, @@ -46,6 +48,7 @@ StorageItemCleanupError, StorageItemsCleanupResult, StoredItem, + StoredItemOrderBy, ) from galaxy.schema.tasks import ( MaterializeDatasetInstanceTaskRequest, @@ -332,6 +335,14 @@ def _set_permissions(self, trans, hda, role_ids_dict): class HDAStorageCleanerManager(base.StorageCleanerManager): def __init__(self, hda_manager: HDAManager): self.hda_manager = hda_manager + self.sort_map = { + StoredItemOrderBy.NAME_ASC: asc(model.HistoryDatasetAssociation.name), + StoredItemOrderBy.NAME_DSC: desc(model.HistoryDatasetAssociation.name), + StoredItemOrderBy.SIZE_ASC: asc(model.Dataset.total_size), + StoredItemOrderBy.SIZE_DSC: desc(model.Dataset.total_size), + StoredItemOrderBy.UPDATE_TIME_ASC: asc(model.HistoryDatasetAssociation.update_time), + StoredItemOrderBy.UPDATE_TIME_DSC: desc(model.HistoryDatasetAssociation.update_time), + } def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: stmt = ( @@ -351,7 +362,13 @@ def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: total_size = 0 if result[0] is None else result[0] return CleanableItemsSummary(total_size=total_size, total_items=result[1]) - def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: + def get_discarded( + self, + user: model.User, + offset: Optional[int], + limit: Optional[int], + order: Optional[StoredItemOrderBy], + ) -> List[StoredItem]: stmt = ( select( [ @@ -376,6 +393,8 @@ def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional stmt = stmt.offset(offset) if limit: stmt = stmt.limit(limit) + if order: + stmt = stmt.order_by(self.sort_map[order]) result = self.hda_manager.session().execute(stmt) discarded = [ StoredItem(id=row.id, name=row.name, type="dataset", size=row.total_size, update_time=row.update_time) diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 185212aa424a..4a930287e99f 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -55,6 +55,7 @@ StorageItemCleanupError, StorageItemsCleanupResult, StoredItem, + StoredItemOrderBy, ) from galaxy.security.validate_user_input import validate_preferred_object_store_id from galaxy.structured_app import MinimalManagerApp @@ -370,6 +371,14 @@ def make_members_public(self, trans, item): class HistoryStorageCleanerManager(StorageCleanerManager): def __init__(self, history_manager: HistoryManager): self.history_manager = history_manager + self.sort_map = { + StoredItemOrderBy.NAME_ASC: asc(model.History.name), + StoredItemOrderBy.NAME_DSC: desc(model.History.name), + StoredItemOrderBy.SIZE_ASC: asc(model.History.disk_size), + StoredItemOrderBy.SIZE_DSC: desc(model.History.disk_size), + StoredItemOrderBy.UPDATE_TIME_ASC: asc(model.History.update_time), + StoredItemOrderBy.UPDATE_TIME_DSC: desc(model.History.update_time), + } def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: stmt = select([func.sum(model.History.disk_size), func.count(model.History.id)]).where( @@ -381,7 +390,13 @@ def get_discarded_summary(self, user: model.User) -> CleanableItemsSummary: total_size = 0 if result[0] is None else result[0] return CleanableItemsSummary(total_size=total_size, total_items=result[1]) - def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional[int]) -> List[StoredItem]: + def get_discarded( + self, + user: model.User, + offset: Optional[int], + limit: Optional[int], + order: Optional[StoredItemOrderBy], + ) -> List[StoredItem]: stmt = select(model.History).where( model.History.user_id == user.id, model.History.deleted == true(), @@ -391,6 +406,8 @@ def get_discarded(self, user: model.User, offset: Optional[int], limit: Optional stmt = stmt.offset(offset) if limit: stmt = stmt.limit(limit) + if order: + stmt = stmt.order_by(self.sort_map[order]) result = self.history_manager.session().execute(stmt).scalars() discarded = [self._history_to_stored_item(item) for item in result] return discarded diff --git a/lib/galaxy/schema/storage_cleaner.py b/lib/galaxy/schema/storage_cleaner.py index f6f228890518..69fb82771cb8 100644 --- a/lib/galaxy/schema/storage_cleaner.py +++ b/lib/galaxy/schema/storage_cleaner.py @@ -1,4 +1,5 @@ from datetime import datetime +from enum import Enum from typing import ( List, Union, @@ -41,6 +42,17 @@ class StoredItem(Model): update_time: datetime = UpdateTimeField +class StoredItemOrderBy(str, Enum): + """Available options for sorting Stored Items results.""" + + NAME_ASC = "name-asc" + NAME_DSC = "name-dsc" + SIZE_ASC = "size-asc" + SIZE_DSC = "size-dsc" + UPDATE_TIME_ASC = "update_time-asc" + UPDATE_TIME_DSC = "update_time-dsc" + + class StorageItemCleanupError(Model): item_id: EncodedDatabaseIdField error: str diff --git a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py index d9d7ad9d9e8e..a37cf55b8bf8 100644 --- a/lib/galaxy/webapps/galaxy/api/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/api/storage_cleaner.py @@ -7,7 +7,10 @@ Optional, ) -from fastapi import Body +from fastapi import ( + Body, + Query, +) from galaxy.managers.context import ProvidesHistoryContext from galaxy.schema.storage_cleaner import ( @@ -15,6 +18,7 @@ CleanupStorageItemsRequest, StorageItemsCleanupResult, StoredItem, + StoredItemOrderBy, ) from galaxy.webapps.galaxy.api import ( depends, @@ -32,6 +36,15 @@ router = Router(tags=["storage management"]) +OrderQueryParam: Optional[StoredItemOrderBy] = Query( + default=None, + title="Order", + description=( + "String containing one of the valid ordering attributes followed " + "by '-asc' or '-dsc' for ascending and descending order respectively." + ), +) + @router.cbv class FastAPIStorageCleaner: @@ -56,8 +69,9 @@ def discarded_histories( trans: ProvidesHistoryContext = DependsOnTrans, offset: Optional[int] = OffsetQueryParam, limit: Optional[int] = LimitQueryParam, + order: Optional[StoredItemOrderBy] = OrderQueryParam, ) -> List[StoredItem]: - return self.service.get_discarded_histories(trans, offset, limit) + return self.service.get_discarded_histories(trans, offset, limit, order) @router.delete( "/api/storage/histories", @@ -88,8 +102,11 @@ def discarded_datasets_summary( def discarded_datasets( self, trans: ProvidesHistoryContext = DependsOnTrans, + offset: Optional[int] = OffsetQueryParam, + limit: Optional[int] = LimitQueryParam, + order: Optional[StoredItemOrderBy] = OrderQueryParam, ) -> List[StoredItem]: - return self.service.get_discarded_datasets(trans) + return self.service.get_discarded_datasets(trans, offset, limit, order) @router.delete( "/api/storage/datasets", diff --git a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py index 37f4ec6ca5a7..32912fc24ca9 100644 --- a/lib/galaxy/webapps/galaxy/services/storage_cleaner.py +++ b/lib/galaxy/webapps/galaxy/services/storage_cleaner.py @@ -8,6 +8,7 @@ from galaxy.managers.hdas import HDAStorageCleanerManager from galaxy.managers.histories import HistoryStorageCleanerManager from galaxy.managers.users import UserManager +from galaxy.schema.storage_cleaner import StoredItemOrderBy from galaxy.webapps.galaxy.services.base import ServiceBase log = logging.getLogger(__name__) @@ -31,10 +32,14 @@ def get_discarded_histories_summary(self, trans: ProvidesHistoryContext): return self.history_cleaner.get_discarded_summary(user) def get_discarded_histories( - self, trans: ProvidesHistoryContext, offset: Optional[int] = None, limit: Optional[int] = None + self, + trans: ProvidesHistoryContext, + offset: Optional[int] = None, + limit: Optional[int] = None, + order: Optional[StoredItemOrderBy] = None, ): user = self.get_authenticated_user(trans) - return self.history_cleaner.get_discarded(user, offset, limit) + return self.history_cleaner.get_discarded(user, offset, limit, order) def cleanup_histories(self, trans: ProvidesHistoryContext, item_ids: Set[int]): user = self.get_authenticated_user(trans) @@ -45,10 +50,14 @@ def get_discarded_datasets_summary(self, trans: ProvidesHistoryContext): return self.hda_cleaner.get_discarded_summary(user) def get_discarded_datasets( - self, trans: ProvidesHistoryContext, offset: Optional[int] = None, limit: Optional[int] = None + self, + trans: ProvidesHistoryContext, + offset: Optional[int] = None, + limit: Optional[int] = None, + order: Optional[StoredItemOrderBy] = None, ): user = self.get_authenticated_user(trans) - return self.hda_cleaner.get_discarded(user, offset, limit) + return self.hda_cleaner.get_discarded(user, offset, limit, order) def cleanup_datasets(self, trans: ProvidesHistoryContext, item_ids: Set[int]): user = self.get_authenticated_user(trans) From e6387d8f56e16ac105766405b18f32a14d40bdc4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 27 Jan 2023 12:35:48 +0100 Subject: [PATCH 20/42] Add test coverage for order_by --- lib/galaxy_test/api/test_storage_cleaner.py | 132 ++++++++++++++------ 1 file changed, 96 insertions(+), 36 deletions(-) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index 8bcb2aa66970..8adfa3f1d63f 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -1,5 +1,4 @@ from typing import ( - Dict, List, NamedTuple, ) @@ -10,7 +9,7 @@ from ._framework import ApiTestCase -class HistoryDataForTests(NamedTuple): +class StoredItemDataForTests(NamedTuple): name: str size: int @@ -25,11 +24,11 @@ def setUp(self): @requires_new_history def test_discarded_histories_monitoring_and_cleanup(self): # Create some histories for testing - history_data_01 = HistoryDataForTests(name=f"TestHistory01_{uuid4()}", size=10) - history_data_02 = HistoryDataForTests(name=f"TestHistory02_{uuid4()}", size=25) - history_data_03 = HistoryDataForTests(name=f"TestHistory03_{uuid4()}", size=50) + history_data_01 = StoredItemDataForTests(name=f"TestHistory01_{uuid4()}", size=10) + history_data_02 = StoredItemDataForTests(name=f"TestHistory02_{uuid4()}", size=25) + history_data_03 = StoredItemDataForTests(name=f"TestHistory03_{uuid4()}", size=50) test_histories = [history_data_01, history_data_02, history_data_03] - history_name_id_map = self._create_histories(test_histories) + history_ids = self._create_histories_with(test_histories) # Initially, there shouldn't be any deleted and not purged (discarded) histories summary_response = self._get("storage/histories/discarded/summary") @@ -39,7 +38,7 @@ def test_discarded_histories_monitoring_and_cleanup(self): assert summary["total_size"] == 0 # Delete the histories - for history_id in history_name_id_map.values(): + for history_id in history_ids: self._delete(f"histories/{history_id}", json=True) expected_discarded_histories_count = len(test_histories) expected_total_size = sum([test_history.size for test_history in test_histories]) @@ -52,14 +51,35 @@ def test_discarded_histories_monitoring_and_cleanup(self): assert summary["total_size"] == expected_total_size # Check listing all the discarded histories - discarded_histories_response = self._get("storage/histories/discarded") + storage_items_url = "storage/histories/discarded" + discarded_histories_response = self._get(storage_items_url) self._assert_status_code_is_ok(discarded_histories_response) discarded_histories = discarded_histories_response.json() assert len(discarded_histories) == expected_discarded_histories_count assert sum([history["size"] for history in discarded_histories]) == expected_total_size + # Check listing order by + order_by = "name-asc" + expected_ordered_names = [history_data_01.name, history_data_02.name, history_data_03.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "name-dsc" + expected_ordered_names = [history_data_03.name, history_data_02.name, history_data_01.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "size-asc" + expected_ordered_names = [history_data_01.name, history_data_02.name, history_data_03.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "size-dsc" + expected_ordered_names = [history_data_03.name, history_data_02.name, history_data_01.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "update_time-asc" + expected_ordered_names = [history_data_01.name, history_data_02.name, history_data_03.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "update_time-dsc" + expected_ordered_names = [history_data_03.name, history_data_02.name, history_data_01.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + # Cleanup all the histories - payload = {"item_ids": list(history_name_id_map.values())} + payload = {"item_ids": history_ids} cleanup_response = self._delete("storage/histories", data=payload, json=True) self._assert_status_code_is_ok(cleanup_response) cleanup_result = cleanup_response.json() @@ -68,33 +88,15 @@ def test_discarded_histories_monitoring_and_cleanup(self): assert cleanup_result["total_free_bytes"] == expected_total_size assert not cleanup_result["errors"] - def _create_histories(self, test_histories: List[HistoryDataForTests], wait_for_histories=True) -> Dict[str, str]: - history_name_id_map = {} - for history_data in test_histories: - post_data = dict(name=history_data.name) - create_response = self._post("histories", data=post_data).json() - self._assert_has_keys(create_response, "name", "id") - history_id = create_response["id"] - history_name_id_map[history_data.name] = history_id - # Create a dataset with content equal to the expected size of the history - if history_data.size: - self.dataset_populator.new_dataset(history_id, content=f"{'0'*(history_data.size-1)}\n") - if wait_for_histories: - for history_id in history_name_id_map.values(): - self.dataset_populator.wait_for_history(history_id) - return history_name_id_map - @requires_new_history def test_discarded_datasets_monitoring_and_cleanup(self): # Prepare history with some datasets history_id = self.dataset_populator.new_history(f"History for discarded datasets {uuid4()}") - num_datasets = 3 - dataset_size = 30 - dataset_ids = [] - for _ in range(num_datasets): - dataset = self.dataset_populator.new_dataset(history_id, content=f"{'0'*(dataset_size-1)}\n") - dataset_ids.append(dataset["id"]) - self.dataset_populator.wait_for_history(history_id) + dataset_data_01 = StoredItemDataForTests(name=f"Dataset01_{uuid4()}", size=10) + dataset_data_02 = StoredItemDataForTests(name=f"Dataset02_{uuid4()}", size=25) + dataset_data_03 = StoredItemDataForTests(name=f"Dataset03_{uuid4()}", size=50) + test_datasets = [dataset_data_01, dataset_data_02, dataset_data_03] + dataset_ids = self._create_datasets_in_history_with(history_id, test_datasets) # Initially, there shouldn't be any deleted and not purged (discarded) datasets summary_response = self._get("storage/datasets/discarded/summary") @@ -109,7 +111,7 @@ def test_discarded_datasets_monitoring_and_cleanup(self): # All datasets should be in the summary expected_num_discarded_datasets = len(dataset_ids) - expected_total_size = expected_num_discarded_datasets * dataset_size + expected_total_size = sum([test_dataset.size for test_dataset in test_datasets]) summary_response = self._get("storage/datasets/discarded/summary") self._assert_status_code_is_ok(summary_response) summary = summary_response.json() @@ -117,12 +119,32 @@ def test_discarded_datasets_monitoring_and_cleanup(self): assert summary["total_size"] == expected_total_size # Check listing all the discarded datasets - discarded_datasets_response = self._get("storage/datasets/discarded") + storage_items_url = "storage/datasets/discarded" + discarded_datasets_response = self._get(storage_items_url) self._assert_status_code_is_ok(discarded_datasets_response) discarded_datasets = discarded_datasets_response.json() assert len(discarded_datasets) == expected_num_discarded_datasets - for dataset in discarded_datasets: - assert dataset["size"] == dataset_size + assert sum([item["size"] for item in discarded_datasets]) == expected_total_size + + # Check listing order by + order_by = "name-asc" + expected_ordered_names = [dataset_data_01.name, dataset_data_02.name, dataset_data_03.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "name-dsc" + expected_ordered_names = [dataset_data_03.name, dataset_data_02.name, dataset_data_01.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "size-asc" + expected_ordered_names = [dataset_data_01.name, dataset_data_02.name, dataset_data_03.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "size-dsc" + expected_ordered_names = [dataset_data_03.name, dataset_data_02.name, dataset_data_01.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "update_time-asc" + expected_ordered_names = [dataset_data_01.name, dataset_data_02.name, dataset_data_03.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) + order_by = "update_time-dsc" + expected_ordered_names = [dataset_data_03.name, dataset_data_02.name, dataset_data_01.name] + self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) # Cleanup all the datasets payload = {"item_ids": dataset_ids} @@ -133,3 +155,41 @@ def test_discarded_datasets_monitoring_and_cleanup(self): assert cleanup_result["success_item_count"] == expected_num_discarded_datasets assert cleanup_result["total_free_bytes"] == expected_total_size assert not cleanup_result["errors"] + + def _create_histories_with( + self, test_histories: List[StoredItemDataForTests], wait_for_histories=True + ) -> List[str]: + history_ids = [] + for history_data in test_histories: + post_data = dict(name=history_data.name) + create_response = self._post("histories", data=post_data).json() + self._assert_has_keys(create_response, "name", "id") + history_id = create_response["id"] + history_ids.append(history_id) + # Create a dataset with content equal to the expected size of the history + if history_data.size: + self.dataset_populator.new_dataset(history_id, content=f"{'0'*(history_data.size-1)}\n") + if wait_for_histories: + for history_id in history_ids: + self.dataset_populator.wait_for_history(history_id) + return history_ids + + def _create_datasets_in_history_with( + self, history_id: str, test_datasets: List[StoredItemDataForTests], wait_for_history=True + ) -> List[str]: + dataset_ids = [] + for dataset_data in test_datasets: + dataset = self.dataset_populator.new_dataset( + history_id, name=dataset_data.name, content=f"{'0'*(dataset_data.size-1)}\n" + ) + dataset_ids.append(dataset["id"]) + if wait_for_history: + self.dataset_populator.wait_for_history(history_id) + return dataset_ids + + def _assert_order_is_respected(self, storage_items_url: str, order_by: str, expected_ordered_names: List[str]): + items_response = self._get(f"{storage_items_url}?order={order_by}") + self._assert_status_code_is_ok(items_response) + items = items_response.json() + for index, item in enumerate(items): + assert item["name"] == expected_ordered_names[index] From 5f871dbebbc887ef05acbd70c4ca03c11200dc54 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:19:27 +0100 Subject: [PATCH 21/42] Refactor API tests to share testing logic --- lib/galaxy_test/api/test_storage_cleaner.py | 169 +++++++------------- 1 file changed, 60 insertions(+), 109 deletions(-) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index 8adfa3f1d63f..cbee3d84f62a 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -1,6 +1,7 @@ from typing import ( List, NamedTuple, + Optional, ) from uuid import uuid4 @@ -23,136 +24,86 @@ def setUp(self): @requires_new_history def test_discarded_histories_monitoring_and_cleanup(self): - # Create some histories for testing - history_data_01 = StoredItemDataForTests(name=f"TestHistory01_{uuid4()}", size=10) - history_data_02 = StoredItemDataForTests(name=f"TestHistory02_{uuid4()}", size=25) - history_data_03 = StoredItemDataForTests(name=f"TestHistory03_{uuid4()}", size=50) - test_histories = [history_data_01, history_data_02, history_data_03] + test_histories = self._build_test_items(resource_name="History") history_ids = self._create_histories_with(test_histories) - # Initially, there shouldn't be any deleted and not purged (discarded) histories - summary_response = self._get("storage/histories/discarded/summary") - self._assert_status_code_is_ok(summary_response) - summary = summary_response.json() - assert summary["total_items"] == 0 - assert summary["total_size"] == 0 - - # Delete the histories - for history_id in history_ids: - self._delete(f"histories/{history_id}", json=True) - expected_discarded_histories_count = len(test_histories) - expected_total_size = sum([test_history.size for test_history in test_histories]) - - # All the `test_histories` should be in the summary - summary_response = self._get("storage/histories/discarded/summary") - self._assert_status_code_is_ok(summary_response) - summary = summary_response.json() - assert summary["total_items"] == expected_discarded_histories_count - assert summary["total_size"] == expected_total_size - - # Check listing all the discarded histories - storage_items_url = "storage/histories/discarded" - discarded_histories_response = self._get(storage_items_url) - self._assert_status_code_is_ok(discarded_histories_response) - discarded_histories = discarded_histories_response.json() - assert len(discarded_histories) == expected_discarded_histories_count - assert sum([history["size"] for history in discarded_histories]) == expected_total_size - - # Check listing order by - order_by = "name-asc" - expected_ordered_names = [history_data_01.name, history_data_02.name, history_data_03.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "name-dsc" - expected_ordered_names = [history_data_03.name, history_data_02.name, history_data_01.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "size-asc" - expected_ordered_names = [history_data_01.name, history_data_02.name, history_data_03.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "size-dsc" - expected_ordered_names = [history_data_03.name, history_data_02.name, history_data_01.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "update_time-asc" - expected_ordered_names = [history_data_01.name, history_data_02.name, history_data_03.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "update_time-dsc" - expected_ordered_names = [history_data_03.name, history_data_02.name, history_data_01.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - - # Cleanup all the histories - payload = {"item_ids": history_ids} - cleanup_response = self._delete("storage/histories", data=payload, json=True) - self._assert_status_code_is_ok(cleanup_response) - cleanup_result = cleanup_response.json() - assert cleanup_result["total_item_count"] == expected_discarded_histories_count - assert cleanup_result["success_item_count"] == expected_discarded_histories_count - assert cleanup_result["total_free_bytes"] == expected_total_size - assert not cleanup_result["errors"] + self._assert_monitoring_and_cleanup_for_discarded_resource("histories", test_histories, history_ids) @requires_new_history def test_discarded_datasets_monitoring_and_cleanup(self): - # Prepare history with some datasets history_id = self.dataset_populator.new_history(f"History for discarded datasets {uuid4()}") - dataset_data_01 = StoredItemDataForTests(name=f"Dataset01_{uuid4()}", size=10) - dataset_data_02 = StoredItemDataForTests(name=f"Dataset02_{uuid4()}", size=25) - dataset_data_03 = StoredItemDataForTests(name=f"Dataset03_{uuid4()}", size=50) - test_datasets = [dataset_data_01, dataset_data_02, dataset_data_03] + test_datasets = self._build_test_items(resource_name="Dataset") dataset_ids = self._create_datasets_in_history_with(history_id, test_datasets) - # Initially, there shouldn't be any deleted and not purged (discarded) datasets - summary_response = self._get("storage/datasets/discarded/summary") + self._assert_monitoring_and_cleanup_for_discarded_resource( + "datasets", test_datasets, dataset_ids, delete_resource_uri=f"histories/{history_id}/contents" + ) + + def _build_test_items(self, resource_name: str): + return [ + StoredItemDataForTests(name=f"Test{resource_name}01_{uuid4()}", size=10), + StoredItemDataForTests(name=f"Test{resource_name}02_{uuid4()}", size=25), + StoredItemDataForTests(name=f"Test{resource_name}03_{uuid4()}", size=50), + ] + + def _assert_monitoring_and_cleanup_for_discarded_resource( + self, + resource: str, + test_items: List[StoredItemDataForTests], + item_ids: List[str], + delete_resource_uri: Optional[str] = None, + ): + """Tests the storage cleaner API for a particular resource (histories or datasets)""" + delete_resource_uri = delete_resource_uri if delete_resource_uri else resource + discarded_storage_items_uri = f"storage/{resource}/discarded" + # Initially, there shouldn't be any deleted and not purged (discarded) items + summary_response = self._get(f"{discarded_storage_items_uri}/summary") self._assert_status_code_is_ok(summary_response) summary = summary_response.json() assert summary["total_items"] == 0 assert summary["total_size"] == 0 - # Delete the datasets - for dataset_id in dataset_ids: - self.dataset_populator.delete_dataset(history_id, dataset_id) + # Delete the items + for item_id in item_ids: + self._delete(f"{delete_resource_uri}/{item_id}", json=True) + expected_discarded_item_count = len(test_items) + expected_total_size = sum([item.size for item in test_items]) - # All datasets should be in the summary - expected_num_discarded_datasets = len(dataset_ids) - expected_total_size = sum([test_dataset.size for test_dataset in test_datasets]) - summary_response = self._get("storage/datasets/discarded/summary") + # All the `test_items` should be in the summary + summary_response = self._get(f"{discarded_storage_items_uri}/summary") self._assert_status_code_is_ok(summary_response) summary = summary_response.json() - assert summary["total_items"] == expected_num_discarded_datasets + assert summary["total_items"] == expected_discarded_item_count assert summary["total_size"] == expected_total_size - # Check listing all the discarded datasets - storage_items_url = "storage/datasets/discarded" - discarded_datasets_response = self._get(storage_items_url) - self._assert_status_code_is_ok(discarded_datasets_response) - discarded_datasets = discarded_datasets_response.json() - assert len(discarded_datasets) == expected_num_discarded_datasets - assert sum([item["size"] for item in discarded_datasets]) == expected_total_size + # Check listing all the discarded items + paginated_items_response = self._get(discarded_storage_items_uri) + self._assert_status_code_is_ok(paginated_items_response) + paginated_items = paginated_items_response.json() + assert len(paginated_items) == expected_discarded_item_count + assert sum([item["size"] for item in paginated_items]) == expected_total_size # Check listing order by - order_by = "name-asc" - expected_ordered_names = [dataset_data_01.name, dataset_data_02.name, dataset_data_03.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "name-dsc" - expected_ordered_names = [dataset_data_03.name, dataset_data_02.name, dataset_data_01.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "size-asc" - expected_ordered_names = [dataset_data_01.name, dataset_data_02.name, dataset_data_03.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "size-dsc" - expected_ordered_names = [dataset_data_03.name, dataset_data_02.name, dataset_data_01.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "update_time-asc" - expected_ordered_names = [dataset_data_01.name, dataset_data_02.name, dataset_data_03.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - order_by = "update_time-dsc" - expected_ordered_names = [dataset_data_03.name, dataset_data_02.name, dataset_data_01.name] - self._assert_order_is_respected(storage_items_url, order_by, expected_ordered_names) - - # Cleanup all the datasets - payload = {"item_ids": dataset_ids} - cleanup_response = self._delete("storage/datasets", data=payload, json=True) + item_names_forward_order = [test_items[0].name, test_items[1].name, test_items[2].name] + item_names_reverse_order = list(reversed(item_names_forward_order)) + expected_order_by_map = { + "name-asc": item_names_forward_order, + "name-dsc": item_names_reverse_order, + "size-asc": item_names_forward_order, + "size-dsc": item_names_reverse_order, + "update_time-asc": item_names_forward_order, + "update_time-dsc": item_names_reverse_order, + } + for order_by, expected_ordered_names in expected_order_by_map.items(): + self._assert_order_is_expected(discarded_storage_items_uri, order_by, expected_ordered_names) + + # Cleanup all the items + payload = {"item_ids": item_ids} + cleanup_response = self._delete(f"storage/{resource}", data=payload, json=True) self._assert_status_code_is_ok(cleanup_response) cleanup_result = cleanup_response.json() - assert cleanup_result["total_item_count"] == expected_num_discarded_datasets - assert cleanup_result["success_item_count"] == expected_num_discarded_datasets + assert cleanup_result["total_item_count"] == expected_discarded_item_count + assert cleanup_result["success_item_count"] == expected_discarded_item_count assert cleanup_result["total_free_bytes"] == expected_total_size assert not cleanup_result["errors"] @@ -187,7 +138,7 @@ def _create_datasets_in_history_with( self.dataset_populator.wait_for_history(history_id) return dataset_ids - def _assert_order_is_respected(self, storage_items_url: str, order_by: str, expected_ordered_names: List[str]): + def _assert_order_is_expected(self, storage_items_url: str, order_by: str, expected_ordered_names: List[str]): items_response = self._get(f"{storage_items_url}?order={order_by}") self._assert_status_code_is_ok(items_response) items = items_response.json() From 8db5d565b4e7664859623b2338806331b317f5fd Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:20:43 +0100 Subject: [PATCH 22/42] Add test coverage for pagination --- lib/galaxy_test/api/test_storage_cleaner.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/lib/galaxy_test/api/test_storage_cleaner.py index cbee3d84f62a..5a71bae2ba8d 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/lib/galaxy_test/api/test_storage_cleaner.py @@ -83,6 +83,15 @@ def _assert_monitoring_and_cleanup_for_discarded_resource( assert len(paginated_items) == expected_discarded_item_count assert sum([item["size"] for item in paginated_items]) == expected_total_size + # Check pagination + offset = 1 + limit = 1 + paginated_items_response = self._get(f"{discarded_storage_items_uri}?offset={offset}&limit={limit}") + self._assert_status_code_is_ok(paginated_items_response) + paginated_items = paginated_items_response.json() + assert len(paginated_items) == 1 + assert paginated_items[0]["name"] == test_items[1].name + # Check listing order by item_names_forward_order = [test_items[0].name, test_items[1].name, test_items[2].name] item_names_reverse_order = list(reversed(item_names_forward_order)) From 8ac3b38c6e6338300323efd86ea3ade73dc224cc Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 27 Jan 2023 14:24:58 +0100 Subject: [PATCH 23/42] Update client API schema --- client/src/schema/schema.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/client/src/schema/schema.ts b/client/src/schema/schema.ts index 4d7d2d984642..80f554347a24 100644 --- a/client/src/schema/schema.ts +++ b/client/src/schema/schema.ts @@ -6796,6 +6796,12 @@ export interface components { */ update_time: string; }; + /** + * StoredItemOrderBy + * @description Available options for sorting Stored Items results. + * @enum {string} + */ + StoredItemOrderBy: "name-asc" | "name-dsc" | "size-asc" | "size-dsc" | "update_time-asc" | "update_time-dsc"; /** * SuitableConverter * @description Base model definition with common configuration used by all derived models. @@ -13517,6 +13523,14 @@ export interface operations { discarded_datasets_api_storage_datasets_discarded_get: { /** Returns discarded datasets owned by the given user. The results can be paginated. */ parameters?: { + /** @description Starts at the beginning skip the first ( offset - 1 ) items and begin returning at the Nth item */ + /** @description The maximum number of items to return. */ + /** @description String containing one of the valid ordering attributes followed by '-asc' or '-dsc' for ascending and descending order respectively. */ + query?: { + offset?: number; + limit?: number; + order?: components["schemas"]["StoredItemOrderBy"]; + }; /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ header?: { "run-as"?: string; @@ -13596,9 +13610,11 @@ export interface operations { parameters?: { /** @description Starts at the beginning skip the first ( offset - 1 ) items and begin returning at the Nth item */ /** @description The maximum number of items to return. */ + /** @description String containing one of the valid ordering attributes followed by '-asc' or '-dsc' for ascending and descending order respectively. */ query?: { offset?: number; limit?: number; + order?: components["schemas"]["StoredItemOrderBy"]; }; /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ header?: { From ddbd42108dd436479d8906e3bf719ffd67fb5723 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 30 Jan 2023 13:26:43 +0100 Subject: [PATCH 24/42] Adapt client service to use new API --- .../Cleanup/CleanupOperationSummary.test.ts | 29 ++- .../Cleanup/CleanupResultDialog.test.ts | 48 ++-- .../Cleanup/ReviewCleanupDialog.test.ts | 31 ++- .../Cleanup/ReviewCleanupDialog.vue | 24 +- .../Cleanup/model/CleanableSummary.ts | 16 +- .../Cleanup/model/CleanupOperation.ts | 26 +- .../Management/Cleanup/model/CleanupResult.ts | 84 ++++--- .../Management/Cleanup/model/index.ts | 4 +- .../User/DiskUsage/Management/services.ts | 237 +++--------------- 9 files changed, 203 insertions(+), 296 deletions(-) diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/CleanupOperationSummary.test.ts b/client/src/components/User/DiskUsage/Management/Cleanup/CleanupOperationSummary.test.ts index ea56e1bb0731..dd6ef1ec9e0e 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/CleanupOperationSummary.test.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/CleanupOperationSummary.test.ts @@ -2,13 +2,18 @@ import { mount } from "@vue/test-utils"; import flushPromises from "flush-promises"; import { getLocalVue } from "@tests/jest/helpers"; import CleanupOperationSummary from "./CleanupOperationSummary.vue"; -import { CleanableSummary, type CleanupOperation, CleanupResult } from "./model"; +import { CleanableSummary, type CleanupOperation, CleanupResult, type CleanableItem } from "./model"; const localVue = getLocalVue(); const REVIEW_ITEMS_LINK = '[data-test-id="review-link"]'; const NO_ITEMS_INDICATOR = '[data-test-id="no-items-indicator"]'; +const EXPECTED_ITEMS: CleanableItem[] = [ + { id: "1", name: "Item 1", size: 512, type: "dataset", update_time: new Date().toISOString() }, + { id: "2", name: "Item 2", size: 512, type: "dataset", update_time: new Date().toISOString() }, +]; + /** Operation that can clean some items */ const CLEANUP_OPERATION: CleanupOperation = { id: "operation-id", @@ -16,16 +21,20 @@ const CLEANUP_OPERATION: CleanupOperation = { description: "operation description", fetchSummary: async () => new CleanableSummary({ - totalSize: 1024, - totalItems: 2, + total_size: 1024, + total_items: 2, }), fetchItems: async () => [], cleanupItems: async () => - new CleanupResult({ - totalItemCount: 2, - totalFreeBytes: 1024, - errors: [], - }), + new CleanupResult( + { + total_item_count: 2, + success_item_count: 2, + total_free_bytes: 1024, + errors: [], + }, + EXPECTED_ITEMS + ), }; /** Operation without items to clean*/ const EMPTY_CLEANUP_OPERATION: CleanupOperation = { @@ -34,8 +43,8 @@ const EMPTY_CLEANUP_OPERATION: CleanupOperation = { description: "operation that has no items to clean", fetchSummary: async () => new CleanableSummary({ - totalSize: 0, - totalItems: 0, + total_size: 0, + total_items: 0, }), fetchItems: async () => [], cleanupItems: async () => new CleanupResult(), diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/CleanupResultDialog.test.ts b/client/src/components/User/DiskUsage/Management/Cleanup/CleanupResultDialog.test.ts index c556f9e15826..e36462251f83 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/CleanupResultDialog.test.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/CleanupResultDialog.test.ts @@ -2,7 +2,7 @@ import { mount } from "@vue/test-utils"; import flushPromises from "flush-promises"; import { getLocalVue } from "tests/jest/helpers"; import CleanupResultDialog from "./CleanupResultDialog.vue"; -import { CleanupResult } from "./model"; +import { CleanupResult, type CleanableItem } from "./model"; const localVue = getLocalVue(); @@ -14,25 +14,41 @@ const ERRORS_TABLE = '[data-test-id="errors-table"]'; const NO_RESULT_YET = undefined; const FAILED_RESULT = () => { - return new CleanupResult({ - errorMessage: "The operation failed", - totalFreeBytes: 0, - totalItemCount: 0, - errors: [], - }); + return new CleanupResult( + { + total_item_count: 0, + errors: [], + success_item_count: 0, + total_free_bytes: 0, + }, + [], + "The operation failed" + ); }; +const TEST_ITEMS: CleanableItem[] = [ + { id: "1", name: "Dataset X", size: 512, type: "dataset", update_time: new Date().toISOString() }, + { id: "2", name: "Dataset Y", size: 512, type: "dataset", update_time: new Date().toISOString() }, + { id: "3", name: "Dataset Z", size: 512, type: "dataset", update_time: new Date().toISOString() }, +]; const PARTIAL_SUCCESS_RESULT = () => { - return new CleanupResult({ - totalItemCount: 3, - totalFreeBytes: 1, - errors: [ - { name: "Dataset X", reason: "Failed because of X" }, - { name: "Dataset Y", reason: "Failed because of Y" }, - ], - }); + return new CleanupResult( + { + total_item_count: 3, + success_item_count: 1, + total_free_bytes: 512, + errors: [ + { item_id: "1", error: "Failed because of X" }, + { item_id: "2", error: "Failed because of Y" }, + ], + }, + TEST_ITEMS + ); }; const SUCCESS_RESULT = () => { - return new CleanupResult({ totalItemCount: 2, totalFreeBytes: 2, errors: [] }); + return new CleanupResult( + { total_item_count: 3, success_item_count: 3, total_free_bytes: 512 * 3, errors: [] }, + TEST_ITEMS + ); }; async function mountCleanupResultDialogWith(result?: CleanupResult) { const wrapper = mount(CleanupResultDialog, { propsData: { result, show: true }, localVue }); diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.test.ts b/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.test.ts index 8bcd23f75f24..51ded03fba80 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.test.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.test.ts @@ -1,7 +1,7 @@ import { mount, type WrapperArray } from "@vue/test-utils"; import flushPromises from "flush-promises"; import { getLocalVue } from "tests/jest/helpers"; -import { CleanableSummary, type CleanupOperation, CleanupResult } from "./model"; +import { CleanableSummary, type CleanupOperation, CleanupResult, type CleanableItem } from "./model"; import ReviewCleanupDialog from "./ReviewCleanupDialog.vue"; const localVue = getLocalVue(); @@ -12,26 +12,31 @@ const SELECT_ALL_CHECKBOX = '[data-test-id="select-all-checkbox"]'; const AGREEMENT_CHECKBOX = '[data-test-id="agreement-checkbox"]'; const CONFIRMATION_MODAL = "#confirmation-modal"; -const EXPECTED_TOTAL_ITEMS = 2; +const EXPECTED_ITEMS: CleanableItem[] = [ + { id: "1", name: "Item 1", size: 512, type: "dataset", update_time: new Date().toISOString() }, + { id: "2", name: "Item 2", size: 512, type: "dataset", update_time: new Date().toISOString() }, +]; +const EXPECTED_TOTAL_ITEMS = EXPECTED_ITEMS.length; const FAKE_OPERATION: CleanupOperation = { id: "operation-id", name: "operation name", description: "operation description", fetchSummary: async () => new CleanableSummary({ - totalSize: 1024, - totalItems: EXPECTED_TOTAL_ITEMS, + total_size: 1024, + total_items: EXPECTED_TOTAL_ITEMS, }), - fetchItems: async () => [ - { id: "1", name: "Item 1", size: 512, update_time: new Date().toISOString(), hda_ldda: "hda" }, - { id: "2", name: "Item 2", size: 512, update_time: new Date().toISOString(), hda_ldda: "hda" }, - ], + fetchItems: async () => EXPECTED_ITEMS, cleanupItems: async () => - new CleanupResult({ - totalItemCount: EXPECTED_TOTAL_ITEMS, - totalFreeBytes: 1024, - errors: [], - }), + new CleanupResult( + { + total_item_count: EXPECTED_TOTAL_ITEMS, + success_item_count: EXPECTED_TOTAL_ITEMS, + total_free_bytes: 1024, + errors: [], + }, + EXPECTED_ITEMS + ), }; async function mountReviewCleanupDialogWith(operation: CleanupOperation, totalItems = EXPECTED_TOTAL_ITEMS) { diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue b/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue index 02d3c9c6ac5e..c1737108a5c9 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue +++ b/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue @@ -3,7 +3,7 @@ import localize from "@/utils/localization"; import { bytesToString } from "@/utils/utils"; import { BModal, BTable, BFormCheckbox, BLink, BPagination, BButton } from "bootstrap-vue"; import UtcDate from "@/components/UtcDate.vue"; -import type { CleanableItem, CleanupOperation } from "./model"; +import { type CleanableItem, type CleanupOperation, type SortableKey, PaginationOptions } from "./model"; import { computed, ref, watch } from "vue"; interface ReviewCleanupDialogProps { @@ -46,9 +46,7 @@ const fields = [ }, ]; -type SortableKey = "name" | "size" | "update_time"; - -const sortBy = ref("size"); +const sortBy = ref("size"); const sortDesc = ref(true); const perPage = ref(50); const currentPage = ref(1); @@ -164,12 +162,12 @@ async function itemsProvider(ctx: { currentPage: number; perPage: number }) { try { const page = ctx.currentPage > 0 ? ctx.currentPage - 1 : 0; const offset = page * ctx.perPage; - const options = { + const options = new PaginationOptions({ offset: offset, limit: ctx.perPage, sortBy: sortBy.value, sortDesc: sortDesc.value, - }; + }); const result = await props.operation.fetchItems(options); return result; } catch (error) { @@ -179,12 +177,14 @@ async function itemsProvider(ctx: { currentPage: number; perPage: number }) { async function onSelectAllItems() { isBusy.value = true; - const allItems = await props.operation.fetchItems({ - offset: 0, - limit: totalRows.value, - sortBy: sortBy.value, - sortDesc: sortDesc.value, - }); + const allItems = await props.operation.fetchItems( + new PaginationOptions({ + offset: 0, + limit: totalRows.value, + sortBy: sortBy.value, + sortDesc: sortDesc.value, + }) + ); selectedItems.value = allItems; isBusy.value = false; } diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanableSummary.ts b/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanableSummary.ts index d61d37084605..c16de15cd1e0 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanableSummary.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanableSummary.ts @@ -1,32 +1,28 @@ import { bytesToString } from "@/utils/utils"; +import type { components } from "@/schema"; -interface SummaryDataResponse { - totalSize: number; - totalItems: number; -} +type CleanableItemsSummaryResponse = components["schemas"]["CleanableItemsSummary"]; /** * Contains summary information about how much storage space can be recovered by removing * a collection of items from it. */ export class CleanableSummary { - private _data: SummaryDataResponse; + private _data: CleanableItemsSummaryResponse; - constructor(data: SummaryDataResponse) { + constructor(data: CleanableItemsSummaryResponse) { this._data = data; } /** * The total size in bytes that can be recovered by removing all the items. - * @returns {Number} */ get totalSize(): number { - return this._data.totalSize; + return this._data.total_size; } /** * The human readable total amount of disk space that can be recovered. - * @returns {String} */ get niceTotalSize(): string { return bytesToString(this.totalSize, true, undefined); @@ -36,6 +32,6 @@ export class CleanableSummary { * The total number of items that could be removed. */ get totalItems() { - return this._data.totalItems; + return this._data.total_items; } } diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupOperation.ts b/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupOperation.ts index e4b9055a76f2..83aa44b864f4 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupOperation.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupOperation.ts @@ -1,17 +1,27 @@ import type { CleanableSummary } from "./CleanableSummary"; import type { CleanupResult } from "./CleanupResult"; +import type { components } from "@/schema"; -export interface PaginationOptions { +export type CleanableItem = components["schemas"]["StoredItem"]; +export type SortableKey = "name" | "size" | "update_time"; +type StoredItemOrderBy = components["schemas"]["StoredItemOrderBy"]; + +export class PaginationOptions { limit?: number; offset?: number; - sortBy?: string; + sortBy?: SortableKey; sortDesc?: boolean; -} -export interface CleanableItem { - id: string; - name: string; - size: number; + constructor(props?: { limit?: number; offset?: number; sortBy?: SortableKey; sortDesc?: boolean }) { + this.limit = props?.limit; + this.offset = props?.offset; + this.sortBy = props?.sortBy; + this.sortDesc = props?.sortDesc; + } + + get order(): StoredItemOrderBy | undefined { + return this.sortBy ? `${this.sortBy}${this.sortDesc ? "-dsc" : "-asc"}` : undefined; + } } /** @@ -46,7 +56,7 @@ export interface CleanupOperation { * @param options The filter options for sorting and pagination of the items. * @returns An array of items that can be potentially `cleaned` and match the filtering params. */ - fetchItems: (options: PaginationOptions) => Promise; + fetchItems: (options?: PaginationOptions) => Promise; /** * Processes the given items to free up some user storage space and provides a result diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupResult.ts b/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupResult.ts index 7d7be5dfa3b7..c9b51fa3f34c 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupResult.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/model/CleanupResult.ts @@ -1,47 +1,79 @@ import { bytesToString } from "@/utils/utils"; +import type { components } from "@/schema"; +import type { CleanableItem } from "./CleanupOperation"; +export type StorageItemsCleanupResult = components["schemas"]["StorageItemsCleanupResult"]; + +/** + * Associates the name of an item with the error message when failed. + * Simplifies displaying information to the final user. + */ export interface ItemError { + /** Name of the item. */ name: string; + /** The reason why it couldn't be cleaned. */ reason: string; } -export interface CleanupResultResponse { - totalItemCount: number; - totalFreeBytes: number; - errors: ItemError[]; - errorMessage?: string; -} - /** - * Contains information about the result of the cleaning operation. + * Provides additional information about the result of the cleaning operation. */ export class CleanupResult { - private _data: CleanupResultResponse; + private _data: StorageItemsCleanupResult; + private _item_id_map: Map; + private _errorMessage?: string; - constructor(data: CleanupResultResponse = { totalFreeBytes: 0, totalItemCount: 0, errors: [] }) { - this._data = data; + /** + * Creates a new CleanupResult. + * @param data The server response data for a cleanup operation. + * @param items The list of requested items to be cleaned. + * @param errorMessage A possible error message if the request completely failed. + */ + constructor(data?: StorageItemsCleanupResult, items?: CleanableItem[], errorMessage?: string) { + this._data = data ?? { total_item_count: 0, success_item_count: 0, total_free_bytes: 0, errors: [] }; + items = items ?? []; + this._item_id_map = new Map(); + items.forEach((item) => { + this._item_id_map.set(item.id, item); + }); + this._errorMessage = errorMessage; } + /** + * The total number of items requested to be cleaned. + */ get totalItemCount(): number { - return this._data.totalItemCount; + return this._data.total_item_count; } + /** + * The total amount of space in bytes recovered after trying to cleanup all the requested items. + */ get totalFreeBytes(): number { - return this._data.totalFreeBytes; + return this._data.total_free_bytes; } + /** + * List of error messages associated with a particular item ID. + * When an item cannot be cleaned it will be registered in this list. + */ get errors(): ItemError[] { - return this._data.errors; + return this._data.errors.map((e) => { + return { name: this._item_id_map.get(e.item_id)?.name ?? "Unknown", reason: e.error }; + }); } + /** + * A general error message, usually indicating that the whole cleanup operation failed with + * no partial success. + */ get errorMessage(): string | undefined { - return this._data.errorMessage; + return this._errorMessage; } /** - * Whether the cleanup operation yielded some errors. - * It doesn't mean the operation completely failed. - * @returns {boolean} + * Whether the cleanup operation could not success for every requested item. + * It doesn't mean the operation completely failed, some items could have been successfully cleaned. */ get hasSomeErrors(): boolean { return this._data.errors.length > 0; @@ -50,32 +82,27 @@ export class CleanupResult { /** * Whether the cleanup operation completely failed. * This means not even partial cleaning was made. - * @returns {boolean} */ get hasFailed(): boolean { - return Boolean(this._data.errorMessage); + return Boolean(this._errorMessage); } /** * Whether the cleanup operation was executed without errors. - * @returns {boolean} */ get success(): boolean { - return !this.hasSomeErrors && !this._data.errorMessage; + return !this.hasSomeErrors && !this.hasFailed; } /** * The number of items successfully cleaned. - * @returns {number} */ get totalCleaned(): number { - return this._data.totalItemCount - this._data.errors.length; + return this._data.total_item_count - this._data.errors.length; } /** - * Whether the cleanup operation managed to - * free some items but not all of them. - * @returns {boolean} + * Whether the cleanup operation managed to free some items but not all of them. */ get isPartialSuccess(): boolean { return this._data.errors.length > 0 && this.totalCleaned > 0; @@ -83,9 +110,8 @@ export class CleanupResult { /** * The total amount of disk space freed by the cleanup operation. - * @returns {String} */ get niceTotalFreeBytes(): string { - return bytesToString(this._data.totalFreeBytes, true, undefined); + return bytesToString(this._data.total_free_bytes, true, undefined); } } diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/model/index.ts b/client/src/components/User/DiskUsage/Management/Cleanup/model/index.ts index 74850c16e6ff..3f5e6b29c099 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/model/index.ts +++ b/client/src/components/User/DiskUsage/Management/Cleanup/model/index.ts @@ -1,4 +1,4 @@ export { CleanableSummary } from "./CleanableSummary"; export type { CleanupCategory } from "./CleanupCategory"; -export type { CleanableItem, CleanupOperation, PaginationOptions } from "./CleanupOperation"; -export { CleanupResult, type CleanupResultResponse, type ItemError } from "./CleanupResult"; +export { type CleanableItem, type CleanupOperation, type SortableKey, PaginationOptions } from "./CleanupOperation"; +export { CleanupResult } from "./CleanupResult"; diff --git a/client/src/components/User/DiskUsage/Management/services.ts b/client/src/components/User/DiskUsage/Management/services.ts index 94d601487518..698548c546e8 100644 --- a/client/src/components/User/DiskUsage/Management/services.ts +++ b/client/src/components/User/DiskUsage/Management/services.ts @@ -1,240 +1,85 @@ -import { getAppRoot } from "@/onload/loadConfig"; import { rethrowSimple } from "@/utils/simple-error"; -import axios from "axios"; -import { - CleanableSummary, - CleanupResult, - type CleanableItem, - type CleanupResultResponse, - type ItemError, - type PaginationOptions, -} from "./Cleanup/model"; +import { CleanableSummary, CleanupResult, PaginationOptions, type CleanableItem } from "./Cleanup/model"; +import { fetcher } from "@/schema"; -const datasetKeys = "id,name,size,update_time,hda_ldda"; -const isDataset = "q=history_content_type-eq&qv=dataset"; -const isDeleted = "q=deleted-eq&qv=True"; -const isNotPurged = "q=purged-eq&qv=False"; -const maxItemsToFetch = 500; -const discardedDatasetsQueryParams = `${isDataset}&${isDeleted}&${isNotPurged}&limit=${maxItemsToFetch}`; +const _fetchDiscardedDatasetsSummary = fetcher.path("/api/storage/datasets/discarded/summary").method("get").create(); -const historyKeys = "id,name,size,update_time"; -const discardedHistoriesQueryParams = `&${isDeleted}&${isNotPurged}&limit=${maxItemsToFetch}`; - -interface DiscardedDataset extends CleanableItem { - hda_ldda: string; -} - -interface DatasetSourceId { - id: string; - src: string; -} - -interface DatasetErrorMessage { - dataset: DatasetSourceId; - error_message: string; -} - -interface DeleteDatasetBatchResult { - success_count: number; - errors?: DatasetErrorMessage[]; -} - -type DiscardedHistory = CleanableItem; - -/** - * Calculates the total amount of bytes that can be cleaned by permanently removing - * deleted datasets. - * @returns Object containing information about how much can be cleaned. - */ export async function fetchDiscardedDatasetsSummary(): Promise { - //TODO: possible optimization -> moving this to specific API endpoint so we don't have to parse - // potentially a huge number of items - const summaryKeys = "size"; - const url = `${getAppRoot()}api/datasets?keys=${summaryKeys}&${discardedDatasetsQueryParams}`; try { - const { data } = await axios.get(url); - const totalSizeInBytes = data.reduce( - (partial_sum: number, item: DiscardedDataset) => partial_sum + item.size, - 0 - ); - return new CleanableSummary({ - totalSize: totalSizeInBytes, - totalItems: data.length, - }); + const { data } = await _fetchDiscardedDatasetsSummary({}); + return new CleanableSummary(data); } catch (e) { rethrowSimple(e); } } -/** - * Retrieves all deleted datasets of the current user that haven't been purged yet using pagination. - * @param options Filtering options for pagination and sorting. - * @returns Array of dataset objects with the fields defined in `datasetKeys` constant. - */ -export async function fetchDiscardedDatasets(options: PaginationOptions = {}): Promise { - let params = ""; - if (options.sortBy) { - const sortPostfix = options.sortDesc ? "-dsc" : "-asc"; - params += `order=${options.sortBy}${sortPostfix}&`; - } - if (options.limit) { - params += `limit=${options.limit}&`; - } - if (options.offset) { - params += `offset=${options.offset}&`; - } - const url = `${getAppRoot()}api/datasets?keys=${datasetKeys}&${discardedDatasetsQueryParams}&${params}`; - try { - const { data } = await axios.get(url); - return data as DiscardedDataset[]; - } catch (e) { - rethrowSimple(e); - } -} +const _fetchDiscardedDatasets = fetcher.path("/api/storage/datasets/discarded").method("get").create(); -/** - * Purges a collection of datasets. - * @param datasetSourceIds Array of objects with datasets {id, src} to be purged. - * @returns Result object with `success_count` and `errors`. - */ -export async function purgeDatasets(datasetSourceIds: DatasetSourceId[]): Promise { - const payload = { - purge: true, - datasets: datasetSourceIds, - }; - const url = `${getAppRoot()}api/datasets`; +export async function fetchDiscardedDatasets(options?: PaginationOptions): Promise { try { - const { data } = await axios.delete(url, { data: payload }); + options = options ?? new PaginationOptions(); + const { data } = await _fetchDiscardedDatasets({ + offset: options.offset, + limit: options.limit, + order: options.order, + }); return data; } catch (e) { rethrowSimple(e); } } -/** - * Purges a set of datasets instances (HDA, LDDA, ...) from disk and returns the total space freed in bytes - * taking into account possible datasets that couldn't be deleted. - * @param items Array of datasets to be removed from disk. - * Each dataset must contain `id` and `size`. - * @returns Information about the result of the cleanup operation. - */ +const _cleanupDiscardedDatasets = fetcher.path("/api/storage/datasets").method("delete").create(); + export async function cleanupDiscardedDatasets(items: CleanableItem[]): Promise { - const resultResponse: CleanupResultResponse = { errors: [], totalFreeBytes: 0, totalItemCount: 0 }; try { - const datasetsTable = items.reduce( - (acc: { [key: string]: CleanableItem }, item: CleanableItem) => ((acc[item.id] = item), acc), - {} - ); - - const datasetSourceIds: DatasetSourceId[] = items.map((item: CleanableItem) => { - const dataset = item as DiscardedDataset; - return { id: dataset.id, src: dataset.hda_ldda }; + const item_ids = items.map((item) => item.id); + const { data } = await _cleanupDiscardedDatasets({ + item_ids, }); - - const requestResult = await purgeDatasets(datasetSourceIds); - - resultResponse.totalItemCount = items.length; - - if (requestResult.errors) { - resultResponse.errors = mapErrors(datasetsTable, requestResult.errors); - - const erroredIds = requestResult.errors?.reduce((acc: string[], error) => [...acc, error.dataset.id], []); - - resultResponse.totalFreeBytes = datasetSourceIds.reduce((partial_sum, item) => { - if (erroredIds?.includes(item.id)) { - return partial_sum; - } else { - return partial_sum + (datasetsTable[item.id]?.size ?? 0); - } - }, 0); - } + return new CleanupResult(data, items); } catch (error) { - resultResponse.errorMessage = error as string; + return new CleanupResult(undefined, items, error as string); } - return new CleanupResult(resultResponse); } +const _fetchDiscardedHistoriesSummary = fetcher.path("/api/storage/histories/discarded/summary").method("get").create(); + export async function fetchDiscardedHistoriesSummary(): Promise { - const summaryKeys = "size"; - const url = `${getAppRoot()}api/histories?keys=${summaryKeys}&${discardedHistoriesQueryParams}`; try { - const { data } = await axios.get(url); - const totalSizeInBytes = data.reduce( - (partial_sum: number, item: DiscardedHistory) => partial_sum + item.size, - 0 - ); - return new CleanableSummary({ - totalSize: totalSizeInBytes, - totalItems: data.length, - }); + const { data } = await _fetchDiscardedHistoriesSummary({}); + return new CleanableSummary(data); } catch (e) { rethrowSimple(e); } } -export async function fetchDiscardedHistories(options: PaginationOptions = {}): Promise { - let params = ""; - if (options.sortBy) { - const sortPostfix = options.sortDesc ? "-dsc" : "-asc"; - params += `order=${options.sortBy}${sortPostfix}&`; - } - if (options.limit) { - params += `limit=${options.limit}&`; - } - if (options.offset) { - params += `offset=${options.offset}&`; - } - const url = `${getAppRoot()}api/histories?keys=${historyKeys}&${discardedHistoriesQueryParams}&${params}`; - try { - const { data } = await axios.get(url); - return data as DiscardedHistory[]; - } catch (e) { - rethrowSimple(e); - } -} +const _fetchDiscardedHistories = fetcher.path("/api/storage/histories/discarded").method("get").create(); -async function purgeHistory(historyId: string) { - const payload = { - purge: true, - }; - const url = `${getAppRoot()}api/histories/${historyId}`; +export async function fetchDiscardedHistories(options?: PaginationOptions): Promise { try { - const { data } = await axios.delete(url, { data: payload }); + options = options ?? new PaginationOptions(); + const { data } = await _fetchDiscardedHistories({ + offset: options.offset, + limit: options.limit, + order: options.order, + }); return data; } catch (e) { rethrowSimple(e); } } -export async function cleanupDiscardedHistories(histories: DiscardedHistory[]) { - const resultResponse: CleanupResultResponse = { errors: [], totalFreeBytes: 0, totalItemCount: 0 }; - const historiesTable = histories.reduce( - (acc: { [key: string]: DiscardedHistory }, item: DiscardedHistory) => ((acc[item.id] = item), acc), - {} - ); - // TODO: Promise.all() and do this in parallel? Or add a bulk delete endpoint? +const _cleanupDiscardedHistories = fetcher.path("/api/storage/histories").method("delete").create(); + +export async function cleanupDiscardedHistories(items: CleanableItem[]) { try { - for (const history of histories) { - await purgeHistory(history.id); - resultResponse.totalFreeBytes += historiesTable[history.id]?.size ?? 0; - resultResponse.totalItemCount += 1; - } + const item_ids = items.map((item) => item.id); + const { data } = await _cleanupDiscardedHistories({ + item_ids, + }); + return new CleanupResult(data, items); } catch (error) { - resultResponse.errorMessage = error as string; + return new CleanupResult(undefined, items, error as string); } - - return new CleanupResult(resultResponse); -} - -/** - * Maps the error messages with the dataset name for user display. - * @param datasetsTable Datasets dictionary indexed by ID - * @param errors List of errors associated with each dataset ID - * @returns A list with the name of the dataset and the associated error message. - */ -function mapErrors(datasetsTable: { [key: string]: CleanableItem }, errors: DatasetErrorMessage[]): ItemError[] { - return errors.map((error) => { - const name = datasetsTable[error.dataset.id]?.name ?? "Unknown Dataset"; - return { name: name, reason: error.error_message }; - }); } From 053bd7424cd35b0bb03541b233a49091eb37a6c9 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 30 Jan 2023 13:35:55 +0100 Subject: [PATCH 25/42] Fix possible None on total_size When listing datasets, the `total_size` can be None in some error statuses, so make it 0 by default. --- lib/galaxy/managers/hdas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 3eb44650e60a..e9d91ff66269 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -397,7 +397,7 @@ def get_discarded( stmt = stmt.order_by(self.sort_map[order]) result = self.hda_manager.session().execute(stmt) discarded = [ - StoredItem(id=row.id, name=row.name, type="dataset", size=row.total_size, update_time=row.update_time) + StoredItem(id=row.id, name=row.name, type="dataset", size=row.total_size or 0, update_time=row.update_time) for row in result ] return discarded From 912fbb848290d2ceeaae75bd16e49f0c107d8898 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 30 Jan 2023 15:26:43 +0100 Subject: [PATCH 26/42] Trigger summary reload only when the amount changes --- .../src/components/User/DiskUsage/Management/StorageManager.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/components/User/DiskUsage/Management/StorageManager.vue b/client/src/components/User/DiskUsage/Management/StorageManager.vue index 927363f22236..0e722ea13945 100644 --- a/client/src/components/User/DiskUsage/Management/StorageManager.vue +++ b/client/src/components/User/DiskUsage/Management/StorageManager.vue @@ -39,7 +39,7 @@ async function onConfirmCleanupSelected(selectedItems: CleanableItem[]) { await delay(1000); if (currentOperation.value) { cleanupResult.value = await currentOperation.value.cleanupItems(selectedItems); - if (cleanupResult.value.success) { + if (cleanupResult.value.totalFreeBytes) { refreshOperationId.value = currentOperation.value.id.toString(); } } From 7e05c0f5d31962ac3990e0f991ecd00b6816fc3e Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 30 Jan 2023 15:31:23 +0100 Subject: [PATCH 27/42] Move API test to integration test To avoid interactions with other tests that may alter the state of datasets or histories --- .../api => test/integration}/test_storage_cleaner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename {lib/galaxy_test/api => test/integration}/test_storage_cleaner.py (98%) diff --git a/lib/galaxy_test/api/test_storage_cleaner.py b/test/integration/test_storage_cleaner.py similarity index 98% rename from lib/galaxy_test/api/test_storage_cleaner.py rename to test/integration/test_storage_cleaner.py index 5a71bae2ba8d..37f59f671354 100644 --- a/lib/galaxy_test/api/test_storage_cleaner.py +++ b/test/integration/test_storage_cleaner.py @@ -7,7 +7,7 @@ from galaxy_test.base.decorators import requires_new_history from galaxy_test.base.populators import DatasetPopulator -from ._framework import ApiTestCase +from galaxy_test.driver import integration_util class StoredItemDataForTests(NamedTuple): @@ -15,7 +15,7 @@ class StoredItemDataForTests(NamedTuple): size: int -class TestStorageCleanerApi(ApiTestCase): +class TestStorageCleaner(integration_util.IntegrationTestCase): dataset_populator: DatasetPopulator def setUp(self): @@ -56,6 +56,7 @@ def _assert_monitoring_and_cleanup_for_discarded_resource( """Tests the storage cleaner API for a particular resource (histories or datasets)""" delete_resource_uri = delete_resource_uri if delete_resource_uri else resource discarded_storage_items_uri = f"storage/{resource}/discarded" + # Initially, there shouldn't be any deleted and not purged (discarded) items summary_response = self._get(f"{discarded_storage_items_uri}/summary") self._assert_status_code_is_ok(summary_response) From cfca84af6ee0724aa5292723394430ac5a441f59 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 30 Jan 2023 16:20:22 +0100 Subject: [PATCH 28/42] Revert manual service encoding/decoding deprecation This reverts commit edfa62994ea55c49001d3dc4d0ea0c356120271f. Maybe worth adding it back later. --- lib/galaxy/webapps/galaxy/services/base.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/lib/galaxy/webapps/galaxy/services/base.py b/lib/galaxy/webapps/galaxy/services/base.py index 164e4a8fd7c7..752458090639 100644 --- a/lib/galaxy/webapps/galaxy/services/base.py +++ b/lib/galaxy/webapps/galaxy/services/base.py @@ -8,7 +8,6 @@ ) from celery.result import AsyncResult -from deprecated import deprecated from galaxy.exceptions import ( AuthenticationRequired, @@ -49,11 +48,6 @@ class SecurityNotProvidedError(Exception): pass -MANUAL_ENCODING_DEPRECATION_MESSAGE = ( - "Use model decoding/encoding instead with DecodedDatabaseIdField or EncodedDatabaseIdField." -) - - class ServiceBase: """Base class with common logic and utils reused by other services. @@ -70,7 +64,6 @@ def __init__(self, security: Optional[IdEncodingHelper] = None): self._security = security @property - @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def security(self) -> IdEncodingHelper: if self._security is None: raise SecurityNotProvidedError( @@ -78,24 +71,20 @@ def security(self) -> IdEncodingHelper: ) return self._security - @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def decode_id(self, id: EncodedDatabaseIdField, kind: Optional[str] = None) -> int: """Decodes a previously encoded database ID.""" return decode_with_security(self.security, id, kind=kind) - @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def encode_id(self, id: int, kind: Optional[str] = None) -> EncodedDatabaseIdField: """Encodes a raw database ID.""" return encode_with_security(self.security, id, kind=kind) - @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def decode_ids(self, ids: List[EncodedDatabaseIdField]) -> List[int]: """ Decodes all encoded IDs in the given list. """ return [self.decode_id(id) for id in ids] - @deprecated(reason=MANUAL_ENCODING_DEPRECATION_MESSAGE) def encode_all_ids(self, rval, recursive: bool = False): """ Encodes all integer values in the dict rval whose keys are 'id' or end with '_id' From ec9b65d2239fa4a37c6ce828c3ebc49754b3d1c6 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 31 Jan 2023 13:08:08 +0100 Subject: [PATCH 29/42] Use transaction context instead of flush --- lib/galaxy/managers/hdas.py | 21 ++++++++++----------- lib/galaxy/managers/histories.py | 22 ++++++++++------------ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index e9d91ff66269..244d8e9d0e06 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -407,17 +407,16 @@ def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCle total_free_bytes = 0 errors: List[StorageItemCleanupError] = [] - for hda_id in item_ids: - try: - hda = self.hda_manager.get_owned(hda_id, user) - self.hda_manager.purge(hda) - success_item_count += 1 - total_free_bytes += int(hda.get_size()) - except BaseException as e: - errors.append(StorageItemCleanupError(item_id=hda_id, error=str(e))) - - if success_item_count: - self.hda_manager.session().flush() + with self.hda_manager.session().begin(): + for hda_id in item_ids: + try: + hda = self.hda_manager.get_owned(hda_id, user) + self.hda_manager.purge(hda) + success_item_count += 1 + total_free_bytes += int(hda.get_size()) + except BaseException as e: + errors.append(StorageItemCleanupError(item_id=hda_id, error=str(e))) + return StorageItemsCleanupResult( total_item_count=len(item_ids), success_item_count=success_item_count, diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 4a930287e99f..bd66f59367ad 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -64,7 +64,6 @@ class HistoryManager(sharable.SharableModelManager, deletable.PurgableManagerMixin, SortableManager): - model_class = model.History foreign_key_name = "history" user_share_model = model.HistoryUserShareAssociation @@ -417,17 +416,16 @@ def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCle total_free_bytes = 0 errors: List[StorageItemCleanupError] = [] - for history_id in item_ids: - try: - history = self.history_manager.get_owned(history_id, user) - self.history_manager.purge(history, flush=False) - success_item_count += 1 - total_free_bytes += int(history.disk_size) - except BaseException as e: - errors.append(StorageItemCleanupError(item_id=history_id, error=str(e))) - - if success_item_count: - self.history_manager.session().flush() + with self.history_manager.session().begin(): + for history_id in item_ids: + try: + history = self.history_manager.get_owned(history_id, user) + self.history_manager.purge(history, flush=False) + success_item_count += 1 + total_free_bytes += int(history.disk_size) + except BaseException as e: + errors.append(StorageItemCleanupError(item_id=history_id, error=str(e))) + return StorageItemsCleanupResult( total_item_count=len(item_ids), success_item_count=success_item_count, From 4cfdc41065dc0a937758e6db969f16f2f10bfaf4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 7 Feb 2023 15:18:13 +0100 Subject: [PATCH 30/42] Add method to purge a list of datasets from store --- lib/galaxy/managers/datasets.py | 22 +++++++++++++++++++++- lib/galaxy/schema/tasks.py | 9 ++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 210862879098..0d361a781ae8 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -25,7 +25,10 @@ secured, users, ) -from galaxy.schema.tasks import ComputeDatasetHashTaskRequest +from galaxy.schema.tasks import ( + ComputeDatasetHashTaskRequest, + PurgeDatasetsTaskRequest, +) from galaxy.structured_app import MinimalManagerApp from galaxy.util.hash_util import memory_bound_hexdigest @@ -85,6 +88,23 @@ def purge(self, dataset, flush=True): self.session().flush() return dataset + def purge_datasets(self, request: PurgeDatasetsTaskRequest): + """ + Caution: any additional security checks must be done before executing this action. + + Completely removes a set of object_store/files associated with the datasets from storage and marks them as purged. + They might not be removed if there are still un-purged associations to the dataset. + """ + self.error_unless_dataset_purge_allowed() + with self.session().begin(): + for dataset_id in request.dataset_ids: + dataset: model.Dataset = self.session().query(model.Dataset).get(dataset_id) + if dataset.user_can_purge: + try: + dataset.full_delete() + except Exception: + log.exception(f"Unable to purge dataset ({dataset.id})") + # TODO: this may be more conv. somewhere else # TODO: how to allow admin bypass? def error_unless_dataset_purge_allowed(self, msg=None): diff --git a/lib/galaxy/schema/tasks.py b/lib/galaxy/schema/tasks.py index be120cf74b5b..3f8a96aca218 100644 --- a/lib/galaxy/schema/tasks.py +++ b/lib/galaxy/schema/tasks.py @@ -1,4 +1,7 @@ -from typing import Optional +from typing import ( + List, + Optional, +) from uuid import UUID from pydantic import ( @@ -116,3 +119,7 @@ class ComputeDatasetHashTaskRequest(BaseModel): extra_files_path: Optional[str] hash_function: HashFunctionNameEnum user: Optional[RequestUser] # access checks should be done pre-celery so this is optional + + +class PurgeDatasetsTaskRequest(BaseModel): + dataset_ids: List[int] From a3ca3ff06af01a0830f2335cbf15c78a8e64398b Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 7 Feb 2023 15:19:02 +0100 Subject: [PATCH 31/42] Add celery task to purge a list of datasets --- lib/galaxy/celery/tasks.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/galaxy/celery/tasks.py b/lib/galaxy/celery/tasks.py index d452a29dfd80..35b52f308c07 100644 --- a/lib/galaxy/celery/tasks.py +++ b/lib/galaxy/celery/tasks.py @@ -43,6 +43,7 @@ ImportModelStoreTaskRequest, MaterializeDatasetInstanceTaskRequest, PrepareDatasetCollectionDownload, + PurgeDatasetsTaskRequest, SetupHistoryExportJob, WriteHistoryContentTo, WriteHistoryTo, @@ -84,6 +85,11 @@ def purge_hda(hda_manager: HDAManager, hda_id: int): hda_manager._purge(hda) +@galaxy_task(ignore_result=True, action="completely removes a set of datasets from the object_store") +def purge_datasets(dataset_manager: DatasetManager, request: PurgeDatasetsTaskRequest): + dataset_manager.purge_datasets(request) + + @galaxy_task(ignore_result=True, action="materializing dataset instance") def materialize( hda_manager: HDAManager, From 0b03edf0badaa1ef1832d2fafb749c266fef43a5 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 7 Feb 2023 15:23:09 +0100 Subject: [PATCH 32/42] Rewrite datasets cleanup API action Purge associations and datasets in 2 steps: First synchronously mark all associations as purged in one transaction. Then purge internal datasets in a celery task or synchronously if not available. This will ensure that the associations are marked as purged when the request finishes keeping the results consistent between subsequent requests. --- lib/galaxy/managers/hdas.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 244d8e9d0e06..75e689ba19e5 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -52,6 +52,7 @@ ) from galaxy.schema.tasks import ( MaterializeDatasetInstanceTaskRequest, + PurgeDatasetsTaskRequest, RequestUser, ) from galaxy.structured_app import ( @@ -333,8 +334,9 @@ def _set_permissions(self, trans, hda, role_ids_dict): class HDAStorageCleanerManager(base.StorageCleanerManager): - def __init__(self, hda_manager: HDAManager): + def __init__(self, hda_manager: HDAManager, dataset_manager: datasets.DatasetManager): self.hda_manager = hda_manager + self.dataset_manager = dataset_manager self.sort_map = { StoredItemOrderBy.NAME_ASC: asc(model.HistoryDatasetAssociation.name), StoredItemOrderBy.NAME_DSC: desc(model.HistoryDatasetAssociation.name), @@ -406,17 +408,24 @@ def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCle success_item_count = 0 total_free_bytes = 0 errors: List[StorageItemCleanupError] = [] + dataset_ids_to_remove: Set[int] = set() with self.hda_manager.session().begin(): for hda_id in item_ids: try: - hda = self.hda_manager.get_owned(hda_id, user) - self.hda_manager.purge(hda) + hda: model.HistoryDatasetAssociation = self.hda_manager.get_owned(hda_id, user) + hda.deleted = True + quota_amount = int(hda.quota_amount(user)) + hda.purge_usage_from_quota(user) + hda.purged = True + dataset_ids_to_remove.add(hda.dataset.id) success_item_count += 1 - total_free_bytes += int(hda.get_size()) + total_free_bytes += quota_amount except BaseException as e: errors.append(StorageItemCleanupError(item_id=hda_id, error=str(e))) + self._request_full_delete_all(dataset_ids_to_remove) + return StorageItemsCleanupResult( total_item_count=len(item_ids), success_item_count=success_item_count, @@ -424,6 +433,16 @@ def cleanup_items(self, user: model.User, item_ids: Set[int]) -> StorageItemsCle errors=errors, ) + def _request_full_delete_all(self, dataset_ids_to_remove: Set[int]): + use_tasks = self.dataset_manager.app.config.enable_celery_tasks + request = PurgeDatasetsTaskRequest(dataset_ids=list(dataset_ids_to_remove)) + if use_tasks: + from galaxy.celery.tasks import purge_datasets + + purge_datasets.delay(request=request) + else: + self.dataset_manager.purge_datasets(request) + class HDASerializer( # datasets._UnflattenedMetadataDatasetAssociationSerializer, datasets.DatasetAssociationSerializer[HDAManager], From a21ce9a07369d3430035812093afc800a5d76cc4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 10 Feb 2023 11:57:34 +0100 Subject: [PATCH 33/42] Make checkbox force select all items in review --- .../Cleanup/ReviewCleanupDialog.vue | 43 +++++++++++++------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue b/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue index c1737108a5c9..4ab8733455d5 100644 --- a/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue +++ b/client/src/components/User/DiskUsage/Management/Cleanup/ReviewCleanupDialog.vue @@ -46,9 +46,11 @@ const fields = [ }, ]; +const MAXIMUM_REVIEWABLE_ITEM_LIMIT = 200; +const MAXIMUM_ITEMS_PER_PAGE = 25; + const sortBy = ref("size"); const sortDesc = ref(true); -const perPage = ref(50); const currentPage = ref(1); const totalRows = ref(1); const allSelected = ref(false); @@ -56,7 +58,6 @@ const indeterminate = ref(false); const showDialog = ref(false); const items = ref([]); const selectedItems = ref([]); -const itemLimit = ref(500); const confirmChecked = ref(false); const isBusy = ref(false); @@ -69,7 +70,7 @@ const hasItemsSelected = computed(() => { }); const hasPages = computed(() => { - return totalRows.value > perPage.value; + return totalRows.value > MAXIMUM_ITEMS_PER_PAGE; }); const title = computed(() => { @@ -93,25 +94,26 @@ const confirmButtonVariant = computed(() => { }); const rowLimitReached = computed(() => { - return totalRows.value >= itemLimit.value; + return totalRows.value >= MAXIMUM_REVIEWABLE_ITEM_LIMIT; }); const rowLimitReachedText = computed(() => { return localize( - `Displaying a maximum of ${itemLimit.value} items here. If there are more, you can rerun this operation after deleting some.` + `Displaying a maximum of ${MAXIMUM_REVIEWABLE_ITEM_LIMIT} items here. If there are more, you can rerun this operation after deleting some.` ); }); watch(props, (newVal) => { currentPage.value = 1; - totalRows.value = newVal.totalItems; + totalRows.value = + newVal.totalItems > MAXIMUM_REVIEWABLE_ITEM_LIMIT ? MAXIMUM_REVIEWABLE_ITEM_LIMIT : newVal.totalItems; }); watch(selectedItems, (newVal) => { if (newVal.length === 0) { indeterminate.value = false; allSelected.value = false; - } else if (newVal.length === items.value.length) { + } else if (newVal.length === totalRows.value) { indeterminate.value = false; allSelected.value = true; } else { @@ -124,8 +126,12 @@ function toNiceSize(sizeInBytes: number) { return bytesToString(sizeInBytes, true, undefined); } -function toggleSelectAll(checked: boolean) { - selectedItems.value = checked ? items.value : []; +async function toggleSelectAll(checked: boolean) { + if (checked) { + await onSelectAllItems(); + } else { + unselectAll(); + } } function openModal() { @@ -141,7 +147,7 @@ function onShowModal() { } function resetModal() { - selectedItems.value = []; + unselectAll(); } function resetConfirmationModal() { @@ -185,10 +191,15 @@ async function onSelectAllItems() { sortDesc: sortDesc.value, }) ); + items.value = allItems; selectedItems.value = allItems; isBusy.value = false; } +function unselectAll() { + selectedItems.value = []; +} + defineExpose({ openModal, selectedItems, @@ -199,12 +210,12 @@ defineExpose({
{{ captionText }} - select all {{ totalItems }} items + select all {{ totalRows }} items