From 289b9c5f737f1c2c52ef7589e99313614d870c3f Mon Sep 17 00:00:00 2001 From: Maurane GLAUDE Date: Wed, 2 Oct 2024 13:31:14 +0200 Subject: [PATCH 1/6] feat(auto_archive_service): add snapshot clearing task and option in `config` --- antarest/core/config.py | 5 +++++ .../study/storage/auto_archive_service.py | 20 +++++++++++++++++-- .../business/test_autoarchive_service.py | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/antarest/core/config.py b/antarest/core/config.py index 4b85331d6a..e7e39d71b7 100644 --- a/antarest/core/config.py +++ b/antarest/core/config.py @@ -155,6 +155,7 @@ class StorageConfig: auto_archive_dry_run: bool = False auto_archive_sleeping_time: int = 3600 auto_archive_max_parallel: int = 5 + variant_snapshot_lifespan_days: int = 1 @classmethod def from_dict(cls, data: JSON) -> "StorageConfig": @@ -184,6 +185,10 @@ def from_dict(cls, data: JSON) -> "StorageConfig": auto_archive_dry_run=data.get("auto_archive_dry_run", defaults.auto_archive_dry_run), auto_archive_sleeping_time=data.get("auto_archive_sleeping_time", defaults.auto_archive_sleeping_time), auto_archive_max_parallel=data.get("auto_archive_max_parallel", defaults.auto_archive_max_parallel), + variant_snapshot_lifespan_days=data.get( + "snapshot_variant_lifespan_days", + defaults.variant_snapshot_lifespan_days, + ), ) diff --git a/antarest/study/storage/auto_archive_service.py b/antarest/study/storage/auto_archive_service.py index 85cb2237f9..6c4ff9ad12 100644 --- a/antarest/study/storage/auto_archive_service.py +++ b/antarest/study/storage/auto_archive_service.py @@ -48,8 +48,9 @@ def _try_archive_studies(self) -> None: study_ids_to_archive = [ (study.id, isinstance(study, RawStudy)) for study in studies - if (study.last_access or study.updated_at) < old_date - and (isinstance(study, VariantStudy) or not study.archived) + if ((study.last_access or study.updated_at) < old_date) + and isinstance(study, VariantStudy) + or not study.archived ] for study_id, is_raw_study in study_ids_to_archive[0 : self.max_parallel]: try: @@ -84,10 +85,25 @@ def _try_archive_studies(self) -> None: exc_info=e, ) + def _try_clear_snapshot(self) -> None: + snapshot_old_time = datetime.datetime.utcnow() - datetime.timedelta( + days=self.config.storage.variant_snapshot_lifespan_days + ) + # get variants + variants = self.study_service.repository.get_all( + study_filter=StudyFilter(variant=True, access_permissions=AccessPermissions(is_admin=True)) + ) + # check their date + for variant in variants: + if (variant.last_access or variant.updated_at) < snapshot_old_time: + # clear ones that are too old + self.study_service.storage_service.variant_study_service.clear_snapshot(variant.id) + def _loop(self) -> None: while True: try: self._try_archive_studies() + self._try_clear_snapshot() except Exception as e: logger.error( "Unexpected error happened when processing auto archive service loop", diff --git a/tests/storage/business/test_autoarchive_service.py b/tests/storage/business/test_autoarchive_service.py index 559126f00d..e0f0bac2cc 100644 --- a/tests/storage/business/test_autoarchive_service.py +++ b/tests/storage/business/test_autoarchive_service.py @@ -96,3 +96,5 @@ def test_auto_archival(tmp_path: Path): # Check that the variant outputs are deleted for the variant study "e" study_service.archive_outputs.assert_called_once_with("e", params=RequestParameters(DEFAULT_ADMIN_USER)) + + # Check if variant snapshots with date older than variant_snapshot_lifespan_days argument are cleared From 627c295bda0f5c69c5074e1aba80e4b5a76cf7aa Mon Sep 17 00:00:00 2001 From: Maurane GLAUDE Date: Mon, 7 Oct 2024 13:49:44 +0200 Subject: [PATCH 2/6] refactor(variant_study_service): re-use snapshot clearing task feature --- antarest/study/storage/auto_archive_service.py | 16 +++++----------- .../storage/business/test_autoarchive_service.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/antarest/study/storage/auto_archive_service.py b/antarest/study/storage/auto_archive_service.py index 6c4ff9ad12..dc2795bfa4 100644 --- a/antarest/study/storage/auto_archive_service.py +++ b/antarest/study/storage/auto_archive_service.py @@ -86,18 +86,12 @@ def _try_archive_studies(self) -> None: ) def _try_clear_snapshot(self) -> None: - snapshot_old_time = datetime.datetime.utcnow() - datetime.timedelta( - days=self.config.storage.variant_snapshot_lifespan_days - ) - # get variants - variants = self.study_service.repository.get_all( - study_filter=StudyFilter(variant=True, access_permissions=AccessPermissions(is_admin=True)) + # convert days in hours + hours_of_lifespan = hours=self.config.storage.variant_snapshot_lifespan_days * 24 + + self.study_service.storage_service.variant_study_service.clear_all_snapshots( + hours_of_lifespan ) - # check their date - for variant in variants: - if (variant.last_access or variant.updated_at) < snapshot_old_time: - # clear ones that are too old - self.study_service.storage_service.variant_study_service.clear_snapshot(variant.id) def _loop(self) -> None: while True: diff --git a/tests/storage/business/test_autoarchive_service.py b/tests/storage/business/test_autoarchive_service.py index e0f0bac2cc..52fb4d436d 100644 --- a/tests/storage/business/test_autoarchive_service.py +++ b/tests/storage/business/test_autoarchive_service.py @@ -69,6 +69,18 @@ def test_auto_archival(tmp_path: Path): id="e", updated_at=now - datetime.timedelta(days=61), ), + VariantStudy( + id="f", + updated_at=now - datetime.timedelta(days=0), + ), + VariantStudy( + id="g", + updated_at=now - datetime.timedelta(days=1), + ), + VariantStudy( + id="h", + last_access=now - datetime.timedelta(days=2) + ) ] ) db_session.commit() @@ -97,4 +109,7 @@ def test_auto_archival(tmp_path: Path): # Check that the variant outputs are deleted for the variant study "e" study_service.archive_outputs.assert_called_once_with("e", params=RequestParameters(DEFAULT_ADMIN_USER)) + # Test auto snapshot clearing + auto_archive_service._try_clear_snapshot() + # Check if variant snapshots with date older than variant_snapshot_lifespan_days argument are cleared From fb71df13ab820be0e6602349ab2309879681f328 Mon Sep 17 00:00:00 2001 From: Maurane GLAUDE Date: Mon, 7 Oct 2024 14:59:35 +0200 Subject: [PATCH 3/6] test(test_autoarchive_service): add test --- .../study/storage/auto_archive_service.py | 9 ++++----- .../business/test_autoarchive_service.py | 19 ++++--------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/antarest/study/storage/auto_archive_service.py b/antarest/study/storage/auto_archive_service.py index dc2795bfa4..23e5914d8c 100644 --- a/antarest/study/storage/auto_archive_service.py +++ b/antarest/study/storage/auto_archive_service.py @@ -48,9 +48,8 @@ def _try_archive_studies(self) -> None: study_ids_to_archive = [ (study.id, isinstance(study, RawStudy)) for study in studies - if ((study.last_access or study.updated_at) < old_date) - and isinstance(study, VariantStudy) - or not study.archived + if (study.last_access or study.updated_at) < old_date + and (isinstance(study, VariantStudy) or not study.archived) ] for study_id, is_raw_study in study_ids_to_archive[0 : self.max_parallel]: try: @@ -87,10 +86,10 @@ def _try_archive_studies(self) -> None: def _try_clear_snapshot(self) -> None: # convert days in hours - hours_of_lifespan = hours=self.config.storage.variant_snapshot_lifespan_days * 24 + hours_of_lifespan = self.config.storage.variant_snapshot_lifespan_days * 24 self.study_service.storage_service.variant_study_service.clear_all_snapshots( - hours_of_lifespan + datetime.timedelta(hours=hours_of_lifespan) ) def _loop(self) -> None: diff --git a/tests/storage/business/test_autoarchive_service.py b/tests/storage/business/test_autoarchive_service.py index 52fb4d436d..7d1b0c94a7 100644 --- a/tests/storage/business/test_autoarchive_service.py +++ b/tests/storage/business/test_autoarchive_service.py @@ -69,18 +69,6 @@ def test_auto_archival(tmp_path: Path): id="e", updated_at=now - datetime.timedelta(days=61), ), - VariantStudy( - id="f", - updated_at=now - datetime.timedelta(days=0), - ), - VariantStudy( - id="g", - updated_at=now - datetime.timedelta(days=1), - ), - VariantStudy( - id="h", - last_access=now - datetime.timedelta(days=2) - ) ] ) db_session.commit() @@ -109,7 +97,8 @@ def test_auto_archival(tmp_path: Path): # Check that the variant outputs are deleted for the variant study "e" study_service.archive_outputs.assert_called_once_with("e", params=RequestParameters(DEFAULT_ADMIN_USER)) - # Test auto snapshot clearing + # Check if the `clear_all_snapshots` method was called with default values auto_archive_service._try_clear_snapshot() - - # Check if variant snapshots with date older than variant_snapshot_lifespan_days argument are cleared + study_service.storage_service.variant_study_service.clear_all_snapshots.assert_called_once_with( + datetime.timedelta(hours=24), + ) From 9407129a376dbfcec2e08c237892cf5512045520 Mon Sep 17 00:00:00 2001 From: Maurane GLAUDE Date: Mon, 7 Oct 2024 15:28:50 +0200 Subject: [PATCH 4/6] docs: add new configuration field --- antarest/core/config.py | 6 +++--- antarest/study/storage/auto_archive_service.py | 4 ++-- docs/install/1-CONFIG.md | 6 ++++++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/antarest/core/config.py b/antarest/core/config.py index e7e39d71b7..2ef63515a5 100644 --- a/antarest/core/config.py +++ b/antarest/core/config.py @@ -155,7 +155,7 @@ class StorageConfig: auto_archive_dry_run: bool = False auto_archive_sleeping_time: int = 3600 auto_archive_max_parallel: int = 5 - variant_snapshot_lifespan_days: int = 1 + snapshot_lifespan: int = 1 @classmethod def from_dict(cls, data: JSON) -> "StorageConfig": @@ -185,9 +185,9 @@ def from_dict(cls, data: JSON) -> "StorageConfig": auto_archive_dry_run=data.get("auto_archive_dry_run", defaults.auto_archive_dry_run), auto_archive_sleeping_time=data.get("auto_archive_sleeping_time", defaults.auto_archive_sleeping_time), auto_archive_max_parallel=data.get("auto_archive_max_parallel", defaults.auto_archive_max_parallel), - variant_snapshot_lifespan_days=data.get( + snapshot_lifespan=data.get( "snapshot_variant_lifespan_days", - defaults.variant_snapshot_lifespan_days, + defaults.snapshot_lifespan, ), ) diff --git a/antarest/study/storage/auto_archive_service.py b/antarest/study/storage/auto_archive_service.py index 23e5914d8c..9e714306b8 100644 --- a/antarest/study/storage/auto_archive_service.py +++ b/antarest/study/storage/auto_archive_service.py @@ -85,8 +85,8 @@ def _try_archive_studies(self) -> None: ) def _try_clear_snapshot(self) -> None: - # convert days in hours - hours_of_lifespan = self.config.storage.variant_snapshot_lifespan_days * 24 + # get retention hours from number of days + hours_of_lifespan = self.config.storage.snapshot_lifespan * 24 self.study_service.storage_service.variant_study_service.clear_all_snapshots( datetime.timedelta(hours=hours_of_lifespan) diff --git a/docs/install/1-CONFIG.md b/docs/install/1-CONFIG.md index 9ad48dad03..554f1a9728 100644 --- a/docs/install/1-CONFIG.md +++ b/docs/install/1-CONFIG.md @@ -304,6 +304,12 @@ default: - **Default value:** 5 - **Description:** Max auto archival tasks in parallel. +## **snapshot_lifespan** + +- **Type:** Integer +- **Default value:** 1 +- **Description:** Snapshots of variant not updated or accessed for **snapshot_lifespan** days will be cleared. + ## **watcher_lock** - **Type:** Boolean From 277216e0d7afb896ae6b87f0566071597d6bfd19 Mon Sep 17 00:00:00 2001 From: Maurane GLAUDE Date: Mon, 7 Oct 2024 17:46:32 +0200 Subject: [PATCH 5/6] style: rename variable 'snapshot_lifespan' to 'snapshot_retention_days' --- antarest/core/config.py | 6 +++--- antarest/study/storage/auto_archive_service.py | 5 +---- docs/install/1-CONFIG.md | 4 ++-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/antarest/core/config.py b/antarest/core/config.py index 2ef63515a5..56022904c8 100644 --- a/antarest/core/config.py +++ b/antarest/core/config.py @@ -155,7 +155,7 @@ class StorageConfig: auto_archive_dry_run: bool = False auto_archive_sleeping_time: int = 3600 auto_archive_max_parallel: int = 5 - snapshot_lifespan: int = 1 + snapshot_retention_days: int = 1 @classmethod def from_dict(cls, data: JSON) -> "StorageConfig": @@ -185,9 +185,9 @@ def from_dict(cls, data: JSON) -> "StorageConfig": auto_archive_dry_run=data.get("auto_archive_dry_run", defaults.auto_archive_dry_run), auto_archive_sleeping_time=data.get("auto_archive_sleeping_time", defaults.auto_archive_sleeping_time), auto_archive_max_parallel=data.get("auto_archive_max_parallel", defaults.auto_archive_max_parallel), - snapshot_lifespan=data.get( + snapshot_retention_days=data.get( "snapshot_variant_lifespan_days", - defaults.snapshot_lifespan, + defaults.snapshot_retention_days, ), ) diff --git a/antarest/study/storage/auto_archive_service.py b/antarest/study/storage/auto_archive_service.py index 9e714306b8..a3a1525d69 100644 --- a/antarest/study/storage/auto_archive_service.py +++ b/antarest/study/storage/auto_archive_service.py @@ -85,11 +85,8 @@ def _try_archive_studies(self) -> None: ) def _try_clear_snapshot(self) -> None: - # get retention hours from number of days - hours_of_lifespan = self.config.storage.snapshot_lifespan * 24 - self.study_service.storage_service.variant_study_service.clear_all_snapshots( - datetime.timedelta(hours=hours_of_lifespan) + datetime.timedelta(days=self.config.storage.snapshot_retention_days) ) def _loop(self) -> None: diff --git a/docs/install/1-CONFIG.md b/docs/install/1-CONFIG.md index 554f1a9728..b9e04f644f 100644 --- a/docs/install/1-CONFIG.md +++ b/docs/install/1-CONFIG.md @@ -304,11 +304,11 @@ default: - **Default value:** 5 - **Description:** Max auto archival tasks in parallel. -## **snapshot_lifespan** +## **snapshot_retention_days** - **Type:** Integer - **Default value:** 1 -- **Description:** Snapshots of variant not updated or accessed for **snapshot_lifespan** days will be cleared. +- **Description:** Snapshots of variant not updated or accessed for **snapshot_retention_days** days will be cleared. ## **watcher_lock** From e9af95d4650dde45f05b5568c389255e0b80ec71 Mon Sep 17 00:00:00 2001 From: Maurane GLAUDE Date: Tue, 8 Oct 2024 09:48:47 +0200 Subject: [PATCH 6/6] refactor(storage): refactor `_try_archive_studies` and fix minor issues --- antarest/core/config.py | 2 +- antarest/study/storage/auto_archive_service.py | 13 +++++++------ .../storage/business/test_autoarchive_service.py | 15 +++++---------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/antarest/core/config.py b/antarest/core/config.py index 56022904c8..b88e4f44bb 100644 --- a/antarest/core/config.py +++ b/antarest/core/config.py @@ -186,7 +186,7 @@ def from_dict(cls, data: JSON) -> "StorageConfig": auto_archive_sleeping_time=data.get("auto_archive_sleeping_time", defaults.auto_archive_sleeping_time), auto_archive_max_parallel=data.get("auto_archive_max_parallel", defaults.auto_archive_max_parallel), snapshot_retention_days=data.get( - "snapshot_variant_lifespan_days", + "snapshot_retention_days", defaults.snapshot_retention_days, ), ) diff --git a/antarest/study/storage/auto_archive_service.py b/antarest/study/storage/auto_archive_service.py index a3a1525d69..b293fc1207 100644 --- a/antarest/study/storage/auto_archive_service.py +++ b/antarest/study/storage/auto_archive_service.py @@ -38,11 +38,17 @@ def __init__(self, study_service: StudyService, config: Config): self.max_parallel = self.config.storage.auto_archive_max_parallel def _try_archive_studies(self) -> None: + """ + Archive old studies + Clear old variant snapshots + """ old_date = datetime.datetime.utcnow() - datetime.timedelta(days=self.config.storage.auto_archive_threshold_days) with db(): # in this part full `Read` rights over studies are granted to this function studies: t.Sequence[Study] = self.study_service.repository.get_all( - study_filter=StudyFilter(managed=True, access_permissions=AccessPermissions(is_admin=True)) + study_filter=StudyFilter( + managed=True, + access_permissions=AccessPermissions(is_admin=True)) ) # list of study IDs and boolean indicating if it's a raw study (True) or a variant (False) study_ids_to_archive = [ @@ -73,9 +79,6 @@ def _try_archive_studies(self) -> None: study_id, params=RequestParameters(DEFAULT_ADMIN_USER), ) - self.study_service.storage_service.variant_study_service.clear_snapshot( - self.study_service.get_study(study_id) - ) except TaskAlreadyRunning: pass except Exception as e: @@ -84,7 +87,6 @@ def _try_archive_studies(self) -> None: exc_info=e, ) - def _try_clear_snapshot(self) -> None: self.study_service.storage_service.variant_study_service.clear_all_snapshots( datetime.timedelta(days=self.config.storage.snapshot_retention_days) ) @@ -93,7 +95,6 @@ def _loop(self) -> None: while True: try: self._try_archive_studies() - self._try_clear_snapshot() except Exception as e: logger.error( "Unexpected error happened when processing auto archive service loop", diff --git a/tests/storage/business/test_autoarchive_service.py b/tests/storage/business/test_autoarchive_service.py index 7d1b0c94a7..eb04adf2b2 100644 --- a/tests/storage/business/test_autoarchive_service.py +++ b/tests/storage/business/test_autoarchive_service.py @@ -69,6 +69,10 @@ def test_auto_archival(tmp_path: Path): id="e", updated_at=now - datetime.timedelta(days=61), ), + VariantStudy( + id="f", + updated_at=now - datetime.timedelta(days=1), + ), ] ) db_session.commit() @@ -86,19 +90,10 @@ def test_auto_archival(tmp_path: Path): # Check that the raw study "d" was about to be archived but failed because the task was already running study_service.archive.assert_called_once_with("d", params=RequestParameters(DEFAULT_ADMIN_USER)) - # Check that the snapshot of the variant study "e" is cleared - study_service.storage_service.variant_study_service.clear_snapshot.assert_called_once() - calls = study_service.storage_service.variant_study_service.clear_snapshot.call_args_list - assert len(calls) == 1 - clear_snapshot_call = calls[0] - actual_study = clear_snapshot_call[0][0] - assert actual_study.id == "e" - # Check that the variant outputs are deleted for the variant study "e" study_service.archive_outputs.assert_called_once_with("e", params=RequestParameters(DEFAULT_ADMIN_USER)) # Check if the `clear_all_snapshots` method was called with default values - auto_archive_service._try_clear_snapshot() study_service.storage_service.variant_study_service.clear_all_snapshots.assert_called_once_with( - datetime.timedelta(hours=24), + datetime.timedelta(days=1), )