From 9a896703f2f504e2276564198edc0a6c36ca04e2 Mon Sep 17 00:00:00 2001 From: Wen Guan Date: Sun, 5 Nov 2023 15:35:02 +0100 Subject: [PATCH] increase memory --- .github/workflows/python-publish.yml | 1 + doma/lib/idds/doma/workflowv2/domapandawork.py | 3 ++- main/lib/idds/tests/panda_client_submit_test.py | 2 +- main/tools/panda/increase_memory | 4 ++++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 356774e0..76319057 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -49,3 +49,4 @@ jobs: python main/tools/pypi/update_version.py ${version_tag} python setup.py sdist bdist_wheel twine upload */dist/idds*-${version_tag}.tar.gz + twine upload */dist/idds*-${version_tag}*.whl diff --git a/doma/lib/idds/doma/workflowv2/domapandawork.py b/doma/lib/idds/doma/workflowv2/domapandawork.py index ae803cd0..b0aa5d78 100644 --- a/doma/lib/idds/doma/workflowv2/domapandawork.py +++ b/doma/lib/idds/doma/workflowv2/domapandawork.py @@ -553,8 +553,9 @@ def create_processing(self, input_output_maps=[]): task_param_map['ramCount'] = self.task_rss / self.core_count if self.core_count else self.task_rss # task_param_map['ramUnit'] = 'MB' task_param_map['ramUnit'] = 'MBPerCoreFixed' - if self.task_rss_retry_offset and self.task_rss_retry_step: + if self.task_rss_retry_offset: task_param_map['retryRamOffset'] = self.task_rss_retry_offset + if self.task_rss_retry_step: task_param_map['retryRamStep'] = self.task_rss_retry_step if self.task_rss_max: # todo: until PanDA supports it diff --git a/main/lib/idds/tests/panda_client_submit_test.py b/main/lib/idds/tests/panda_client_submit_test.py index 537b2fef..0d476fad 100644 --- a/main/lib/idds/tests/panda_client_submit_test.py +++ b/main/lib/idds/tests/panda_client_submit_test.py @@ -43,7 +43,7 @@ taskParamMap['ramUnit'] = 'MBPerCoreFixed' taskParamMap['retryRamOffset'] = 2000 -taskParamMap['retryRamStep'] = 400 +taskParamMap['retryRamStep'] = 0.3 """ taskParamMap['log'] = {'dataset': logDatasetName, diff --git a/main/tools/panda/increase_memory b/main/tools/panda/increase_memory index fd09c225..970bea93 100644 --- a/main/tools/panda/increase_memory +++ b/main/tools/panda/increase_memory @@ -6,3 +6,7 @@ insert into retryerrors(retryerror_id, errorsource, errorcode, active, retryacti insert into retryerrors(retryerror_id, errorsource, errorcode, errordiag, active, retryaction, description) values(3, 'pilotErrorCode', 1305, '.*Unable to allocate.*', 'Y', 2, 'increase memory'); +# add rule to increase memory x times +insert into retryactions(retryaction_id, retry_action, active, retry_description) values (1, 'increase_memory_xtimes', 'Y', 'Job ran out of memory. Increase memory setting for next retry.'); +insert into retryerrors(retryerror_id, errorsource, errorcode, errordiag, active, retryaction, description) values(1, 'taskBufferErrorCode', 300, '.*The worker was finished while the job was starting.*', 'Y', 1, 'increase memory'); +