From 658b83deda7ccdf379f7de386dc8b8a53fe030a4 Mon Sep 17 00:00:00 2001 From: shayanaijaz Date: Tue, 22 Aug 2023 16:28:32 -0500 Subject: [PATCH 1/5] added success message and state for interactive job on timeout --- server/portal/apps/webhooks/views.py | 9 +++++++++ server/portal/apps/workspace/api/views.py | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/server/portal/apps/webhooks/views.py b/server/portal/apps/webhooks/views.py index 702045b27..c3cdac2cf 100644 --- a/server/portal/apps/webhooks/views.py +++ b/server/portal/apps/webhooks/views.py @@ -54,6 +54,14 @@ def validate_tapis_job(job_uuid, job_owner, disallowed_states=[]): if job_data.status in disallowed_states: return None + if hasattr(job_data, 'notes') and job_data.status == 'FAILED': + notes = json.loads(job_data.notes) + + # checks to see if an interactive job ended with tapis timeout code of 0:0 + if notes.get('isInteractive', False) and job_data.remoteResultInfo == '0:0': + job_data.status = 'FINISHED' + job_data.remoteOutcome = 'FINISHED' + return job_data @@ -122,6 +130,7 @@ def post(self, request, *args, **kwargs): job_details = validate_tapis_job(job_uuid, username, disallowed_states=non_terminal_states) if job_details: event_data[Notification.EXTRA]['remoteOutcome'] = job_details.remoteOutcome + event_data[Notification.EXTRA]['status'] = job_details.status try: logger.info('Indexing job output for job={}'.format(job_uuid)) diff --git a/server/portal/apps/workspace/api/views.py b/server/portal/apps/workspace/api/views.py index b79716428..74fa93aac 100644 --- a/server/portal/apps/workspace/api/views.py +++ b/server/portal/apps/workspace/api/views.py @@ -138,6 +138,19 @@ def get(self, request, *args, **kwargs): @method_decorator(login_required, name='dispatch') class JobsView(BaseApiView): + + @staticmethod + def check_job_for_timeout(job): + if hasattr(job, 'notes') and job.status == 'FAILED': + notes = json.loads(job.notes) + + # checks to see if an interactive job ended with tapis timeout code of 0:0 + if notes.get('isInteractive', False) and job.remoteResultInfo == '0:0': + job.status = 'FINISHED' + job.remoteOutcome = 'FINISHED' + + return job + def get(self, request, operation=None): allowed_actions = ['listing', 'search', 'select'] @@ -150,6 +163,12 @@ def get(self, request, operation=None): op = getattr(self, operation) data = op(tapis, request) + if (isinstance(data, list)): + for index, job in enumerate(data): + data[index] = self.check_job_for_timeout(job) + else: + data = self.check_job_for_timeout(data) + return JsonResponse( { 'status': 200, From 8a39f95424bad462d87f79657be952010ac78724 Mon Sep 17 00:00:00 2001 From: shayanaijaz Date: Wed, 23 Aug 2023 15:35:28 -0500 Subject: [PATCH 2/5] linting --- server/portal/apps/workspace/api/views.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/portal/apps/workspace/api/views.py b/server/portal/apps/workspace/api/views.py index 74fa93aac..6e4f5d3d8 100644 --- a/server/portal/apps/workspace/api/views.py +++ b/server/portal/apps/workspace/api/views.py @@ -138,7 +138,7 @@ def get(self, request, *args, **kwargs): @method_decorator(login_required, name='dispatch') class JobsView(BaseApiView): - + @staticmethod def check_job_for_timeout(job): if hasattr(job, 'notes') and job.status == 'FAILED': @@ -148,9 +148,9 @@ def check_job_for_timeout(job): if notes.get('isInteractive', False) and job.remoteResultInfo == '0:0': job.status = 'FINISHED' job.remoteOutcome = 'FINISHED' - + return job - + def get(self, request, operation=None): allowed_actions = ['listing', 'search', 'select'] @@ -164,7 +164,7 @@ def get(self, request, operation=None): data = op(tapis, request) if (isinstance(data, list)): - for index, job in enumerate(data): + for index, job in enumerate(data): data[index] = self.check_job_for_timeout(job) else: data = self.check_job_for_timeout(data) From d9f665e39a74ccf338e2cdbca2ccdeb5215a25ca Mon Sep 17 00:00:00 2001 From: shayanaijaz Date: Wed, 30 Aug 2023 11:28:01 -0500 Subject: [PATCH 3/5] created a central utility function, added logic to determine timeout --- server/portal/apps/webhooks/views.py | 9 ++----- server/portal/apps/workspace/api/utils.py | 23 ++++++++++++++++ server/portal/apps/workspace/api/views.py | 33 +++++++++++++++-------- 3 files changed, 47 insertions(+), 18 deletions(-) create mode 100644 server/portal/apps/workspace/api/utils.py diff --git a/server/portal/apps/webhooks/views.py b/server/portal/apps/webhooks/views.py index c3cdac2cf..54544b23d 100644 --- a/server/portal/apps/webhooks/views.py +++ b/server/portal/apps/webhooks/views.py @@ -20,6 +20,7 @@ validate_webhook, execute_callback ) +from portal.apps.workspace.api.utils import check_job_for_timeout from django.conf import settings @@ -54,13 +55,7 @@ def validate_tapis_job(job_uuid, job_owner, disallowed_states=[]): if job_data.status in disallowed_states: return None - if hasattr(job_data, 'notes') and job_data.status == 'FAILED': - notes = json.loads(job_data.notes) - - # checks to see if an interactive job ended with tapis timeout code of 0:0 - if notes.get('isInteractive', False) and job_data.remoteResultInfo == '0:0': - job_data.status = 'FINISHED' - job_data.remoteOutcome = 'FINISHED' + job_data = check_job_for_timeout(job_data) return job_data diff --git a/server/portal/apps/workspace/api/utils.py b/server/portal/apps/workspace/api/utils.py new file mode 100644 index 000000000..bf0ca0a1a --- /dev/null +++ b/server/portal/apps/workspace/api/utils.py @@ -0,0 +1,23 @@ +import json + + +def get_tapis_timeout_error_messages(job_id): + return [ + 'JOBS_EARLY_TERMINATION Job terminated by Tapis because: TIME_EXPIRED', + f'JOBS_USER_APP_FAILURE The user application ({job_id}) ended with remote status "TIMEOUT" and returned exit code: 0:0.' + ] + + +def check_job_for_timeout(job): + if (hasattr(job, 'notes')): + notes = json.loads(job.notes) + + is_failed = job.status == 'FAILED' + is_interactive = notes.get('isInteractive', False) + has_timeout_message = job.lastMessage in get_tapis_timeout_error_messages(job.remoteJobId) + + if is_failed and is_interactive and has_timeout_message: + job.status = 'FINISHED' + job.remoteOutcome = 'FINISHED' + + return job diff --git a/server/portal/apps/workspace/api/views.py b/server/portal/apps/workspace/api/views.py index 6e4f5d3d8..6a611ba7f 100644 --- a/server/portal/apps/workspace/api/views.py +++ b/server/portal/apps/workspace/api/views.py @@ -23,6 +23,7 @@ from portal.apps.onboarding.steps.system_access_v3 import create_system_credentials from portal.apps.users.utils import get_user_data from .handlers.tapis_handlers import tapis_get_handler +from portal.apps.workspace.api.utils import check_job_for_timeout logger = logging.getLogger(__name__) METRICS = logging.getLogger('metrics.{}'.format(__name__)) @@ -139,17 +140,27 @@ def get(self, request, *args, **kwargs): @method_decorator(login_required, name='dispatch') class JobsView(BaseApiView): - @staticmethod - def check_job_for_timeout(job): - if hasattr(job, 'notes') and job.status == 'FAILED': - notes = json.loads(job.notes) + # @staticmethod + # def get_tapis_timeout_error_messages(job_id): + # return [ + # 'JOBS_EARLY_TERMINATION Job terminated by Tapis because: TIME_EXPIRED', + # f'JOBS_USER_APP_FAILURE The user application ({job_id}) ended with remote status "TIMEOUT" and returned exit code: 0:0.' + # ] - # checks to see if an interactive job ended with tapis timeout code of 0:0 - if notes.get('isInteractive', False) and job.remoteResultInfo == '0:0': - job.status = 'FINISHED' - job.remoteOutcome = 'FINISHED' + # @staticmethod + # def check_job_for_timeout(job, timeout_messages): + # if (hasattr(job, 'notes')): + # notes = json.loads(job.notes) - return job + # is_failed = job.status == 'FAILED' + # is_interactive = notes.get('isInteractive', False) + # has_timeout_message = job.lastMessage in timeout_messages + + # if is_failed and is_interactive and has_timeout_message: + # job.status = 'FINISHED' + # job.remoteOutcome = 'FINISHED' + + # return job def get(self, request, operation=None): @@ -165,9 +176,9 @@ def get(self, request, operation=None): if (isinstance(data, list)): for index, job in enumerate(data): - data[index] = self.check_job_for_timeout(job) + data[index] = check_job_for_timeout(job) else: - data = self.check_job_for_timeout(data) + data = check_job_for_timeout(data) return JsonResponse( { From 9a0066887dd45ad2df2dd226fd6e7a255bb73811 Mon Sep 17 00:00:00 2001 From: shayanaijaz Date: Wed, 30 Aug 2023 11:40:40 -0500 Subject: [PATCH 4/5] remove commented code --- server/portal/apps/workspace/api/views.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/server/portal/apps/workspace/api/views.py b/server/portal/apps/workspace/api/views.py index 6a611ba7f..9a94db829 100644 --- a/server/portal/apps/workspace/api/views.py +++ b/server/portal/apps/workspace/api/views.py @@ -140,28 +140,6 @@ def get(self, request, *args, **kwargs): @method_decorator(login_required, name='dispatch') class JobsView(BaseApiView): - # @staticmethod - # def get_tapis_timeout_error_messages(job_id): - # return [ - # 'JOBS_EARLY_TERMINATION Job terminated by Tapis because: TIME_EXPIRED', - # f'JOBS_USER_APP_FAILURE The user application ({job_id}) ended with remote status "TIMEOUT" and returned exit code: 0:0.' - # ] - - # @staticmethod - # def check_job_for_timeout(job, timeout_messages): - # if (hasattr(job, 'notes')): - # notes = json.loads(job.notes) - - # is_failed = job.status == 'FAILED' - # is_interactive = notes.get('isInteractive', False) - # has_timeout_message = job.lastMessage in timeout_messages - - # if is_failed and is_interactive and has_timeout_message: - # job.status = 'FINISHED' - # job.remoteOutcome = 'FINISHED' - - # return job - def get(self, request, operation=None): allowed_actions = ['listing', 'search', 'select'] From d87a5eb468de1202c08566c0d10b002f1c7c0538 Mon Sep 17 00:00:00 2001 From: shayanaijaz Date: Wed, 30 Aug 2023 12:28:20 -0500 Subject: [PATCH 5/5] Added a comment --- server/portal/apps/workspace/api/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/portal/apps/workspace/api/utils.py b/server/portal/apps/workspace/api/utils.py index bf0ca0a1a..9d9923dfa 100644 --- a/server/portal/apps/workspace/api/utils.py +++ b/server/portal/apps/workspace/api/utils.py @@ -9,6 +9,11 @@ def get_tapis_timeout_error_messages(job_id): def check_job_for_timeout(job): + """ + Check an interactive job for timeout status and mark it as finished + since Tapis does not have native support for interactive jobs yet + """ + if (hasattr(job, 'notes')): notes = json.loads(job.notes)