Skip to content

Commit

Permalink
task/WP-189 Handle timeout exit code for interactive app jobs (#851)
Browse files Browse the repository at this point in the history
* added success message and state for interactive job on timeout

* linting

* created a central utility function, added logic to determine timeout

* remove commented code

* Added a comment
  • Loading branch information
shayanaijaz authored and chandra-tacc committed Sep 13, 2023
1 parent ab6d9bc commit d36ba9f
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
4 changes: 4 additions & 0 deletions server/portal/apps/webhooks/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
validate_webhook,
execute_callback
)
from portal.apps.workspace.api.utils import check_job_for_timeout

from django.conf import settings

Expand Down Expand Up @@ -54,6 +55,8 @@ def validate_tapis_job(job_uuid, job_owner, disallowed_states=[]):
if job_data.status in disallowed_states:
return None

job_data = check_job_for_timeout(job_data)

return job_data


Expand Down Expand Up @@ -122,6 +125,7 @@ def post(self, request, *args, **kwargs):
job_details = validate_tapis_job(job_uuid, username, disallowed_states=non_terminal_states)
if job_details:
event_data[Notification.EXTRA]['remoteOutcome'] = job_details.remoteOutcome
event_data[Notification.EXTRA]['status'] = job_details.status

try:
logger.info('Indexing job output for job={}'.format(job_uuid))
Expand Down
28 changes: 28 additions & 0 deletions server/portal/apps/workspace/api/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json


def get_tapis_timeout_error_messages(job_id):
return [
'JOBS_EARLY_TERMINATION Job terminated by Tapis because: TIME_EXPIRED',
f'JOBS_USER_APP_FAILURE The user application ({job_id}) ended with remote status "TIMEOUT" and returned exit code: 0:0.'
]


def check_job_for_timeout(job):
"""
Check an interactive job for timeout status and mark it as finished
since Tapis does not have native support for interactive jobs yet
"""

if (hasattr(job, 'notes')):
notes = json.loads(job.notes)

is_failed = job.status == 'FAILED'
is_interactive = notes.get('isInteractive', False)
has_timeout_message = job.lastMessage in get_tapis_timeout_error_messages(job.remoteJobId)

if is_failed and is_interactive and has_timeout_message:
job.status = 'FINISHED'
job.remoteOutcome = 'FINISHED'

return job
8 changes: 8 additions & 0 deletions server/portal/apps/workspace/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from portal.apps.onboarding.steps.system_access_v3 import create_system_credentials
from portal.apps.users.utils import get_user_data
from .handlers.tapis_handlers import tapis_get_handler
from portal.apps.workspace.api.utils import check_job_for_timeout

logger = logging.getLogger(__name__)
METRICS = logging.getLogger('metrics.{}'.format(__name__))
Expand Down Expand Up @@ -138,6 +139,7 @@ def get(self, request, *args, **kwargs):

@method_decorator(login_required, name='dispatch')
class JobsView(BaseApiView):

def get(self, request, operation=None):

allowed_actions = ['listing', 'search', 'select']
Expand All @@ -150,6 +152,12 @@ def get(self, request, operation=None):
op = getattr(self, operation)
data = op(tapis, request)

if (isinstance(data, list)):
for index, job in enumerate(data):
data[index] = check_job_for_timeout(job)
else:
data = check_job_for_timeout(data)

return JsonResponse(
{
'status': 200,
Expand Down

0 comments on commit d36ba9f

Please sign in to comment.