Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

task/WP-189 Handle timeout exit code for interactive app jobs #851

Merged
merged 5 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions server/portal/apps/webhooks/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
validate_webhook,
execute_callback
)
from portal.apps.workspace.api.utils import check_job_for_timeout

from django.conf import settings

Expand Down Expand Up @@ -54,6 +55,8 @@ def validate_tapis_job(job_uuid, job_owner, disallowed_states=[]):
if job_data.status in disallowed_states:
return None

job_data = check_job_for_timeout(job_data)

return job_data


Expand Down Expand Up @@ -122,6 +125,7 @@ def post(self, request, *args, **kwargs):
job_details = validate_tapis_job(job_uuid, username, disallowed_states=non_terminal_states)
if job_details:
event_data[Notification.EXTRA]['remoteOutcome'] = job_details.remoteOutcome
event_data[Notification.EXTRA]['status'] = job_details.status

try:
logger.info('Indexing job output for job={}'.format(job_uuid))
Expand Down
28 changes: 28 additions & 0 deletions server/portal/apps/workspace/api/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json


def get_tapis_timeout_error_messages(job_id):
return [
'JOBS_EARLY_TERMINATION Job terminated by Tapis because: TIME_EXPIRED',
f'JOBS_USER_APP_FAILURE The user application ({job_id}) ended with remote status "TIMEOUT" and returned exit code: 0:0.'
]


def check_job_for_timeout(job):
"""
Check an interactive job for timeout status and mark it as finished
since Tapis does not have native support for interactive jobs yet
"""

if (hasattr(job, 'notes')):
notes = json.loads(job.notes)

is_failed = job.status == 'FAILED'
is_interactive = notes.get('isInteractive', False)
has_timeout_message = job.lastMessage in get_tapis_timeout_error_messages(job.remoteJobId)

if is_failed and is_interactive and has_timeout_message:
job.status = 'FINISHED'
job.remoteOutcome = 'FINISHED'

return job
8 changes: 8 additions & 0 deletions server/portal/apps/workspace/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from portal.apps.onboarding.steps.system_access_v3 import create_system_credentials
from portal.apps.users.utils import get_user_data
from .handlers.tapis_handlers import tapis_get_handler
from portal.apps.workspace.api.utils import check_job_for_timeout

logger = logging.getLogger(__name__)
METRICS = logging.getLogger('metrics.{}'.format(__name__))
Expand Down Expand Up @@ -138,6 +139,7 @@ def get(self, request, *args, **kwargs):

@method_decorator(login_required, name='dispatch')
class JobsView(BaseApiView):

def get(self, request, operation=None):

allowed_actions = ['listing', 'search', 'select']
Expand All @@ -150,6 +152,12 @@ def get(self, request, operation=None):
op = getattr(self, operation)
data = op(tapis, request)

if (isinstance(data, list)):
for index, job in enumerate(data):
data[index] = check_job_for_timeout(job)
else:
data = check_job_for_timeout(data)

return JsonResponse(
{
'status': 200,
Expand Down