Skip to content

Commit

Permalink
feat: easy fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
qcampagn committed Oct 30, 2023
1 parent c7a61d0 commit cd8fc37
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 30 deletions.
41 changes: 20 additions & 21 deletions src/DIRAC/WorkloadManagementSystem/Agent/ScoutingJobStatusAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
from DIRAC import S_OK, S_ERROR
from DIRAC.Core.Base.AgentModule import AgentModule
from DIRAC.WorkloadManagementSystem.Client import JobStatus
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateUpdateClient
from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB
Expand Down Expand Up @@ -59,7 +60,7 @@ def beginExecution(self):
return S_OK()

def execute(self):
"""The PilotAgent execution method.
"""The ScoutingJobStatus execution method.
"""
result = self.jobDB.selectJobs({'Status': 'Scouting'})
if not result['OK']:
Expand Down Expand Up @@ -93,7 +94,7 @@ def execute(self):

scoutIDdict[scoutID] = scoutStatus
if scoutStatus['Status'] == 'NotComplete':
self.log.verbose("%s: skipping since corresponding scout does not complete yet." % jobID)
self.log.verbose(f"{jobID}: skipping since corresponding scout does not complete yet.")
continue
else:
result = self.__updateJobStatus(jobID, status=scoutStatus['Status'],
Expand Down Expand Up @@ -123,61 +124,60 @@ def __getScoutStatus(self, scoutid):
status = result['Value'][scoutjob]['Status']
site = result['Value'][scoutjob]['Site']
jobid = scoutjob
if status == 'Done':
if status == JobStatus.DONE:
donejoblist.append(jobid)
donesitelist.append(site)
elif status == 'Failed':
elif status == JobStatus.FAILED:
failedjoblist.append(jobid)
failedsitelist.append(site)
elif status == 'Stalled':
elif status == JobStatus.STALLED:
stalledjoblist.append(jobid)

if self.criteriaSucceeded > len(scoutjobs):
criteriaSucceeded = max(int(len(scoutjobs) * self.criteriaSucceededRate), 1)
self.log.verbose('criteriaSucceeded = %s' % self.criteriaSucceeded)
self.log.verbose(f'criteriaSucceeded = {self.criteriaSucceeded}')
else:
criteriaSucceeded = self.criteriaSucceeded
self.log.debug('criteriaSucceeded = %s' % self.criteriaSucceeded)
self.log.debug(f'criteriaSucceeded = {self.criteriaSucceeded}')

if self.criteriaFailed > len(scoutjobs):
criteriaFailed = max(int(len(scoutjobs) * self.criteriaFailedRate), 1)
self.log.verbose('criteriaFailed = %s' % self.criteriaFailed)
self.log.verbose(f'criteriaFailed = {self.criteriaFailed}')
else:
criteriaFailed = self.criteriaFailed
self.log.debug('criteriaFailed = %s' % self.criteriaFailed)
self.log.debug(f'criteriaFailed = {self.criteriaFailed}')

if self.criteriaStalled > len(scoutjobs):
criteriaStalled = max(int(len(scoutjobs) * self.criteriaStalledRate), 1)
self.log.verbose('criteriaStalled = %s' % self.criteriaStalled)
self.log.verbose(f'criteriaStalled = {self.criteriaStalled}')
else:
criteriaStalled = self.criteriaStalled
self.log.debug('criteriaStalled = %s' % self.criteriaStalled)
self.log.debug(f'criteriaStalled = {self.criteriaStalled}')

if len(donejoblist) >= criteriaSucceeded:
self.log.verbose('Scout (ID = %s) are done.' % scoutid)
self.log.verbose(f'Scout (ID = {scoutid}) are done.')
scoutStatus = {'Status': 'Checking', 'MinorStatus': 'Scouting', 'appstatus': 'Scout Complete'}

elif len(failedjoblist) >= criteriaFailed:
self.log.verbose('Scout (ID = %s) are failed.' % scoutid)
self.log.verbose(f'Scout (ID = {scoutid}) are failed.')
msg = 'Failed scout job ' + str(failedjoblist)
scoutStatus = {'Status': 'Failed', 'MinorStatus': 'Failed in scouting', 'appstatus': msg}

elif len(stalledjoblist) >= criteriaStalled:
self.log.verbose('Scout (ID = %s) are stalled.' % scoutid)
self.log.verbose(f'Scout (ID = {scoutid}) are stalled.')
msg = 'Stalled scout job ' + str(stalledjoblist)
scoutStatus = {'Status': 'Stalled', 'MinorStatus': 'Stalled in scouting', 'appstatus': msg}

else:
self.log.verbose('Scout (ID = %s) did not completed.' % scoutid)
self.log.verbose(f'Scout (ID = {scoutid}) did not completed.')
scoutStatus = {'Status': 'NotComplete'}

return S_OK(scoutStatus)

def __updateJobStatus(self, job, status=None, minorstatus=None, appstatus=None):
""" This method updates the job status in the JobDB.
"""
self.log.info('Job %s set Status="%s", MinorStatus="%s", ApplicationStatus="%s".'
% (job, status, minorstatus, appstatus))
self.log.info(f'Job {job} set Status="{status}", MinorStatus="{minorstatus}", ApplicationStatus="{appstatus}".')
if not self.am_getOption('Enable', True):
result = S_OK('DisabledMode')

Expand All @@ -187,8 +187,7 @@ def __updateJobStatus(self, job, status=None, minorstatus=None, appstatus=None):
if result['OK']:
minorstatus = result['Value']['ApplicationStatus']

self.log.verbose("self.jobDB.setJobAttribute(%s,'ApplicationStatus','%s',update=True)"
% (job, appstatus))
self.log.verbose(f"self.jobDB.setJobAttribute({job},'ApplicationStatus','{appstatus}',update=True)")
result = self.jobDB.setJobAttribute(job, 'ApplicationStatus', appstatus, update=True)
if not result['OK']:
return result
Expand All @@ -199,7 +198,7 @@ def __updateJobStatus(self, job, status=None, minorstatus=None, appstatus=None):
if result['OK']:
minorstatus = result['Value']['MinorStatus']

self.log.verbose("self.jobDB.setJobAttribute(%s,'MinorStatus','%s',update=True)" % (job, minorstatus))
self.log.verbose(f"self.jobDB.setJobAttribute({job},'MinorStatus','{minorstatus}',update=True)")
result = self.jobDB.setJobAttribute(job, 'MinorStatus', minorstatus, update=True)
if not result['OK']:
return result
Expand All @@ -215,7 +214,7 @@ def __updateJobStatus(self, job, status=None, minorstatus=None, appstatus=None):
if result['OK']:
status = result['Value']['Status']

self.log.verbose("self.jobDB.setJobAttribute(%s,'Status','%s',update=True)" % (job, status))
self.log.verbose(f"self.jobDB.setJobAttribute({job},'Status','{status}',update=True)")
result = self.jobDB.setJobAttribute(job, 'Status', status, update=True)
if not result['OK']:
return result
Expand Down
17 changes: 8 additions & 9 deletions src/DIRAC/WorkloadManagementSystem/Executor/Scouting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from DIRAC import S_OK, S_ERROR

from DIRAC.WorkloadManagementSystem.Executor.Base.OptimizerExecutor import OptimizerExecutor
from DIRAC.WorkloadManagementSystem.Client import JobStatus
from DIRAC.ResourceStatusSystem.Client.SiteStatus import SiteStatus
from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateUpdateClient

Expand All @@ -28,7 +29,7 @@ def optimizeJob(self, jid, jobState):

result = self.__jobDB.getJobParameters(jid, ['ScoutFlag', 'ScoutID'])
if not result['OK']:
return S_ERROR('Could not retrieve scoutparams')
return result

rCounter = 0
if result['Value']:
Expand All @@ -46,7 +47,7 @@ def optimizeJob(self, jid, jobState):
else:
result = jobState.getManifest()
if not result['OK']:
return S_ERROR('Could not retrieve job manifest: %s' % result['Message'])
return result
jobManifest = result['Value']
scoutID = jobManifest.getOption('ScoutID', None)
if not scoutID:
Expand Down Expand Up @@ -87,8 +88,7 @@ def optimizeJob(self, jid, jobState):
return self.setNextOptimizer(jobState)

self.jobLog.info('Job %s set scouting status' % jid)
result = self.__setScoutingStatus(jobState)
return result
return self.__setScoutingStatus(jobState)

def __getIDandFlag(self, scoutparams):

Expand All @@ -107,21 +107,20 @@ def __setScoutparamsInJobParameters(self, jid, scoutID, scoutFlag, jobState=None
result = self.__jobDB.setJobParameters(jid, paramList)
if not result['OK']:
self.jobLog.info('Skipping, since failed in recovering scoutparams of JobParameters.')
return result

return S_OK()

return result

def __setScoutingStatus(self, jobState=None):

if not jobState:
jobState = self.__jobData.jobState

result = jobState.getStatus()
if not result['OK']:
if not (result := jobState.getStatus())['OK']:
return result

opName = self.ex_optimizerName()
result = jobState.setStatus(self.ex_getOption('WaitingStatus', 'Scouting'),
result = jobState.setStatus(self.ex_getOption('WaitingStatus', JobStatus.SCOUTING),
minorStatus=self.ex_getOption('WaitingMinorStatus',
'Waiting for Scout Job Completion'),
appStatus="Unknown",
Expand Down

0 comments on commit cd8fc37

Please sign in to comment.