Skip to content

Commit

Permalink
Merge pull request #255 from laraPPr/jm_slurm_connect_fix
Browse files Browse the repository at this point in the history
fix for job manager crash: Unable to contact slurm controller
  • Loading branch information
trz42 authored Feb 22, 2024
2 parents 0e145d6 + e73bfe6 commit b03baa1
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,14 +667,26 @@ def main():
if max_iter != 0:
known_jobs = job_manager.get_known_jobs()
while max_iter < 0 or i < max_iter:
# sleep poll_interval seconds (not for the first iteration)
if i != 0:
log(
"job manager main loop: sleep %d seconds" % poll_interval,
job_manager.logfile,
)
time.sleep(poll_interval)
log("job manager main loop: iteration %d" % i, job_manager.logfile)
log(
"job manager main loop: known_jobs='%s'" % ",".join(
known_jobs.keys()),
job_manager.logfile,
)

current_jobs = job_manager.get_current_jobs()
try:
current_jobs = job_manager.get_current_jobs()
except RuntimeError:
i = i + 1
continue

log(
"job manager main loop: current_jobs='%s'" % ",".join(
current_jobs.keys()),
Expand Down Expand Up @@ -729,13 +741,7 @@ def main():

known_jobs = current_jobs

# sleep poll_interval seconds (only if at least one more iteration)
if max_iter < 0 or i + 1 < max_iter:
log(
"job manager main loop: sleep %d seconds" % poll_interval,
job_manager.logfile,
)
time.sleep(poll_interval)
# add one iteration to the loop
i = i + 1


Expand Down

0 comments on commit b03baa1

Please sign in to comment.