Skip to content

Commit

Permalink
add setting to give all jobs a unique name
Browse files Browse the repository at this point in the history
  • Loading branch information
truib committed Jun 21, 2024
1 parent 548a3e2 commit 1dcf081
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 1 deletion.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a
Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer),
most environment settings are cleared. Hence, they need to be set again at a later stage.

```
job_name = JOB_NAME
```
Replace `JOB_NAME` with a string of at least 3 characters that is used as job
name when a job is submitted. This is used to filter jobs, e.g., should be used
to make sure that multiple bot instances can run in the same Slurm environment.

```
jobs_base_dir = PATH_TO_JOBS_BASE_DIR
```
Expand Down
4 changes: 4 additions & 0 deletions app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY
# http_proxy = http://PROXY_DNS:3128/
# https_proxy = http://PROXY_DNS:3128/

# Used to give all jobs of a bot instance the same name. Can be used to allow
# multiple bot instances running on the same Slurm cluster.
job_name = prod

# directory under which the bot prepares directories per job
# structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR
jobs_base_dir = $HOME/jobs
Expand Down
1 change: 1 addition & 0 deletions eessi_bot_event_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
# config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional
# config.BUILDENV_SETTING_HTTPS_PROXY, # optional
# config.BUILDENV_SETTING_HTTP_PROXY, # optional
config.BUILDENV_SETTING_JOB_NAME, # required
config.BUILDENV_SETTING_JOBS_BASE_DIR, # required
# config.BUILDENV_SETTING_LOAD_MODULES, # optional
config.BUILDENV_SETTING_LOCAL_TMP, # required
Expand Down
8 changes: 7 additions & 1 deletion eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@

# settings that are required in 'app.cfg'
REQUIRED_CONFIG = {
config.SECTION_BUILDENV: [
config.BUILDENV_SETTING_JOB_NAME], # required
config.SECTION_FINISHED_JOB_COMMENTS: [
config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required
config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required
Expand Down Expand Up @@ -85,6 +87,10 @@ def __init__(self):
cfg = config.read_config()
job_manager_cfg = cfg[config.SECTION_JOB_MANAGER]
self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH)
buildenv_cfg = cfg[config.SECTION_BUILDENV]
self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME)
if len(self.job_name) < 3:
raise Exception(f"job name ({self.job_name}) is shorter than 3 characters")

def get_current_jobs(self):
"""
Expand All @@ -105,7 +111,7 @@ def get_current_jobs(self):
if username is None:
raise Exception("Unable to find username")

squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username)
squeue_cmd = "%s --long --noheader --user=%s --name='%s'" % (self.poll_command, username, self.job_name)
squeue_output, squeue_err, squeue_exitcode = run_cmd(
squeue_cmd,
"get_current_jobs(): squeue command",
Expand Down
9 changes: 9 additions & 0 deletions tasks/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ def get_build_env_cfg(cfg):

buildenv = cfg[config.SECTION_BUILDENV]

job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME)
log(f"{fn}(): job_name '{job_name}'")
config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name}

jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR)
log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'")
config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir}
Expand Down Expand Up @@ -640,6 +644,10 @@ def submit_job(job, cfg):

build_env_cfg = get_build_env_cfg(cfg)

# the job_name is used to filter jobs in case multiple bot
# instances run on the same system
job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME)

# add a default time limit of 24h to the job submit command if no other time
# limit is specified already
all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts])
Expand All @@ -654,6 +662,7 @@ def submit_job(job, cfg):
build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS],
time_limit,
job.slurm_opts,
f"--job-name='{job_name}'",
build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT],
])

Expand Down
1 change: 1 addition & 0 deletions tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations'
BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy'
BUILDENV_SETTING_HTTP_PROXY = 'http_proxy'
BUILDENV_SETTING_JOB_NAME = 'job_name'
BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir'
BUILDENV_SETTING_LOAD_MODULES = 'load_modules'
BUILDENV_SETTING_LOCAL_TMP = 'local_tmp'
Expand Down

0 comments on commit 1dcf081

Please sign in to comment.