diff --git a/README.md b/README.md index e268fc41..9e556311 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer), most environment settings are cleared. Hence, they need to be set again at a later stage. +``` +job_name = JOB_NAME +``` +Replace `JOB_NAME` with a string of at least 3 characters that is used as job +name when a job is submitted. This is used to filter jobs, e.g., should be used +to make sure that multiple bot instances can run in the same Slurm environment. + ``` jobs_base_dir = PATH_TO_JOBS_BASE_DIR ``` diff --git a/app.cfg.example b/app.cfg.example index ae51ade6..7cbde15d 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY # http_proxy = http://PROXY_DNS:3128/ # https_proxy = http://PROXY_DNS:3128/ +# Used to give all jobs of a bot instance the same name. Can be used to allow +# multiple bot instances running on the same Slurm cluster. +job_name = prod + # directory under which the bot prepares directories per job # structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR jobs_base_dir = $HOME/jobs diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 5677ed2c..d414f947 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -51,6 +51,7 @@ # config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional # config.BUILDENV_SETTING_HTTPS_PROXY, # optional # config.BUILDENV_SETTING_HTTP_PROXY, # optional + config.BUILDENV_SETTING_JOB_NAME, # required config.BUILDENV_SETTING_JOBS_BASE_DIR, # required # config.BUILDENV_SETTING_LOAD_MODULES, # optional config.BUILDENV_SETTING_LOCAL_TMP, # required diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index e7473f00..aba40081 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -50,6 +50,8 @@ # settings that are required in 'app.cfg' REQUIRED_CONFIG = { + config.SECTION_BUILDENV: [ + config.BUILDENV_SETTING_JOB_NAME], # required config.SECTION_FINISHED_JOB_COMMENTS: [ config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required @@ -85,6 +87,10 @@ def __init__(self): cfg = config.read_config() job_manager_cfg = cfg[config.SECTION_JOB_MANAGER] self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH) + buildenv_cfg = cfg[config.SECTION_BUILDENV] + self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME) + if len(self.job_name) < 3: + raise Exception(f"job name ({self.job_name}) is shorter than 3 characters") def get_current_jobs(self): """ @@ -105,7 +111,7 @@ def get_current_jobs(self): if username is None: raise Exception("Unable to find username") - squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username) + squeue_cmd = "%s --long --noheader --user=%s --name='%s'" % (self.poll_command, username, self.job_name) squeue_output, squeue_err, squeue_exitcode = run_cmd( squeue_cmd, "get_current_jobs(): squeue command", diff --git a/tasks/build.py b/tasks/build.py index 82a0911e..5fa36076 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -67,6 +67,10 @@ def get_build_env_cfg(cfg): buildenv = cfg[config.SECTION_BUILDENV] + job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME) + log(f"{fn}(): job_name '{job_name}'") + config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name} + jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR) log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'") config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir} @@ -640,6 +644,10 @@ def submit_job(job, cfg): build_env_cfg = get_build_env_cfg(cfg) + # the job_name is used to filter jobs in case multiple bot + # instances run on the same system + job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME) + # add a default time limit of 24h to the job submit command if no other time # limit is specified already all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts]) @@ -654,6 +662,7 @@ def submit_job(job, cfg): build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], time_limit, job.slurm_opts, + f"--job-name='{job_name}'", build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT], ]) diff --git a/tools/config.py b/tools/config.py index dcffe03d..11527702 100644 --- a/tools/config.py +++ b/tools/config.py @@ -43,6 +43,7 @@ BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations' BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy' BUILDENV_SETTING_HTTP_PROXY = 'http_proxy' +BUILDENV_SETTING_JOB_NAME = 'job_name' BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir' BUILDENV_SETTING_LOAD_MODULES = 'load_modules' BUILDENV_SETTING_LOCAL_TMP = 'local_tmp'