diff --git a/README.md b/README.md index e268fc41..9e556311 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer), most environment settings are cleared. Hence, they need to be set again at a later stage. +``` +job_name = JOB_NAME +``` +Replace `JOB_NAME` with a string of at least 3 characters that is used as job +name when a job is submitted. This is used to filter jobs, e.g., should be used +to make sure that multiple bot instances can run in the same Slurm environment. + ``` jobs_base_dir = PATH_TO_JOBS_BASE_DIR ``` diff --git a/app.cfg.example b/app.cfg.example index ae51ade6..7cbde15d 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY # http_proxy = http://PROXY_DNS:3128/ # https_proxy = http://PROXY_DNS:3128/ +# Used to give all jobs of a bot instance the same name. Can be used to allow +# multiple bot instances running on the same Slurm cluster. +job_name = prod + # directory under which the bot prepares directories per job # structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR jobs_base_dir = $HOME/jobs diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 5677ed2c..d414f947 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -51,6 +51,7 @@ # config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional # config.BUILDENV_SETTING_HTTPS_PROXY, # optional # config.BUILDENV_SETTING_HTTP_PROXY, # optional + config.BUILDENV_SETTING_JOB_NAME, # required config.BUILDENV_SETTING_JOBS_BASE_DIR, # required # config.BUILDENV_SETTING_LOAD_MODULES, # optional config.BUILDENV_SETTING_LOCAL_TMP, # required diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index e7473f00..bb0c6dd8 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -50,6 +50,8 @@ # settings that are required in 'app.cfg' REQUIRED_CONFIG = { + config.SECTION_BUILDENV: [ + config.BUILDENV_SETTING_JOB_NAME], # required config.SECTION_FINISHED_JOB_COMMENTS: [ config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required @@ -85,6 +87,10 @@ def __init__(self): cfg = config.read_config() job_manager_cfg = cfg[config.SECTION_JOB_MANAGER] self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH) + buildenv_cfg = cfg[config.SECTION_BUILDENV] + self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME) + if self.job_name and len(self.job_name) < 3: + raise Exception(f"job name ({self.job_name}) is shorter than 3 characters") def get_current_jobs(self): """ @@ -106,6 +112,8 @@ def get_current_jobs(self): raise Exception("Unable to find username") squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username) + if self.job_name: + squeue_cmd += " --name='%s'" % self.job_name squeue_output, squeue_err, squeue_exitcode = run_cmd( squeue_cmd, "get_current_jobs(): squeue command", diff --git a/tasks/build.py b/tasks/build.py index 82a0911e..46b9543a 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -65,11 +65,16 @@ def get_build_env_cfg(cfg): """ fn = sys._getframe().f_code.co_name + config_data = {} buildenv = cfg[config.SECTION_BUILDENV] + job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME) + log(f"{fn}(): job_name '{job_name}'") + config_data[config.BUILDENV_SETTING_JOB_NAME] = job_name + jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR) log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'") - config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir} + config_data[config.BUILDENV_SETTING_JOBS_BASE_DIR] = jobs_base_dir local_tmp = buildenv.get(config.BUILDENV_SETTING_LOCAL_TMP) log(f"{fn}(): local_tmp '{local_tmp}'") @@ -640,6 +645,10 @@ def submit_job(job, cfg): build_env_cfg = get_build_env_cfg(cfg) + # the job_name is used to filter jobs in case multiple bot + # instances run on the same system + job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME) + # add a default time limit of 24h to the job submit command if no other time # limit is specified already all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts]) @@ -653,9 +662,9 @@ def submit_job(job, cfg): build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND], build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], time_limit, - job.slurm_opts, - build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT], - ]) + job.slurm_opts] + + ([f"--job-name='{job_name}'"] if job_name else []) + + [build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT]]) cmdline_output, cmdline_error, cmdline_exit_code = run_cmd(command_line, "submit job for target '%s'" % job.arch_target, diff --git a/tests/test_app.cfg b/tests/test_app.cfg index f940c1df..fd91ed8b 100644 --- a/tests/test_app.cfg +++ b/tests/test_app.cfg @@ -11,6 +11,8 @@ # sample config file for tests (some functions run config.read_config() # which reads app.cfg by default) +[buildenv] + [job_manager] # variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it diff --git a/tools/config.py b/tools/config.py index dcffe03d..11527702 100644 --- a/tools/config.py +++ b/tools/config.py @@ -43,6 +43,7 @@ BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations' BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy' BUILDENV_SETTING_HTTP_PROXY = 'http_proxy' +BUILDENV_SETTING_JOB_NAME = 'job_name' BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir' BUILDENV_SETTING_LOAD_MODULES = 'load_modules' BUILDENV_SETTING_LOCAL_TMP = 'local_tmp'