Skip to content

Commit

Permalink
Merge pull request #273 from trz42/add_job_name
Browse files Browse the repository at this point in the history
add setting to give all jobs a unique name
  • Loading branch information
boegel authored Aug 9, 2024
2 parents 45755b7 + 78e625e commit b7ffe3b
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 4 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a
Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer),
most environment settings are cleared. Hence, they need to be set again at a later stage.

```
job_name = JOB_NAME
```
Replace `JOB_NAME` with a string of at least 3 characters that is used as job
name when a job is submitted. This is used to filter jobs, e.g., should be used
to make sure that multiple bot instances can run in the same Slurm environment.

```
jobs_base_dir = PATH_TO_JOBS_BASE_DIR
```
Expand Down
4 changes: 4 additions & 0 deletions app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY
# http_proxy = http://PROXY_DNS:3128/
# https_proxy = http://PROXY_DNS:3128/

# Used to give all jobs of a bot instance the same name. Can be used to allow
# multiple bot instances running on the same Slurm cluster.
job_name = prod

# directory under which the bot prepares directories per job
# structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR
jobs_base_dir = $HOME/jobs
Expand Down
1 change: 1 addition & 0 deletions eessi_bot_event_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
# config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional
# config.BUILDENV_SETTING_HTTPS_PROXY, # optional
# config.BUILDENV_SETTING_HTTP_PROXY, # optional
config.BUILDENV_SETTING_JOB_NAME, # required
config.BUILDENV_SETTING_JOBS_BASE_DIR, # required
# config.BUILDENV_SETTING_LOAD_MODULES, # optional
config.BUILDENV_SETTING_LOCAL_TMP, # required
Expand Down
8 changes: 8 additions & 0 deletions eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@

# settings that are required in 'app.cfg'
REQUIRED_CONFIG = {
config.SECTION_BUILDENV: [
config.BUILDENV_SETTING_JOB_NAME], # required
config.SECTION_FINISHED_JOB_COMMENTS: [
config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required
config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required
Expand Down Expand Up @@ -85,6 +87,10 @@ def __init__(self):
cfg = config.read_config()
job_manager_cfg = cfg[config.SECTION_JOB_MANAGER]
self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH)
buildenv_cfg = cfg[config.SECTION_BUILDENV]
self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME)
if self.job_name and len(self.job_name) < 3:
raise Exception(f"job name ({self.job_name}) is shorter than 3 characters")

def get_current_jobs(self):
"""
Expand All @@ -106,6 +112,8 @@ def get_current_jobs(self):
raise Exception("Unable to find username")

squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username)
if self.job_name:
squeue_cmd += " --name='%s'" % self.job_name
squeue_output, squeue_err, squeue_exitcode = run_cmd(
squeue_cmd,
"get_current_jobs(): squeue command",
Expand Down
17 changes: 13 additions & 4 deletions tasks/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,16 @@ def get_build_env_cfg(cfg):
"""
fn = sys._getframe().f_code.co_name

config_data = {}
buildenv = cfg[config.SECTION_BUILDENV]

job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME)
log(f"{fn}(): job_name '{job_name}'")
config_data[config.BUILDENV_SETTING_JOB_NAME] = job_name

jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR)
log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'")
config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir}
config_data[config.BUILDENV_SETTING_JOBS_BASE_DIR] = jobs_base_dir

local_tmp = buildenv.get(config.BUILDENV_SETTING_LOCAL_TMP)
log(f"{fn}(): local_tmp '{local_tmp}'")
Expand Down Expand Up @@ -640,6 +645,10 @@ def submit_job(job, cfg):

build_env_cfg = get_build_env_cfg(cfg)

# the job_name is used to filter jobs in case multiple bot
# instances run on the same system
job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME)

# add a default time limit of 24h to the job submit command if no other time
# limit is specified already
all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts])
Expand All @@ -653,9 +662,9 @@ def submit_job(job, cfg):
build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND],
build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS],
time_limit,
job.slurm_opts,
build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT],
])
job.slurm_opts] +
([f"--job-name='{job_name}'"] if job_name else []) +
[build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT]])

cmdline_output, cmdline_error, cmdline_exit_code = run_cmd(command_line,
"submit job for target '%s'" % job.arch_target,
Expand Down
2 changes: 2 additions & 0 deletions tests/test_app.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

# sample config file for tests (some functions run config.read_config()
# which reads app.cfg by default)
[buildenv]

[job_manager]

# variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it
Expand Down
1 change: 1 addition & 0 deletions tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations'
BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy'
BUILDENV_SETTING_HTTP_PROXY = 'http_proxy'
BUILDENV_SETTING_JOB_NAME = 'job_name'
BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir'
BUILDENV_SETTING_LOAD_MODULES = 'load_modules'
BUILDENV_SETTING_LOCAL_TMP = 'local_tmp'
Expand Down

0 comments on commit b7ffe3b

Please sign in to comment.