Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add setting to give all jobs a unique name #273

Merged
merged 6 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a
Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer),
most environment settings are cleared. Hence, they need to be set again at a later stage.

```
job_name = JOB_NAME
```
Replace `JOB_NAME` with a string of at least 3 characters that is used as job
name when a job is submitted. This is used to filter jobs, e.g., should be used
to make sure that multiple bot instances can run in the same Slurm environment.

```
jobs_base_dir = PATH_TO_JOBS_BASE_DIR
```
Expand Down
4 changes: 4 additions & 0 deletions app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY
# http_proxy = http://PROXY_DNS:3128/
# https_proxy = http://PROXY_DNS:3128/

# Used to give all jobs of a bot instance the same name. Can be used to allow
# multiple bot instances running on the same Slurm cluster.
job_name = prod

# directory under which the bot prepares directories per job
# structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR
jobs_base_dir = $HOME/jobs
Expand Down
1 change: 1 addition & 0 deletions eessi_bot_event_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
# config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional
# config.BUILDENV_SETTING_HTTPS_PROXY, # optional
# config.BUILDENV_SETTING_HTTP_PROXY, # optional
config.BUILDENV_SETTING_JOB_NAME, # required
config.BUILDENV_SETTING_JOBS_BASE_DIR, # required
# config.BUILDENV_SETTING_LOAD_MODULES, # optional
config.BUILDENV_SETTING_LOCAL_TMP, # required
Expand Down
8 changes: 8 additions & 0 deletions eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@

# settings that are required in 'app.cfg'
REQUIRED_CONFIG = {
config.SECTION_BUILDENV: [
config.BUILDENV_SETTING_JOB_NAME], # required
config.SECTION_FINISHED_JOB_COMMENTS: [
config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required
config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required
Expand Down Expand Up @@ -85,6 +87,10 @@ def __init__(self):
cfg = config.read_config()
job_manager_cfg = cfg[config.SECTION_JOB_MANAGER]
self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH)
buildenv_cfg = cfg[config.SECTION_BUILDENV]
self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME)
if self.job_name and len(self.job_name) < 3:
raise Exception(f"job name ({self.job_name}) is shorter than 3 characters")

def get_current_jobs(self):
"""
Expand All @@ -106,6 +112,8 @@ def get_current_jobs(self):
raise Exception("Unable to find username")

squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username)
if self.job_name:
squeue_cmd += " --name='%s'" % self.job_name
squeue_output, squeue_err, squeue_exitcode = run_cmd(
squeue_cmd,
"get_current_jobs(): squeue command",
Expand Down
14 changes: 11 additions & 3 deletions tasks/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ def get_build_env_cfg(cfg):

buildenv = cfg[config.SECTION_BUILDENV]

job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME)
log(f"{fn}(): job_name '{job_name}'")
config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't work, since config_data is overwritten below?

You should change the line 76 below to:

config_data[config.BUILDENV_SETTING_JOBS_BASE_DIR] = jobs_base_dir

This also shows this wasn't (re)tested?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. Silly me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Tested. Should work. 😬


jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR)
log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'")
config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir}
Expand Down Expand Up @@ -640,6 +644,10 @@ def submit_job(job, cfg):

build_env_cfg = get_build_env_cfg(cfg)

# the job_name is used to filter jobs in case multiple bot
# instances run on the same system
job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME)

# add a default time limit of 24h to the job submit command if no other time
# limit is specified already
all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts])
Expand All @@ -653,9 +661,9 @@ def submit_job(job, cfg):
build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND],
build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS],
time_limit,
job.slurm_opts,
build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT],
])
job.slurm_opts] +
([f"--job-name='{job_name}'"] if job_name else []) +
[build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT]])

cmdline_output, cmdline_error, cmdline_exit_code = run_cmd(command_line,
"submit job for target '%s'" % job.arch_target,
Expand Down
2 changes: 2 additions & 0 deletions tests/test_app.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

# sample config file for tests (some functions run config.read_config()
# which reads app.cfg by default)
[buildenv]

[job_manager]

# variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it
Expand Down
1 change: 1 addition & 0 deletions tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations'
BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy'
BUILDENV_SETTING_HTTP_PROXY = 'http_proxy'
BUILDENV_SETTING_JOB_NAME = 'job_name'
BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir'
BUILDENV_SETTING_LOAD_MODULES = 'load_modules'
BUILDENV_SETTING_LOCAL_TMP = 'local_tmp'
Expand Down
Loading