Skip to content

Commit

Permalink
Merge pull request #170 from jonas-lq/issue#109
Browse files Browse the repository at this point in the history
Add bot comments in app.cfg.example

Looks good. Tested with trz42/software-layer#61

Minor polishing which can be done in a follow-up PR.
  • Loading branch information
trz42 authored Apr 6, 2023
2 parents feb7493 + 3d39ee0 commit 6241d41
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 49 deletions.
21 changes: 21 additions & 0 deletions app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,24 @@ poll_interval = 60

# full path to the command for manipulating existing jobs
scontrol_command = /usr/bin/scontrol

# variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it
[submitted_job_comments]
initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}`
awaits_release = job id `{job_id}` awaits release by job manager

[new_job_comments]
awaits_lauch = job awaits launch by Slurm scheduler

[running_job_comments]
running_job = job `{job_id}` is running

[finished_job_comments]
success = :grin: SUCCESS tarball `{tarball_name}` ({tarball_size} GiB) in job dir
failure = :cry: FAILURE
no_slurm_out = No slurm output `{slurm_out}` in job dir
slurm_out = Found slurm output `{slurm_out}` in job dir
missing_modules = Slurm output lacks message "No missing modules!".
no_tarball_message = Slurm output lacks message about created tarball.
no_matching_tarball = No tarball matching `{tarball_pattern}` found in job dir.
multiple_tarballs = Found {num_tarballs} tarballs in job dir - only 1 matching `{tarball_pattern}` expected.
8 changes: 6 additions & 2 deletions eessi_bot_event_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#
import waitress
import sys
import tasks.build as build

from connections import github
from tools import config
Expand Down Expand Up @@ -162,8 +163,11 @@ def main():
"""Main function."""
opts = event_handler_parse()

# config is read to raise an exception early when the event_handler starts.
config.read_config()
required_config = {
build.SUBMITTED_JOB_COMMENTS: [build.INITIAL_COMMENT, build.AWAITS_RELEASE]
}
# config is read and checked for settings to raise an exception early when the event_handler starts.
config.check_required_cfg_settings(required_config)
github.connect()

if opts.file:
Expand Down
90 changes: 50 additions & 40 deletions eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,27 @@

from pyghee.utils import log, error

AWAITS_LAUCH = "awaits_lauch"
FAILURE = "failure"
FINISHED_JOB_COMMENTS = "finished_job_comments"
NEW_JOB_COMMENTS = "new_job_comments"
MISSING_MODULES = "missing_modules"
MULTIPLE_TARBALLS = "multiple_tarballs"
NO_MATCHING_TARBALL = "no_matching_tarball"
NO_SLURM_OUT = "no_slurm_out"
NO_TARBALL_MESSAGE = "no_tarball_message"
RUNNING_JOB = "running_job"
RUNNING_JOB_COMMENTS = "running_job_comments"
SLURM_OUT = "slurm_out"
SUCCESS = "success"

REQUIRED_CONFIG = {
NEW_JOB_COMMENTS: [AWAITS_LAUCH],
RUNNING_JOB_COMMENTS: [RUNNING_JOB],
FINISHED_JOB_COMMENTS: [SUCCESS, FAILURE, NO_SLURM_OUT, SLURM_OUT, MISSING_MODULES,
NO_TARBALL_MESSAGE, NO_MATCHING_TARBALL, MULTIPLE_TARBALLS]
}


class EESSIBotSoftwareLayerJobManager:
"main class for (Slurm) job manager of EESSI bot (separate process)"
Expand Down Expand Up @@ -291,9 +312,10 @@ def process_new_job(self, new_job):
# (c) add a row to the table
# add row to status table if we found a comment
if "comment_id" in new_job:
new_job_comments_cfg = config.read_config()[NEW_JOB_COMMENTS]
dt = datetime.now(timezone.utc)
update = "\n|%s|" % dt.strftime("%b %d %X %Z %Y")
update += "released|job awaits launch by Slurm scheduler|"
update = "\n|%s|released|" % dt.strftime("%b %d %X %Z %Y")
update += f"{new_job_comments_cfg[AWAITS_LAUCH]}|"
update_comment(new_job["comment_id"], pr, update)
else:
log(
Expand Down Expand Up @@ -363,12 +385,13 @@ def process_running_jobs(self, running_job):

if "comment_id" in running_job:
dt = datetime.now(timezone.utc)
running_msg = "job %s is running" % running_job['jobid']
running_job_comments_cfg = config.read_config()[RUNNING_JOB_COMMENTS]
running_msg = running_job_comments_cfg[RUNNING_JOB].format(job_id=running_job['jobid'])
if "comment_body" in running_job and running_msg in running_job["comment_body"]:
log("Not updating comment, '%s' already found" % running_msg)
else:
update = "\n|%s|" % dt.strftime("%b %d %X %Z %Y")
update += "running|" + running_msg
update = f"\n|{dt.strftime('%b %d %X %Z %Y')}|running|"
update += f"{running_msg}|"
update_comment(running_job["comment_id"], pullrequest, update)
else:
log(
Expand Down Expand Up @@ -463,20 +486,20 @@ def process_finished_job(self, finished_job):

dt = datetime.now(timezone.utc)

finished_job_comments_cfg = config.read_config()[FINISHED_JOB_COMMENTS]
comment_update = f"\n|{dt.strftime('%b %d %X %Z %Y')}|finished|"
if (no_missing_modules and targz_created and
len(eessi_tarballs) == 1):
# We've got one tarball and slurm out messages are ok
# Prepare a message with information such as
# (installation status, tarball name, tarball size)
comment_update = "\n|%s|finished|:grin: SUCCESS " % dt.strftime(
"%b %d %X %Z %Y")

tarball_name = os.path.basename(eessi_tarballs[0])
tarball_size = os.path.getsize(eessi_tarballs[0]) / 2**30
comment_update += "tarball <code>%s</code> (%.3f GiB) " % (
os.path.basename(eessi_tarballs[0]),
tarball_size,
success_comment = finished_job_comments_cfg[SUCCESS].format(
tarball_name=tarball_name,
tarball_size=tarball_size
)
comment_update += "in job dir|"
comment_update += f"{success_comment}|"
# NOTE explicitly name repo in build job comment?
# comment_update += '\nAwaiting approval to
# comment_update += ingest tarball into the repository.'
Expand All @@ -489,52 +512,39 @@ def process_finished_job(self, finished_job):
# prepare a message with details about the above conditions and
# update PR with a comment

comment_update = "\n|%s|finished|:cry: FAILURE <ul>" % dt.strftime(
"%b %d %X %Z %Y"
)
comment_update += f"{finished_job_comments_cfg[FAILURE]} <ul>"
found_slurm_out = os.path.exists(slurm_out)

if not found_slurm_out:
# no slurm out ... something went wrong with the job
comment_update += (
"<li>No slurm output <code>%s</code> in job dir</li>"
% os.path.basename(slurm_out)
# no slurm out ... something went wrong with the job f"<li> {} </li>"
comment_update += f"<li> {finished_job_comments_cfg[NO_SLURM_OUT]} </li>".format(
slurm_out=os.path.basename(slurm_out)
)
else:
comment_update += (
"<li>Found slurm output <code>%s</code> in job dir</li>"
% os.path.basename(slurm_out)
comment_update += f"<li> {finished_job_comments_cfg[SLURM_OUT]} </li>".format(
slurm_out=os.path.basename(slurm_out)
)

if found_slurm_out and not no_missing_modules:
# Found slurm out, but doesn't contain message 'No missing modules!'
comment_update += (
'<li>Slurm output lacks message "No missing modules!".</li>'
)
comment_update += f"<li> {finished_job_comments_cfg[MISSING_MODULES]} </li>"

if found_slurm_out and not targz_created:
# Found slurm out, but doesn't contain message
# 'eessi-.*-software-.*.tar.gz created!'
comment_update += (
"<li>Slurm output lacks message about created tarball.</li>"
)
comment_update += f"<li> {finished_job_comments_cfg[NO_TARBALL_MESSAGE]} </li>"

if len(eessi_tarballs) == 0:
# no luck, job just seemed to have failed ...
comment_update += (
"<li>No tarball matching <code>%s</code> found in job dir.</li>"
% tarball_pattern.replace(r"*", r"\*")
comment_update += f"<li> {finished_job_comments_cfg[NO_MATCHING_TARBALL]} </li>".format(
tarball_pattern=tarball_pattern.replace(r"*", r"\*")
)

if len(eessi_tarballs) > 1:
# something's fishy, we only expected a single tar.gz file
comment_update += (
"<li>Found %d tarballs in job dir - only 1 "
"matching <code>%s</code> expected.</li>"
% (
len(eessi_tarballs),
tarball_pattern.replace(r"*", r"\*"),
)
comment_update += f"<li> {finished_job_comments_cfg[MULTIPLE_TARBALLS]} </li>".format(
num_tarballs=len(eessi_tarballs),
tarball_pattern=tarball_pattern.replace(r"*", r"\*")
)
comment_update += "</ul>|"
# comment_update += '\nAn admin may investigate what went wrong.
Expand Down Expand Up @@ -572,8 +582,8 @@ def main():

opts = job_manager_parse()

# config is read to raise an exception early when the job_manager runs.
config.read_config()
# config is read and checked for settings to raise an exception early when the job_manager runs.
config.check_required_cfg_settings(REQUIRED_CONFIG)
github.connect()

job_manager = EESSIBotSoftwareLayerJobManager()
Expand Down
20 changes: 13 additions & 7 deletions tasks/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,20 @@
from pyghee.utils import log, error
from tools import config, run_cmd

AWAITS_RELEASE = "awaits_release"
BUILDENV = "buildenv"
BUILD_JOB_SCRIPT = "build_job_script"
CONTAINER_CACHEDIR = "container_cachedir"
DEFAULT_JOB_TIME_LIMIT = "24:00:00"
CVMFS_CUSTOMIZATIONS = "cvmfs_customizations"
HTTP_PROXY = "http_proxy"
HTTPS_PROXY = "https_proxy"
INITIAL_COMMENT = "initial_comment"
JOBS_BASE_DIR = "jobs_base_dir"
LOAD_MODULES = "load_modules"
LOCAL_TMP = "local_tmp"
SLURM_PARAMS = "slurm_params"
SUBMITTED_JOB_COMMENTS = "submitted_job_comments"
SUBMIT_COMMAND = "submit_command"
BUILD_PERMISSION = "build_permission"
ARCHITECTURE_TARGETS = "architecturetargets"
Expand Down Expand Up @@ -552,14 +555,17 @@ def create_pr_comments(job, job_id, app_name, job_comment, pr, repo_name, gh, sy
dt = datetime.now(timezone.utc)

# construct initial job comment
job_comment = (f"New job on instance `{app_name}`"
f" for architecture `{arch_name}`"
f" for repository `{job.repo_id}`"
f" in job dir `{symlink}`\n"
f"|date|job status|comment|\n"
submitted_job_comments_cfg = config.read_config()[SUBMITTED_JOB_COMMENTS]
job_comment = (f"{submitted_job_comments_cfg[INITIAL_COMMENT]}"
f"\n|date|job status|comment|\n"
f"|----------|----------|------------------------|\n"
f"|{dt.strftime('%b %d %X %Z %Y')}|submitted|"
f"job id `{job_id}` awaits release by job manager|")
f"|{dt.strftime('%b %d %X %Z %Y')}|"
f"submitted|"
f"{submitted_job_comments_cfg[AWAITS_RELEASE]}|").format(app_name=app_name,
arch_name=arch_name,
symlink=symlink,
repo_id=job.repo_id,
job_id=job_id)

# create comment to pull request
repo = gh.get_repo(repo_name)
Expand Down
18 changes: 18 additions & 0 deletions tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,21 @@ def read_config(path='app.cfg'):
error(f"{fn}(): Unable to read configuration file {path}!\n{err}")

return config


def check_required_cfg_settings(req_settings, path="app.cfg"):
"""
Reads the config file and checks if it contains the required settings, signaling an error if not
Args:
req_settings (dict (str, list)): required settings
path (string): path to the configuration file
Returns:
None
"""
cfg = read_config()
for section in req_settings.keys():
if section not in cfg:
error(f'Missing section "{section}" in configuration file {path}.')
for item in req_settings[section]:
if item not in cfg[section]:
error(f'Missing configuration item "{item}" in section "{section}" of configuration file {path}.')

0 comments on commit 6241d41

Please sign in to comment.