From afc2f67956e64ff0b2fe9ef4149ef546ed041f5c Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 16 Apr 2024 10:43:44 +0200 Subject: [PATCH 01/64] Preliminary work on PR cleanup task --- eessi_bot_event_handler.py | 42 ++++++++++++++++++++++++++++++++++++++ tasks/clean_up.py | 42 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 tasks/clean_up.py diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 49f10238..642e8bff 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -555,6 +555,48 @@ def start(self, app, port=3000): waitress.serve(app, listen='*:%s' % port) + def handle_pull_request_merged_event(self, event_info, pr): + """ + Handle events of type pull_request with the action merged. Main action + is to scan directories used and move them to the trash_bin. + + Args: + event_info (dict): event received by event_handler + pr (github.PullRequest.PullRequest): instance representing the pull request + + Returns: + github.IssueComment.IssueComment instance or None (note, github refers to + PyGithub, not the github from the internal connections module) + """ + self.log("PR merged: scanning directories used by PR") + + + # 1) determine the jobs that have been run for the PR + job_dirs = determine_job_dirs(pr.number) + + # 2) read location of trash_bin from cfg + merge_cfg = self.cfg['merge_cfg'] + trash_bin_dir = merge_cfg.get('trash_bin_dir') + + # 3) move the directories to the trash_bin + self.log("Moving directories to trash_bin") + move_to_trash_bin(trash_bin_dir, job_dirs) + + # 4) report move to pull request? + repo_name = pr.base.repo.full_name + gh = github.get_instance() + repo = gh.get_repo(repo_name) + pull_request = repo.get_pull(pr.number) + issue_comment = pull_request.create_issue_comment(comment) + return issue_comment + +def move_to_trash_bin(trash_bin_dir, job_dirs): + move_cmd = ["mv -t", trash_bin_dir] + for job_dir in job_dirs: + move_cmd.append(job_dir) + ' '.join(move_cmd) + out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False) + def main(): """ Main function which parses command line arguments, verifies if required diff --git a/tasks/clean_up.py b/tasks/clean_up.py new file mode 100644 index 00000000..830d9d2d --- /dev/null +++ b/tasks/clean_up.py @@ -0,0 +1,42 @@ +# This file is part of the EESSI build-and-deploy bot, +# see https://github.com/EESSI/eessi-bot-software-layer +# +# The bot helps with requests to add software installations to the +# EESSI software layer, see https://github.com/EESSI/software-layer +# +# author: Bob Droege (@bedroge) +# author: Kenneth Hoste (@boegel) +# author: Hafsa Naeem (@hafsa-naeem) +# author: Jacob Ziemke (@jacobz137) +# author: Jonas Qvigstad (@jonas-lq) +# author: Lara Ramona Peeters (@laraPPr) +# author: Pedro Santos Neves (@Neves-P) +# author: Thomas Roeblitz (@trz42) +# +# license: GPLv2 +# + +# Standard library imports +from collections import namedtuple +import configparser +from datetime import datetime, timezone +import json +import os +import shutil +import sys + +# Third party imports (anything installed into the local Python environment) +from pyghee.utils import error, log +from retry.api import retry_call + +# Local application imports (anything from EESSI/eessi-bot-software-layer) +from connections import github +from tools import config, pr_comments, run_cmd +from tools.job_metadata import create_metadata_file + +def move_to_trash_bin(trash_bin_dir, job_dirs): + move_cmd = ["mv -t", trash_bin_dir] + for job_dir in job_dirs: + move_cmd.append(job_dir) + ' '.join(move_cmd) + out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False) From 5e17612329a5acab0b9e79426ac4c70b2ed109f5 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 7 May 2024 08:53:00 +0200 Subject: [PATCH 02/64] WIP disk cleanup --- eessi_bot_event_handler.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 642e8bff..ab3a3c71 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -554,7 +554,6 @@ def start(self, app, port=3000): self.log(log_file_info) waitress.serve(app, listen='*:%s' % port) - def handle_pull_request_merged_event(self, event_info, pr): """ Handle events of type pull_request with the action merged. Main action @@ -570,13 +569,14 @@ def handle_pull_request_merged_event(self, event_info, pr): """ self.log("PR merged: scanning directories used by PR") - # 1) determine the jobs that have been run for the PR job_dirs = determine_job_dirs(pr.number) # 2) read location of trash_bin from cfg merge_cfg = self.cfg['merge_cfg'] trash_bin_dir = merge_cfg.get('trash_bin_dir') + # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) + # cron job deletes symlinks? # 3) move the directories to the trash_bin self.log("Moving directories to trash_bin") @@ -590,13 +590,6 @@ def handle_pull_request_merged_event(self, event_info, pr): issue_comment = pull_request.create_issue_comment(comment) return issue_comment -def move_to_trash_bin(trash_bin_dir, job_dirs): - move_cmd = ["mv -t", trash_bin_dir] - for job_dir in job_dirs: - move_cmd.append(job_dir) - ' '.join(move_cmd) - out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False) - def main(): """ Main function which parses command line arguments, verifies if required From 5d8f0a126a5dc21e570f3617f932666da42bc10e Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 7 May 2024 09:58:36 +0200 Subject: [PATCH 03/64] cleanup script WIP --- scripts/cleanup_pr_trash_bin.sh | 159 ++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100755 scripts/cleanup_pr_trash_bin.sh diff --git a/scripts/cleanup_pr_trash_bin.sh b/scripts/cleanup_pr_trash_bin.sh new file mode 100755 index 00000000..44755648 --- /dev/null +++ b/scripts/cleanup_pr_trash_bin.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# +# GitHub App for the EESSI project +# +# A bot to help with requests to add software installations to the EESSI software layer, +# see https://github.com/EESSI/software-layer + +# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR +# +# author: Thomas Roeblitz (@trz42) +# +# license: GPLv2 +# + +SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE)) + +function display_help +{ + echo "Usage: $0 [OPTIONS] " >&2 + echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2 + echo " value from bot config file app.cfg or .]" >&2 + echo " -D | --dry-run - only show commands that would be run" >&2 + echo " [default: false]" >&2 + echo " -h | --help - display this usage information" >&2 +} + +function get_trash_bin_dir +{ + app_cfg_path=${1} + grep trash_bin_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//' +} + +echo + +if [[ $# -lt 1 ]]; then + display_help + exit 1 +fi + +# process command line args +POSITIONAL_ARGS=() + +jobs_base_dir= +dry_run=false + +while [[ $# -gt 0 ]]; do + case $1 in + -b|--jobs-base-dir) + if [[ $# -gt 1 ]]; then + jobs_base_dir="$2" + shift 2 + else + echo "Error: missing argument (directory) for parameter '${1}'" + exit 2 + fi + ;; + -D|--dry-run) + dry_run=true + shift 1 + ;; + -h|--help) + display_help + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +# restore potentially parsed filename(s) into $* +set -- "${POSITIONAL_ARGS[@]}" + +if [[ $# -ne 1 ]]; then + echo "Error: exactly one PR number should be provided as argument" + display_help + exit 3 +fi + +pull_request=${1} + +if ${dry_run} = true ; then + echo "DRY_RUN: not removing any files" +fi + +# determine jobs base dir if not given explicitly +# 1. check for file app.cfg in SCRIPT_DIR +# 2. check for file app.cfg in current dir +# if found try to obtain value of jobs_base_dir setting +# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit +# TODO: Change jobs_base_dir to trash_bin +if [[ -z ${jobs_base_dir} ]]; then + echo "jobs base directory not given explicitly, trying to determine it" + if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then + echo "check for app.cfg in '${SCRIPT_DIR}'" + jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg) + else + if [[ -e ./app.cfg ]]; then + echo "check for app.cfg in '${PWD}' (current directory)" + jobs_base_dir=$(get_jobs_base_dir ./app.cfg) + fi + fi +fi +if [[ -z ${jobs_base_dir} ]]; then + echo "Error: jobs base directory is empty, please specify it as argument" + display_help + exit 4 +fi + +echo "processing all directories for PR ${pull_request}:" +find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /' + +echo +echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done + +echo +echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) +do + for f in $(find $d -type f -wholename "*[0-9].tgz") + do + if ${dry_run} = true ; then + echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" + else + echo "Removing file '$f'" + rm $f + fi + done +done + +echo +echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done + +echo +echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) +do + for f in $(find $d -type f -wholename "*tar.gz") + do + if ${dry_run} = true ; then + echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" + else + echo "Removing file '$f'" + rm $f + fi + done +done + +echo +echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done + From 568e4c51b1c44e03865353cf91219e511924e913 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 7 May 2024 09:59:24 +0200 Subject: [PATCH 04/64] Make date subdiretory --- tasks/clean_up.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 830d9d2d..669f970c 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -19,7 +19,6 @@ # Standard library imports from collections import namedtuple import configparser -from datetime import datetime, timezone import json import os import shutil @@ -34,9 +33,10 @@ from tools import config, pr_comments, run_cmd from tools.job_metadata import create_metadata_file + def move_to_trash_bin(trash_bin_dir, job_dirs): - move_cmd = ["mv -t", trash_bin_dir] - for job_dir in job_dirs: - move_cmd.append(job_dir) + move_cmd = ["mkdir -p tras_bin_dir && mv -t", trash_bin_dir] + for job_dir in job_dirs: + move_cmd.append(job_dir) ' '.join(move_cmd) out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False) From 7e91ce69948666d65a02e693248472e07b54c013 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 7 May 2024 10:00:11 +0200 Subject: [PATCH 05/64] Have subdirectory with date and comment --- eessi_bot_event_handler.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index d74955d8..ba6787b7 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -18,6 +18,7 @@ # Standard library imports import sys +from datetime import datetime, timezone # Third party imports (anything installed into the local Python environment) from pyghee.lib import create_app, get_event_info, PyGHee, read_event_from_json @@ -28,8 +29,9 @@ from connections import github from tasks.build import check_build_permission, get_architecture_targets, get_repo_cfg, \ request_bot_build_issue_comments, submit_build_jobs -from tasks.deploy import deploy_built_artefacts -from tools import config +from tasks.deploy import deploy_built_artefacts, determine_job_dirs +from tasks.clean_up import move_to_trash_bin +from tools import config, cvmfs_repository from tools.args import event_handler_parse from tools.commands import EESSIBotCommand, EESSIBotCommandError, \ contains_any_bot_command, get_bot_command @@ -613,18 +615,24 @@ def handle_pull_request_merged_event(self, event_info, pr): PyGithub, not the github from the internal connections module) """ self.log("PR merged: scanning directories used by PR") - + repo_cfg = get_repo_cfg(self.cfg) + repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME] + clean_up_comments_cfg = self.cfg[config.SECTION_CLEAN_UP_COMMENTS] # 1) determine the jobs that have been run for the PR job_dirs = determine_job_dirs(pr.number) # 2) read location of trash_bin from cfg merge_cfg = self.cfg['merge_cfg'] - trash_bin_dir = merge_cfg.get('trash_bin_dir') + repo_cfg = get_repo_cfg(self.cfg) + repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME] + dt = datetime.now(timezone.utc) + trash_bin_dir = "/".join([merge_cfg.get('trash_bin_dir'), repo_name, dt.strftime('%Y%m%d')]) + # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) # cron job deletes symlinks? # 3) move the directories to the trash_bin - self.log("Moving directories to trash_bin") + log("Moving directories to trash_bin") move_to_trash_bin(trash_bin_dir, job_dirs) # 4) report move to pull request? @@ -632,7 +640,8 @@ def handle_pull_request_merged_event(self, event_info, pr): gh = github.get_instance() repo = gh.get_repo(repo_name) pull_request = repo.get_pull(pr.number) - issue_comment = pull_request.create_issue_comment(comment) + moved_comment = clean_up_comments_cfg[config.CLEANUP_COMMENTS_MOVED] + issue_comment = pull_request.create_issue_comment(moved_comment) return issue_comment def main(): From 648232d4e06836b580deb0741921c67a03377981 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 7 May 2024 10:08:03 +0200 Subject: [PATCH 06/64] Typo --- tasks/clean_up.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 669f970c..b1ff954a 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -35,7 +35,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): - move_cmd = ["mkdir -p tras_bin_dir && mv -t", trash_bin_dir] + move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir] for job_dir in job_dirs: move_cmd.append(job_dir) ' '.join(move_cmd) From 8e2082d6b009169dda0775e1fc85fcad493caf5a Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 4 Jun 2024 10:26:31 +0200 Subject: [PATCH 07/64] Better read from config and logging --- eessi_bot_event_handler.py | 29 +++++++++++++++++++++-------- tasks/clean_up.py | 17 +++++++---------- tools/config.py | 6 ++++++ 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index ba6787b7..955b3d7d 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -614,25 +614,37 @@ def handle_pull_request_merged_event(self, event_info, pr): github.IssueComment.IssueComment instance or None (note, github refers to PyGithub, not the github from the internal connections module) """ - self.log("PR merged: scanning directories used by PR") - repo_cfg = get_repo_cfg(self.cfg) - repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME] + + # Detect event and only act if PR is merged + request_body = event_info['raw_request_body'] + action = request_body['action'] + + if action == 'merged': + self.log("PR merged: scanning directories used by PR") + self.log(f"merge '{action}' action is handled") + else: + self.log(f"merge action '{action}' not handled") + return + # at this point we know that we are handling a new merge + # NOTE: Permissions to merge are already handled through GitHub, we + # don't need to check here + clean_up_comments_cfg = self.cfg[config.SECTION_CLEAN_UP_COMMENTS] # 1) determine the jobs that have been run for the PR job_dirs = determine_job_dirs(pr.number) - # 2) read location of trash_bin from cfg - merge_cfg = self.cfg['merge_cfg'] + # 2) Get trash_bin_dir from configs + trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR] repo_cfg = get_repo_cfg(self.cfg) repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME] dt = datetime.now(timezone.utc) - trash_bin_dir = "/".join([merge_cfg.get('trash_bin_dir'), repo_name, dt.strftime('%Y%m%d')]) + trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')]) # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) # cron job deletes symlinks? # 3) move the directories to the trash_bin - log("Moving directories to trash_bin") + self.log("Moving directories to trash_bin") move_to_trash_bin(trash_bin_dir, job_dirs) # 4) report move to pull request? @@ -640,10 +652,11 @@ def handle_pull_request_merged_event(self, event_info, pr): gh = github.get_instance() repo = gh.get_repo(repo_name) pull_request = repo.get_pull(pr.number) - moved_comment = clean_up_comments_cfg[config.CLEANUP_COMMENTS_MOVED] + moved_comment = clean_up_comments_cfg[config.CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT] issue_comment = pull_request.create_issue_comment(moved_comment) return issue_comment + def main(): """ Main function which parses command line arguments, verifies if required diff --git a/tasks/clean_up.py b/tasks/clean_up.py index b1ff954a..5f91f932 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -17,26 +17,23 @@ # # Standard library imports -from collections import namedtuple -import configparser -import json -import os -import shutil import sys # Third party imports (anything installed into the local Python environment) -from pyghee.utils import error, log -from retry.api import retry_call +from pyghee.utils import log # Local application imports (anything from EESSI/eessi-bot-software-layer) -from connections import github -from tools import config, pr_comments, run_cmd -from tools.job_metadata import create_metadata_file +from tools import run_cmd def move_to_trash_bin(trash_bin_dir, job_dirs): + funcname = sys._getframe().f_code.co_name + log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}") + move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir] for job_dir in job_dirs: move_cmd.append(job_dir) ' '.join(move_cmd) out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False) + + return diff --git a/tools/config.py b/tools/config.py index dcffe03d..8c0e9c00 100644 --- a/tools/config.py +++ b/tools/config.py @@ -105,6 +105,12 @@ SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release' +SECTION_MERGED_PR = 'merged_pr' +MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir' + +SECTION_CLEAN_UP_COMMENTS = 'clean_up_comments' +CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT = 'moved_comment' + def read_config(path='app.cfg'): """ From d5f1d39803ff692c3a79fd7f4ebd9269ee0abb73 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 4 Jun 2024 10:36:47 +0200 Subject: [PATCH 08/64] Scripting & cron job follow later. Closes #1 --- scripts/cleanup_pr.sh | 158 ------------------------------- scripts/cleanup_pr_trash_bin.sh | 159 -------------------------------- 2 files changed, 317 deletions(-) delete mode 100755 scripts/cleanup_pr.sh delete mode 100755 scripts/cleanup_pr_trash_bin.sh diff --git a/scripts/cleanup_pr.sh b/scripts/cleanup_pr.sh deleted file mode 100755 index 41527675..00000000 --- a/scripts/cleanup_pr.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/bash -# -# GitHub App for the EESSI project -# -# A bot to help with requests to add software installations to the EESSI software layer, -# see https://github.com/EESSI/software-layer - -# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR -# -# author: Thomas Roeblitz (@trz42) -# -# license: GPLv2 -# - -SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE)) - -function display_help -{ - echo "Usage: $0 [OPTIONS] " >&2 - echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2 - echo " value from bot config file app.cfg or .]" >&2 - echo " -D | --dry-run - only show commands that would be run" >&2 - echo " [default: false]" >&2 - echo " -h | --help - display this usage information" >&2 -} - -function get_jobs_base_dir -{ - app_cfg_path=${1} - grep jobs_base_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//' -} - -echo - -if [[ $# -lt 1 ]]; then - display_help - exit 1 -fi - -# process command line args -POSITIONAL_ARGS=() - -jobs_base_dir= -dry_run=false - -while [[ $# -gt 0 ]]; do - case $1 in - -b|--jobs-base-dir) - if [[ $# -gt 1 ]]; then - jobs_base_dir="$2" - shift 2 - else - echo "Error: missing argument (directory) for parameter '${1}'" - exit 2 - fi - ;; - -D|--dry-run) - dry_run=true - shift 1 - ;; - -h|--help) - display_help - exit 0 - ;; - -*|--*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) # No more options - POSITIONAL_ARGS+=("$1") # save positional arg - shift - ;; - esac -done - -# restore potentially parsed filename(s) into $* -set -- "${POSITIONAL_ARGS[@]}" - -if [[ $# -ne 1 ]]; then - echo "Error: exactly one PR number should be provided as argument" - display_help - exit 3 -fi - -pull_request=${1} - -if ${dry_run} = true ; then - echo "DRY_RUN: not removing any files" -fi - -# determine jobs base dir if not given explicitly -# 1. check for file app.cfg in SCRIPT_DIR -# 2. check for file app.cfg in current dir -# if found try to obtain value of jobs_base_dir setting -# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit -if [[ -z ${jobs_base_dir} ]]; then - echo "jobs base directory not given explicitly, trying to determine it" - if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then - echo "check for app.cfg in '${SCRIPT_DIR}'" - jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg) - else - if [[ -e ./app.cfg ]]; then - echo "check for app.cfg in '${PWD}' (current directory)" - jobs_base_dir=$(get_jobs_base_dir ./app.cfg) - fi - fi -fi -if [[ -z ${jobs_base_dir} ]]; then - echo "Error: jobs base directory is empty, please specify it as argument" - display_help - exit 4 -fi - -echo "processing all directories for PR ${pull_request}:" -find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /' - -echo -echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done - -echo -echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) -do - for f in $(find $d -type f -wholename "*[0-9].tgz") - do - if ${dry_run} = true ; then - echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" - else - echo "Removing file '$f'" - rm $f - fi - done -done - -echo -echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done - -echo -echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) -do - for f in $(find $d -type f -wholename "*tar.gz") - do - if ${dry_run} = true ; then - echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" - else - echo "Removing file '$f'" - rm $f - fi - done -done - -echo -echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done - diff --git a/scripts/cleanup_pr_trash_bin.sh b/scripts/cleanup_pr_trash_bin.sh deleted file mode 100755 index 44755648..00000000 --- a/scripts/cleanup_pr_trash_bin.sh +++ /dev/null @@ -1,159 +0,0 @@ -#!/bin/bash -# -# GitHub App for the EESSI project -# -# A bot to help with requests to add software installations to the EESSI software layer, -# see https://github.com/EESSI/software-layer - -# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR -# -# author: Thomas Roeblitz (@trz42) -# -# license: GPLv2 -# - -SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE)) - -function display_help -{ - echo "Usage: $0 [OPTIONS] " >&2 - echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2 - echo " value from bot config file app.cfg or .]" >&2 - echo " -D | --dry-run - only show commands that would be run" >&2 - echo " [default: false]" >&2 - echo " -h | --help - display this usage information" >&2 -} - -function get_trash_bin_dir -{ - app_cfg_path=${1} - grep trash_bin_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//' -} - -echo - -if [[ $# -lt 1 ]]; then - display_help - exit 1 -fi - -# process command line args -POSITIONAL_ARGS=() - -jobs_base_dir= -dry_run=false - -while [[ $# -gt 0 ]]; do - case $1 in - -b|--jobs-base-dir) - if [[ $# -gt 1 ]]; then - jobs_base_dir="$2" - shift 2 - else - echo "Error: missing argument (directory) for parameter '${1}'" - exit 2 - fi - ;; - -D|--dry-run) - dry_run=true - shift 1 - ;; - -h|--help) - display_help - exit 0 - ;; - -*|--*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) # No more options - POSITIONAL_ARGS+=("$1") # save positional arg - shift - ;; - esac -done - -# restore potentially parsed filename(s) into $* -set -- "${POSITIONAL_ARGS[@]}" - -if [[ $# -ne 1 ]]; then - echo "Error: exactly one PR number should be provided as argument" - display_help - exit 3 -fi - -pull_request=${1} - -if ${dry_run} = true ; then - echo "DRY_RUN: not removing any files" -fi - -# determine jobs base dir if not given explicitly -# 1. check for file app.cfg in SCRIPT_DIR -# 2. check for file app.cfg in current dir -# if found try to obtain value of jobs_base_dir setting -# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit -# TODO: Change jobs_base_dir to trash_bin -if [[ -z ${jobs_base_dir} ]]; then - echo "jobs base directory not given explicitly, trying to determine it" - if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then - echo "check for app.cfg in '${SCRIPT_DIR}'" - jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg) - else - if [[ -e ./app.cfg ]]; then - echo "check for app.cfg in '${PWD}' (current directory)" - jobs_base_dir=$(get_jobs_base_dir ./app.cfg) - fi - fi -fi -if [[ -z ${jobs_base_dir} ]]; then - echo "Error: jobs base directory is empty, please specify it as argument" - display_help - exit 4 -fi - -echo "processing all directories for PR ${pull_request}:" -find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /' - -echo -echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done - -echo -echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) -do - for f in $(find $d -type f -wholename "*[0-9].tgz") - do - if ${dry_run} = true ; then - echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" - else - echo "Removing file '$f'" - rm $f - fi - done -done - -echo -echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done - -echo -echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) -do - for f in $(find $d -type f -wholename "*tar.gz") - do - if ${dry_run} = true ; then - echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" - else - echo "Removing file '$f'" - rm $f - fi - done -done - -echo -echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage" -for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done - From e0910f60e64d8294092bfa857e44f42474bbd2eb Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 4 Jun 2024 13:26:45 +0200 Subject: [PATCH 09/64] Treat hound to some white spaces --- eessi_bot_event_handler.py | 2 +- tasks/clean_up.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 955b3d7d..7f7680bd 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -639,7 +639,7 @@ def handle_pull_request_merged_event(self, event_info, pr): repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME] dt = datetime.now(timezone.utc) trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')]) - + # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) # cron job deletes symlinks? diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 5f91f932..1916a1d7 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -29,7 +29,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): funcname = sys._getframe().f_code.co_name log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}") - + move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir] for job_dir in job_dirs: move_cmd.append(job_dir) From d6452028f0b2d9bd9725376ce7ab1cd39e61fd83 Mon Sep 17 00:00:00 2001 From: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:16:18 +0200 Subject: [PATCH 10/64] Update eessi_bot_event_handler.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Röblitz --- eessi_bot_event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 7f7680bd..82ccfa16 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -621,7 +621,7 @@ def handle_pull_request_merged_event(self, event_info, pr): if action == 'merged': self.log("PR merged: scanning directories used by PR") - self.log(f"merge '{action}' action is handled") + self.log(f"pull_request event with action '{action}' will be handled") else: self.log(f"merge action '{action}' not handled") return From d75bb11193696f24e82bf72b446d325864c92086 Mon Sep 17 00:00:00 2001 From: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:17:21 +0200 Subject: [PATCH 11/64] Update tasks/clean_up.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Röblitz --- tasks/clean_up.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 1916a1d7..c36a78fc 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -4,14 +4,7 @@ # The bot helps with requests to add software installations to the # EESSI software layer, see https://github.com/EESSI/software-layer # -# author: Bob Droege (@bedroge) -# author: Kenneth Hoste (@boegel) -# author: Hafsa Naeem (@hafsa-naeem) -# author: Jacob Ziemke (@jacobz137) -# author: Jonas Qvigstad (@jonas-lq) -# author: Lara Ramona Peeters (@laraPPr) # author: Pedro Santos Neves (@Neves-P) -# author: Thomas Roeblitz (@trz42) # # license: GPLv2 # From 435bcae9e0f1027489d27470f0e4b6a01412055f Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 10 Jun 2024 14:23:38 +0200 Subject: [PATCH 12/64] Handle based on `merged` not `action` from `request_body` --- eessi_bot_event_handler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 82ccfa16..a2125ea6 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -618,12 +618,13 @@ def handle_pull_request_merged_event(self, event_info, pr): # Detect event and only act if PR is merged request_body = event_info['raw_request_body'] action = request_body['action'] + merged = request_body['merged'] - if action == 'merged': + if merged == 'true': self.log("PR merged: scanning directories used by PR") - self.log(f"pull_request event with action '{action}' will be handled") + self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled") else: - self.log(f"merge action '{action}' not handled") + self.log(f"Action '{action}' not handled as merged is '{merged}'") return # at this point we know that we are handling a new merge # NOTE: Permissions to merge are already handled through GitHub, we From 6b38f373fb14dc31affc5c9a5a4ce0109bc53204 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 10 Jun 2024 14:24:09 +0200 Subject: [PATCH 13/64] Add docstring and use python routines (not shell commands) --- tasks/clean_up.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index c36a78fc..96b3b0c1 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -11,22 +11,31 @@ # Standard library imports import sys +import os +import shutil # Third party imports (anything installed into the local Python environment) from pyghee.utils import log # Local application imports (anything from EESSI/eessi-bot-software-layer) -from tools import run_cmd def move_to_trash_bin(trash_bin_dir, job_dirs): + """ + Move directory to trash_bin_dir + + Args: + trash_bin_dir (string): path to the trash_bin_dir. Defined in .cfg + job_dirs (list): list with job directory names + + Returns: + None (implicitly) + """ funcname = sys._getframe().f_code.co_name log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}") - move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir] + os.makedirs(trash_bin_dir, exist_ok=True) for job_dir in job_dirs: - move_cmd.append(job_dir) - ' '.join(move_cmd) - out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False) - - return + destination_dir = shutil.move(job_dir, trash_bin_dir) + log(f"{funcname}(): moved {job_dir} to {destination_dir}") + return True From 769daec68918d09ee586dfac4795c0cc7719bced Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 10 Jun 2024 16:42:02 +0200 Subject: [PATCH 14/64] Actions are 'closed', not 'merged' --- eessi_bot_event_handler.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index a2125ea6..a9ccca3f 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -31,7 +31,7 @@ request_bot_build_issue_comments, submit_build_jobs from tasks.deploy import deploy_built_artefacts, determine_job_dirs from tasks.clean_up import move_to_trash_bin -from tools import config, cvmfs_repository +from tools import config from tools.args import event_handler_parse from tools.commands import EESSIBotCommand, EESSIBotCommandError, \ contains_any_bot_command, get_bot_command @@ -601,10 +601,11 @@ def start(self, app, port=3000): self.log(log_file_info) waitress.serve(app, listen='*:%s' % port) - def handle_pull_request_merged_event(self, event_info, pr): + def handle_pull_request_closed_event(self, event_info, pr): """ - Handle events of type pull_request with the action merged. Main action - is to scan directories used and move them to the trash_bin. + Handle events of type pull_request with the action 'closed'. Main action + is to scan directories used and move them to the trash_bin when the PR + is merged. Args: event_info (dict): event received by event_handler @@ -636,8 +637,8 @@ def handle_pull_request_merged_event(self, event_info, pr): # 2) Get trash_bin_dir from configs trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR] - repo_cfg = get_repo_cfg(self.cfg) - repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME] + + repo_name = event_info['full_name'] dt = datetime.now(timezone.utc) trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')]) From 1c4731588efead74f1fcc00cbccc33dcbc5c2ad7 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 10 Jun 2024 17:24:57 +0200 Subject: [PATCH 15/64] Read from config properly --- eessi_bot_event_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index a9ccca3f..fdc66a5a 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -619,7 +619,7 @@ def handle_pull_request_closed_event(self, event_info, pr): # Detect event and only act if PR is merged request_body = event_info['raw_request_body'] action = request_body['action'] - merged = request_body['merged'] + merged = request_body['pull_request']['merged'] if merged == 'true': self.log("PR merged: scanning directories used by PR") @@ -638,7 +638,7 @@ def handle_pull_request_closed_event(self, event_info, pr): # 2) Get trash_bin_dir from configs trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR] - repo_name = event_info['full_name'] + repo_name = event_info['repository']['full_name'] dt = datetime.now(timezone.utc) trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')]) From 1a6eab3eac45646bd51ccbbfa62210e6b1a2c351 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 10 Jun 2024 17:42:44 +0200 Subject: [PATCH 16/64] Fix typo in `merged`. true -> True --- eessi_bot_event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index fdc66a5a..f60f91b0 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -621,7 +621,7 @@ def handle_pull_request_closed_event(self, event_info, pr): action = request_body['action'] merged = request_body['pull_request']['merged'] - if merged == 'true': + if merged == 'True': self.log("PR merged: scanning directories used by PR") self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled") else: From 0b913432097d319207f2bf44757a8b8601dc6fe1 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 11 Jun 2024 16:41:46 +0200 Subject: [PATCH 17/64] merged is a Bool (?) --- eessi_bot_event_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index f60f91b0..3d6dc6d6 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -621,11 +621,11 @@ def handle_pull_request_closed_event(self, event_info, pr): action = request_body['action'] merged = request_body['pull_request']['merged'] - if merged == 'True': + if merged: self.log("PR merged: scanning directories used by PR") self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled") else: - self.log(f"Action '{action}' not handled as merged is '{merged}'") + self.log(f"Action '{action}' not handled as 'merged' is '{merged}'") return # at this point we know that we are handling a new merge # NOTE: Permissions to merge are already handled through GitHub, we From 784c9b9cb2bdeab6a796b4a3e55404c6426e5048 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 11 Jun 2024 16:56:25 +0200 Subject: [PATCH 18/64] Better formatting for merge conflict errors --- tasks/build.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 82a0911e..527758d7 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -381,21 +381,21 @@ def comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_e download_pr_comments_cfg = config.read_config()[config.SECTION_DOWNLOAD_PR_COMMENTS] if error_stage == _ERROR_GIT_CLONE: - download_comment = (f"`{download_pr_error}`" + download_comment = (f"`{download_pr_error}`\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_FAILURE]}" - f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_TIP]}") + f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_TIP]}") elif error_stage == _ERROR_GIT_CHECKOUT: - download_comment = (f"`{download_pr_error}`" + download_comment = (f"`{download_pr_error}`\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_FAILURE]}" - f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_TIP]}") + f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_TIP]}") elif error_stage == _ERROR_CURL: - download_comment = (f"`{download_pr_error}`" + download_comment = (f"`{download_pr_error}`\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_FAILURE]}" - f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_TIP]}") + f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_TIP]}") elif error_stage == _ERROR_GIT_APPLY: - download_comment = (f"`{download_pr_error}`" + download_comment = (f"`{download_pr_error}`\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_FAILURE]}" - f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_TIP]}") + f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_TIP]}") download_comment = pr_comments.create_comment( repo_name=base_repo_name, pr_number=pr.number, comment=download_comment From 4160bd1bbc563364df5a71478a0666a9f0150b94 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 11 Jun 2024 17:34:21 +0200 Subject: [PATCH 19/64] Fix bug in getting repository name --- eessi_bot_event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 3d6dc6d6..3f9a8e1b 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -638,7 +638,7 @@ def handle_pull_request_closed_event(self, event_info, pr): # 2) Get trash_bin_dir from configs trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR] - repo_name = event_info['repository']['full_name'] + repo_name = request_body['repository']['full_name'] dt = datetime.now(timezone.utc) trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')]) From 29ab450d4ed5b4c707a17beee472fc5c6c4948ca Mon Sep 17 00:00:00 2001 From: Neves-P Date: Wed, 12 Jun 2024 19:20:52 +0200 Subject: [PATCH 20/64] Same formatting for directory timestamp --- eessi_bot_event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 3f9a8e1b..a832983a 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -640,7 +640,7 @@ def handle_pull_request_closed_event(self, event_info, pr): repo_name = request_body['repository']['full_name'] dt = datetime.now(timezone.utc) - trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')]) + trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m.%d')]) # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) # cron job deletes symlinks? From 1dcf08174c6b5118661a6508781422fee6bd3f23 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 21 Jun 2024 21:53:59 +0200 Subject: [PATCH 21/64] add setting to give all jobs a unique name --- README.md | 7 +++++++ app.cfg.example | 4 ++++ eessi_bot_event_handler.py | 1 + eessi_bot_job_manager.py | 8 +++++++- tasks/build.py | 9 +++++++++ tools/config.py | 1 + 6 files changed, 29 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e268fc41..9e556311 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer), most environment settings are cleared. Hence, they need to be set again at a later stage. +``` +job_name = JOB_NAME +``` +Replace `JOB_NAME` with a string of at least 3 characters that is used as job +name when a job is submitted. This is used to filter jobs, e.g., should be used +to make sure that multiple bot instances can run in the same Slurm environment. + ``` jobs_base_dir = PATH_TO_JOBS_BASE_DIR ``` diff --git a/app.cfg.example b/app.cfg.example index ae51ade6..7cbde15d 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY # http_proxy = http://PROXY_DNS:3128/ # https_proxy = http://PROXY_DNS:3128/ +# Used to give all jobs of a bot instance the same name. Can be used to allow +# multiple bot instances running on the same Slurm cluster. +job_name = prod + # directory under which the bot prepares directories per job # structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR jobs_base_dir = $HOME/jobs diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 5677ed2c..d414f947 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -51,6 +51,7 @@ # config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional # config.BUILDENV_SETTING_HTTPS_PROXY, # optional # config.BUILDENV_SETTING_HTTP_PROXY, # optional + config.BUILDENV_SETTING_JOB_NAME, # required config.BUILDENV_SETTING_JOBS_BASE_DIR, # required # config.BUILDENV_SETTING_LOAD_MODULES, # optional config.BUILDENV_SETTING_LOCAL_TMP, # required diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index e7473f00..aba40081 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -50,6 +50,8 @@ # settings that are required in 'app.cfg' REQUIRED_CONFIG = { + config.SECTION_BUILDENV: [ + config.BUILDENV_SETTING_JOB_NAME], # required config.SECTION_FINISHED_JOB_COMMENTS: [ config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required @@ -85,6 +87,10 @@ def __init__(self): cfg = config.read_config() job_manager_cfg = cfg[config.SECTION_JOB_MANAGER] self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH) + buildenv_cfg = cfg[config.SECTION_BUILDENV] + self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME) + if len(self.job_name) < 3: + raise Exception(f"job name ({self.job_name}) is shorter than 3 characters") def get_current_jobs(self): """ @@ -105,7 +111,7 @@ def get_current_jobs(self): if username is None: raise Exception("Unable to find username") - squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username) + squeue_cmd = "%s --long --noheader --user=%s --name='%s'" % (self.poll_command, username, self.job_name) squeue_output, squeue_err, squeue_exitcode = run_cmd( squeue_cmd, "get_current_jobs(): squeue command", diff --git a/tasks/build.py b/tasks/build.py index 82a0911e..5fa36076 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -67,6 +67,10 @@ def get_build_env_cfg(cfg): buildenv = cfg[config.SECTION_BUILDENV] + job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME) + log(f"{fn}(): job_name '{job_name}'") + config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name} + jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR) log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'") config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir} @@ -640,6 +644,10 @@ def submit_job(job, cfg): build_env_cfg = get_build_env_cfg(cfg) + # the job_name is used to filter jobs in case multiple bot + # instances run on the same system + job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME) + # add a default time limit of 24h to the job submit command if no other time # limit is specified already all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts]) @@ -654,6 +662,7 @@ def submit_job(job, cfg): build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], time_limit, job.slurm_opts, + f"--job-name='{job_name}'", build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT], ]) diff --git a/tools/config.py b/tools/config.py index dcffe03d..11527702 100644 --- a/tools/config.py +++ b/tools/config.py @@ -43,6 +43,7 @@ BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations' BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy' BUILDENV_SETTING_HTTP_PROXY = 'http_proxy' +BUILDENV_SETTING_JOB_NAME = 'job_name' BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir' BUILDENV_SETTING_LOAD_MODULES = 'load_modules' BUILDENV_SETTING_LOCAL_TMP = 'local_tmp' From 326dd38d55a7d937a7867767c184d4889f99aa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Mon, 24 Jun 2024 21:20:04 +0200 Subject: [PATCH 22/64] Only use job_name if it is not None Co-authored-by: Kenneth Hoste --- eessi_bot_job_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index aba40081..f8d4368a 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -89,7 +89,7 @@ def __init__(self): self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH) buildenv_cfg = cfg[config.SECTION_BUILDENV] self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME) - if len(self.job_name) < 3: + if self.job_name and len(self.job_name) < 3: raise Exception(f"job name ({self.job_name}) is shorter than 3 characters") def get_current_jobs(self): @@ -111,7 +111,9 @@ def get_current_jobs(self): if username is None: raise Exception("Unable to find username") - squeue_cmd = "%s --long --noheader --user=%s --name='%s'" % (self.poll_command, username, self.job_name) + squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username) + if self.job_name: + squeue_cmd += "--name='%s'" % self.job_name squeue_output, squeue_err, squeue_exitcode = run_cmd( squeue_cmd, "get_current_jobs(): squeue command", From 76a263d0a6843c9acc5bd7a3b8be48f47f5fcd72 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 24 Jun 2024 21:23:35 +0200 Subject: [PATCH 23/64] add space before name argument --- eessi_bot_job_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index f8d4368a..bb0c6dd8 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -113,7 +113,7 @@ def get_current_jobs(self): squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username) if self.job_name: - squeue_cmd += "--name='%s'" % self.job_name + squeue_cmd += " --name='%s'" % self.job_name squeue_output, squeue_err, squeue_exitcode = run_cmd( squeue_cmd, "get_current_jobs(): squeue command", From c614a96f801ae2c79f9f1fa9cd5d3cdecbc156db Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 24 Jun 2024 21:34:42 +0200 Subject: [PATCH 24/64] only add job-name submission arg if job_name is not None --- tasks/build.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 5fa36076..b2de809d 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -661,10 +661,9 @@ def submit_job(job, cfg): build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND], build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], time_limit, - job.slurm_opts, - f"--job-name='{job_name}'", - build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT], - ]) + job.slurm_opts] + + ([f"--job-name='{job_name}'"] if job_name else []) + + [build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT]]) cmdline_output, cmdline_error, cmdline_exit_code = run_cmd(command_line, "submit job for target '%s'" % job.arch_target, From b285f81c59426854cb65a77ece21cce686e92127 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 25 Jun 2024 08:43:52 +0200 Subject: [PATCH 25/64] add missing config section to app.cfg for tests --- tests/test_app.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_app.cfg b/tests/test_app.cfg index f940c1df..fd91ed8b 100644 --- a/tests/test_app.cfg +++ b/tests/test_app.cfg @@ -11,6 +11,8 @@ # sample config file for tests (some functions run config.read_config() # which reads app.cfg by default) +[buildenv] + [job_manager] # variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it From 087d6c553f628103c9a4a78ed1d79848ed5aeca1 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 13:56:43 +0200 Subject: [PATCH 26/64] Best to not read comment from config --- eessi_bot_event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index a832983a..01bb2a3f 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -654,7 +654,7 @@ def handle_pull_request_closed_event(self, event_info, pr): gh = github.get_instance() repo = gh.get_repo(repo_name) pull_request = repo.get_pull(pr.number) - moved_comment = clean_up_comments_cfg[config.CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT] + moved_comment = f"PR merged! Moved `{job_dirs}` to `{trash_bin_dir}`" issue_comment = pull_request.create_issue_comment(moved_comment) return issue_comment From 5dbd132fe2151f7d9884d7dabd632ba2cb96a45f Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 14:00:18 +0200 Subject: [PATCH 27/64] Remove local variable --- eessi_bot_event_handler.py | 1 - tools/config.py | 1 - 2 files changed, 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 01bb2a3f..51134e39 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -631,7 +631,6 @@ def handle_pull_request_closed_event(self, event_info, pr): # NOTE: Permissions to merge are already handled through GitHub, we # don't need to check here - clean_up_comments_cfg = self.cfg[config.SECTION_CLEAN_UP_COMMENTS] # 1) determine the jobs that have been run for the PR job_dirs = determine_job_dirs(pr.number) diff --git a/tools/config.py b/tools/config.py index 8c0e9c00..d56eb123 100644 --- a/tools/config.py +++ b/tools/config.py @@ -109,7 +109,6 @@ MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir' SECTION_CLEAN_UP_COMMENTS = 'clean_up_comments' -CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT = 'moved_comment' def read_config(path='app.cfg'): From b782889b1e758f8ae91c669985e62a6cc92eb439 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 14:51:40 +0200 Subject: [PATCH 28/64] Restore cleanup_pr.sh --- scripts/cleanup_pr.sh | 158 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 scripts/cleanup_pr.sh diff --git a/scripts/cleanup_pr.sh b/scripts/cleanup_pr.sh new file mode 100644 index 00000000..41527675 --- /dev/null +++ b/scripts/cleanup_pr.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# +# GitHub App for the EESSI project +# +# A bot to help with requests to add software installations to the EESSI software layer, +# see https://github.com/EESSI/software-layer + +# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR +# +# author: Thomas Roeblitz (@trz42) +# +# license: GPLv2 +# + +SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE)) + +function display_help +{ + echo "Usage: $0 [OPTIONS] " >&2 + echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2 + echo " value from bot config file app.cfg or .]" >&2 + echo " -D | --dry-run - only show commands that would be run" >&2 + echo " [default: false]" >&2 + echo " -h | --help - display this usage information" >&2 +} + +function get_jobs_base_dir +{ + app_cfg_path=${1} + grep jobs_base_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//' +} + +echo + +if [[ $# -lt 1 ]]; then + display_help + exit 1 +fi + +# process command line args +POSITIONAL_ARGS=() + +jobs_base_dir= +dry_run=false + +while [[ $# -gt 0 ]]; do + case $1 in + -b|--jobs-base-dir) + if [[ $# -gt 1 ]]; then + jobs_base_dir="$2" + shift 2 + else + echo "Error: missing argument (directory) for parameter '${1}'" + exit 2 + fi + ;; + -D|--dry-run) + dry_run=true + shift 1 + ;; + -h|--help) + display_help + exit 0 + ;; + -*|--*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) # No more options + POSITIONAL_ARGS+=("$1") # save positional arg + shift + ;; + esac +done + +# restore potentially parsed filename(s) into $* +set -- "${POSITIONAL_ARGS[@]}" + +if [[ $# -ne 1 ]]; then + echo "Error: exactly one PR number should be provided as argument" + display_help + exit 3 +fi + +pull_request=${1} + +if ${dry_run} = true ; then + echo "DRY_RUN: not removing any files" +fi + +# determine jobs base dir if not given explicitly +# 1. check for file app.cfg in SCRIPT_DIR +# 2. check for file app.cfg in current dir +# if found try to obtain value of jobs_base_dir setting +# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit +if [[ -z ${jobs_base_dir} ]]; then + echo "jobs base directory not given explicitly, trying to determine it" + if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then + echo "check for app.cfg in '${SCRIPT_DIR}'" + jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg) + else + if [[ -e ./app.cfg ]]; then + echo "check for app.cfg in '${PWD}' (current directory)" + jobs_base_dir=$(get_jobs_base_dir ./app.cfg) + fi + fi +fi +if [[ -z ${jobs_base_dir} ]]; then + echo "Error: jobs base directory is empty, please specify it as argument" + display_help + exit 4 +fi + +echo "processing all directories for PR ${pull_request}:" +find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /' + +echo +echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done + +echo +echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) +do + for f in $(find $d -type f -wholename "*[0-9].tgz") + do + if ${dry_run} = true ; then + echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" + else + echo "Removing file '$f'" + rm $f + fi + done +done + +echo +echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done + +echo +echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}) +do + for f in $(find $d -type f -wholename "*tar.gz") + do + if ${dry_run} = true ; then + echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))" + else + echo "Removing file '$f'" + rm $f + fi + done +done + +echo +echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage" +for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done + From f4114b397704d9f5a1ff45977e66abcef286c0b0 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 14:53:28 +0200 Subject: [PATCH 29/64] Restore cleanup_pr.sh --- scripts/cleanup_pr.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/cleanup_pr.sh diff --git a/scripts/cleanup_pr.sh b/scripts/cleanup_pr.sh old mode 100644 new mode 100755 From 283f602d4aefe64a562ee74dbdad150874ae2c37 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 15:45:29 +0200 Subject: [PATCH 30/64] Also move pr event dirs --- eessi_bot_event_handler.py | 3 ++- tasks/clean_up.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 51134e39..85230894 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -17,6 +17,7 @@ # # Standard library imports +import os import sys from datetime import datetime, timezone @@ -639,7 +640,7 @@ def handle_pull_request_closed_event(self, event_info, pr): repo_name = request_body['repository']['full_name'] dt = datetime.now(timezone.utc) - trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m.%d')]) + trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y.%m.%d')]) # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) # cron job deletes symlinks? diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 96b3b0c1..8a28a977 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -35,7 +35,17 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}") os.makedirs(trash_bin_dir, exist_ok=True) + pr_dirs = [] for job_dir in job_dirs: destination_dir = shutil.move(job_dir, trash_bin_dir) log(f"{funcname}(): moved {job_dir} to {destination_dir}") + # Save upper directory above to remove later (pr_xx) + pr_dirs = os.path.dirname(job_dir) + + # Remove event_xxx-yyy/run_nnn/ directories + pr_dirs = list(set(pr_dirs)) + for pr_dir in pr_dirs: + destination_dir = shutil.move(pr_dir, trash_bin_dir) + log(f"{funcname}(): moved {pr_dir} to {destination_dir}") + return True From c025746a8768a06bbe7f15178f3c2cbbfab643df Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 15:49:11 +0200 Subject: [PATCH 31/64] Unnecessary library --- eessi_bot_event_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 85230894..2f94acda 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -17,7 +17,6 @@ # # Standard library imports -import os import sys from datetime import datetime, timezone From e56e9edb6a4ffaf149742ffc6bbcefa9ee2e8f5f Mon Sep 17 00:00:00 2001 From: Neves-P Date: Mon, 1 Jul 2024 17:07:40 +0200 Subject: [PATCH 32/64] Use shutil.copy2 and os.remove instead --- eessi_bot_event_handler.py | 2 +- tasks/clean_up.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 2f94acda..d96ed3d9 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -648,7 +648,7 @@ def handle_pull_request_closed_event(self, event_info, pr): self.log("Moving directories to trash_bin") move_to_trash_bin(trash_bin_dir, job_dirs) - # 4) report move to pull request? + # 4) report move to pull request repo_name = pr.base.repo.full_name gh = github.get_instance() repo = gh.get_repo(repo_name) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 8a28a977..6dda6f20 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -45,7 +45,9 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): # Remove event_xxx-yyy/run_nnn/ directories pr_dirs = list(set(pr_dirs)) for pr_dir in pr_dirs: - destination_dir = shutil.move(pr_dir, trash_bin_dir) - log(f"{funcname}(): moved {pr_dir} to {destination_dir}") + destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True) + log(f"{funcname}(): copied {pr_dir} to {destination_dir}") + os.remove(pr_dir) + log(f"{funcname}(): removed {pr_dir}") return True From 00eeb89c5b3ca0e3b85ad7bc165894cf3d753200 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 2 Jul 2024 09:59:34 +0200 Subject: [PATCH 33/64] Move whole directory(ies) to trash --- tasks/clean_up.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 6dda6f20..174b0a61 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -37,13 +37,11 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): os.makedirs(trash_bin_dir, exist_ok=True) pr_dirs = [] for job_dir in job_dirs: - destination_dir = shutil.move(job_dir, trash_bin_dir) - log(f"{funcname}(): moved {job_dir} to {destination_dir}") - # Save upper directory above to remove later (pr_xx) - pr_dirs = os.path.dirname(job_dir) + # Save upper directory to remove later (pr_xx) + pr_dirs.append(os.path.dirname(job_dir)) # Remove event_xxx-yyy/run_nnn/ directories - pr_dirs = list(set(pr_dirs)) + pr_dirs = list(set(pr_dirs)) # get only unique dirs for pr_dir in pr_dirs: destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True) log(f"{funcname}(): copied {pr_dir} to {destination_dir}") From 4b360b349ee0a7fcc55e7cce7b441931455bb657 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 2 Jul 2024 10:00:06 +0200 Subject: [PATCH 34/64] lint --- tasks/clean_up.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 174b0a61..5853d003 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -41,7 +41,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): pr_dirs.append(os.path.dirname(job_dir)) # Remove event_xxx-yyy/run_nnn/ directories - pr_dirs = list(set(pr_dirs)) # get only unique dirs + pr_dirs = list(set(pr_dirs)) # get only unique dirs for pr_dir in pr_dirs: destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True) log(f"{funcname}(): copied {pr_dir} to {destination_dir}") From b8f6d5714d3b258cb6f65ec1091b5c8e6baf37f5 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 2 Jul 2024 10:13:19 +0200 Subject: [PATCH 35/64] Better formatting error messages --- tasks/build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 527758d7..35b21494 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -381,19 +381,19 @@ def comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_e download_pr_comments_cfg = config.read_config()[config.SECTION_DOWNLOAD_PR_COMMENTS] if error_stage == _ERROR_GIT_CLONE: - download_comment = (f"`{download_pr_error}`\n" + download_comment = (f"```{download_pr_error}```\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_FAILURE]}" f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_TIP]}") elif error_stage == _ERROR_GIT_CHECKOUT: - download_comment = (f"`{download_pr_error}`\n" + download_comment = (f"```{download_pr_error}```\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_FAILURE]}" f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_TIP]}") elif error_stage == _ERROR_CURL: - download_comment = (f"`{download_pr_error}`\n" + download_comment = (f"```{download_pr_error}```\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_FAILURE]}" f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_TIP]}") elif error_stage == _ERROR_GIT_APPLY: - download_comment = (f"`{download_pr_error}`\n" + download_comment = (f"```{download_pr_error}```\n" f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_FAILURE]}" f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_TIP]}") From 59a337cf1ce2bcbf86adbfabc981434a02404c6e Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 2 Jul 2024 10:45:16 +0200 Subject: [PATCH 36/64] Need copytree instead --- tasks/clean_up.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 5853d003..8e2e5094 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -43,7 +43,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): # Remove event_xxx-yyy/run_nnn/ directories pr_dirs = list(set(pr_dirs)) # get only unique dirs for pr_dir in pr_dirs: - destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True) + destination_dir = shutil.copytree(pr_dir, trash_bin_dir) log(f"{funcname}(): copied {pr_dir} to {destination_dir}") os.remove(pr_dir) log(f"{funcname}(): removed {pr_dir}") From f80ab2ba3024e582201d67904fd0a2c9f431b386 Mon Sep 17 00:00:00 2001 From: Neves-P Date: Tue, 2 Jul 2024 13:30:33 +0200 Subject: [PATCH 37/64] Use rmtree --- tasks/clean_up.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 8e2e5094..bc78077f 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -45,7 +45,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): for pr_dir in pr_dirs: destination_dir = shutil.copytree(pr_dir, trash_bin_dir) log(f"{funcname}(): copied {pr_dir} to {destination_dir}") - os.remove(pr_dir) + shutil.rmtree(pr_dir) # Use shutil.rmtree to remove directories recursively log(f"{funcname}(): removed {pr_dir}") return True From da970e7e8d81131070389037b88ce34847192ddc Mon Sep 17 00:00:00 2001 From: Neves-P Date: Fri, 5 Jul 2024 23:15:39 +0200 Subject: [PATCH 38/64] copytree with dirs_exist_ok True --- tasks/clean_up.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index bc78077f..0e385629 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -43,7 +43,8 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): # Remove event_xxx-yyy/run_nnn/ directories pr_dirs = list(set(pr_dirs)) # get only unique dirs for pr_dir in pr_dirs: - destination_dir = shutil.copytree(pr_dir, trash_bin_dir) + log(f"{funcname}(): attempting to copy {pr_dir} to {trash_bin_dir}") + destination_dir = shutil.copytree(pr_dir, trash_bin_dir, dirs_exist_ok=True) log(f"{funcname}(): copied {pr_dir} to {destination_dir}") shutil.rmtree(pr_dir) # Use shutil.rmtree to remove directories recursively log(f"{funcname}(): removed {pr_dir}") From 71e7b31bfd5a6a07710d243ebd8a163ebb31b4be Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 5 Aug 2024 14:15:45 +0200 Subject: [PATCH 39/64] tweak clean up method by - putting configuration settings into a single section - using shutil.move instead of shutil.copy (move should fallback to copy if moving is not doable) --- eessi_bot_event_handler.py | 8 ++++++-- tasks/clean_up.py | 31 +++++++++++++++++++++++-------- tools/config.py | 8 +++----- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index d96ed3d9..d018bf19 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -60,6 +60,9 @@ config.BUILDENV_SETTING_SHARED_FS_PATH, # optional+recommended # config.BUILDENV_SETTING_SLURM_PARAMS, # optional config.BUILDENV_SETTING_SUBMIT_COMMAND], # required + config.SECTION_CLEAN_UP: [ + config.CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR, # required + config.CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT], # required config.SECTION_DEPLOYCFG: [ config.DEPLOYCFG_SETTING_ARTEFACT_PREFIX, # (required) config.DEPLOYCFG_SETTING_ARTEFACT_UPLOAD_SCRIPT, # required @@ -635,7 +638,7 @@ def handle_pull_request_closed_event(self, event_info, pr): job_dirs = determine_job_dirs(pr.number) # 2) Get trash_bin_dir from configs - trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR] + trash_bin_root_dir = self.cfg[config.SECTION_CLEAN_UP][config.CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR] repo_name = request_body['repository']['full_name'] dt = datetime.now(timezone.utc) @@ -653,7 +656,8 @@ def handle_pull_request_closed_event(self, event_info, pr): gh = github.get_instance() repo = gh.get_repo(repo_name) pull_request = repo.get_pull(pr.number) - moved_comment = f"PR merged! Moved `{job_dirs}` to `{trash_bin_dir}`" + clean_up_comment = self.cfg[config.SECTION_CLEAN_UP][config.CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT] + moved_comment = clean_up_comment.format(job_dirs=job_dirs, trash_bin_dir=trash_bin_dir) issue_comment = pull_request.create_issue_comment(moved_comment) return issue_comment diff --git a/tasks/clean_up.py b/tasks/clean_up.py index 0e385629..ed8ca5f7 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -31,22 +31,37 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): Returns: None (implicitly) """ + # idea: + # - shutil.move YYYY.MM/pr_PR_NUM to trash_bin_dir + # - need to obtain list of YYYY.MM/pr_PR_NUM directories from job dirs + # - need to ensure that YYYY.MM under trash_bin_dir exists (or create it) + # - then we can just move YYYY.MM/pr_PR_NUM to trash_bin_dir/YYYY.MM + # - (LATER) we should also fix the symbolic links under job_ids_dir/finished + # (remove it for the job id and add a new one pointing to the new location) funcname = sys._getframe().f_code.co_name log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}") + # ensure the 'trash_bin_dir' exists os.makedirs(trash_bin_dir, exist_ok=True) + pr_dirs = [] for job_dir in job_dirs: - # Save upper directory to remove later (pr_xx) - pr_dirs.append(os.path.dirname(job_dir)) + pr_dir = os.path.dirname(job_dir) + log(f"{funcname}(): adding PR dir '{pr_dir}' (from job dir '{job_dir}')") + pr_dirs.append(pr_dir) + - # Remove event_xxx-yyy/run_nnn/ directories + # Move (or copy as fallback) entire pr_PR_NUM directories to trash_bin_dir/YYYY.MM pr_dirs = list(set(pr_dirs)) # get only unique dirs for pr_dir in pr_dirs: - log(f"{funcname}(): attempting to copy {pr_dir} to {trash_bin_dir}") - destination_dir = shutil.copytree(pr_dir, trash_bin_dir, dirs_exist_ok=True) - log(f"{funcname}(): copied {pr_dir} to {destination_dir}") - shutil.rmtree(pr_dir) # Use shutil.rmtree to remove directories recursively - log(f"{funcname}(): removed {pr_dir}") + # determine YYYY.MM parent of pr_dir + year_month_dir = pr_dir.split('/')[-2] + # make sure that directory exists under trash_bin_dir + target_bin_dir = os.path.join(trash_bin_dir, year_month_dir) + os.makedirs(target_bin_dir, exist_ok=True) + + log(f"{funcname}(): attempting to move {pr_dir} to {target_bin_dir}") + destination_dir = shutil.move(pr_dir, target_bin_dir) + log(f"{funcname}(): moved {pr_dir} to {destination_dir}") return True diff --git a/tools/config.py b/tools/config.py index d56eb123..79662249 100644 --- a/tools/config.py +++ b/tools/config.py @@ -105,11 +105,9 @@ SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release' -SECTION_MERGED_PR = 'merged_pr' -MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir' - -SECTION_CLEAN_UP_COMMENTS = 'clean_up_comments' - +SECTION_CLEAN_UP = 'clean_up' +CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir' +CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT = 'moved_job_dirs_comment' def read_config(path='app.cfg'): """ From d1a02786e10e4a5efc381780203c1d73ef2f114d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 5 Aug 2024 14:25:01 +0200 Subject: [PATCH 40/64] add information to README.md and app.cfg.example --- README.md | 15 +++++++++++++++ app.cfg.example | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/README.md b/README.md index e268fc41..cfe425e5 100644 --- a/README.md +++ b/README.md @@ -720,6 +720,21 @@ git_apply_tip = _Tip: This can usually be resolved by syncing your branch and re `git_apply_tip` should guide the contributor/maintainer about resolving the cause of `git apply` failing. +#### `[clean_up]` section + +The `[clean_up]` section includes settings related to cleaning up disk used by merged (and closed) PRs. +``` +trash_bin_dir = PATH/TO/TRASH_BIN_DIRECTORY +``` +Ideally this is on the same filesystem used by `jobs_base_dir` and `job_ids_dir` to efficiently move data +into the trash bin. If it resides on a different filesystem, the data will be copied. + +``` +moved_job_dirs_comment = PR merged! Moved `{job_dirs}` to `{trash_bin_dir}` +``` +Template that is used by the bot to add a comment to a PR noting down which directories have been +moved and where. + # Instructions to run the bot components The bot consists of three components: diff --git a/app.cfg.example b/app.cfg.example index ae51ade6..dcb40f41 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -259,3 +259,7 @@ curl_failure = Unable to download the `.diff` file. curl_tip = _Tip: This could be a connection failure. Try again and if the issue remains check if the address is correct_ git_apply_failure = Unable to download or merge changes between the source branch and the destination branch. git_apply_tip = _Tip: This can usually be resolved by syncing your branch and resolving any merge conflicts._ + +[clean_up] +trash_bin_dir = $HOME/trash_bin +moved_job_dirs_comment = PR merged! Moved `{job_dirs}` to `{trash_bin_dir}` From d8d79b691d68c08e016a6eb8670f90d28e7b20c4 Mon Sep 17 00:00:00 2001 From: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:31:45 +0200 Subject: [PATCH 41/64] Lint --- tasks/clean_up.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks/clean_up.py b/tasks/clean_up.py index ed8ca5f7..093117eb 100644 --- a/tasks/clean_up.py +++ b/tasks/clean_up.py @@ -50,7 +50,6 @@ def move_to_trash_bin(trash_bin_dir, job_dirs): log(f"{funcname}(): adding PR dir '{pr_dir}' (from job dir '{job_dir}')") pr_dirs.append(pr_dir) - # Move (or copy as fallback) entire pr_PR_NUM directories to trash_bin_dir/YYYY.MM pr_dirs = list(set(pr_dirs)) # get only unique dirs for pr_dir in pr_dirs: From f69c49c491978ee92c6e3f40d851d8ce4a27aaee Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 6 Aug 2024 13:53:23 +0200 Subject: [PATCH 42/64] add empty line --- tools/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/config.py b/tools/config.py index 79662249..4fd62c7c 100644 --- a/tools/config.py +++ b/tools/config.py @@ -109,6 +109,7 @@ CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir' CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT = 'moved_job_dirs_comment' + def read_config(path='app.cfg'): """ Read the config file From 78e625e2e737192c4b6a45eeffbc237826a89170 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 7 Aug 2024 11:04:08 +0200 Subject: [PATCH 43/64] fix issue overwriting of config_data entry --- tasks/build.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index b2de809d..46b9543a 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -65,15 +65,16 @@ def get_build_env_cfg(cfg): """ fn = sys._getframe().f_code.co_name + config_data = {} buildenv = cfg[config.SECTION_BUILDENV] job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME) log(f"{fn}(): job_name '{job_name}'") - config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name} + config_data[config.BUILDENV_SETTING_JOB_NAME] = job_name jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR) log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'") - config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir} + config_data[config.BUILDENV_SETTING_JOBS_BASE_DIR] = jobs_base_dir local_tmp = buildenv.get(config.BUILDENV_SETTING_LOCAL_TMP) log(f"{fn}(): local_tmp '{local_tmp}'") From 3add12a1dc4ecb9ac0473379a1dce2c1cae86c3c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Mon, 19 Aug 2024 10:18:27 +0200 Subject: [PATCH 44/64] handle closed PRs too --- eessi_bot_event_handler.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index ab2ad0da..97cc369d 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -12,6 +12,7 @@ # author: Jonas Qvigstad (@jonas-lq) # author: Lara Ramona Peeters (@laraPPr) # author: Thomas Roeblitz (@trz42) +# author: Pedro Santos Neves (@Neves-P) # # license: GPLv2 # @@ -607,9 +608,9 @@ def start(self, app, port=3000): def handle_pull_request_closed_event(self, event_info, pr): """ - Handle events of type pull_request with the action 'closed'. Main action - is to scan directories used and move them to the trash_bin when the PR - is merged. + Handle events of type pull_request with the action 'closed'. It + determines used by the PR and moves them to the trash_bin. It also adds + information to the logs and a comment to the PR. Args: event_info (dict): event received by event_handler @@ -623,34 +624,33 @@ def handle_pull_request_closed_event(self, event_info, pr): # Detect event and only act if PR is merged request_body = event_info['raw_request_body'] action = request_body['action'] - merged = request_body['pull_request']['merged'] + # next value: True -> PR merged, False -> PR closed + mergedOrClosed = request_body['pull_request']['merged'] + status = "merged" if mergedOrClosed else "closed" - if merged: - self.log("PR merged: scanning directories used by PR") - self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled") - else: - self.log(f"Action '{action}' not handled as 'merged' is '{merged}'") - return - # at this point we know that we are handling a new merge - # NOTE: Permissions to merge are already handled through GitHub, we - # don't need to check here + self.log(f"PR {pr.number}: PR got {status} (json value: {mergedOrClosed})") # 1) determine the jobs that have been run for the PR + self.log(f"PR {pr.number}: determining directories to be moved to trash bin") job_dirs = determine_job_dirs(pr.number) # 2) Get trash_bin_dir from configs trash_bin_root_dir = self.cfg[config.SECTION_CLEAN_UP][config.CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR] repo_name = request_body['repository']['full_name'] - dt = datetime.now(timezone.utc) - trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y.%m.%d')]) + dt_start = datetime.now(timezone.utc) + trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt_start.strftime('%Y.%m.%d')]) # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?) # cron job deletes symlinks? # 3) move the directories to the trash_bin - self.log("Moving directories to trash_bin") + self.log(f"PR {pr.number}: moving directories to trash bin {trash_bin_dir}") move_to_trash_bin(trash_bin_dir, job_dirs) + dt_end = datetime.now(timezone.utc) + dt_delta = dt_end - dt_start + seconds_elapsed = dt_delta.days * 24 * 3600 + dt_delta.seconds + self.log(f"PR {pr.number}: moved directories to trash bin {trash_bin_dir} (took {seconds_elapsed} seconds)") # 4) report move to pull request repo_name = pr.base.repo.full_name From e14fe10de429ca1cae3a11e14ad5393df44cc99e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Tue, 20 Aug 2024 10:56:54 +0200 Subject: [PATCH 45/64] improve comment and remove unused code Co-authored-by: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com> --- eessi_bot_event_handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 97cc369d..91274fbe 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -621,9 +621,8 @@ def handle_pull_request_closed_event(self, event_info, pr): PyGithub, not the github from the internal connections module) """ - # Detect event and only act if PR is merged + # Detect event and report if PR was merged or closed request_body = event_info['raw_request_body'] - action = request_body['action'] # next value: True -> PR merged, False -> PR closed mergedOrClosed = request_body['pull_request']['merged'] status = "merged" if mergedOrClosed else "closed" From 25410e76f53299881df03b53e2427d468f6ad24c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 21 Aug 2024 14:52:40 +0200 Subject: [PATCH 46/64] add filter for accelerators and pytests --- tests/test_tools_filter.py | 216 +++++++++++++++++++++++++++++++++---- tools/filter.py | 105 +++++++++++++----- 2 files changed, 275 insertions(+), 46 deletions(-) diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index 3cf4c699..b689aa60 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -10,12 +10,18 @@ # # Standard library imports +import copy # Third party imports (anything installed into the local Python environment) import pytest # Local application imports (anything from EESSI/eessi-bot-software-layer) -from tools.filter import EESSIBotActionFilter, EESSIBotActionFilterError +from tools.filter import (COMPONENT_TOO_SHORT, + COMPONENT_UNKNOWN, + EESSIBotActionFilter, + EESSIBotActionFilterError, + FILTER_EMPTY_PATTERN, + FILTER_FORMAT_ERROR) def test_empty_action_filter(): @@ -25,6 +31,45 @@ def test_empty_action_filter(): assert expected == actual +def test_add_wellformed_filter_from_string(): + af = EESSIBotActionFilter("") + component = 'acc' + pattern = 'nvidia/cc80' + af.add_filter_from_string(f"{component}:{pattern}") + expected = f"accelerator:{pattern}" + actual = af.to_string() + assert expected == actual + + +def test_add_non_wellformed_filter_from_string(): + af = EESSIBotActionFilter("") + component1 = 'acc' + filter_string1 = f"{component1}" + with pytest.raises(Exception) as err1: + af.add_filter_from_string(filter_string1) + assert err1.type == EESSIBotActionFilterError + expected_msg1 = FILTER_FORMAT_ERROR.format(filter_string=filter_string1) + assert str(err1.value) == expected_msg1 + + component2 = 'a' + pattern2 = 'zen4' + filter_string2 = f"{component2}:{pattern2}" + with pytest.raises(Exception) as err2: + af.add_filter_from_string(filter_string2) + assert err2.type == EESSIBotActionFilterError + expected_msg2 = COMPONENT_TOO_SHORT.format(component=component2, pattern=pattern2) + assert str(err2.value) == expected_msg2 + + component3 = 'arc' + pattern3 = '' + filter_string3 = f"{component3}:{pattern3}" + with pytest.raises(Exception) as err3: + af.add_filter_from_string(filter_string3) + assert err3.type == EESSIBotActionFilterError + expected_msg3 = FILTER_EMPTY_PATTERN.format(filter_string=filter_string3) + assert str(err3.value) == expected_msg3 + + def test_add_single_action_filter(): af = EESSIBotActionFilter("") component = 'arch' @@ -42,6 +87,106 @@ def test_add_non_supported_component(): with pytest.raises(Exception) as err: af.add_filter(component, pattern) assert err.type == EESSIBotActionFilterError + expected_msg = COMPONENT_UNKNOWN.format(component=component, pattern=pattern) + assert str(err.value) == expected_msg + + +def test_add_too_short_supported_component(): + af = EESSIBotActionFilter("") + component = 'a' + pattern = '.*intel.*' + with pytest.raises(Exception) as err: + af.add_filter(component, pattern) + assert err.type == EESSIBotActionFilterError + expected_msg = COMPONENT_TOO_SHORT.format(component=component, pattern=pattern) + assert str(err.value) == expected_msg + + +# TODO tests for removing filters +@pytest.fixture +def complex_filter(): + af = EESSIBotActionFilter("") + component1 = 'arch' + pattern1 = '.*intel.*' + af.add_filter(component1, pattern1) + component2 = 'repo' + pattern2 = 'nessi.no-2022.*' + af.add_filter(component2, pattern2) + component3 = 'inst' + pattern3 = '[aA]' + af.add_filter(component3, pattern3) + yield af + + +def test_remove_existing_filter(complex_filter): + component1 = 'architecture' + pattern1 = '.*intel.*' + filter_string1 = f"{component1}:{pattern1}" + component2 = 'repository' + pattern2 = 'nessi.no-2022.*' + filter_string2 = f"{component2}:{pattern2}" + component3 = 'instance' + pattern3 = '[aA]' + filter_string3 = f"{component3}:{pattern3}" + + # remove last filter + org_filter = copy.deepcopy(complex_filter) + org_filter.remove_filter(component3, pattern3) + expected = filter_string1 + expected += f" {filter_string2}" + actual = org_filter.to_string() + assert expected == actual + + # remove second last filter + org_filter = copy.deepcopy(complex_filter) + org_filter.remove_filter(component2, pattern2) + expected = filter_string1 + expected += f" {filter_string3}" + actual = org_filter.to_string() + assert expected == actual + + # remove first filter + org_filter = copy.deepcopy(complex_filter) + org_filter.remove_filter(component1, pattern1) + expected = filter_string2 + expected += f" {filter_string3}" + actual = org_filter.to_string() + assert expected == actual + + +def test_remove_non_existing_filter(complex_filter): + component = 'accel' + pattern = 'amd/gfx90a' + + # remove non-existing filter + org_filter = copy.deepcopy(complex_filter) + org_filter.remove_filter(component, pattern) + org_filter_str = org_filter.to_string() + complex_filter_str = complex_filter.to_string() + assert org_filter_str == complex_filter_str + + +def test_remove_filter_errors(complex_filter): + component1 = 'ac' + pattern1 = 'amd/gfx90a' + component2 = 'operating_system' + pattern2 = 'linux' + + # remove filter using too short component name + org_filter = copy.deepcopy(complex_filter) + with pytest.raises(Exception) as err1: + org_filter.remove_filter(component1, pattern1) + assert err1.type == EESSIBotActionFilterError + expected_msg1 = COMPONENT_TOO_SHORT.format(component=component1, pattern=pattern1) + assert str(err1.value) == expected_msg1 + + # remove filter using unknown component name + org_filter = copy.deepcopy(complex_filter) + with pytest.raises(Exception) as err2: + org_filter.remove_filter(component2, pattern2) + assert err2.type == EESSIBotActionFilterError + expected_msg2 = COMPONENT_UNKNOWN.format(component=component2, pattern=pattern2) + assert str(err2.value) == expected_msg2 def test_check_matching_empty_filter(): @@ -71,21 +216,6 @@ def test_check_matching_simple_filter(): assert expected == actual -@pytest.fixture -def complex_filter(): - af = EESSIBotActionFilter("") - component1 = 'arch' - pattern1 = '.*intel.*' - af.add_filter(component1, pattern1) - component2 = 'repo' - pattern2 = 'nessi.no-2022.*' - af.add_filter(component2, pattern2) - component3 = 'i' - pattern3 = '[aA]' - af.add_filter(component3, pattern3) - yield af - - def test_create_complex_filter(complex_filter): expected = "architecture:.*intel.*" expected += " repository:nessi.no-2022.*" @@ -125,9 +255,9 @@ def test_non_match_architecture_repository_context(complex_filter): @pytest.fixture def arch_filter_slash_syntax(): af = EESSIBotActionFilter("") - component1 = 'arch' - pattern1 = '.*/intel/.*' - af.add_filter(component1, pattern1) + component = 'arch' + pattern = '.*/intel/.*' + af.add_filter(component, pattern) yield af @@ -146,9 +276,9 @@ def test_match_architecture_syntax_slash(arch_filter_slash_syntax): @pytest.fixture def arch_filter_dash_syntax(): af = EESSIBotActionFilter("") - component1 = 'arch' - pattern1 = '.*-intel-.*' - af.add_filter(component1, pattern1) + component = 'arch' + pattern = '.*-intel-.*' + af.add_filter(component, pattern) yield af @@ -162,3 +292,45 @@ def test_match_architecture_syntax_dash(arch_filter_dash_syntax): expected = True actual = arch_filter_dash_syntax.check_filters(context) assert expected == actual + + +@pytest.fixture +def accel_filter_slash_syntax(): + af = EESSIBotActionFilter("") + component = 'accel' + pattern = 'nvidia/.*' + af.add_filter(component, pattern) + yield af + + +def test_match_accelerator_syntax_slash(accel_filter_slash_syntax): + context = {"accelerator": "nvidia/cc70", "repository": "EESSI"} + expected = True + actual = accel_filter_slash_syntax.check_filters(context) + assert expected == actual + + context = {"accelerator": "nvidia=cc70"} + expected = True + actual = accel_filter_slash_syntax.check_filters(context) + assert expected == actual + + +@pytest.fixture +def accel_filter_equal_syntax(): + af = EESSIBotActionFilter("") + component = 'accel' + pattern = 'amd=gfx90a' + af.add_filter(component, pattern) + yield af + + +def test_match_accelerator_syntax_equal(accel_filter_equal_syntax): + context = {"accelerator": "amd=gfx90a", "repository": "EESSI"} + expected = True + actual = accel_filter_equal_syntax.check_filters(context) + assert expected == actual + + context = {"accelerator": "amd/gfx90a"} + expected = True + actual = accel_filter_equal_syntax.check_filters(context) + assert expected == actual diff --git a/tools/filter.py b/tools/filter.py index 8d791936..721cf3d7 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -23,11 +23,22 @@ # NOTE because one can use any prefix of one of the four components below to # define a filter, we need to make sure that no two filters share the same # prefix OR we have to change the handling of filters. +FILTER_COMPONENT_ACCEL = 'accelerator' FILTER_COMPONENT_ARCH = 'architecture' FILTER_COMPONENT_INST = 'instance' FILTER_COMPONENT_JOB = 'job' FILTER_COMPONENT_REPO = 'repository' -FILTER_COMPONENTS = [FILTER_COMPONENT_ARCH, FILTER_COMPONENT_INST, FILTER_COMPONENT_JOB, FILTER_COMPONENT_REPO] +FILTER_COMPONENTS = [FILTER_COMPONENT_ACCEL, + FILTER_COMPONENT_ARCH, + FILTER_COMPONENT_INST, + FILTER_COMPONENT_JOB, + FILTER_COMPONENT_REPO + ] + +COMPONENT_TOO_SHORT = "component in filter spec '{component}:{pattern}' is too short; must be 3 characters or longer" +COMPONENT_UNKNOWN = "unknown component={component} in {component}:{pattern}" +FILTER_EMPTY_PATTERN = "pattern in filter string '{filter_string}' is empty" +FILTER_FORMAT_ERROR = "filter string '{filter_string}' does not conform to format 'component:pattern'" Filter = namedtuple('Filter', ('component', 'pattern')) @@ -37,7 +48,12 @@ class EESSIBotActionFilterError(Exception): Exception to be raised when encountering an error in creating or adding a filter """ - pass + + def __init__(self, value): + self.value = value + + def __str__(self): + return self.value class EESSIBotActionFilter: @@ -87,8 +103,8 @@ def add_filter(self, component, pattern): Adds a filter given by a component and a pattern Args: - component (string): any prefix of known filter components (see - FILTER_COMPONENTS) + component (string): any prefix (min 3 characters long) of known filter + components (see FILTER_COMPONENTS) pattern (string): regular expression pattern that is applied to a string representing the component @@ -99,26 +115,36 @@ def add_filter(self, component, pattern): EESSIBotActionFilterError: raised if unknown component is provided as argument """ - # check if component is supported (i.e., it is a prefix of one of the - # elements in FILTER_COMPONENTS) + # check if component is supported + # - it is a 3+ character-long string, _and_ + # - it is a prefix of one of the elements in FILTER_COMPONENTS + if len(component) < 3: + msg = COMPONENT_TOO_SHORT.format(component=component, pattern=pattern) + log(msg) + raise EESSIBotActionFilterError(msg) full_component = None for cis in FILTER_COMPONENTS: # NOTE the below code assumes that no two filter share the same - # prefix (e.g., 'repository' and 'request' would have the same - # prefixes 'r' and 're') + # 3-character-long prefix (e.g., 'repository' and 'repeat' would + # have the same prefix 'rep') if cis.startswith(component): full_component = cis break if full_component: log(f"processing component {component}") # replace '-' with '/' in pattern when using 'architecture' filter - # component (done to make sure that values are comparable) + # component (done to make sure that values are comparable) if full_component == FILTER_COMPONENT_ARCH: pattern = pattern.replace('-', '/') + # replace '=' with '/' in pattern when using 'accelerator' filter + # component (done to make sure that values are comparable) + if full_component == FILTER_COMPONENT_ACCEL: + pattern = pattern.replace('=', '/') self.action_filters.append(Filter(full_component, pattern)) else: - log(f"component {component} is unknown") - raise EESSIBotActionFilterError(f"unknown component={component} in {component}:{pattern}") + msg = COMPONENT_UNKNOWN.format(component=component, pattern=pattern) + log(msg) + raise EESSIBotActionFilterError(msg) def add_filter_from_string(self, filter_string): """ @@ -136,11 +162,17 @@ def add_filter_from_string(self, filter_string): """ _filter_split = filter_string.split(':') if len(_filter_split) != 2: - log(f"filter string '{filter_string}' does not conform to format 'component:pattern'") - raise EESSIBotActionFilterError(f"filter '{filter_string}' does not conform to format 'component:pattern'") + msg = FILTER_FORMAT_ERROR.format(filter_string=filter_string) + log(msg) + raise EESSIBotActionFilterError(msg) + if len(_filter_split[0]) < 3: + msg = COMPONENT_TOO_SHORT.format(component=_filter_split[0], pattern=_filter_split[1]) + log(msg) + raise EESSIBotActionFilterError(msg) if len(_filter_split[1]) == 0: - log(f"pattern in filter string '{filter_string}' is empty") - raise EESSIBotActionFilterError(f"pattern in filter string '{filter_string}' is empty") + msg = FILTER_EMPTY_PATTERN.format(filter_string=filter_string) + log(msg) + raise EESSIBotActionFilterError(msg) self.add_filter(_filter_split[0], _filter_split[1]) def remove_filter(self, component, pattern): @@ -148,22 +180,44 @@ def remove_filter(self, component, pattern): Removes all elements matching the filter given by (component, pattern) Args: - component (string): any prefix of 'architecture', 'instance', 'job' or 'repository' + component (string): one of FILTER_COMPONENTS pattern (string): regex that is applied to a string representing the component Returns: None (implicitly) """ - index = 0 - for _filter in self.action_filters: + if len(component) < 3: + msg = COMPONENT_TOO_SHORT.format(component=component, pattern=pattern) + log(msg) + raise EESSIBotActionFilterError(msg) + full_component = None + for cis in FILTER_COMPONENTS: + # NOTE the below code assumes that no two filter share the same + # 3-character-long prefix (e.g., 'repository' and 'repeat' would + # have the same prefix 'rep') + if cis.startswith(component): + full_component = cis + break + if not full_component: + # the component provided as argument is not in the list of FILTER_COMPONENTS + msg = COMPONENT_UNKNOWN.format(component=component, pattern=pattern) + log(msg) + raise EESSIBotActionFilterError(msg) + + # need to traverse list from end or next elem after a removed item is + # skipped + num_filters = len(self.action_filters) + for idx in range(num_filters, 0, -1): + # idx runs from num_filters to 1; this needs to be corrected to + # num_filters-1 to 0 + index = idx - 1 # NOTE the below code assumes that no two filter share the same - # prefix (e.g., 'repository' and 'request' would have the same - # prefixes 'r' and 're') + # 3-character-long prefix (e.g., 'repository' and 'repeat' would + # have the same prefix 'rep') + _filter = self.action_filters[index] if _filter.component.startswith(component) and pattern == _filter.pattern: log(f"removing filter ({_filter.component}, {pattern})") self.action_filters.pop(index) - else: - index += 1 def to_string(self): """ @@ -184,8 +238,8 @@ def to_string(self): def check_filters(self, context): """ - Checks filters for a given context which is defined by one to four - components (architecture, instance, job, repository) + Checks filters for a given context which is defined by one to five + components (accelerator, architecture, instance, job, repository) Args: context (dict) : dictionary that maps components to their value @@ -217,6 +271,9 @@ def check_filters(self, context): # replace - with / in architecture component if af.component == FILTER_COMPONENT_ARCH: value = value.replace('-', '/') + # replace = with / in accelerator component + if af.component == FILTER_COMPONENT_ACCEL: + value = value.replace('=', '/') if re.search(af.pattern, value): # if the pattern of the filter matches check = True From 99f42b1daab8170e80c5f6e370f03ce7c8fcab4a Mon Sep 17 00:00:00 2001 From: sam Date: Sun, 1 Sep 2024 12:53:36 +0200 Subject: [PATCH 47/64] add support for updating submit options --- tasks/build.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tasks/build.py b/tasks/build.py index 059b30cc..39c4778e 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -658,6 +658,20 @@ def submit_job(job, cfg): else: time_limit = f"--time={DEFAULT_JOB_TIME_LIMIT}" + # update job.slurm_opts with det_submit_opts(job) in det_submit_opts.py if available + do_update_slurm_opts = False + sys.path.append(job.working_dir) + + try: + from det_submit_opts import det_submit_opts # pylint:disable=import-outside-toplevel + do_update_slurm_opts = True + except ImportError: + log(f"{fn}(): not updating job.slurm_opts: cannot import function det_submit_opts from module det_submit_opts") + + if do_update_slurm_opts: + job = job._replace(slurm_opts=det_submit_opts(job)) + log(f"{fn}(): updated job.slurm_opts: {job.slurm_opts}") + command_line = ' '.join([ build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND], build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], From 4517ab225231283653ffa97360bda0c4fba13719 Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 3 Sep 2024 20:45:59 +0200 Subject: [PATCH 48/64] make it configurable --- app.cfg.example | 3 +++ eessi_bot_event_handler.py | 1 + tasks/build.py | 18 +++++++++++------- tools/config.py | 1 + 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/app.cfg.example b/app.cfg.example index a5a09649..b65ada67 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -128,6 +128,9 @@ build_permission = # template for comment when user who set a label has no permission to trigger build jobs no_build_permission_comment = Label `bot:build` has been set by user `{build_labeler}`, but this person does not have permission to trigger builds +# whether or not to allow updating the submit options via custom module det_submit_opts +allow_update_submit_opts = false + [deploycfg] # script for uploading built software packages diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 91274fbe..a94e078e 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -47,6 +47,7 @@ config.BOT_CONTROL_SETTING_COMMAND_PERMISSION, # required config.BOT_CONTROL_SETTING_COMMAND_RESPONSE_FMT], # required config.SECTION_BUILDENV: [ + # config.BUILDENV_SETTING_ALLOW_UPDATE_SUBMIT_OPTS # optional config.BUILDENV_SETTING_BUILD_JOB_SCRIPT, # required config.BUILDENV_SETTING_BUILD_LOGS_DIR, # optional+recommended config.BUILDENV_SETTING_BUILD_PERMISSION, # optional+recommended diff --git a/tasks/build.py b/tasks/build.py index 39c4778e..903800b0 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -658,15 +658,19 @@ def submit_job(job, cfg): else: time_limit = f"--time={DEFAULT_JOB_TIME_LIMIT}" - # update job.slurm_opts with det_submit_opts(job) in det_submit_opts.py if available + # update job.slurm_opts with det_submit_opts(job) in det_submit_opts.py if allowed and available do_update_slurm_opts = False - sys.path.append(job.working_dir) + allow_update_slurm_opts = cfg[config.SECTION_BUILDENV].getboolean(config.BUILDENV_SETTING_ALLOW_UPDATE_SUBMIT_OPTS) - try: - from det_submit_opts import det_submit_opts # pylint:disable=import-outside-toplevel - do_update_slurm_opts = True - except ImportError: - log(f"{fn}(): not updating job.slurm_opts: cannot import function det_submit_opts from module det_submit_opts") + if allow_update_slurm_opts: + sys.path.append(job.working_dir) + + try: + from det_submit_opts import det_submit_opts # pylint:disable=import-outside-toplevel + do_update_slurm_opts = True + except ImportError: + log(f"{fn}(): not updating job.slurm_opts: " + "cannot import function det_submit_opts from module det_submit_opts") if do_update_slurm_opts: job = job._replace(slurm_opts=det_submit_opts(job)) diff --git a/tools/config.py b/tools/config.py index f1deb34a..6675fd87 100644 --- a/tools/config.py +++ b/tools/config.py @@ -36,6 +36,7 @@ BOT_CONTROL_SETTING_COMMAND_RESPONSE_FMT = 'command_response_fmt' SECTION_BUILDENV = 'buildenv' +BUILDENV_SETTING_ALLOW_UPDATE_SUBMIT_OPTS = 'allow_update_submit_opts' BUILDENV_SETTING_BUILD_JOB_SCRIPT = 'build_job_script' BUILDENV_SETTING_BUILD_LOGS_DIR = 'build_logs_dir' BUILDENV_SETTING_BUILD_PERMISSION = 'build_permission' From 62a493329e43d47d5b1a9078aa94fb9bd2628b2b Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Fri, 6 Sep 2024 11:51:17 +0200 Subject: [PATCH 49/64] updating authors in changed files and add setting to README.md --- README.md | 7 +++++++ app.cfg.example | 1 + eessi_bot_event_handler.py | 1 + tasks/build.py | 1 + tools/config.py | 1 + 5 files changed, 11 insertions(+) diff --git a/README.md b/README.md index 2eb0de7c..8af58a92 100644 --- a/README.md +++ b/README.md @@ -426,6 +426,13 @@ no_build_permission_comment = The `bot: build ...` command has been used by user `no_build_permission_comment` defines a comment (template) that is used when the account trying to trigger build jobs has no permission to do so. +``` +allow_update_submit_opts = false +``` +`allow_update_submit_opts` determines whether or not to allow updating the submit +options via custom module `det_submit_opts` provided by the pull request being +processed. + #### `[bot_control]` section diff --git a/app.cfg.example b/app.cfg.example index b65ada67..25abf911 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -10,6 +10,7 @@ # author: Jonas Qvigstad (@jonas-lq) # author: Pedro Santos Neves (@Neves-P) # author: Thomas Roeblitz (@trz42) +# author: Sam Moors (@smoors) # # license: GPLv2 # diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index a94e078e..ac943faf 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -13,6 +13,7 @@ # author: Lara Ramona Peeters (@laraPPr) # author: Thomas Roeblitz (@trz42) # author: Pedro Santos Neves (@Neves-P) +# author: Sam Moors (@smoors) # # license: GPLv2 # diff --git a/tasks/build.py b/tasks/build.py index 903800b0..a967ef73 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -12,6 +12,7 @@ # author: Lara Ramona Peeters (@laraPPr) # author: Pedro Santos Neves (@Neves-P) # author: Thomas Roeblitz (@trz42) +# author: Sam Moors (@smoors) # # license: GPLv2 # diff --git a/tools/config.py b/tools/config.py index 6675fd87..da8a2875 100644 --- a/tools/config.py +++ b/tools/config.py @@ -10,6 +10,7 @@ # author: Jacob Ziemke (@jacobz137) # author: Jonas Qvigstad (@jonas-lq) # author: Thomas Roeblitz (@trz42) +# author: Sam Moors (@smoors) # # license: GPLv2 # From b8c3d48b4e696cb8a5a7c7078bdb356af8940f6e Mon Sep 17 00:00:00 2001 From: sam Date: Sun, 8 Sep 2024 16:57:47 +0200 Subject: [PATCH 50/64] use github API for downloading the diff --- tasks/build.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index a967ef73..0bcd8535 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -343,7 +343,12 @@ def download_pr(repo_name, branch_name, pr, arch_job_dir): error_stage = _ERROR_GIT_CHECKOUT return checkout_output, checkout_err, checkout_exit_code, error_stage - curl_cmd = f'curl -L https://github.com/{repo_name}/pull/{pr.number}.diff > {pr.number}.diff' + curl_cmd = ' '.join([ + 'curl -L', + '-H "Accept: application/vnd.github.diff"', + '-H "X-GitHub-Api-Version: 2022-11-28"', + f'https://api.github.com/repos/{repo_name}/pulls/{pr.number} > {pr.number}.diff', + ]) log(f'curl with command {curl_cmd}') curl_output, curl_error, curl_exit_code = run_cmd( curl_cmd, "Obtain patch", arch_job_dir, raise_on_error=False From cef70a74db5f303c15fc6706e3d036bc0b0ae4ac Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 10:34:01 +0200 Subject: [PATCH 51/64] add function to return filter pattern for a component --- tools/filter.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/filter.py b/tools/filter.py index 721cf3d7..8b14f5eb 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -39,6 +39,7 @@ COMPONENT_UNKNOWN = "unknown component={component} in {component}:{pattern}" FILTER_EMPTY_PATTERN = "pattern in filter string '{filter_string}' is empty" FILTER_FORMAT_ERROR = "filter string '{filter_string}' does not conform to format 'component:pattern'" +UNKNOWN_COMPONENT_CONST = "unknown component constant {component}" Filter = namedtuple('Filter', ('component', 'pattern')) @@ -175,6 +176,26 @@ def add_filter_from_string(self, filter_string): raise EESSIBotActionFilterError(msg) self.add_filter(_filter_split[0], _filter_split[1]) + def get_filter_by_component(self, component): + """ + Returns filter pattern for component. + + Args: + component (string): one of FILTER_COMPONENTS + + Returns: + (list): list of pattern for filters whose component matches argument + """ + if component not in FILTER_COMPONENTS: + msg = UNKNOWN_COMPONENT_CONST.format(component=component) + raise EESSIBotActionFilterError(msg) + + pattern = [] + for _filter in self.action_filters: + if component == _filter.component: + pattern.append(_filter.pattern) + return pattern + def remove_filter(self, component, pattern): """ Removes all elements matching the filter given by (component, pattern) From 4faaa014480c86e2de62ba46fe2d46c015c9590d Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 10 Sep 2024 10:21:59 +0200 Subject: [PATCH 52/64] add docs on private repos --- README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.md b/README.md index 8af58a92..081283d2 100644 --- a/README.md +++ b/README.md @@ -813,3 +813,26 @@ The job manager can run on a different machine than the event handler, as long a For information on how to make pull requests and let the bot build software, see [the bot section of the EESSI documentation](https://www.eessi.io/docs/bot/). + +# Private target repos + +Both Git and Curl need to have access to the target repo. A convenient way to +access a private repo via a Github token is by adding the following lines to +your `~/.netrc` and `~/.curlrc` files: + +``` +# ~/.netrc +machine github.com +login oauth +password + +machine api.github.com +login oauth +password +``` + +``` +# ~/.curlrc +--netrc +``` + From 96b067dace1aac1441ac6f2450f7668c5bf5c62d Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 11:38:01 +0200 Subject: [PATCH 53/64] updated and new settings to support accelerator build arg --- README.md | 17 ++++++++++++----- app.cfg.example | 3 ++- tasks/build.py | 42 ++++++++++++++++++++++++++++++++++++------ tools/config.py | 3 ++- tools/job_metadata.py | 1 + 5 files changed, 53 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8af58a92..ebf4e29b 100644 --- a/README.md +++ b/README.md @@ -645,17 +645,24 @@ scontrol_command = /usr/bin/scontrol #### `[submitted_job_comments]` section The `[submitted_job_comments]` section specifies templates for messages about newly submitted jobs. -``` -initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}` -``` -`initial_comment` is used to create a comment to a PR when a new job has been created. - ``` awaits_release = job id `{job_id}` awaits release by job manager ``` `awaits_release` is used to provide a status update of a job (shown as a row in the job's status table). +``` +initial_comment = New job on instance `{app_name}` for architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +``` +`initial_comment` is used to create a comment to a PR when a new job has been +created. Note, the part '{accelerator_spec}' is only filled-in by the bot if the +argument 'accelerator' to the `bot: build` command have been used. +``` +with_accelerator = " and accelerator {accelerator}" +``` +`with_accelerator` is used to provide information about the accelerator the job +should build for if and only if the argument `accelerator:X/Y` has been provided. + #### `[new_job_comments]` section The `[new_job_comments]` section sets templates for messages about jobs whose `hold` flag was released. diff --git a/app.cfg.example b/app.cfg.example index 25abf911..2e50834c 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -243,8 +243,9 @@ scontrol_command = /usr/bin/scontrol # are removed, the output (in PR comments) will lack important # information. [submitted_job_comments] -initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}` awaits_release = job id `{job_id}` awaits release by job manager +initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +with_accelerator = " and accelerator {accelerator}" [new_job_comments] diff --git a/tasks/build.py b/tasks/build.py index a967ef73..96d04fe4 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -33,6 +33,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from connections import github from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd +import tools.filter as tools_filter # defaults (used if not specified via, eg, 'app.cfg') @@ -46,7 +47,7 @@ _ERROR_NONE = "none" -Job = namedtuple('Job', ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id')) +Job = namedtuple('Job', ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id', 'accelerator')) # global repo_cfg repo_cfg = {} @@ -474,6 +475,12 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # call to just before download_pr year_month, pr_id, run_dir = create_pr_dir(pr, cfg, event_info) + accelerator = "none" + # determine accelerator from action_filter argument + accelerators = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_ACCEL) + if len(accelerators) > 0: + accelerator = accelerators[0] + jobs = [] for arch, slurm_opt in arch_map.items(): arch_dir = arch.replace('/', '_') @@ -501,6 +508,15 @@ def prepare_jobs(pr, cfg, event_info, action_filter): continue else: log(f"{fn}(): context DOES satisfy filter(s), going on with job") + # we reached this point when the filter matched (otherwise we + # 'continue' with the next repository) + # for each match of the filter we create a specific job directory + # however, matching CPU architectures works differently to handling + # accelerators; multiple CPU architectures defined in arch_target_map + # can match the (CPU) architecture component of a filter; in + # contrast, the value of the accelerator filter is just passed down + # to scripts in bot/ directory of the pull request (see function + # prepare_job_cfg and creation of Job tuple below) job_dir = os.path.join(run_dir, arch_dir, repo_id) os.makedirs(job_dir, exist_ok=True) log(f"{fn}(): job_dir '{job_dir}'") @@ -514,10 +530,12 @@ def prepare_jobs(pr, cfg, event_info, action_filter): cpu_target = '/'.join(arch.split('/')[1:]) os_type = arch.split('/')[0] log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'") - prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type) + + log(f"{fn}(): accelerator = '{accelerator}'") + prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator) # enlist jobs to proceed - job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id) + job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id, accelerator) jobs.append(job) log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list") @@ -527,7 +545,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): return jobs -def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type): +def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type, accelerator): """ Set up job configuration file 'job.cfg' in directory /cfg @@ -538,6 +556,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, repo_id (string): identifier of the repository to build for software_subdir (string): software subdirectory to build for (e.g., 'x86_64/generic') os_type (string): type of the os (e.g., 'linux') + accelerator (string): defines accelerator to build for (e.g., 'nvidia/cc80') Returns: None (implicitly) @@ -563,6 +582,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, # [architecture] # software_subdir = software_subdir # os_type = os_type + # accelerator = accelerator job_cfg = configparser.ConfigParser() job_cfg[job_metadata.JOB_CFG_SITE_CONFIG_SECTION] = {} build_env_to_job_cfg_keys = { @@ -607,6 +627,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, job_cfg[job_cfg_arch_section] = {} job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR] = software_subdir job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_OS_TYPE] = os_type + job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator # copy contents of directory containing repository configuration to directory # containing job configuration/metadata @@ -726,11 +747,19 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink): # obtain arch from job.arch_target which has the format OS/ARCH arch_name = '-'.join(job.arch_target.split('/')[1:]) + submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS] + + # obtain accelerator from job.accelerator + accelerator = job.accelerator + accelerator_spec_str = '' + if not accelerator is 'none': + accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" + accelerator_spec_str = accelerator_spec.format(accelerator=accelerator) + # get current date and time dt = datetime.now(timezone.utc) # construct initial job comment - submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS] job_comment = (f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT]}" f"\n|date|job status|comment|\n" f"|----------|----------|------------------------|\n" @@ -741,7 +770,8 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink): arch_name=arch_name, symlink=symlink, repo_id=job.repo_id, - job_id=job_id) + job_id=job_id, + accelerator_spec=accelerator_spec_str) # create comment to pull request repo_name = pr.base.repo.full_name diff --git a/tools/config.py b/tools/config.py index da8a2875..ff641ebb 100644 --- a/tools/config.py +++ b/tools/config.py @@ -105,8 +105,9 @@ RUNNING_JOB_COMMENTS_SETTING_RUNNING_JOB = 'running_job' SECTION_SUBMITTED_JOB_COMMENTS = 'submitted_job_comments' -SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release' +SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment' +SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR = 'with_accelerator' SECTION_CLEAN_UP = 'clean_up' CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir' diff --git a/tools/job_metadata.py b/tools/job_metadata.py index 286f71cb..d4000199 100644 --- a/tools/job_metadata.py +++ b/tools/job_metadata.py @@ -33,6 +33,7 @@ JOB_CFG_ARCHITECTURE_SECTION = "architecture" JOB_CFG_ARCHITECTURE_OS_TYPE = "os_type" JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR = "software_subdir" +JOB_CFG_ARCHITECTURE_ACCELERATOR = "accelerator" JOB_CFG_REPOSITORY_SECTION = "repository" JOB_CFG_REPOSITORY_CONTAINER = "container" From 815f8fe7a306ec57ccc98c26558c251ac512b51e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 11:56:06 +0200 Subject: [PATCH 54/64] fix comparison syntax --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index 96d04fe4..5b214582 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -752,7 +752,7 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink): # obtain accelerator from job.accelerator accelerator = job.accelerator accelerator_spec_str = '' - if not accelerator is 'none': + if accelerator != 'none': accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" accelerator_spec_str = accelerator_spec.format(accelerator=accelerator) From 9e4cbfdeb2ff3f76e61bf66abca0f42f4778cf4c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 12:19:24 +0200 Subject: [PATCH 55/64] improve settings' value for with_accelerator --- README.md | 2 +- app.cfg.example | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ebf4e29b..04da8630 100644 --- a/README.md +++ b/README.md @@ -658,7 +658,7 @@ initial_comment = New job on instance `{app_name}` for architecture `{arch_name} created. Note, the part '{accelerator_spec}' is only filled-in by the bot if the argument 'accelerator' to the `bot: build` command have been used. ``` -with_accelerator = " and accelerator {accelerator}" +with_accelerator =  and accelerator `{accelerator}` ``` `with_accelerator` is used to provide information about the accelerator the job should build for if and only if the argument `accelerator:X/Y` has been provided. diff --git a/app.cfg.example b/app.cfg.example index 2e50834c..152de2bc 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -245,7 +245,7 @@ scontrol_command = /usr/bin/scontrol [submitted_job_comments] awaits_release = job id `{job_id}` awaits release by job manager initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` -with_accelerator = " and accelerator {accelerator}" +with_accelerator =  and accelerator `{accelerator}` [new_job_comments] From 34534bb10edb8b1d3643e46bba61b5f6443bdd10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Tue, 10 Sep 2024 16:55:08 +0200 Subject: [PATCH 56/64] fix typo in README.md Co-authored-by: Kenneth Hoste --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 04da8630..feb9d38e 100644 --- a/README.md +++ b/README.md @@ -656,7 +656,7 @@ initial_comment = New job on instance `{app_name}` for architecture `{arch_name} ``` `initial_comment` is used to create a comment to a PR when a new job has been created. Note, the part '{accelerator_spec}' is only filled-in by the bot if the -argument 'accelerator' to the `bot: build` command have been used. +argument 'accelerator' to the `bot: build` command has been used. ``` with_accelerator =  and accelerator `{accelerator}` ``` From 8f44ba9846c7d9eb37fefd232e0884ab9651e627 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 21:11:17 +0200 Subject: [PATCH 57/64] using None for undefined accelerator, improved logging --- tasks/build.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tasks/build.py b/tasks/build.py index 5b214582..56068e04 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -475,11 +475,16 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # call to just before download_pr year_month, pr_id, run_dir = create_pr_dir(pr, cfg, event_info) - accelerator = "none" # determine accelerator from action_filter argument accelerators = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_ACCEL) - if len(accelerators) > 0: + if len(accelerators) == 1: accelerator = accelerators[0] + elif len(accelerators) > 1: + log(f"{fn}(): found more than one ({len(accelerators)}) accelerator requirement") + accelerator = None + else: + log(f"{fn}(): found no accelerator requirement") + accelerator = None jobs = [] for arch, slurm_opt in arch_map.items(): @@ -529,9 +534,10 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # prepare job configuration file 'job.cfg' in directory /cfg cpu_target = '/'.join(arch.split('/')[1:]) os_type = arch.split('/')[0] - log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'") - log(f"{fn}(): accelerator = '{accelerator}'") + log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'" + f", accelerator = '{accelerator}'") + prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator) # enlist jobs to proceed @@ -749,12 +755,11 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink): submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS] - # obtain accelerator from job.accelerator - accelerator = job.accelerator + # set string for accelerator if job.accelerator is defined/set (e.g., not None) accelerator_spec_str = '' - if accelerator != 'none': + if job.accelerator: accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" - accelerator_spec_str = accelerator_spec.format(accelerator=accelerator) + accelerator_spec_str = accelerator_spec.format(accelerator=job.accelerator) # get current date and time dt = datetime.now(timezone.utc) From 216e2dbc41a045ec92423b7b8509c7215650137e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 21:22:30 +0200 Subject: [PATCH 58/64] fix failing pytests for extended Job tuple --- tests/test_task_build.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_task_build.py b/tests/test_task_build.py index 6f79fc74..f432d768 100644 --- a/tests/test_task_build.py +++ b/tests/test_task_build.py @@ -282,7 +282,7 @@ def test_create_pr_comment_succeeds(mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number) + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") job_id = "123" app_name = "pytest" @@ -311,7 +311,7 @@ def test_create_pr_comment_succeeds_none(mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number) + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") job_id = "123" app_name = "pytest" @@ -336,7 +336,7 @@ def test_create_pr_comment_raises_once_then_succeeds(mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number) + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") job_id = "123" app_name = "pytest" @@ -361,7 +361,7 @@ def test_create_pr_comment_always_raises(mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number) + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") job_id = "123" app_name = "pytest" @@ -387,7 +387,7 @@ def test_create_pr_comment_three_raises(mocked_github, tmpdir): print("CREATING PR COMMENT") ym = datetime.today().strftime('%Y.%m') pr_number = 1 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number) + job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") job_id = "123" app_name = "pytest" @@ -409,7 +409,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # create some test data ym = datetime.today().strftime('%Y.%m') pr_number = 999 - job = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number) + job = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") job_id = "123" @@ -440,14 +440,14 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # use directory that does not exist dir_does_not_exist = os.path.join(tmpdir, "dir_does_not_exist") - job2 = Job(dir_does_not_exist, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number) + job2 = Job(dir_does_not_exist, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") job_id2 = "222" with pytest.raises(FileNotFoundError): create_metadata_file(job2, job_id2, pr_comment) # use directory without write permission dir_without_write_perm = os.path.join("/") - job3 = Job(dir_without_write_perm, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number) + job3 = Job(dir_without_write_perm, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") job_id3 = "333" with pytest.raises(OSError): create_metadata_file(job3, job_id3, pr_comment) @@ -457,7 +457,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # use undefined values for parameters # job_id = None - job4 = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number) + job4 = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") job_id4 = None create_metadata_file(job4, job_id4, pr_comment) @@ -472,7 +472,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir): # use undefined values for parameters # job.working_dir = None - job5 = Job(None, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number) + job5 = Job(None, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic") job_id5 = "555" with pytest.raises(TypeError): create_metadata_file(job5, job_id5, pr_comment) From 35dac28497b9380d89035e88a0710143a59bef0a Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 21:30:08 +0200 Subject: [PATCH 59/64] adjust test data (app.cfg) --- tests/test_app.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_app.cfg b/tests/test_app.cfg index fd91ed8b..43e11bf9 100644 --- a/tests/test_app.cfg +++ b/tests/test_app.cfg @@ -17,8 +17,9 @@ # variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it [submitted_job_comments] -initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}` awaits_release = job id `{job_id}` awaits release by job manager +initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +with_accelerator =  and accelerator `{accelerator}` [new_job_comments] awaits_lauch = job awaits launch by Slurm scheduler From 687c256a5a6e5b4165ad09bc48dcab78d8fe425c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 10 Sep 2024 21:57:35 +0200 Subject: [PATCH 60/64] cfg values cannot be `None` --- tasks/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/build.py b/tasks/build.py index 56068e04..d70103ee 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -633,7 +633,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, job_cfg[job_cfg_arch_section] = {} job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR] = software_subdir job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_OS_TYPE] = os_type - job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator + job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator if accelerator else '' # copy contents of directory containing repository configuration to directory # containing job configuration/metadata From 9af672cf89cd87d65d4fefb135eebbb98371e5af Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 17 Sep 2024 15:57:14 +0200 Subject: [PATCH 61/64] mark `with_accelerator` setting in `[submitted_job_comments]`' section as required --- eessi_bot_event_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index ac943faf..00a1db81 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -97,7 +97,8 @@ config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required config.SECTION_SUBMITTED_JOB_COMMENTS: [ config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, # required - config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE] # required + config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR], # required } From f172396dd2a2f76544c7e49a6744fd1d06c6fe9e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 18 Sep 2024 20:02:15 +0200 Subject: [PATCH 62/64] release notes for v0.6.0 --- RELEASE_NOTES | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 20080acf..f203c1ed 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,6 +1,37 @@ This file contains a description of the major changes to the EESSI build-and-deploy bot. For more detailed information, please see the git log. +v0.6.0 (18 September 2024) +-------------------------- + +This is a minor release of the EESSI build-and-deploy bot. + +Bug fixes: +* mark 'with_accelerator' setting as required (#282) + +Improvements: +* move merged PR job directories to 'trash_bin_dir' (#271) + * the target directory can be defined with the 'app.cfg' setting 'trash_bin_dir' + * it uses 'shutil.move' which tries to use 'mv' if source and target are on the + same filesystem +* add setting to give all jobs a unique name (#273) +* move closed PR job directories to 'trash_bin_dir' (#275) +* add filter for accelerators (#276) +* add support for updating Slurm options through user-defined python module in + target PR (#277) +* use GitHub API for downloading the diff of a PR (#278) +* add documentation about private repos (#279) +* pass accelerator value to job scripts (via job.cfg) and extend PR comment if + the 'accelerator' argument is used (#280) + +New 'app.cfg' settings (see README.md and app.cfg.example for details): +* (optional) 'allow_update_submit_opts' +* (required) 'job_name' +* (required) 'moved_job_dirs_comment' +* (required) 'trash_bin_dir' +* (required) 'with_accelerator' + + v0.5.0 (16 May 2024) -------------------------- From 5cdc4baa9a21ac859dc9aac638a45b403a2bda7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20R=C3=B6blitz?= Date: Wed, 18 Sep 2024 20:16:13 +0200 Subject: [PATCH 63/64] merge info on 280 and 282 Co-authored-by: Kenneth Hoste --- RELEASE_NOTES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index f203c1ed..84ca89e8 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -22,7 +22,7 @@ Improvements: * use GitHub API for downloading the diff of a PR (#278) * add documentation about private repos (#279) * pass accelerator value to job scripts (via job.cfg) and extend PR comment if - the 'accelerator' argument is used (#280) + the 'accelerator' argument is used (#280, #282) New 'app.cfg' settings (see README.md and app.cfg.example for details): * (optional) 'allow_update_submit_opts' From a22c4340bd5f6ee6e9984d6fa2bf785c385d546c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Wed, 18 Sep 2024 20:26:03 +0200 Subject: [PATCH 64/64] update release notes --- RELEASE_NOTES | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index f203c1ed..dd5378e1 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -6,9 +6,6 @@ v0.6.0 (18 September 2024) This is a minor release of the EESSI build-and-deploy bot. -Bug fixes: -* mark 'with_accelerator' setting as required (#282) - Improvements: * move merged PR job directories to 'trash_bin_dir' (#271) * the target directory can be defined with the 'app.cfg' setting 'trash_bin_dir' @@ -22,14 +19,14 @@ Improvements: * use GitHub API for downloading the diff of a PR (#278) * add documentation about private repos (#279) * pass accelerator value to job scripts (via job.cfg) and extend PR comment if - the 'accelerator' argument is used (#280) + the 'accelerator' argument is used (#280, #282) New 'app.cfg' settings (see README.md and app.cfg.example for details): -* (optional) 'allow_update_submit_opts' -* (required) 'job_name' -* (required) 'moved_job_dirs_comment' -* (required) 'trash_bin_dir' -* (required) 'with_accelerator' +* (optional) 'allow_update_submit_opts' in section '[buildenv]' +* (required) 'job_name' in section '[buildenv]' +* (required) 'moved_job_dirs_comment' in section '[clean_up]' +* (required) 'trash_bin_dir' in section '[clean_up]' +* (required) 'with_accelerator' in section '[submitted_job_comments]' v0.5.0 (16 May 2024)