From afc2f67956e64ff0b2fe9ef4149ef546ed041f5c Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 16 Apr 2024 10:43:44 +0200
Subject: [PATCH 01/64] Preliminary work on PR cleanup task
---
eessi_bot_event_handler.py | 42 ++++++++++++++++++++++++++++++++++++++
tasks/clean_up.py | 42 ++++++++++++++++++++++++++++++++++++++
2 files changed, 84 insertions(+)
create mode 100644 tasks/clean_up.py
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 49f10238..642e8bff 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -555,6 +555,48 @@ def start(self, app, port=3000):
waitress.serve(app, listen='*:%s' % port)
+ def handle_pull_request_merged_event(self, event_info, pr):
+ """
+ Handle events of type pull_request with the action merged. Main action
+ is to scan directories used and move them to the trash_bin.
+
+ Args:
+ event_info (dict): event received by event_handler
+ pr (github.PullRequest.PullRequest): instance representing the pull request
+
+ Returns:
+ github.IssueComment.IssueComment instance or None (note, github refers to
+ PyGithub, not the github from the internal connections module)
+ """
+ self.log("PR merged: scanning directories used by PR")
+
+
+ # 1) determine the jobs that have been run for the PR
+ job_dirs = determine_job_dirs(pr.number)
+
+ # 2) read location of trash_bin from cfg
+ merge_cfg = self.cfg['merge_cfg']
+ trash_bin_dir = merge_cfg.get('trash_bin_dir')
+
+ # 3) move the directories to the trash_bin
+ self.log("Moving directories to trash_bin")
+ move_to_trash_bin(trash_bin_dir, job_dirs)
+
+ # 4) report move to pull request?
+ repo_name = pr.base.repo.full_name
+ gh = github.get_instance()
+ repo = gh.get_repo(repo_name)
+ pull_request = repo.get_pull(pr.number)
+ issue_comment = pull_request.create_issue_comment(comment)
+ return issue_comment
+
+def move_to_trash_bin(trash_bin_dir, job_dirs):
+ move_cmd = ["mv -t", trash_bin_dir]
+ for job_dir in job_dirs:
+ move_cmd.append(job_dir)
+ ' '.join(move_cmd)
+ out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False)
+
def main():
"""
Main function which parses command line arguments, verifies if required
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
new file mode 100644
index 00000000..830d9d2d
--- /dev/null
+++ b/tasks/clean_up.py
@@ -0,0 +1,42 @@
+# This file is part of the EESSI build-and-deploy bot,
+# see https://github.com/EESSI/eessi-bot-software-layer
+#
+# The bot helps with requests to add software installations to the
+# EESSI software layer, see https://github.com/EESSI/software-layer
+#
+# author: Bob Droege (@bedroge)
+# author: Kenneth Hoste (@boegel)
+# author: Hafsa Naeem (@hafsa-naeem)
+# author: Jacob Ziemke (@jacobz137)
+# author: Jonas Qvigstad (@jonas-lq)
+# author: Lara Ramona Peeters (@laraPPr)
+# author: Pedro Santos Neves (@Neves-P)
+# author: Thomas Roeblitz (@trz42)
+#
+# license: GPLv2
+#
+
+# Standard library imports
+from collections import namedtuple
+import configparser
+from datetime import datetime, timezone
+import json
+import os
+import shutil
+import sys
+
+# Third party imports (anything installed into the local Python environment)
+from pyghee.utils import error, log
+from retry.api import retry_call
+
+# Local application imports (anything from EESSI/eessi-bot-software-layer)
+from connections import github
+from tools import config, pr_comments, run_cmd
+from tools.job_metadata import create_metadata_file
+
+def move_to_trash_bin(trash_bin_dir, job_dirs):
+ move_cmd = ["mv -t", trash_bin_dir]
+ for job_dir in job_dirs:
+ move_cmd.append(job_dir)
+ ' '.join(move_cmd)
+ out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False)
From 5e17612329a5acab0b9e79426ac4c70b2ed109f5 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 7 May 2024 08:53:00 +0200
Subject: [PATCH 02/64] WIP disk cleanup
---
eessi_bot_event_handler.py | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 642e8bff..ab3a3c71 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -554,7 +554,6 @@ def start(self, app, port=3000):
self.log(log_file_info)
waitress.serve(app, listen='*:%s' % port)
-
def handle_pull_request_merged_event(self, event_info, pr):
"""
Handle events of type pull_request with the action merged. Main action
@@ -570,13 +569,14 @@ def handle_pull_request_merged_event(self, event_info, pr):
"""
self.log("PR merged: scanning directories used by PR")
-
# 1) determine the jobs that have been run for the PR
job_dirs = determine_job_dirs(pr.number)
# 2) read location of trash_bin from cfg
merge_cfg = self.cfg['merge_cfg']
trash_bin_dir = merge_cfg.get('trash_bin_dir')
+ # Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
+ # cron job deletes symlinks?
# 3) move the directories to the trash_bin
self.log("Moving directories to trash_bin")
@@ -590,13 +590,6 @@ def handle_pull_request_merged_event(self, event_info, pr):
issue_comment = pull_request.create_issue_comment(comment)
return issue_comment
-def move_to_trash_bin(trash_bin_dir, job_dirs):
- move_cmd = ["mv -t", trash_bin_dir]
- for job_dir in job_dirs:
- move_cmd.append(job_dir)
- ' '.join(move_cmd)
- out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False)
-
def main():
"""
Main function which parses command line arguments, verifies if required
From 5d8f0a126a5dc21e570f3617f932666da42bc10e Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 7 May 2024 09:58:36 +0200
Subject: [PATCH 03/64] cleanup script WIP
---
scripts/cleanup_pr_trash_bin.sh | 159 ++++++++++++++++++++++++++++++++
1 file changed, 159 insertions(+)
create mode 100755 scripts/cleanup_pr_trash_bin.sh
diff --git a/scripts/cleanup_pr_trash_bin.sh b/scripts/cleanup_pr_trash_bin.sh
new file mode 100755
index 00000000..44755648
--- /dev/null
+++ b/scripts/cleanup_pr_trash_bin.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+#
+# GitHub App for the EESSI project
+#
+# A bot to help with requests to add software installations to the EESSI software layer,
+# see https://github.com/EESSI/software-layer
+
+# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR
+#
+# author: Thomas Roeblitz (@trz42)
+#
+# license: GPLv2
+#
+
+SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE))
+
+function display_help
+{
+ echo "Usage: $0 [OPTIONS] " >&2
+ echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2
+ echo " value from bot config file app.cfg or .]" >&2
+ echo " -D | --dry-run - only show commands that would be run" >&2
+ echo " [default: false]" >&2
+ echo " -h | --help - display this usage information" >&2
+}
+
+function get_trash_bin_dir
+{
+ app_cfg_path=${1}
+ grep trash_bin_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//'
+}
+
+echo
+
+if [[ $# -lt 1 ]]; then
+ display_help
+ exit 1
+fi
+
+# process command line args
+POSITIONAL_ARGS=()
+
+jobs_base_dir=
+dry_run=false
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ -b|--jobs-base-dir)
+ if [[ $# -gt 1 ]]; then
+ jobs_base_dir="$2"
+ shift 2
+ else
+ echo "Error: missing argument (directory) for parameter '${1}'"
+ exit 2
+ fi
+ ;;
+ -D|--dry-run)
+ dry_run=true
+ shift 1
+ ;;
+ -h|--help)
+ display_help
+ exit 0
+ ;;
+ -*|--*)
+ echo "Error: Unknown option: $1" >&2
+ exit 1
+ ;;
+ *) # No more options
+ POSITIONAL_ARGS+=("$1") # save positional arg
+ shift
+ ;;
+ esac
+done
+
+# restore potentially parsed filename(s) into $*
+set -- "${POSITIONAL_ARGS[@]}"
+
+if [[ $# -ne 1 ]]; then
+ echo "Error: exactly one PR number should be provided as argument"
+ display_help
+ exit 3
+fi
+
+pull_request=${1}
+
+if ${dry_run} = true ; then
+ echo "DRY_RUN: not removing any files"
+fi
+
+# determine jobs base dir if not given explicitly
+# 1. check for file app.cfg in SCRIPT_DIR
+# 2. check for file app.cfg in current dir
+# if found try to obtain value of jobs_base_dir setting
+# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit
+# TODO: Change jobs_base_dir to trash_bin
+if [[ -z ${jobs_base_dir} ]]; then
+ echo "jobs base directory not given explicitly, trying to determine it"
+ if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then
+ echo "check for app.cfg in '${SCRIPT_DIR}'"
+ jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg)
+ else
+ if [[ -e ./app.cfg ]]; then
+ echo "check for app.cfg in '${PWD}' (current directory)"
+ jobs_base_dir=$(get_jobs_base_dir ./app.cfg)
+ fi
+ fi
+fi
+if [[ -z ${jobs_base_dir} ]]; then
+ echo "Error: jobs base directory is empty, please specify it as argument"
+ display_help
+ exit 4
+fi
+
+echo "processing all directories for PR ${pull_request}:"
+find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /'
+
+echo
+echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
+
+echo
+echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
+do
+ for f in $(find $d -type f -wholename "*[0-9].tgz")
+ do
+ if ${dry_run} = true ; then
+ echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
+ else
+ echo "Removing file '$f'"
+ rm $f
+ fi
+ done
+done
+
+echo
+echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
+
+echo
+echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
+do
+ for f in $(find $d -type f -wholename "*tar.gz")
+ do
+ if ${dry_run} = true ; then
+ echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
+ else
+ echo "Removing file '$f'"
+ rm $f
+ fi
+ done
+done
+
+echo
+echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
+
From 568e4c51b1c44e03865353cf91219e511924e913 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 7 May 2024 09:59:24 +0200
Subject: [PATCH 04/64] Make date subdiretory
---
tasks/clean_up.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 830d9d2d..669f970c 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -19,7 +19,6 @@
# Standard library imports
from collections import namedtuple
import configparser
-from datetime import datetime, timezone
import json
import os
import shutil
@@ -34,9 +33,10 @@
from tools import config, pr_comments, run_cmd
from tools.job_metadata import create_metadata_file
+
def move_to_trash_bin(trash_bin_dir, job_dirs):
- move_cmd = ["mv -t", trash_bin_dir]
- for job_dir in job_dirs:
- move_cmd.append(job_dir)
+ move_cmd = ["mkdir -p tras_bin_dir && mv -t", trash_bin_dir]
+ for job_dir in job_dirs:
+ move_cmd.append(job_dir)
' '.join(move_cmd)
out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False)
From 7e91ce69948666d65a02e693248472e07b54c013 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 7 May 2024 10:00:11 +0200
Subject: [PATCH 05/64] Have subdirectory with date and comment
---
eessi_bot_event_handler.py | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index d74955d8..ba6787b7 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -18,6 +18,7 @@
# Standard library imports
import sys
+from datetime import datetime, timezone
# Third party imports (anything installed into the local Python environment)
from pyghee.lib import create_app, get_event_info, PyGHee, read_event_from_json
@@ -28,8 +29,9 @@
from connections import github
from tasks.build import check_build_permission, get_architecture_targets, get_repo_cfg, \
request_bot_build_issue_comments, submit_build_jobs
-from tasks.deploy import deploy_built_artefacts
-from tools import config
+from tasks.deploy import deploy_built_artefacts, determine_job_dirs
+from tasks.clean_up import move_to_trash_bin
+from tools import config, cvmfs_repository
from tools.args import event_handler_parse
from tools.commands import EESSIBotCommand, EESSIBotCommandError, \
contains_any_bot_command, get_bot_command
@@ -613,18 +615,24 @@ def handle_pull_request_merged_event(self, event_info, pr):
PyGithub, not the github from the internal connections module)
"""
self.log("PR merged: scanning directories used by PR")
-
+ repo_cfg = get_repo_cfg(self.cfg)
+ repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME]
+ clean_up_comments_cfg = self.cfg[config.SECTION_CLEAN_UP_COMMENTS]
# 1) determine the jobs that have been run for the PR
job_dirs = determine_job_dirs(pr.number)
# 2) read location of trash_bin from cfg
merge_cfg = self.cfg['merge_cfg']
- trash_bin_dir = merge_cfg.get('trash_bin_dir')
+ repo_cfg = get_repo_cfg(self.cfg)
+ repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME]
+ dt = datetime.now(timezone.utc)
+ trash_bin_dir = "/".join([merge_cfg.get('trash_bin_dir'), repo_name, dt.strftime('%Y%m%d')])
+
# Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
# cron job deletes symlinks?
# 3) move the directories to the trash_bin
- self.log("Moving directories to trash_bin")
+ log("Moving directories to trash_bin")
move_to_trash_bin(trash_bin_dir, job_dirs)
# 4) report move to pull request?
@@ -632,7 +640,8 @@ def handle_pull_request_merged_event(self, event_info, pr):
gh = github.get_instance()
repo = gh.get_repo(repo_name)
pull_request = repo.get_pull(pr.number)
- issue_comment = pull_request.create_issue_comment(comment)
+ moved_comment = clean_up_comments_cfg[config.CLEANUP_COMMENTS_MOVED]
+ issue_comment = pull_request.create_issue_comment(moved_comment)
return issue_comment
def main():
From 648232d4e06836b580deb0741921c67a03377981 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 7 May 2024 10:08:03 +0200
Subject: [PATCH 06/64] Typo
---
tasks/clean_up.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 669f970c..b1ff954a 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -35,7 +35,7 @@
def move_to_trash_bin(trash_bin_dir, job_dirs):
- move_cmd = ["mkdir -p tras_bin_dir && mv -t", trash_bin_dir]
+ move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir]
for job_dir in job_dirs:
move_cmd.append(job_dir)
' '.join(move_cmd)
From 8e2082d6b009169dda0775e1fc85fcad493caf5a Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 4 Jun 2024 10:26:31 +0200
Subject: [PATCH 07/64] Better read from config and logging
---
eessi_bot_event_handler.py | 29 +++++++++++++++++++++--------
tasks/clean_up.py | 17 +++++++----------
tools/config.py | 6 ++++++
3 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index ba6787b7..955b3d7d 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -614,25 +614,37 @@ def handle_pull_request_merged_event(self, event_info, pr):
github.IssueComment.IssueComment instance or None (note, github refers to
PyGithub, not the github from the internal connections module)
"""
- self.log("PR merged: scanning directories used by PR")
- repo_cfg = get_repo_cfg(self.cfg)
- repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME]
+
+ # Detect event and only act if PR is merged
+ request_body = event_info['raw_request_body']
+ action = request_body['action']
+
+ if action == 'merged':
+ self.log("PR merged: scanning directories used by PR")
+ self.log(f"merge '{action}' action is handled")
+ else:
+ self.log(f"merge action '{action}' not handled")
+ return
+ # at this point we know that we are handling a new merge
+ # NOTE: Permissions to merge are already handled through GitHub, we
+ # don't need to check here
+
clean_up_comments_cfg = self.cfg[config.SECTION_CLEAN_UP_COMMENTS]
# 1) determine the jobs that have been run for the PR
job_dirs = determine_job_dirs(pr.number)
- # 2) read location of trash_bin from cfg
- merge_cfg = self.cfg['merge_cfg']
+ # 2) Get trash_bin_dir from configs
+ trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR]
repo_cfg = get_repo_cfg(self.cfg)
repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME]
dt = datetime.now(timezone.utc)
- trash_bin_dir = "/".join([merge_cfg.get('trash_bin_dir'), repo_name, dt.strftime('%Y%m%d')])
+ trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')])
# Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
# cron job deletes symlinks?
# 3) move the directories to the trash_bin
- log("Moving directories to trash_bin")
+ self.log("Moving directories to trash_bin")
move_to_trash_bin(trash_bin_dir, job_dirs)
# 4) report move to pull request?
@@ -640,10 +652,11 @@ def handle_pull_request_merged_event(self, event_info, pr):
gh = github.get_instance()
repo = gh.get_repo(repo_name)
pull_request = repo.get_pull(pr.number)
- moved_comment = clean_up_comments_cfg[config.CLEANUP_COMMENTS_MOVED]
+ moved_comment = clean_up_comments_cfg[config.CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT]
issue_comment = pull_request.create_issue_comment(moved_comment)
return issue_comment
+
def main():
"""
Main function which parses command line arguments, verifies if required
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index b1ff954a..5f91f932 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -17,26 +17,23 @@
#
# Standard library imports
-from collections import namedtuple
-import configparser
-import json
-import os
-import shutil
import sys
# Third party imports (anything installed into the local Python environment)
-from pyghee.utils import error, log
-from retry.api import retry_call
+from pyghee.utils import log
# Local application imports (anything from EESSI/eessi-bot-software-layer)
-from connections import github
-from tools import config, pr_comments, run_cmd
-from tools.job_metadata import create_metadata_file
+from tools import run_cmd
def move_to_trash_bin(trash_bin_dir, job_dirs):
+ funcname = sys._getframe().f_code.co_name
+ log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}")
+
move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir]
for job_dir in job_dirs:
move_cmd.append(job_dir)
' '.join(move_cmd)
out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False)
+
+ return
diff --git a/tools/config.py b/tools/config.py
index dcffe03d..8c0e9c00 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -105,6 +105,12 @@
SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment'
SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release'
+SECTION_MERGED_PR = 'merged_pr'
+MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir'
+
+SECTION_CLEAN_UP_COMMENTS = 'clean_up_comments'
+CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT = 'moved_comment'
+
def read_config(path='app.cfg'):
"""
From d5f1d39803ff692c3a79fd7f4ebd9269ee0abb73 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 4 Jun 2024 10:36:47 +0200
Subject: [PATCH 08/64] Scripting & cron job follow later. Closes #1
---
scripts/cleanup_pr.sh | 158 -------------------------------
scripts/cleanup_pr_trash_bin.sh | 159 --------------------------------
2 files changed, 317 deletions(-)
delete mode 100755 scripts/cleanup_pr.sh
delete mode 100755 scripts/cleanup_pr_trash_bin.sh
diff --git a/scripts/cleanup_pr.sh b/scripts/cleanup_pr.sh
deleted file mode 100755
index 41527675..00000000
--- a/scripts/cleanup_pr.sh
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/bin/bash
-#
-# GitHub App for the EESSI project
-#
-# A bot to help with requests to add software installations to the EESSI software layer,
-# see https://github.com/EESSI/software-layer
-
-# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR
-#
-# author: Thomas Roeblitz (@trz42)
-#
-# license: GPLv2
-#
-
-SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE))
-
-function display_help
-{
- echo "Usage: $0 [OPTIONS] " >&2
- echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2
- echo " value from bot config file app.cfg or .]" >&2
- echo " -D | --dry-run - only show commands that would be run" >&2
- echo " [default: false]" >&2
- echo " -h | --help - display this usage information" >&2
-}
-
-function get_jobs_base_dir
-{
- app_cfg_path=${1}
- grep jobs_base_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//'
-}
-
-echo
-
-if [[ $# -lt 1 ]]; then
- display_help
- exit 1
-fi
-
-# process command line args
-POSITIONAL_ARGS=()
-
-jobs_base_dir=
-dry_run=false
-
-while [[ $# -gt 0 ]]; do
- case $1 in
- -b|--jobs-base-dir)
- if [[ $# -gt 1 ]]; then
- jobs_base_dir="$2"
- shift 2
- else
- echo "Error: missing argument (directory) for parameter '${1}'"
- exit 2
- fi
- ;;
- -D|--dry-run)
- dry_run=true
- shift 1
- ;;
- -h|--help)
- display_help
- exit 0
- ;;
- -*|--*)
- echo "Error: Unknown option: $1" >&2
- exit 1
- ;;
- *) # No more options
- POSITIONAL_ARGS+=("$1") # save positional arg
- shift
- ;;
- esac
-done
-
-# restore potentially parsed filename(s) into $*
-set -- "${POSITIONAL_ARGS[@]}"
-
-if [[ $# -ne 1 ]]; then
- echo "Error: exactly one PR number should be provided as argument"
- display_help
- exit 3
-fi
-
-pull_request=${1}
-
-if ${dry_run} = true ; then
- echo "DRY_RUN: not removing any files"
-fi
-
-# determine jobs base dir if not given explicitly
-# 1. check for file app.cfg in SCRIPT_DIR
-# 2. check for file app.cfg in current dir
-# if found try to obtain value of jobs_base_dir setting
-# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit
-if [[ -z ${jobs_base_dir} ]]; then
- echo "jobs base directory not given explicitly, trying to determine it"
- if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then
- echo "check for app.cfg in '${SCRIPT_DIR}'"
- jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg)
- else
- if [[ -e ./app.cfg ]]; then
- echo "check for app.cfg in '${PWD}' (current directory)"
- jobs_base_dir=$(get_jobs_base_dir ./app.cfg)
- fi
- fi
-fi
-if [[ -z ${jobs_base_dir} ]]; then
- echo "Error: jobs base directory is empty, please specify it as argument"
- display_help
- exit 4
-fi
-
-echo "processing all directories for PR ${pull_request}:"
-find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /'
-
-echo
-echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
-
-echo
-echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
-do
- for f in $(find $d -type f -wholename "*[0-9].tgz")
- do
- if ${dry_run} = true ; then
- echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
- else
- echo "Removing file '$f'"
- rm $f
- fi
- done
-done
-
-echo
-echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
-
-echo
-echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
-do
- for f in $(find $d -type f -wholename "*tar.gz")
- do
- if ${dry_run} = true ; then
- echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
- else
- echo "Removing file '$f'"
- rm $f
- fi
- done
-done
-
-echo
-echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
-
diff --git a/scripts/cleanup_pr_trash_bin.sh b/scripts/cleanup_pr_trash_bin.sh
deleted file mode 100755
index 44755648..00000000
--- a/scripts/cleanup_pr_trash_bin.sh
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/bash
-#
-# GitHub App for the EESSI project
-#
-# A bot to help with requests to add software installations to the EESSI software layer,
-# see https://github.com/EESSI/software-layer
-
-# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR
-#
-# author: Thomas Roeblitz (@trz42)
-#
-# license: GPLv2
-#
-
-SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE))
-
-function display_help
-{
- echo "Usage: $0 [OPTIONS] " >&2
- echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2
- echo " value from bot config file app.cfg or .]" >&2
- echo " -D | --dry-run - only show commands that would be run" >&2
- echo " [default: false]" >&2
- echo " -h | --help - display this usage information" >&2
-}
-
-function get_trash_bin_dir
-{
- app_cfg_path=${1}
- grep trash_bin_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//'
-}
-
-echo
-
-if [[ $# -lt 1 ]]; then
- display_help
- exit 1
-fi
-
-# process command line args
-POSITIONAL_ARGS=()
-
-jobs_base_dir=
-dry_run=false
-
-while [[ $# -gt 0 ]]; do
- case $1 in
- -b|--jobs-base-dir)
- if [[ $# -gt 1 ]]; then
- jobs_base_dir="$2"
- shift 2
- else
- echo "Error: missing argument (directory) for parameter '${1}'"
- exit 2
- fi
- ;;
- -D|--dry-run)
- dry_run=true
- shift 1
- ;;
- -h|--help)
- display_help
- exit 0
- ;;
- -*|--*)
- echo "Error: Unknown option: $1" >&2
- exit 1
- ;;
- *) # No more options
- POSITIONAL_ARGS+=("$1") # save positional arg
- shift
- ;;
- esac
-done
-
-# restore potentially parsed filename(s) into $*
-set -- "${POSITIONAL_ARGS[@]}"
-
-if [[ $# -ne 1 ]]; then
- echo "Error: exactly one PR number should be provided as argument"
- display_help
- exit 3
-fi
-
-pull_request=${1}
-
-if ${dry_run} = true ; then
- echo "DRY_RUN: not removing any files"
-fi
-
-# determine jobs base dir if not given explicitly
-# 1. check for file app.cfg in SCRIPT_DIR
-# 2. check for file app.cfg in current dir
-# if found try to obtain value of jobs_base_dir setting
-# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit
-# TODO: Change jobs_base_dir to trash_bin
-if [[ -z ${jobs_base_dir} ]]; then
- echo "jobs base directory not given explicitly, trying to determine it"
- if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then
- echo "check for app.cfg in '${SCRIPT_DIR}'"
- jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg)
- else
- if [[ -e ./app.cfg ]]; then
- echo "check for app.cfg in '${PWD}' (current directory)"
- jobs_base_dir=$(get_jobs_base_dir ./app.cfg)
- fi
- fi
-fi
-if [[ -z ${jobs_base_dir} ]]; then
- echo "Error: jobs base directory is empty, please specify it as argument"
- display_help
- exit 4
-fi
-
-echo "processing all directories for PR ${pull_request}:"
-find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /'
-
-echo
-echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
-
-echo
-echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
-do
- for f in $(find $d -type f -wholename "*[0-9].tgz")
- do
- if ${dry_run} = true ; then
- echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
- else
- echo "Removing file '$f'"
- rm $f
- fi
- done
-done
-
-echo
-echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
-
-echo
-echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
-do
- for f in $(find $d -type f -wholename "*tar.gz")
- do
- if ${dry_run} = true ; then
- echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
- else
- echo "Removing file '$f'"
- rm $f
- fi
- done
-done
-
-echo
-echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage"
-for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
-
From e0910f60e64d8294092bfa857e44f42474bbd2eb Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 4 Jun 2024 13:26:45 +0200
Subject: [PATCH 09/64] Treat hound to some white spaces
---
eessi_bot_event_handler.py | 2 +-
tasks/clean_up.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 955b3d7d..7f7680bd 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -639,7 +639,7 @@ def handle_pull_request_merged_event(self, event_info, pr):
repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME]
dt = datetime.now(timezone.utc)
trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')])
-
+
# Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
# cron job deletes symlinks?
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 5f91f932..1916a1d7 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -29,7 +29,7 @@
def move_to_trash_bin(trash_bin_dir, job_dirs):
funcname = sys._getframe().f_code.co_name
log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}")
-
+
move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir]
for job_dir in job_dirs:
move_cmd.append(job_dir)
From d6452028f0b2d9bd9725376ce7ab1cd39e61fd83 Mon Sep 17 00:00:00 2001
From: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com>
Date: Mon, 10 Jun 2024 13:16:18 +0200
Subject: [PATCH 10/64] Update eessi_bot_event_handler.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Thomas Röblitz
---
eessi_bot_event_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 7f7680bd..82ccfa16 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -621,7 +621,7 @@ def handle_pull_request_merged_event(self, event_info, pr):
if action == 'merged':
self.log("PR merged: scanning directories used by PR")
- self.log(f"merge '{action}' action is handled")
+ self.log(f"pull_request event with action '{action}' will be handled")
else:
self.log(f"merge action '{action}' not handled")
return
From d75bb11193696f24e82bf72b446d325864c92086 Mon Sep 17 00:00:00 2001
From: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com>
Date: Mon, 10 Jun 2024 13:17:21 +0200
Subject: [PATCH 11/64] Update tasks/clean_up.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Thomas Röblitz
---
tasks/clean_up.py | 7 -------
1 file changed, 7 deletions(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 1916a1d7..c36a78fc 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -4,14 +4,7 @@
# The bot helps with requests to add software installations to the
# EESSI software layer, see https://github.com/EESSI/software-layer
#
-# author: Bob Droege (@bedroge)
-# author: Kenneth Hoste (@boegel)
-# author: Hafsa Naeem (@hafsa-naeem)
-# author: Jacob Ziemke (@jacobz137)
-# author: Jonas Qvigstad (@jonas-lq)
-# author: Lara Ramona Peeters (@laraPPr)
# author: Pedro Santos Neves (@Neves-P)
-# author: Thomas Roeblitz (@trz42)
#
# license: GPLv2
#
From 435bcae9e0f1027489d27470f0e4b6a01412055f Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 10 Jun 2024 14:23:38 +0200
Subject: [PATCH 12/64] Handle based on `merged` not `action` from
`request_body`
---
eessi_bot_event_handler.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 82ccfa16..a2125ea6 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -618,12 +618,13 @@ def handle_pull_request_merged_event(self, event_info, pr):
# Detect event and only act if PR is merged
request_body = event_info['raw_request_body']
action = request_body['action']
+ merged = request_body['merged']
- if action == 'merged':
+ if merged == 'true':
self.log("PR merged: scanning directories used by PR")
- self.log(f"pull_request event with action '{action}' will be handled")
+ self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled")
else:
- self.log(f"merge action '{action}' not handled")
+ self.log(f"Action '{action}' not handled as merged is '{merged}'")
return
# at this point we know that we are handling a new merge
# NOTE: Permissions to merge are already handled through GitHub, we
From 6b38f373fb14dc31affc5c9a5a4ce0109bc53204 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 10 Jun 2024 14:24:09 +0200
Subject: [PATCH 13/64] Add docstring and use python routines (not shell
commands)
---
tasks/clean_up.py | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index c36a78fc..96b3b0c1 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -11,22 +11,31 @@
# Standard library imports
import sys
+import os
+import shutil
# Third party imports (anything installed into the local Python environment)
from pyghee.utils import log
# Local application imports (anything from EESSI/eessi-bot-software-layer)
-from tools import run_cmd
def move_to_trash_bin(trash_bin_dir, job_dirs):
+ """
+ Move directory to trash_bin_dir
+
+ Args:
+ trash_bin_dir (string): path to the trash_bin_dir. Defined in .cfg
+ job_dirs (list): list with job directory names
+
+ Returns:
+ None (implicitly)
+ """
funcname = sys._getframe().f_code.co_name
log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}")
- move_cmd = ["mkdir -p trash_bin_dir && mv -t", trash_bin_dir]
+ os.makedirs(trash_bin_dir, exist_ok=True)
for job_dir in job_dirs:
- move_cmd.append(job_dir)
- ' '.join(move_cmd)
- out, err, ec = run_cmd(move_cmd, 'Move job directories to trash_bin', raise_on_error=False)
-
- return
+ destination_dir = shutil.move(job_dir, trash_bin_dir)
+ log(f"{funcname}(): moved {job_dir} to {destination_dir}")
+ return True
From 769daec68918d09ee586dfac4795c0cc7719bced Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 10 Jun 2024 16:42:02 +0200
Subject: [PATCH 14/64] Actions are 'closed', not 'merged'
---
eessi_bot_event_handler.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index a2125ea6..a9ccca3f 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -31,7 +31,7 @@
request_bot_build_issue_comments, submit_build_jobs
from tasks.deploy import deploy_built_artefacts, determine_job_dirs
from tasks.clean_up import move_to_trash_bin
-from tools import config, cvmfs_repository
+from tools import config
from tools.args import event_handler_parse
from tools.commands import EESSIBotCommand, EESSIBotCommandError, \
contains_any_bot_command, get_bot_command
@@ -601,10 +601,11 @@ def start(self, app, port=3000):
self.log(log_file_info)
waitress.serve(app, listen='*:%s' % port)
- def handle_pull_request_merged_event(self, event_info, pr):
+ def handle_pull_request_closed_event(self, event_info, pr):
"""
- Handle events of type pull_request with the action merged. Main action
- is to scan directories used and move them to the trash_bin.
+ Handle events of type pull_request with the action 'closed'. Main action
+ is to scan directories used and move them to the trash_bin when the PR
+ is merged.
Args:
event_info (dict): event received by event_handler
@@ -636,8 +637,8 @@ def handle_pull_request_merged_event(self, event_info, pr):
# 2) Get trash_bin_dir from configs
trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR]
- repo_cfg = get_repo_cfg(self.cfg)
- repo_name = repo_cfg[cvmfs_repository.REPOS_CFG_REPO_NAME]
+
+ repo_name = event_info['full_name']
dt = datetime.now(timezone.utc)
trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')])
From 1c4731588efead74f1fcc00cbccc33dcbc5c2ad7 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 10 Jun 2024 17:24:57 +0200
Subject: [PATCH 15/64] Read from config properly
---
eessi_bot_event_handler.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index a9ccca3f..fdc66a5a 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -619,7 +619,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
# Detect event and only act if PR is merged
request_body = event_info['raw_request_body']
action = request_body['action']
- merged = request_body['merged']
+ merged = request_body['pull_request']['merged']
if merged == 'true':
self.log("PR merged: scanning directories used by PR")
@@ -638,7 +638,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
# 2) Get trash_bin_dir from configs
trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR]
- repo_name = event_info['full_name']
+ repo_name = event_info['repository']['full_name']
dt = datetime.now(timezone.utc)
trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')])
From 1a6eab3eac45646bd51ccbbfa62210e6b1a2c351 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 10 Jun 2024 17:42:44 +0200
Subject: [PATCH 16/64] Fix typo in `merged`.
true -> True
---
eessi_bot_event_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index fdc66a5a..f60f91b0 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -621,7 +621,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
action = request_body['action']
merged = request_body['pull_request']['merged']
- if merged == 'true':
+ if merged == 'True':
self.log("PR merged: scanning directories used by PR")
self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled")
else:
From 0b913432097d319207f2bf44757a8b8601dc6fe1 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 11 Jun 2024 16:41:46 +0200
Subject: [PATCH 17/64] merged is a Bool (?)
---
eessi_bot_event_handler.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index f60f91b0..3d6dc6d6 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -621,11 +621,11 @@ def handle_pull_request_closed_event(self, event_info, pr):
action = request_body['action']
merged = request_body['pull_request']['merged']
- if merged == 'True':
+ if merged:
self.log("PR merged: scanning directories used by PR")
self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled")
else:
- self.log(f"Action '{action}' not handled as merged is '{merged}'")
+ self.log(f"Action '{action}' not handled as 'merged' is '{merged}'")
return
# at this point we know that we are handling a new merge
# NOTE: Permissions to merge are already handled through GitHub, we
From 784c9b9cb2bdeab6a796b4a3e55404c6426e5048 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 11 Jun 2024 16:56:25 +0200
Subject: [PATCH 18/64] Better formatting for merge conflict errors
---
tasks/build.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/tasks/build.py b/tasks/build.py
index 82a0911e..527758d7 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -381,21 +381,21 @@ def comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_e
download_pr_comments_cfg = config.read_config()[config.SECTION_DOWNLOAD_PR_COMMENTS]
if error_stage == _ERROR_GIT_CLONE:
- download_comment = (f"`{download_pr_error}`"
+ download_comment = (f"`{download_pr_error}`\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_FAILURE]}"
- f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_TIP]}")
+ f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_TIP]}")
elif error_stage == _ERROR_GIT_CHECKOUT:
- download_comment = (f"`{download_pr_error}`"
+ download_comment = (f"`{download_pr_error}`\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_FAILURE]}"
- f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_TIP]}")
+ f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_TIP]}")
elif error_stage == _ERROR_CURL:
- download_comment = (f"`{download_pr_error}`"
+ download_comment = (f"`{download_pr_error}`\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_FAILURE]}"
- f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_TIP]}")
+ f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_TIP]}")
elif error_stage == _ERROR_GIT_APPLY:
- download_comment = (f"`{download_pr_error}`"
+ download_comment = (f"`{download_pr_error}`\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_FAILURE]}"
- f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_TIP]}")
+ f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_TIP]}")
download_comment = pr_comments.create_comment(
repo_name=base_repo_name, pr_number=pr.number, comment=download_comment
From 4160bd1bbc563364df5a71478a0666a9f0150b94 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 11 Jun 2024 17:34:21 +0200
Subject: [PATCH 19/64] Fix bug in getting repository name
---
eessi_bot_event_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 3d6dc6d6..3f9a8e1b 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -638,7 +638,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
# 2) Get trash_bin_dir from configs
trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR]
- repo_name = event_info['repository']['full_name']
+ repo_name = request_body['repository']['full_name']
dt = datetime.now(timezone.utc)
trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')])
From 29ab450d4ed5b4c707a17beee472fc5c6c4948ca Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Wed, 12 Jun 2024 19:20:52 +0200
Subject: [PATCH 20/64] Same formatting for directory timestamp
---
eessi_bot_event_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 3f9a8e1b..a832983a 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -640,7 +640,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
repo_name = request_body['repository']['full_name']
dt = datetime.now(timezone.utc)
- trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m%d')])
+ trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m.%d')])
# Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
# cron job deletes symlinks?
From 1dcf08174c6b5118661a6508781422fee6bd3f23 Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Fri, 21 Jun 2024 21:53:59 +0200
Subject: [PATCH 21/64] add setting to give all jobs a unique name
---
README.md | 7 +++++++
app.cfg.example | 4 ++++
eessi_bot_event_handler.py | 1 +
eessi_bot_job_manager.py | 8 +++++++-
tasks/build.py | 9 +++++++++
tools/config.py | 1 +
6 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index e268fc41..9e556311 100644
--- a/README.md
+++ b/README.md
@@ -375,6 +375,13 @@ package repositories. Typically these settings are set in the prologue of a
Slurm job. However, when entering the [EESSI compatibility layer](https://www.eessi.io/docs/compatibility_layer),
most environment settings are cleared. Hence, they need to be set again at a later stage.
+```
+job_name = JOB_NAME
+```
+Replace `JOB_NAME` with a string of at least 3 characters that is used as job
+name when a job is submitted. This is used to filter jobs, e.g., should be used
+to make sure that multiple bot instances can run in the same Slurm environment.
+
```
jobs_base_dir = PATH_TO_JOBS_BASE_DIR
```
diff --git a/app.cfg.example b/app.cfg.example
index ae51ade6..7cbde15d 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -87,6 +87,10 @@ container_cachedir = PATH_TO_SHARED_DIRECTORY
# http_proxy = http://PROXY_DNS:3128/
# https_proxy = http://PROXY_DNS:3128/
+# Used to give all jobs of a bot instance the same name. Can be used to allow
+# multiple bot instances running on the same Slurm cluster.
+job_name = prod
+
# directory under which the bot prepares directories per job
# structure created is as follows: YYYY.MM/pr_PR_NUMBER/event_EVENT_ID/run_RUN_NUMBER/OS+SUBDIR
jobs_base_dir = $HOME/jobs
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 5677ed2c..d414f947 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -51,6 +51,7 @@
# config.BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS, # optional
# config.BUILDENV_SETTING_HTTPS_PROXY, # optional
# config.BUILDENV_SETTING_HTTP_PROXY, # optional
+ config.BUILDENV_SETTING_JOB_NAME, # required
config.BUILDENV_SETTING_JOBS_BASE_DIR, # required
# config.BUILDENV_SETTING_LOAD_MODULES, # optional
config.BUILDENV_SETTING_LOCAL_TMP, # required
diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py
index e7473f00..aba40081 100644
--- a/eessi_bot_job_manager.py
+++ b/eessi_bot_job_manager.py
@@ -50,6 +50,8 @@
# settings that are required in 'app.cfg'
REQUIRED_CONFIG = {
+ config.SECTION_BUILDENV: [
+ config.BUILDENV_SETTING_JOB_NAME], # required
config.SECTION_FINISHED_JOB_COMMENTS: [
config.FINISHED_JOB_COMMENTS_SETTING_JOB_RESULT_UNKNOWN_FMT, # required
config.FINISHED_JOB_COMMENTS_SETTING_JOB_TEST_UNKNOWN_FMT], # required
@@ -85,6 +87,10 @@ def __init__(self):
cfg = config.read_config()
job_manager_cfg = cfg[config.SECTION_JOB_MANAGER]
self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH)
+ buildenv_cfg = cfg[config.SECTION_BUILDENV]
+ self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME)
+ if len(self.job_name) < 3:
+ raise Exception(f"job name ({self.job_name}) is shorter than 3 characters")
def get_current_jobs(self):
"""
@@ -105,7 +111,7 @@ def get_current_jobs(self):
if username is None:
raise Exception("Unable to find username")
- squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username)
+ squeue_cmd = "%s --long --noheader --user=%s --name='%s'" % (self.poll_command, username, self.job_name)
squeue_output, squeue_err, squeue_exitcode = run_cmd(
squeue_cmd,
"get_current_jobs(): squeue command",
diff --git a/tasks/build.py b/tasks/build.py
index 82a0911e..5fa36076 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -67,6 +67,10 @@ def get_build_env_cfg(cfg):
buildenv = cfg[config.SECTION_BUILDENV]
+ job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME)
+ log(f"{fn}(): job_name '{job_name}'")
+ config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name}
+
jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR)
log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'")
config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir}
@@ -640,6 +644,10 @@ def submit_job(job, cfg):
build_env_cfg = get_build_env_cfg(cfg)
+ # the job_name is used to filter jobs in case multiple bot
+ # instances run on the same system
+ job_name = cfg[config.SECTION_BUILDENV].get(config.BUILDENV_SETTING_JOB_NAME)
+
# add a default time limit of 24h to the job submit command if no other time
# limit is specified already
all_opts_str = " ".join([build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS], job.slurm_opts])
@@ -654,6 +662,7 @@ def submit_job(job, cfg):
build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS],
time_limit,
job.slurm_opts,
+ f"--job-name='{job_name}'",
build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT],
])
diff --git a/tools/config.py b/tools/config.py
index dcffe03d..11527702 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -43,6 +43,7 @@
BUILDENV_SETTING_CVMFS_CUSTOMIZATIONS = 'cvmfs_customizations'
BUILDENV_SETTING_HTTPS_PROXY = 'https_proxy'
BUILDENV_SETTING_HTTP_PROXY = 'http_proxy'
+BUILDENV_SETTING_JOB_NAME = 'job_name'
BUILDENV_SETTING_JOBS_BASE_DIR = 'jobs_base_dir'
BUILDENV_SETTING_LOAD_MODULES = 'load_modules'
BUILDENV_SETTING_LOCAL_TMP = 'local_tmp'
From 326dd38d55a7d937a7867767c184d4889f99aa93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20R=C3=B6blitz?=
Date: Mon, 24 Jun 2024 21:20:04 +0200
Subject: [PATCH 22/64] Only use job_name if it is not None
Co-authored-by: Kenneth Hoste
---
eessi_bot_job_manager.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py
index aba40081..f8d4368a 100644
--- a/eessi_bot_job_manager.py
+++ b/eessi_bot_job_manager.py
@@ -89,7 +89,7 @@ def __init__(self):
self.logfile = job_manager_cfg.get(config.JOB_MANAGER_SETTING_LOG_PATH)
buildenv_cfg = cfg[config.SECTION_BUILDENV]
self.job_name = buildenv_cfg.get(config.BUILDENV_SETTING_JOB_NAME)
- if len(self.job_name) < 3:
+ if self.job_name and len(self.job_name) < 3:
raise Exception(f"job name ({self.job_name}) is shorter than 3 characters")
def get_current_jobs(self):
@@ -111,7 +111,9 @@ def get_current_jobs(self):
if username is None:
raise Exception("Unable to find username")
- squeue_cmd = "%s --long --noheader --user=%s --name='%s'" % (self.poll_command, username, self.job_name)
+ squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username)
+ if self.job_name:
+ squeue_cmd += "--name='%s'" % self.job_name
squeue_output, squeue_err, squeue_exitcode = run_cmd(
squeue_cmd,
"get_current_jobs(): squeue command",
From 76a263d0a6843c9acc5bd7a3b8be48f47f5fcd72 Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Mon, 24 Jun 2024 21:23:35 +0200
Subject: [PATCH 23/64] add space before name argument
---
eessi_bot_job_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py
index f8d4368a..bb0c6dd8 100644
--- a/eessi_bot_job_manager.py
+++ b/eessi_bot_job_manager.py
@@ -113,7 +113,7 @@ def get_current_jobs(self):
squeue_cmd = "%s --long --noheader --user=%s" % (self.poll_command, username)
if self.job_name:
- squeue_cmd += "--name='%s'" % self.job_name
+ squeue_cmd += " --name='%s'" % self.job_name
squeue_output, squeue_err, squeue_exitcode = run_cmd(
squeue_cmd,
"get_current_jobs(): squeue command",
From c614a96f801ae2c79f9f1fa9cd5d3cdecbc156db Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Mon, 24 Jun 2024 21:34:42 +0200
Subject: [PATCH 24/64] only add job-name submission arg if job_name is not
None
---
tasks/build.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/tasks/build.py b/tasks/build.py
index 5fa36076..b2de809d 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -661,10 +661,9 @@ def submit_job(job, cfg):
build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND],
build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS],
time_limit,
- job.slurm_opts,
- f"--job-name='{job_name}'",
- build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT],
- ])
+ job.slurm_opts] +
+ ([f"--job-name='{job_name}'"] if job_name else []) +
+ [build_env_cfg[config.BUILDENV_SETTING_BUILD_JOB_SCRIPT]])
cmdline_output, cmdline_error, cmdline_exit_code = run_cmd(command_line,
"submit job for target '%s'" % job.arch_target,
From b285f81c59426854cb65a77ece21cce686e92127 Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 25 Jun 2024 08:43:52 +0200
Subject: [PATCH 25/64] add missing config section to app.cfg for tests
---
tests/test_app.cfg | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/test_app.cfg b/tests/test_app.cfg
index f940c1df..fd91ed8b 100644
--- a/tests/test_app.cfg
+++ b/tests/test_app.cfg
@@ -11,6 +11,8 @@
# sample config file for tests (some functions run config.read_config()
# which reads app.cfg by default)
+[buildenv]
+
[job_manager]
# variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it
From 087d6c553f628103c9a4a78ed1d79848ed5aeca1 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 13:56:43 +0200
Subject: [PATCH 26/64] Best to not read comment from config
---
eessi_bot_event_handler.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index a832983a..01bb2a3f 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -654,7 +654,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
gh = github.get_instance()
repo = gh.get_repo(repo_name)
pull_request = repo.get_pull(pr.number)
- moved_comment = clean_up_comments_cfg[config.CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT]
+ moved_comment = f"PR merged! Moved `{job_dirs}` to `{trash_bin_dir}`"
issue_comment = pull_request.create_issue_comment(moved_comment)
return issue_comment
From 5dbd132fe2151f7d9884d7dabd632ba2cb96a45f Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 14:00:18 +0200
Subject: [PATCH 27/64] Remove local variable
---
eessi_bot_event_handler.py | 1 -
tools/config.py | 1 -
2 files changed, 2 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 01bb2a3f..51134e39 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -631,7 +631,6 @@ def handle_pull_request_closed_event(self, event_info, pr):
# NOTE: Permissions to merge are already handled through GitHub, we
# don't need to check here
- clean_up_comments_cfg = self.cfg[config.SECTION_CLEAN_UP_COMMENTS]
# 1) determine the jobs that have been run for the PR
job_dirs = determine_job_dirs(pr.number)
diff --git a/tools/config.py b/tools/config.py
index 8c0e9c00..d56eb123 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -109,7 +109,6 @@
MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir'
SECTION_CLEAN_UP_COMMENTS = 'clean_up_comments'
-CLEAN_UP_COMMENTS_SETTING_MOVED_COMMENT = 'moved_comment'
def read_config(path='app.cfg'):
From b782889b1e758f8ae91c669985e62a6cc92eb439 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 14:51:40 +0200
Subject: [PATCH 28/64] Restore cleanup_pr.sh
---
scripts/cleanup_pr.sh | 158 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 158 insertions(+)
create mode 100644 scripts/cleanup_pr.sh
diff --git a/scripts/cleanup_pr.sh b/scripts/cleanup_pr.sh
new file mode 100644
index 00000000..41527675
--- /dev/null
+++ b/scripts/cleanup_pr.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+#
+# GitHub App for the EESSI project
+#
+# A bot to help with requests to add software installations to the EESSI software layer,
+# see https://github.com/EESSI/software-layer
+
+# This script cleans up (deletes) all build artefacts and temporary storage tarballs of a given PR
+#
+# author: Thomas Roeblitz (@trz42)
+#
+# license: GPLv2
+#
+
+SCRIPT_DIR=$(dirname $(realpath $BASH_SOURCE))
+
+function display_help
+{
+ echo "Usage: $0 [OPTIONS] " >&2
+ echo " -b | --jobs-base-dir DIRECTORY - jobs base directory [default: reads" >&2
+ echo " value from bot config file app.cfg or .]" >&2
+ echo " -D | --dry-run - only show commands that would be run" >&2
+ echo " [default: false]" >&2
+ echo " -h | --help - display this usage information" >&2
+}
+
+function get_jobs_base_dir
+{
+ app_cfg_path=${1}
+ grep jobs_base_dir ${app_cfg_path} | grep -v '^[ ]*#' | sed -e 's/^[^=]*=[ ]*//'
+}
+
+echo
+
+if [[ $# -lt 1 ]]; then
+ display_help
+ exit 1
+fi
+
+# process command line args
+POSITIONAL_ARGS=()
+
+jobs_base_dir=
+dry_run=false
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ -b|--jobs-base-dir)
+ if [[ $# -gt 1 ]]; then
+ jobs_base_dir="$2"
+ shift 2
+ else
+ echo "Error: missing argument (directory) for parameter '${1}'"
+ exit 2
+ fi
+ ;;
+ -D|--dry-run)
+ dry_run=true
+ shift 1
+ ;;
+ -h|--help)
+ display_help
+ exit 0
+ ;;
+ -*|--*)
+ echo "Error: Unknown option: $1" >&2
+ exit 1
+ ;;
+ *) # No more options
+ POSITIONAL_ARGS+=("$1") # save positional arg
+ shift
+ ;;
+ esac
+done
+
+# restore potentially parsed filename(s) into $*
+set -- "${POSITIONAL_ARGS[@]}"
+
+if [[ $# -ne 1 ]]; then
+ echo "Error: exactly one PR number should be provided as argument"
+ display_help
+ exit 3
+fi
+
+pull_request=${1}
+
+if ${dry_run} = true ; then
+ echo "DRY_RUN: not removing any files"
+fi
+
+# determine jobs base dir if not given explicitly
+# 1. check for file app.cfg in SCRIPT_DIR
+# 2. check for file app.cfg in current dir
+# if found try to obtain value of jobs_base_dir setting
+# if not file not found or jobs_base_dir setting not found (or empty) --> error & exit
+if [[ -z ${jobs_base_dir} ]]; then
+ echo "jobs base directory not given explicitly, trying to determine it"
+ if [[ -e ${SCRIPT_DIR}/app.cfg ]]; then
+ echo "check for app.cfg in '${SCRIPT_DIR}'"
+ jobs_base_dir=$(get_jobs_base_dir ${SCRIPT_DIR}/app.cfg)
+ else
+ if [[ -e ./app.cfg ]]; then
+ echo "check for app.cfg in '${PWD}' (current directory)"
+ jobs_base_dir=$(get_jobs_base_dir ./app.cfg)
+ fi
+ fi
+fi
+if [[ -z ${jobs_base_dir} ]]; then
+ echo "Error: jobs base directory is empty, please specify it as argument"
+ display_help
+ exit 4
+fi
+
+echo "processing all directories for PR ${pull_request}:"
+find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request} | sed -e 's/^/ /'
+
+echo
+echo "disk usage of directories for PR ${pull_request} BEFORE removing build artefacts and tmp storage"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
+
+echo
+echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing tmp storage tarballs for PR ${pull_request}"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
+do
+ for f in $(find $d -type f -wholename "*[0-9].tgz")
+ do
+ if ${dry_run} = true ; then
+ echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
+ else
+ echo "Removing file '$f'"
+ rm $f
+ fi
+ done
+done
+
+echo
+echo "disk usage of directories for PR ${pull_request} AFTER removing tmp storage tarballs"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
+
+echo
+echo "$([[ ${dry_run} = true ]] && echo "DRY_RUN: ")removing build artefacts for PR ${pull_request}"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request})
+do
+ for f in $(find $d -type f -wholename "*tar.gz")
+ do
+ if ${dry_run} = true ; then
+ echo "DRY_RUN: rm '$f' ($(ls -lh $f | awk '{print $5}'))"
+ else
+ echo "Removing file '$f'"
+ rm $f
+ fi
+ done
+done
+
+echo
+echo "disk usage of directories for PR ${pull_request} AFTER removing build artefacts and tmp storage"
+for d in $(find ${jobs_base_dir}/* -maxdepth 1 -type d -wholename */pr_${pull_request}); do du -sh $d; done
+
From f4114b397704d9f5a1ff45977e66abcef286c0b0 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 14:53:28 +0200
Subject: [PATCH 29/64] Restore cleanup_pr.sh
---
scripts/cleanup_pr.sh | 0
1 file changed, 0 insertions(+), 0 deletions(-)
mode change 100644 => 100755 scripts/cleanup_pr.sh
diff --git a/scripts/cleanup_pr.sh b/scripts/cleanup_pr.sh
old mode 100644
new mode 100755
From 283f602d4aefe64a562ee74dbdad150874ae2c37 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 15:45:29 +0200
Subject: [PATCH 30/64] Also move pr event dirs
---
eessi_bot_event_handler.py | 3 ++-
tasks/clean_up.py | 10 ++++++++++
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 51134e39..85230894 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -17,6 +17,7 @@
#
# Standard library imports
+import os
import sys
from datetime import datetime, timezone
@@ -639,7 +640,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
repo_name = request_body['repository']['full_name']
dt = datetime.now(timezone.utc)
- trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y%m.%d')])
+ trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y.%m.%d')])
# Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
# cron job deletes symlinks?
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 96b3b0c1..8a28a977 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -35,7 +35,17 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}")
os.makedirs(trash_bin_dir, exist_ok=True)
+ pr_dirs = []
for job_dir in job_dirs:
destination_dir = shutil.move(job_dir, trash_bin_dir)
log(f"{funcname}(): moved {job_dir} to {destination_dir}")
+ # Save upper directory above to remove later (pr_xx)
+ pr_dirs = os.path.dirname(job_dir)
+
+ # Remove event_xxx-yyy/run_nnn/ directories
+ pr_dirs = list(set(pr_dirs))
+ for pr_dir in pr_dirs:
+ destination_dir = shutil.move(pr_dir, trash_bin_dir)
+ log(f"{funcname}(): moved {pr_dir} to {destination_dir}")
+
return True
From c025746a8768a06bbe7f15178f3c2cbbfab643df Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 15:49:11 +0200
Subject: [PATCH 31/64] Unnecessary library
---
eessi_bot_event_handler.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 85230894..2f94acda 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -17,7 +17,6 @@
#
# Standard library imports
-import os
import sys
from datetime import datetime, timezone
From e56e9edb6a4ffaf149742ffc6bbcefa9ee2e8f5f Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Mon, 1 Jul 2024 17:07:40 +0200
Subject: [PATCH 32/64] Use shutil.copy2 and os.remove instead
---
eessi_bot_event_handler.py | 2 +-
tasks/clean_up.py | 6 ++++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 2f94acda..d96ed3d9 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -648,7 +648,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
self.log("Moving directories to trash_bin")
move_to_trash_bin(trash_bin_dir, job_dirs)
- # 4) report move to pull request?
+ # 4) report move to pull request
repo_name = pr.base.repo.full_name
gh = github.get_instance()
repo = gh.get_repo(repo_name)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 8a28a977..6dda6f20 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -45,7 +45,9 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
# Remove event_xxx-yyy/run_nnn/ directories
pr_dirs = list(set(pr_dirs))
for pr_dir in pr_dirs:
- destination_dir = shutil.move(pr_dir, trash_bin_dir)
- log(f"{funcname}(): moved {pr_dir} to {destination_dir}")
+ destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True)
+ log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
+ os.remove(pr_dir)
+ log(f"{funcname}(): removed {pr_dir}")
return True
From 00eeb89c5b3ca0e3b85ad7bc165894cf3d753200 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 2 Jul 2024 09:59:34 +0200
Subject: [PATCH 33/64] Move whole directory(ies) to trash
---
tasks/clean_up.py | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 6dda6f20..174b0a61 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -37,13 +37,11 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
os.makedirs(trash_bin_dir, exist_ok=True)
pr_dirs = []
for job_dir in job_dirs:
- destination_dir = shutil.move(job_dir, trash_bin_dir)
- log(f"{funcname}(): moved {job_dir} to {destination_dir}")
- # Save upper directory above to remove later (pr_xx)
- pr_dirs = os.path.dirname(job_dir)
+ # Save upper directory to remove later (pr_xx)
+ pr_dirs.append(os.path.dirname(job_dir))
# Remove event_xxx-yyy/run_nnn/ directories
- pr_dirs = list(set(pr_dirs))
+ pr_dirs = list(set(pr_dirs)) # get only unique dirs
for pr_dir in pr_dirs:
destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True)
log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
From 4b360b349ee0a7fcc55e7cce7b441931455bb657 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 2 Jul 2024 10:00:06 +0200
Subject: [PATCH 34/64] lint
---
tasks/clean_up.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 174b0a61..5853d003 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -41,7 +41,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
pr_dirs.append(os.path.dirname(job_dir))
# Remove event_xxx-yyy/run_nnn/ directories
- pr_dirs = list(set(pr_dirs)) # get only unique dirs
+ pr_dirs = list(set(pr_dirs)) # get only unique dirs
for pr_dir in pr_dirs:
destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True)
log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
From b8f6d5714d3b258cb6f65ec1091b5c8e6baf37f5 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 2 Jul 2024 10:13:19 +0200
Subject: [PATCH 35/64] Better formatting error messages
---
tasks/build.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tasks/build.py b/tasks/build.py
index 527758d7..35b21494 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -381,19 +381,19 @@ def comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_e
download_pr_comments_cfg = config.read_config()[config.SECTION_DOWNLOAD_PR_COMMENTS]
if error_stage == _ERROR_GIT_CLONE:
- download_comment = (f"`{download_pr_error}`\n"
+ download_comment = (f"```{download_pr_error}```\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_FAILURE]}"
f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CLONE_TIP]}")
elif error_stage == _ERROR_GIT_CHECKOUT:
- download_comment = (f"`{download_pr_error}`\n"
+ download_comment = (f"```{download_pr_error}```\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_FAILURE]}"
f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_CHECKOUT_TIP]}")
elif error_stage == _ERROR_CURL:
- download_comment = (f"`{download_pr_error}`\n"
+ download_comment = (f"```{download_pr_error}```\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_FAILURE]}"
f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_CURL_TIP]}")
elif error_stage == _ERROR_GIT_APPLY:
- download_comment = (f"`{download_pr_error}`\n"
+ download_comment = (f"```{download_pr_error}```\n"
f"{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_FAILURE]}"
f"\n{download_pr_comments_cfg[config.DOWNLOAD_PR_COMMENTS_SETTING_GIT_APPLY_TIP]}")
From 59a337cf1ce2bcbf86adbfabc981434a02404c6e Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 2 Jul 2024 10:45:16 +0200
Subject: [PATCH 36/64] Need copytree instead
---
tasks/clean_up.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 5853d003..8e2e5094 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -43,7 +43,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
# Remove event_xxx-yyy/run_nnn/ directories
pr_dirs = list(set(pr_dirs)) # get only unique dirs
for pr_dir in pr_dirs:
- destination_dir = shutil.copy2(pr_dir, trash_bin_dir, follow_symlinks=True)
+ destination_dir = shutil.copytree(pr_dir, trash_bin_dir)
log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
os.remove(pr_dir)
log(f"{funcname}(): removed {pr_dir}")
From f80ab2ba3024e582201d67904fd0a2c9f431b386 Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Tue, 2 Jul 2024 13:30:33 +0200
Subject: [PATCH 37/64] Use rmtree
---
tasks/clean_up.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 8e2e5094..bc78077f 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -45,7 +45,7 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
for pr_dir in pr_dirs:
destination_dir = shutil.copytree(pr_dir, trash_bin_dir)
log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
- os.remove(pr_dir)
+ shutil.rmtree(pr_dir) # Use shutil.rmtree to remove directories recursively
log(f"{funcname}(): removed {pr_dir}")
return True
From da970e7e8d81131070389037b88ce34847192ddc Mon Sep 17 00:00:00 2001
From: Neves-P
Date: Fri, 5 Jul 2024 23:15:39 +0200
Subject: [PATCH 38/64] copytree with dirs_exist_ok True
---
tasks/clean_up.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index bc78077f..0e385629 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -43,7 +43,8 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
# Remove event_xxx-yyy/run_nnn/ directories
pr_dirs = list(set(pr_dirs)) # get only unique dirs
for pr_dir in pr_dirs:
- destination_dir = shutil.copytree(pr_dir, trash_bin_dir)
+ log(f"{funcname}(): attempting to copy {pr_dir} to {trash_bin_dir}")
+ destination_dir = shutil.copytree(pr_dir, trash_bin_dir, dirs_exist_ok=True)
log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
shutil.rmtree(pr_dir) # Use shutil.rmtree to remove directories recursively
log(f"{funcname}(): removed {pr_dir}")
From 71e7b31bfd5a6a07710d243ebd8a163ebb31b4be Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Mon, 5 Aug 2024 14:15:45 +0200
Subject: [PATCH 39/64] tweak clean up method by
- putting configuration settings into a single section
- using shutil.move instead of shutil.copy (move should fallback to copy
if moving is not doable)
---
eessi_bot_event_handler.py | 8 ++++++--
tasks/clean_up.py | 31 +++++++++++++++++++++++--------
tools/config.py | 8 +++-----
3 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index d96ed3d9..d018bf19 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -60,6 +60,9 @@
config.BUILDENV_SETTING_SHARED_FS_PATH, # optional+recommended
# config.BUILDENV_SETTING_SLURM_PARAMS, # optional
config.BUILDENV_SETTING_SUBMIT_COMMAND], # required
+ config.SECTION_CLEAN_UP: [
+ config.CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR, # required
+ config.CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT], # required
config.SECTION_DEPLOYCFG: [
config.DEPLOYCFG_SETTING_ARTEFACT_PREFIX, # (required)
config.DEPLOYCFG_SETTING_ARTEFACT_UPLOAD_SCRIPT, # required
@@ -635,7 +638,7 @@ def handle_pull_request_closed_event(self, event_info, pr):
job_dirs = determine_job_dirs(pr.number)
# 2) Get trash_bin_dir from configs
- trash_bin_root_dir = self.cfg[config.SECTION_MERGED_PR][config.MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR]
+ trash_bin_root_dir = self.cfg[config.SECTION_CLEAN_UP][config.CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR]
repo_name = request_body['repository']['full_name']
dt = datetime.now(timezone.utc)
@@ -653,7 +656,8 @@ def handle_pull_request_closed_event(self, event_info, pr):
gh = github.get_instance()
repo = gh.get_repo(repo_name)
pull_request = repo.get_pull(pr.number)
- moved_comment = f"PR merged! Moved `{job_dirs}` to `{trash_bin_dir}`"
+ clean_up_comment = self.cfg[config.SECTION_CLEAN_UP][config.CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT]
+ moved_comment = clean_up_comment.format(job_dirs=job_dirs, trash_bin_dir=trash_bin_dir)
issue_comment = pull_request.create_issue_comment(moved_comment)
return issue_comment
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index 0e385629..ed8ca5f7 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -31,22 +31,37 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
Returns:
None (implicitly)
"""
+ # idea:
+ # - shutil.move YYYY.MM/pr_PR_NUM to trash_bin_dir
+ # - need to obtain list of YYYY.MM/pr_PR_NUM directories from job dirs
+ # - need to ensure that YYYY.MM under trash_bin_dir exists (or create it)
+ # - then we can just move YYYY.MM/pr_PR_NUM to trash_bin_dir/YYYY.MM
+ # - (LATER) we should also fix the symbolic links under job_ids_dir/finished
+ # (remove it for the job id and add a new one pointing to the new location)
funcname = sys._getframe().f_code.co_name
log(f"{funcname}(): trash_bin_dir = {trash_bin_dir}")
+ # ensure the 'trash_bin_dir' exists
os.makedirs(trash_bin_dir, exist_ok=True)
+
pr_dirs = []
for job_dir in job_dirs:
- # Save upper directory to remove later (pr_xx)
- pr_dirs.append(os.path.dirname(job_dir))
+ pr_dir = os.path.dirname(job_dir)
+ log(f"{funcname}(): adding PR dir '{pr_dir}' (from job dir '{job_dir}')")
+ pr_dirs.append(pr_dir)
+
- # Remove event_xxx-yyy/run_nnn/ directories
+ # Move (or copy as fallback) entire pr_PR_NUM directories to trash_bin_dir/YYYY.MM
pr_dirs = list(set(pr_dirs)) # get only unique dirs
for pr_dir in pr_dirs:
- log(f"{funcname}(): attempting to copy {pr_dir} to {trash_bin_dir}")
- destination_dir = shutil.copytree(pr_dir, trash_bin_dir, dirs_exist_ok=True)
- log(f"{funcname}(): copied {pr_dir} to {destination_dir}")
- shutil.rmtree(pr_dir) # Use shutil.rmtree to remove directories recursively
- log(f"{funcname}(): removed {pr_dir}")
+ # determine YYYY.MM parent of pr_dir
+ year_month_dir = pr_dir.split('/')[-2]
+ # make sure that directory exists under trash_bin_dir
+ target_bin_dir = os.path.join(trash_bin_dir, year_month_dir)
+ os.makedirs(target_bin_dir, exist_ok=True)
+
+ log(f"{funcname}(): attempting to move {pr_dir} to {target_bin_dir}")
+ destination_dir = shutil.move(pr_dir, target_bin_dir)
+ log(f"{funcname}(): moved {pr_dir} to {destination_dir}")
return True
diff --git a/tools/config.py b/tools/config.py
index d56eb123..79662249 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -105,11 +105,9 @@
SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment'
SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release'
-SECTION_MERGED_PR = 'merged_pr'
-MERGED_PR_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir'
-
-SECTION_CLEAN_UP_COMMENTS = 'clean_up_comments'
-
+SECTION_CLEAN_UP = 'clean_up'
+CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir'
+CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT = 'moved_job_dirs_comment'
def read_config(path='app.cfg'):
"""
From d1a02786e10e4a5efc381780203c1d73ef2f114d Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Mon, 5 Aug 2024 14:25:01 +0200
Subject: [PATCH 40/64] add information to README.md and app.cfg.example
---
README.md | 15 +++++++++++++++
app.cfg.example | 4 ++++
2 files changed, 19 insertions(+)
diff --git a/README.md b/README.md
index e268fc41..cfe425e5 100644
--- a/README.md
+++ b/README.md
@@ -720,6 +720,21 @@ git_apply_tip = _Tip: This can usually be resolved by syncing your branch and re
`git_apply_tip` should guide the contributor/maintainer about resolving the cause
of `git apply` failing.
+#### `[clean_up]` section
+
+The `[clean_up]` section includes settings related to cleaning up disk used by merged (and closed) PRs.
+```
+trash_bin_dir = PATH/TO/TRASH_BIN_DIRECTORY
+```
+Ideally this is on the same filesystem used by `jobs_base_dir` and `job_ids_dir` to efficiently move data
+into the trash bin. If it resides on a different filesystem, the data will be copied.
+
+```
+moved_job_dirs_comment = PR merged! Moved `{job_dirs}` to `{trash_bin_dir}`
+```
+Template that is used by the bot to add a comment to a PR noting down which directories have been
+moved and where.
+
# Instructions to run the bot components
The bot consists of three components:
diff --git a/app.cfg.example b/app.cfg.example
index ae51ade6..dcb40f41 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -259,3 +259,7 @@ curl_failure = Unable to download the `.diff` file.
curl_tip = _Tip: This could be a connection failure. Try again and if the issue remains check if the address is correct_
git_apply_failure = Unable to download or merge changes between the source branch and the destination branch.
git_apply_tip = _Tip: This can usually be resolved by syncing your branch and resolving any merge conflicts._
+
+[clean_up]
+trash_bin_dir = $HOME/trash_bin
+moved_job_dirs_comment = PR merged! Moved `{job_dirs}` to `{trash_bin_dir}`
From d8d79b691d68c08e016a6eb8670f90d28e7b20c4 Mon Sep 17 00:00:00 2001
From: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com>
Date: Mon, 5 Aug 2024 15:31:45 +0200
Subject: [PATCH 41/64] Lint
---
tasks/clean_up.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tasks/clean_up.py b/tasks/clean_up.py
index ed8ca5f7..093117eb 100644
--- a/tasks/clean_up.py
+++ b/tasks/clean_up.py
@@ -50,7 +50,6 @@ def move_to_trash_bin(trash_bin_dir, job_dirs):
log(f"{funcname}(): adding PR dir '{pr_dir}' (from job dir '{job_dir}')")
pr_dirs.append(pr_dir)
-
# Move (or copy as fallback) entire pr_PR_NUM directories to trash_bin_dir/YYYY.MM
pr_dirs = list(set(pr_dirs)) # get only unique dirs
for pr_dir in pr_dirs:
From f69c49c491978ee92c6e3f40d851d8ce4a27aaee Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 6 Aug 2024 13:53:23 +0200
Subject: [PATCH 42/64] add empty line
---
tools/config.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/config.py b/tools/config.py
index 79662249..4fd62c7c 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -109,6 +109,7 @@
CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir'
CLEAN_UP_SETTING_MOVED_JOB_DIRS_COMMENT = 'moved_job_dirs_comment'
+
def read_config(path='app.cfg'):
"""
Read the config file
From 78e625e2e737192c4b6a45eeffbc237826a89170 Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Wed, 7 Aug 2024 11:04:08 +0200
Subject: [PATCH 43/64] fix issue overwriting of config_data entry
---
tasks/build.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tasks/build.py b/tasks/build.py
index b2de809d..46b9543a 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -65,15 +65,16 @@ def get_build_env_cfg(cfg):
"""
fn = sys._getframe().f_code.co_name
+ config_data = {}
buildenv = cfg[config.SECTION_BUILDENV]
job_name = buildenv.get(config.BUILDENV_SETTING_JOB_NAME)
log(f"{fn}(): job_name '{job_name}'")
- config_data = {config.BUILDENV_SETTING_JOB_NAME: job_name}
+ config_data[config.BUILDENV_SETTING_JOB_NAME] = job_name
jobs_base_dir = buildenv.get(config.BUILDENV_SETTING_JOBS_BASE_DIR)
log(f"{fn}(): jobs_base_dir '{jobs_base_dir}'")
- config_data = {config.BUILDENV_SETTING_JOBS_BASE_DIR: jobs_base_dir}
+ config_data[config.BUILDENV_SETTING_JOBS_BASE_DIR] = jobs_base_dir
local_tmp = buildenv.get(config.BUILDENV_SETTING_LOCAL_TMP)
log(f"{fn}(): local_tmp '{local_tmp}'")
From 3add12a1dc4ecb9ac0473379a1dce2c1cae86c3c Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Mon, 19 Aug 2024 10:18:27 +0200
Subject: [PATCH 44/64] handle closed PRs too
---
eessi_bot_event_handler.py | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index ab2ad0da..97cc369d 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -12,6 +12,7 @@
# author: Jonas Qvigstad (@jonas-lq)
# author: Lara Ramona Peeters (@laraPPr)
# author: Thomas Roeblitz (@trz42)
+# author: Pedro Santos Neves (@Neves-P)
#
# license: GPLv2
#
@@ -607,9 +608,9 @@ def start(self, app, port=3000):
def handle_pull_request_closed_event(self, event_info, pr):
"""
- Handle events of type pull_request with the action 'closed'. Main action
- is to scan directories used and move them to the trash_bin when the PR
- is merged.
+ Handle events of type pull_request with the action 'closed'. It
+ determines used by the PR and moves them to the trash_bin. It also adds
+ information to the logs and a comment to the PR.
Args:
event_info (dict): event received by event_handler
@@ -623,34 +624,33 @@ def handle_pull_request_closed_event(self, event_info, pr):
# Detect event and only act if PR is merged
request_body = event_info['raw_request_body']
action = request_body['action']
- merged = request_body['pull_request']['merged']
+ # next value: True -> PR merged, False -> PR closed
+ mergedOrClosed = request_body['pull_request']['merged']
+ status = "merged" if mergedOrClosed else "closed"
- if merged:
- self.log("PR merged: scanning directories used by PR")
- self.log(f"pull_request event with action '{action}' and merged '{merged}' will be handled")
- else:
- self.log(f"Action '{action}' not handled as 'merged' is '{merged}'")
- return
- # at this point we know that we are handling a new merge
- # NOTE: Permissions to merge are already handled through GitHub, we
- # don't need to check here
+ self.log(f"PR {pr.number}: PR got {status} (json value: {mergedOrClosed})")
# 1) determine the jobs that have been run for the PR
+ self.log(f"PR {pr.number}: determining directories to be moved to trash bin")
job_dirs = determine_job_dirs(pr.number)
# 2) Get trash_bin_dir from configs
trash_bin_root_dir = self.cfg[config.SECTION_CLEAN_UP][config.CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR]
repo_name = request_body['repository']['full_name']
- dt = datetime.now(timezone.utc)
- trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt.strftime('%Y.%m.%d')])
+ dt_start = datetime.now(timezone.utc)
+ trash_bin_dir = "/".join([trash_bin_root_dir, repo_name, dt_start.strftime('%Y.%m.%d')])
# Subdirectory with date of move. Also with repository name. Handle symbolic links (later?)
# cron job deletes symlinks?
# 3) move the directories to the trash_bin
- self.log("Moving directories to trash_bin")
+ self.log(f"PR {pr.number}: moving directories to trash bin {trash_bin_dir}")
move_to_trash_bin(trash_bin_dir, job_dirs)
+ dt_end = datetime.now(timezone.utc)
+ dt_delta = dt_end - dt_start
+ seconds_elapsed = dt_delta.days * 24 * 3600 + dt_delta.seconds
+ self.log(f"PR {pr.number}: moved directories to trash bin {trash_bin_dir} (took {seconds_elapsed} seconds)")
# 4) report move to pull request
repo_name = pr.base.repo.full_name
From e14fe10de429ca1cae3a11e14ad5393df44cc99e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20R=C3=B6blitz?=
Date: Tue, 20 Aug 2024 10:56:54 +0200
Subject: [PATCH 45/64] improve comment and remove unused code
Co-authored-by: Pedro Santos Neves <10762799+Neves-P@users.noreply.github.com>
---
eessi_bot_event_handler.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 97cc369d..91274fbe 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -621,9 +621,8 @@ def handle_pull_request_closed_event(self, event_info, pr):
PyGithub, not the github from the internal connections module)
"""
- # Detect event and only act if PR is merged
+ # Detect event and report if PR was merged or closed
request_body = event_info['raw_request_body']
- action = request_body['action']
# next value: True -> PR merged, False -> PR closed
mergedOrClosed = request_body['pull_request']['merged']
status = "merged" if mergedOrClosed else "closed"
From 25410e76f53299881df03b53e2427d468f6ad24c Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Wed, 21 Aug 2024 14:52:40 +0200
Subject: [PATCH 46/64] add filter for accelerators and pytests
---
tests/test_tools_filter.py | 216 +++++++++++++++++++++++++++++++++----
tools/filter.py | 105 +++++++++++++-----
2 files changed, 275 insertions(+), 46 deletions(-)
diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py
index 3cf4c699..b689aa60 100644
--- a/tests/test_tools_filter.py
+++ b/tests/test_tools_filter.py
@@ -10,12 +10,18 @@
#
# Standard library imports
+import copy
# Third party imports (anything installed into the local Python environment)
import pytest
# Local application imports (anything from EESSI/eessi-bot-software-layer)
-from tools.filter import EESSIBotActionFilter, EESSIBotActionFilterError
+from tools.filter import (COMPONENT_TOO_SHORT,
+ COMPONENT_UNKNOWN,
+ EESSIBotActionFilter,
+ EESSIBotActionFilterError,
+ FILTER_EMPTY_PATTERN,
+ FILTER_FORMAT_ERROR)
def test_empty_action_filter():
@@ -25,6 +31,45 @@ def test_empty_action_filter():
assert expected == actual
+def test_add_wellformed_filter_from_string():
+ af = EESSIBotActionFilter("")
+ component = 'acc'
+ pattern = 'nvidia/cc80'
+ af.add_filter_from_string(f"{component}:{pattern}")
+ expected = f"accelerator:{pattern}"
+ actual = af.to_string()
+ assert expected == actual
+
+
+def test_add_non_wellformed_filter_from_string():
+ af = EESSIBotActionFilter("")
+ component1 = 'acc'
+ filter_string1 = f"{component1}"
+ with pytest.raises(Exception) as err1:
+ af.add_filter_from_string(filter_string1)
+ assert err1.type == EESSIBotActionFilterError
+ expected_msg1 = FILTER_FORMAT_ERROR.format(filter_string=filter_string1)
+ assert str(err1.value) == expected_msg1
+
+ component2 = 'a'
+ pattern2 = 'zen4'
+ filter_string2 = f"{component2}:{pattern2}"
+ with pytest.raises(Exception) as err2:
+ af.add_filter_from_string(filter_string2)
+ assert err2.type == EESSIBotActionFilterError
+ expected_msg2 = COMPONENT_TOO_SHORT.format(component=component2, pattern=pattern2)
+ assert str(err2.value) == expected_msg2
+
+ component3 = 'arc'
+ pattern3 = ''
+ filter_string3 = f"{component3}:{pattern3}"
+ with pytest.raises(Exception) as err3:
+ af.add_filter_from_string(filter_string3)
+ assert err3.type == EESSIBotActionFilterError
+ expected_msg3 = FILTER_EMPTY_PATTERN.format(filter_string=filter_string3)
+ assert str(err3.value) == expected_msg3
+
+
def test_add_single_action_filter():
af = EESSIBotActionFilter("")
component = 'arch'
@@ -42,6 +87,106 @@ def test_add_non_supported_component():
with pytest.raises(Exception) as err:
af.add_filter(component, pattern)
assert err.type == EESSIBotActionFilterError
+ expected_msg = COMPONENT_UNKNOWN.format(component=component, pattern=pattern)
+ assert str(err.value) == expected_msg
+
+
+def test_add_too_short_supported_component():
+ af = EESSIBotActionFilter("")
+ component = 'a'
+ pattern = '.*intel.*'
+ with pytest.raises(Exception) as err:
+ af.add_filter(component, pattern)
+ assert err.type == EESSIBotActionFilterError
+ expected_msg = COMPONENT_TOO_SHORT.format(component=component, pattern=pattern)
+ assert str(err.value) == expected_msg
+
+
+# TODO tests for removing filters
+@pytest.fixture
+def complex_filter():
+ af = EESSIBotActionFilter("")
+ component1 = 'arch'
+ pattern1 = '.*intel.*'
+ af.add_filter(component1, pattern1)
+ component2 = 'repo'
+ pattern2 = 'nessi.no-2022.*'
+ af.add_filter(component2, pattern2)
+ component3 = 'inst'
+ pattern3 = '[aA]'
+ af.add_filter(component3, pattern3)
+ yield af
+
+
+def test_remove_existing_filter(complex_filter):
+ component1 = 'architecture'
+ pattern1 = '.*intel.*'
+ filter_string1 = f"{component1}:{pattern1}"
+ component2 = 'repository'
+ pattern2 = 'nessi.no-2022.*'
+ filter_string2 = f"{component2}:{pattern2}"
+ component3 = 'instance'
+ pattern3 = '[aA]'
+ filter_string3 = f"{component3}:{pattern3}"
+
+ # remove last filter
+ org_filter = copy.deepcopy(complex_filter)
+ org_filter.remove_filter(component3, pattern3)
+ expected = filter_string1
+ expected += f" {filter_string2}"
+ actual = org_filter.to_string()
+ assert expected == actual
+
+ # remove second last filter
+ org_filter = copy.deepcopy(complex_filter)
+ org_filter.remove_filter(component2, pattern2)
+ expected = filter_string1
+ expected += f" {filter_string3}"
+ actual = org_filter.to_string()
+ assert expected == actual
+
+ # remove first filter
+ org_filter = copy.deepcopy(complex_filter)
+ org_filter.remove_filter(component1, pattern1)
+ expected = filter_string2
+ expected += f" {filter_string3}"
+ actual = org_filter.to_string()
+ assert expected == actual
+
+
+def test_remove_non_existing_filter(complex_filter):
+ component = 'accel'
+ pattern = 'amd/gfx90a'
+
+ # remove non-existing filter
+ org_filter = copy.deepcopy(complex_filter)
+ org_filter.remove_filter(component, pattern)
+ org_filter_str = org_filter.to_string()
+ complex_filter_str = complex_filter.to_string()
+ assert org_filter_str == complex_filter_str
+
+
+def test_remove_filter_errors(complex_filter):
+ component1 = 'ac'
+ pattern1 = 'amd/gfx90a'
+ component2 = 'operating_system'
+ pattern2 = 'linux'
+
+ # remove filter using too short component name
+ org_filter = copy.deepcopy(complex_filter)
+ with pytest.raises(Exception) as err1:
+ org_filter.remove_filter(component1, pattern1)
+ assert err1.type == EESSIBotActionFilterError
+ expected_msg1 = COMPONENT_TOO_SHORT.format(component=component1, pattern=pattern1)
+ assert str(err1.value) == expected_msg1
+
+ # remove filter using unknown component name
+ org_filter = copy.deepcopy(complex_filter)
+ with pytest.raises(Exception) as err2:
+ org_filter.remove_filter(component2, pattern2)
+ assert err2.type == EESSIBotActionFilterError
+ expected_msg2 = COMPONENT_UNKNOWN.format(component=component2, pattern=pattern2)
+ assert str(err2.value) == expected_msg2
def test_check_matching_empty_filter():
@@ -71,21 +216,6 @@ def test_check_matching_simple_filter():
assert expected == actual
-@pytest.fixture
-def complex_filter():
- af = EESSIBotActionFilter("")
- component1 = 'arch'
- pattern1 = '.*intel.*'
- af.add_filter(component1, pattern1)
- component2 = 'repo'
- pattern2 = 'nessi.no-2022.*'
- af.add_filter(component2, pattern2)
- component3 = 'i'
- pattern3 = '[aA]'
- af.add_filter(component3, pattern3)
- yield af
-
-
def test_create_complex_filter(complex_filter):
expected = "architecture:.*intel.*"
expected += " repository:nessi.no-2022.*"
@@ -125,9 +255,9 @@ def test_non_match_architecture_repository_context(complex_filter):
@pytest.fixture
def arch_filter_slash_syntax():
af = EESSIBotActionFilter("")
- component1 = 'arch'
- pattern1 = '.*/intel/.*'
- af.add_filter(component1, pattern1)
+ component = 'arch'
+ pattern = '.*/intel/.*'
+ af.add_filter(component, pattern)
yield af
@@ -146,9 +276,9 @@ def test_match_architecture_syntax_slash(arch_filter_slash_syntax):
@pytest.fixture
def arch_filter_dash_syntax():
af = EESSIBotActionFilter("")
- component1 = 'arch'
- pattern1 = '.*-intel-.*'
- af.add_filter(component1, pattern1)
+ component = 'arch'
+ pattern = '.*-intel-.*'
+ af.add_filter(component, pattern)
yield af
@@ -162,3 +292,45 @@ def test_match_architecture_syntax_dash(arch_filter_dash_syntax):
expected = True
actual = arch_filter_dash_syntax.check_filters(context)
assert expected == actual
+
+
+@pytest.fixture
+def accel_filter_slash_syntax():
+ af = EESSIBotActionFilter("")
+ component = 'accel'
+ pattern = 'nvidia/.*'
+ af.add_filter(component, pattern)
+ yield af
+
+
+def test_match_accelerator_syntax_slash(accel_filter_slash_syntax):
+ context = {"accelerator": "nvidia/cc70", "repository": "EESSI"}
+ expected = True
+ actual = accel_filter_slash_syntax.check_filters(context)
+ assert expected == actual
+
+ context = {"accelerator": "nvidia=cc70"}
+ expected = True
+ actual = accel_filter_slash_syntax.check_filters(context)
+ assert expected == actual
+
+
+@pytest.fixture
+def accel_filter_equal_syntax():
+ af = EESSIBotActionFilter("")
+ component = 'accel'
+ pattern = 'amd=gfx90a'
+ af.add_filter(component, pattern)
+ yield af
+
+
+def test_match_accelerator_syntax_equal(accel_filter_equal_syntax):
+ context = {"accelerator": "amd=gfx90a", "repository": "EESSI"}
+ expected = True
+ actual = accel_filter_equal_syntax.check_filters(context)
+ assert expected == actual
+
+ context = {"accelerator": "amd/gfx90a"}
+ expected = True
+ actual = accel_filter_equal_syntax.check_filters(context)
+ assert expected == actual
diff --git a/tools/filter.py b/tools/filter.py
index 8d791936..721cf3d7 100644
--- a/tools/filter.py
+++ b/tools/filter.py
@@ -23,11 +23,22 @@
# NOTE because one can use any prefix of one of the four components below to
# define a filter, we need to make sure that no two filters share the same
# prefix OR we have to change the handling of filters.
+FILTER_COMPONENT_ACCEL = 'accelerator'
FILTER_COMPONENT_ARCH = 'architecture'
FILTER_COMPONENT_INST = 'instance'
FILTER_COMPONENT_JOB = 'job'
FILTER_COMPONENT_REPO = 'repository'
-FILTER_COMPONENTS = [FILTER_COMPONENT_ARCH, FILTER_COMPONENT_INST, FILTER_COMPONENT_JOB, FILTER_COMPONENT_REPO]
+FILTER_COMPONENTS = [FILTER_COMPONENT_ACCEL,
+ FILTER_COMPONENT_ARCH,
+ FILTER_COMPONENT_INST,
+ FILTER_COMPONENT_JOB,
+ FILTER_COMPONENT_REPO
+ ]
+
+COMPONENT_TOO_SHORT = "component in filter spec '{component}:{pattern}' is too short; must be 3 characters or longer"
+COMPONENT_UNKNOWN = "unknown component={component} in {component}:{pattern}"
+FILTER_EMPTY_PATTERN = "pattern in filter string '{filter_string}' is empty"
+FILTER_FORMAT_ERROR = "filter string '{filter_string}' does not conform to format 'component:pattern'"
Filter = namedtuple('Filter', ('component', 'pattern'))
@@ -37,7 +48,12 @@ class EESSIBotActionFilterError(Exception):
Exception to be raised when encountering an error in creating or adding a
filter
"""
- pass
+
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return self.value
class EESSIBotActionFilter:
@@ -87,8 +103,8 @@ def add_filter(self, component, pattern):
Adds a filter given by a component and a pattern
Args:
- component (string): any prefix of known filter components (see
- FILTER_COMPONENTS)
+ component (string): any prefix (min 3 characters long) of known filter
+ components (see FILTER_COMPONENTS)
pattern (string): regular expression pattern that is applied to a
string representing the component
@@ -99,26 +115,36 @@ def add_filter(self, component, pattern):
EESSIBotActionFilterError: raised if unknown component is provided
as argument
"""
- # check if component is supported (i.e., it is a prefix of one of the
- # elements in FILTER_COMPONENTS)
+ # check if component is supported
+ # - it is a 3+ character-long string, _and_
+ # - it is a prefix of one of the elements in FILTER_COMPONENTS
+ if len(component) < 3:
+ msg = COMPONENT_TOO_SHORT.format(component=component, pattern=pattern)
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
full_component = None
for cis in FILTER_COMPONENTS:
# NOTE the below code assumes that no two filter share the same
- # prefix (e.g., 'repository' and 'request' would have the same
- # prefixes 'r' and 're')
+ # 3-character-long prefix (e.g., 'repository' and 'repeat' would
+ # have the same prefix 'rep')
if cis.startswith(component):
full_component = cis
break
if full_component:
log(f"processing component {component}")
# replace '-' with '/' in pattern when using 'architecture' filter
- # component (done to make sure that values are comparable)
+ # component (done to make sure that values are comparable)
if full_component == FILTER_COMPONENT_ARCH:
pattern = pattern.replace('-', '/')
+ # replace '=' with '/' in pattern when using 'accelerator' filter
+ # component (done to make sure that values are comparable)
+ if full_component == FILTER_COMPONENT_ACCEL:
+ pattern = pattern.replace('=', '/')
self.action_filters.append(Filter(full_component, pattern))
else:
- log(f"component {component} is unknown")
- raise EESSIBotActionFilterError(f"unknown component={component} in {component}:{pattern}")
+ msg = COMPONENT_UNKNOWN.format(component=component, pattern=pattern)
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
def add_filter_from_string(self, filter_string):
"""
@@ -136,11 +162,17 @@ def add_filter_from_string(self, filter_string):
"""
_filter_split = filter_string.split(':')
if len(_filter_split) != 2:
- log(f"filter string '{filter_string}' does not conform to format 'component:pattern'")
- raise EESSIBotActionFilterError(f"filter '{filter_string}' does not conform to format 'component:pattern'")
+ msg = FILTER_FORMAT_ERROR.format(filter_string=filter_string)
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
+ if len(_filter_split[0]) < 3:
+ msg = COMPONENT_TOO_SHORT.format(component=_filter_split[0], pattern=_filter_split[1])
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
if len(_filter_split[1]) == 0:
- log(f"pattern in filter string '{filter_string}' is empty")
- raise EESSIBotActionFilterError(f"pattern in filter string '{filter_string}' is empty")
+ msg = FILTER_EMPTY_PATTERN.format(filter_string=filter_string)
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
self.add_filter(_filter_split[0], _filter_split[1])
def remove_filter(self, component, pattern):
@@ -148,22 +180,44 @@ def remove_filter(self, component, pattern):
Removes all elements matching the filter given by (component, pattern)
Args:
- component (string): any prefix of 'architecture', 'instance', 'job' or 'repository'
+ component (string): one of FILTER_COMPONENTS
pattern (string): regex that is applied to a string representing the component
Returns:
None (implicitly)
"""
- index = 0
- for _filter in self.action_filters:
+ if len(component) < 3:
+ msg = COMPONENT_TOO_SHORT.format(component=component, pattern=pattern)
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
+ full_component = None
+ for cis in FILTER_COMPONENTS:
+ # NOTE the below code assumes that no two filter share the same
+ # 3-character-long prefix (e.g., 'repository' and 'repeat' would
+ # have the same prefix 'rep')
+ if cis.startswith(component):
+ full_component = cis
+ break
+ if not full_component:
+ # the component provided as argument is not in the list of FILTER_COMPONENTS
+ msg = COMPONENT_UNKNOWN.format(component=component, pattern=pattern)
+ log(msg)
+ raise EESSIBotActionFilterError(msg)
+
+ # need to traverse list from end or next elem after a removed item is
+ # skipped
+ num_filters = len(self.action_filters)
+ for idx in range(num_filters, 0, -1):
+ # idx runs from num_filters to 1; this needs to be corrected to
+ # num_filters-1 to 0
+ index = idx - 1
# NOTE the below code assumes that no two filter share the same
- # prefix (e.g., 'repository' and 'request' would have the same
- # prefixes 'r' and 're')
+ # 3-character-long prefix (e.g., 'repository' and 'repeat' would
+ # have the same prefix 'rep')
+ _filter = self.action_filters[index]
if _filter.component.startswith(component) and pattern == _filter.pattern:
log(f"removing filter ({_filter.component}, {pattern})")
self.action_filters.pop(index)
- else:
- index += 1
def to_string(self):
"""
@@ -184,8 +238,8 @@ def to_string(self):
def check_filters(self, context):
"""
- Checks filters for a given context which is defined by one to four
- components (architecture, instance, job, repository)
+ Checks filters for a given context which is defined by one to five
+ components (accelerator, architecture, instance, job, repository)
Args:
context (dict) : dictionary that maps components to their value
@@ -217,6 +271,9 @@ def check_filters(self, context):
# replace - with / in architecture component
if af.component == FILTER_COMPONENT_ARCH:
value = value.replace('-', '/')
+ # replace = with / in accelerator component
+ if af.component == FILTER_COMPONENT_ACCEL:
+ value = value.replace('=', '/')
if re.search(af.pattern, value):
# if the pattern of the filter matches
check = True
From 99f42b1daab8170e80c5f6e370f03ce7c8fcab4a Mon Sep 17 00:00:00 2001
From: sam
Date: Sun, 1 Sep 2024 12:53:36 +0200
Subject: [PATCH 47/64] add support for updating submit options
---
tasks/build.py | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/tasks/build.py b/tasks/build.py
index 059b30cc..39c4778e 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -658,6 +658,20 @@ def submit_job(job, cfg):
else:
time_limit = f"--time={DEFAULT_JOB_TIME_LIMIT}"
+ # update job.slurm_opts with det_submit_opts(job) in det_submit_opts.py if available
+ do_update_slurm_opts = False
+ sys.path.append(job.working_dir)
+
+ try:
+ from det_submit_opts import det_submit_opts # pylint:disable=import-outside-toplevel
+ do_update_slurm_opts = True
+ except ImportError:
+ log(f"{fn}(): not updating job.slurm_opts: cannot import function det_submit_opts from module det_submit_opts")
+
+ if do_update_slurm_opts:
+ job = job._replace(slurm_opts=det_submit_opts(job))
+ log(f"{fn}(): updated job.slurm_opts: {job.slurm_opts}")
+
command_line = ' '.join([
build_env_cfg[config.BUILDENV_SETTING_SUBMIT_COMMAND],
build_env_cfg[config.BUILDENV_SETTING_SLURM_PARAMS],
From 4517ab225231283653ffa97360bda0c4fba13719 Mon Sep 17 00:00:00 2001
From: sam
Date: Tue, 3 Sep 2024 20:45:59 +0200
Subject: [PATCH 48/64] make it configurable
---
app.cfg.example | 3 +++
eessi_bot_event_handler.py | 1 +
tasks/build.py | 18 +++++++++++-------
tools/config.py | 1 +
4 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/app.cfg.example b/app.cfg.example
index a5a09649..b65ada67 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -128,6 +128,9 @@ build_permission =
# template for comment when user who set a label has no permission to trigger build jobs
no_build_permission_comment = Label `bot:build` has been set by user `{build_labeler}`, but this person does not have permission to trigger builds
+# whether or not to allow updating the submit options via custom module det_submit_opts
+allow_update_submit_opts = false
+
[deploycfg]
# script for uploading built software packages
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index 91274fbe..a94e078e 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -47,6 +47,7 @@
config.BOT_CONTROL_SETTING_COMMAND_PERMISSION, # required
config.BOT_CONTROL_SETTING_COMMAND_RESPONSE_FMT], # required
config.SECTION_BUILDENV: [
+ # config.BUILDENV_SETTING_ALLOW_UPDATE_SUBMIT_OPTS # optional
config.BUILDENV_SETTING_BUILD_JOB_SCRIPT, # required
config.BUILDENV_SETTING_BUILD_LOGS_DIR, # optional+recommended
config.BUILDENV_SETTING_BUILD_PERMISSION, # optional+recommended
diff --git a/tasks/build.py b/tasks/build.py
index 39c4778e..903800b0 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -658,15 +658,19 @@ def submit_job(job, cfg):
else:
time_limit = f"--time={DEFAULT_JOB_TIME_LIMIT}"
- # update job.slurm_opts with det_submit_opts(job) in det_submit_opts.py if available
+ # update job.slurm_opts with det_submit_opts(job) in det_submit_opts.py if allowed and available
do_update_slurm_opts = False
- sys.path.append(job.working_dir)
+ allow_update_slurm_opts = cfg[config.SECTION_BUILDENV].getboolean(config.BUILDENV_SETTING_ALLOW_UPDATE_SUBMIT_OPTS)
- try:
- from det_submit_opts import det_submit_opts # pylint:disable=import-outside-toplevel
- do_update_slurm_opts = True
- except ImportError:
- log(f"{fn}(): not updating job.slurm_opts: cannot import function det_submit_opts from module det_submit_opts")
+ if allow_update_slurm_opts:
+ sys.path.append(job.working_dir)
+
+ try:
+ from det_submit_opts import det_submit_opts # pylint:disable=import-outside-toplevel
+ do_update_slurm_opts = True
+ except ImportError:
+ log(f"{fn}(): not updating job.slurm_opts: "
+ "cannot import function det_submit_opts from module det_submit_opts")
if do_update_slurm_opts:
job = job._replace(slurm_opts=det_submit_opts(job))
diff --git a/tools/config.py b/tools/config.py
index f1deb34a..6675fd87 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -36,6 +36,7 @@
BOT_CONTROL_SETTING_COMMAND_RESPONSE_FMT = 'command_response_fmt'
SECTION_BUILDENV = 'buildenv'
+BUILDENV_SETTING_ALLOW_UPDATE_SUBMIT_OPTS = 'allow_update_submit_opts'
BUILDENV_SETTING_BUILD_JOB_SCRIPT = 'build_job_script'
BUILDENV_SETTING_BUILD_LOGS_DIR = 'build_logs_dir'
BUILDENV_SETTING_BUILD_PERMISSION = 'build_permission'
From 62a493329e43d47d5b1a9078aa94fb9bd2628b2b Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Fri, 6 Sep 2024 11:51:17 +0200
Subject: [PATCH 49/64] updating authors in changed files and add setting to
README.md
---
README.md | 7 +++++++
app.cfg.example | 1 +
eessi_bot_event_handler.py | 1 +
tasks/build.py | 1 +
tools/config.py | 1 +
5 files changed, 11 insertions(+)
diff --git a/README.md b/README.md
index 2eb0de7c..8af58a92 100644
--- a/README.md
+++ b/README.md
@@ -426,6 +426,13 @@ no_build_permission_comment = The `bot: build ...` command has been used by user
`no_build_permission_comment` defines a comment (template) that is used when
the account trying to trigger build jobs has no permission to do so.
+```
+allow_update_submit_opts = false
+```
+`allow_update_submit_opts` determines whether or not to allow updating the submit
+options via custom module `det_submit_opts` provided by the pull request being
+processed.
+
#### `[bot_control]` section
diff --git a/app.cfg.example b/app.cfg.example
index b65ada67..25abf911 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -10,6 +10,7 @@
# author: Jonas Qvigstad (@jonas-lq)
# author: Pedro Santos Neves (@Neves-P)
# author: Thomas Roeblitz (@trz42)
+# author: Sam Moors (@smoors)
#
# license: GPLv2
#
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index a94e078e..ac943faf 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -13,6 +13,7 @@
# author: Lara Ramona Peeters (@laraPPr)
# author: Thomas Roeblitz (@trz42)
# author: Pedro Santos Neves (@Neves-P)
+# author: Sam Moors (@smoors)
#
# license: GPLv2
#
diff --git a/tasks/build.py b/tasks/build.py
index 903800b0..a967ef73 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -12,6 +12,7 @@
# author: Lara Ramona Peeters (@laraPPr)
# author: Pedro Santos Neves (@Neves-P)
# author: Thomas Roeblitz (@trz42)
+# author: Sam Moors (@smoors)
#
# license: GPLv2
#
diff --git a/tools/config.py b/tools/config.py
index 6675fd87..da8a2875 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -10,6 +10,7 @@
# author: Jacob Ziemke (@jacobz137)
# author: Jonas Qvigstad (@jonas-lq)
# author: Thomas Roeblitz (@trz42)
+# author: Sam Moors (@smoors)
#
# license: GPLv2
#
From b8c3d48b4e696cb8a5a7c7078bdb356af8940f6e Mon Sep 17 00:00:00 2001
From: sam
Date: Sun, 8 Sep 2024 16:57:47 +0200
Subject: [PATCH 50/64] use github API for downloading the diff
---
tasks/build.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tasks/build.py b/tasks/build.py
index a967ef73..0bcd8535 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -343,7 +343,12 @@ def download_pr(repo_name, branch_name, pr, arch_job_dir):
error_stage = _ERROR_GIT_CHECKOUT
return checkout_output, checkout_err, checkout_exit_code, error_stage
- curl_cmd = f'curl -L https://github.com/{repo_name}/pull/{pr.number}.diff > {pr.number}.diff'
+ curl_cmd = ' '.join([
+ 'curl -L',
+ '-H "Accept: application/vnd.github.diff"',
+ '-H "X-GitHub-Api-Version: 2022-11-28"',
+ f'https://api.github.com/repos/{repo_name}/pulls/{pr.number} > {pr.number}.diff',
+ ])
log(f'curl with command {curl_cmd}')
curl_output, curl_error, curl_exit_code = run_cmd(
curl_cmd, "Obtain patch", arch_job_dir, raise_on_error=False
From cef70a74db5f303c15fc6706e3d036bc0b0ae4ac Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 10:34:01 +0200
Subject: [PATCH 51/64] add function to return filter pattern for a component
---
tools/filter.py | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/tools/filter.py b/tools/filter.py
index 721cf3d7..8b14f5eb 100644
--- a/tools/filter.py
+++ b/tools/filter.py
@@ -39,6 +39,7 @@
COMPONENT_UNKNOWN = "unknown component={component} in {component}:{pattern}"
FILTER_EMPTY_PATTERN = "pattern in filter string '{filter_string}' is empty"
FILTER_FORMAT_ERROR = "filter string '{filter_string}' does not conform to format 'component:pattern'"
+UNKNOWN_COMPONENT_CONST = "unknown component constant {component}"
Filter = namedtuple('Filter', ('component', 'pattern'))
@@ -175,6 +176,26 @@ def add_filter_from_string(self, filter_string):
raise EESSIBotActionFilterError(msg)
self.add_filter(_filter_split[0], _filter_split[1])
+ def get_filter_by_component(self, component):
+ """
+ Returns filter pattern for component.
+
+ Args:
+ component (string): one of FILTER_COMPONENTS
+
+ Returns:
+ (list): list of pattern for filters whose component matches argument
+ """
+ if component not in FILTER_COMPONENTS:
+ msg = UNKNOWN_COMPONENT_CONST.format(component=component)
+ raise EESSIBotActionFilterError(msg)
+
+ pattern = []
+ for _filter in self.action_filters:
+ if component == _filter.component:
+ pattern.append(_filter.pattern)
+ return pattern
+
def remove_filter(self, component, pattern):
"""
Removes all elements matching the filter given by (component, pattern)
From 4faaa014480c86e2de62ba46fe2d46c015c9590d Mon Sep 17 00:00:00 2001
From: sam
Date: Tue, 10 Sep 2024 10:21:59 +0200
Subject: [PATCH 52/64] add docs on private repos
---
README.md | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/README.md b/README.md
index 8af58a92..081283d2 100644
--- a/README.md
+++ b/README.md
@@ -813,3 +813,26 @@ The job manager can run on a different machine than the event handler, as long a
For information on how to make pull requests and let the bot build software, see
[the bot section of the EESSI documentation](https://www.eessi.io/docs/bot/).
+
+# Private target repos
+
+Both Git and Curl need to have access to the target repo. A convenient way to
+access a private repo via a Github token is by adding the following lines to
+your `~/.netrc` and `~/.curlrc` files:
+
+```
+# ~/.netrc
+machine github.com
+login oauth
+password
+
+machine api.github.com
+login oauth
+password
+```
+
+```
+# ~/.curlrc
+--netrc
+```
+
From 96b067dace1aac1441ac6f2450f7668c5bf5c62d Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 11:38:01 +0200
Subject: [PATCH 53/64] updated and new settings to support accelerator build
arg
---
README.md | 17 ++++++++++++-----
app.cfg.example | 3 ++-
tasks/build.py | 42 ++++++++++++++++++++++++++++++++++++------
tools/config.py | 3 ++-
tools/job_metadata.py | 1 +
5 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
index 8af58a92..ebf4e29b 100644
--- a/README.md
+++ b/README.md
@@ -645,17 +645,24 @@ scontrol_command = /usr/bin/scontrol
#### `[submitted_job_comments]` section
The `[submitted_job_comments]` section specifies templates for messages about newly submitted jobs.
-```
-initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}`
-```
-`initial_comment` is used to create a comment to a PR when a new job has been created.
-
```
awaits_release = job id `{job_id}` awaits release by job manager
```
`awaits_release` is used to provide a status update of a job (shown as a row in the job's status
table).
+```
+initial_comment = New job on instance `{app_name}` for architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}`
+```
+`initial_comment` is used to create a comment to a PR when a new job has been
+created. Note, the part '{accelerator_spec}' is only filled-in by the bot if the
+argument 'accelerator' to the `bot: build` command have been used.
+```
+with_accelerator = " and accelerator {accelerator}"
+```
+`with_accelerator` is used to provide information about the accelerator the job
+should build for if and only if the argument `accelerator:X/Y` has been provided.
+
#### `[new_job_comments]` section
The `[new_job_comments]` section sets templates for messages about jobs whose `hold` flag was released.
diff --git a/app.cfg.example b/app.cfg.example
index 25abf911..2e50834c 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -243,8 +243,9 @@ scontrol_command = /usr/bin/scontrol
# are removed, the output (in PR comments) will lack important
# information.
[submitted_job_comments]
-initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}`
awaits_release = job id `{job_id}` awaits release by job manager
+initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}`
+with_accelerator = " and accelerator {accelerator}"
[new_job_comments]
diff --git a/tasks/build.py b/tasks/build.py
index a967ef73..96d04fe4 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -33,6 +33,7 @@
# Local application imports (anything from EESSI/eessi-bot-software-layer)
from connections import github
from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd
+import tools.filter as tools_filter
# defaults (used if not specified via, eg, 'app.cfg')
@@ -46,7 +47,7 @@
_ERROR_NONE = "none"
-Job = namedtuple('Job', ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id'))
+Job = namedtuple('Job', ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id', 'accelerator'))
# global repo_cfg
repo_cfg = {}
@@ -474,6 +475,12 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
# call to just before download_pr
year_month, pr_id, run_dir = create_pr_dir(pr, cfg, event_info)
+ accelerator = "none"
+ # determine accelerator from action_filter argument
+ accelerators = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_ACCEL)
+ if len(accelerators) > 0:
+ accelerator = accelerators[0]
+
jobs = []
for arch, slurm_opt in arch_map.items():
arch_dir = arch.replace('/', '_')
@@ -501,6 +508,15 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
continue
else:
log(f"{fn}(): context DOES satisfy filter(s), going on with job")
+ # we reached this point when the filter matched (otherwise we
+ # 'continue' with the next repository)
+ # for each match of the filter we create a specific job directory
+ # however, matching CPU architectures works differently to handling
+ # accelerators; multiple CPU architectures defined in arch_target_map
+ # can match the (CPU) architecture component of a filter; in
+ # contrast, the value of the accelerator filter is just passed down
+ # to scripts in bot/ directory of the pull request (see function
+ # prepare_job_cfg and creation of Job tuple below)
job_dir = os.path.join(run_dir, arch_dir, repo_id)
os.makedirs(job_dir, exist_ok=True)
log(f"{fn}(): job_dir '{job_dir}'")
@@ -514,10 +530,12 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
cpu_target = '/'.join(arch.split('/')[1:])
os_type = arch.split('/')[0]
log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'")
- prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type)
+
+ log(f"{fn}(): accelerator = '{accelerator}'")
+ prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator)
# enlist jobs to proceed
- job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id)
+ job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id, accelerator)
jobs.append(job)
log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list")
@@ -527,7 +545,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
return jobs
-def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type):
+def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type, accelerator):
"""
Set up job configuration file 'job.cfg' in directory /cfg
@@ -538,6 +556,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
repo_id (string): identifier of the repository to build for
software_subdir (string): software subdirectory to build for (e.g., 'x86_64/generic')
os_type (string): type of the os (e.g., 'linux')
+ accelerator (string): defines accelerator to build for (e.g., 'nvidia/cc80')
Returns:
None (implicitly)
@@ -563,6 +582,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
# [architecture]
# software_subdir = software_subdir
# os_type = os_type
+ # accelerator = accelerator
job_cfg = configparser.ConfigParser()
job_cfg[job_metadata.JOB_CFG_SITE_CONFIG_SECTION] = {}
build_env_to_job_cfg_keys = {
@@ -607,6 +627,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
job_cfg[job_cfg_arch_section] = {}
job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR] = software_subdir
job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_OS_TYPE] = os_type
+ job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator
# copy contents of directory containing repository configuration to directory
# containing job configuration/metadata
@@ -726,11 +747,19 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink):
# obtain arch from job.arch_target which has the format OS/ARCH
arch_name = '-'.join(job.arch_target.split('/')[1:])
+ submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS]
+
+ # obtain accelerator from job.accelerator
+ accelerator = job.accelerator
+ accelerator_spec_str = ''
+ if not accelerator is 'none':
+ accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}"
+ accelerator_spec_str = accelerator_spec.format(accelerator=accelerator)
+
# get current date and time
dt = datetime.now(timezone.utc)
# construct initial job comment
- submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS]
job_comment = (f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT]}"
f"\n|date|job status|comment|\n"
f"|----------|----------|------------------------|\n"
@@ -741,7 +770,8 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink):
arch_name=arch_name,
symlink=symlink,
repo_id=job.repo_id,
- job_id=job_id)
+ job_id=job_id,
+ accelerator_spec=accelerator_spec_str)
# create comment to pull request
repo_name = pr.base.repo.full_name
diff --git a/tools/config.py b/tools/config.py
index da8a2875..ff641ebb 100644
--- a/tools/config.py
+++ b/tools/config.py
@@ -105,8 +105,9 @@
RUNNING_JOB_COMMENTS_SETTING_RUNNING_JOB = 'running_job'
SECTION_SUBMITTED_JOB_COMMENTS = 'submitted_job_comments'
-SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment'
SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release'
+SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment'
+SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR = 'with_accelerator'
SECTION_CLEAN_UP = 'clean_up'
CLEAN_UP_SETTING_TRASH_BIN_ROOT_DIR = 'trash_bin_dir'
diff --git a/tools/job_metadata.py b/tools/job_metadata.py
index 286f71cb..d4000199 100644
--- a/tools/job_metadata.py
+++ b/tools/job_metadata.py
@@ -33,6 +33,7 @@
JOB_CFG_ARCHITECTURE_SECTION = "architecture"
JOB_CFG_ARCHITECTURE_OS_TYPE = "os_type"
JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR = "software_subdir"
+JOB_CFG_ARCHITECTURE_ACCELERATOR = "accelerator"
JOB_CFG_REPOSITORY_SECTION = "repository"
JOB_CFG_REPOSITORY_CONTAINER = "container"
From 815f8fe7a306ec57ccc98c26558c251ac512b51e Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 11:56:06 +0200
Subject: [PATCH 54/64] fix comparison syntax
---
tasks/build.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tasks/build.py b/tasks/build.py
index 96d04fe4..5b214582 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -752,7 +752,7 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink):
# obtain accelerator from job.accelerator
accelerator = job.accelerator
accelerator_spec_str = ''
- if not accelerator is 'none':
+ if accelerator != 'none':
accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}"
accelerator_spec_str = accelerator_spec.format(accelerator=accelerator)
From 9e4cbfdeb2ff3f76e61bf66abca0f42f4778cf4c Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 12:19:24 +0200
Subject: [PATCH 55/64] improve settings' value for with_accelerator
---
README.md | 2 +-
app.cfg.example | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index ebf4e29b..04da8630 100644
--- a/README.md
+++ b/README.md
@@ -658,7 +658,7 @@ initial_comment = New job on instance `{app_name}` for architecture `{arch_name}
created. Note, the part '{accelerator_spec}' is only filled-in by the bot if the
argument 'accelerator' to the `bot: build` command have been used.
```
-with_accelerator = " and accelerator {accelerator}"
+with_accelerator = and accelerator `{accelerator}`
```
`with_accelerator` is used to provide information about the accelerator the job
should build for if and only if the argument `accelerator:X/Y` has been provided.
diff --git a/app.cfg.example b/app.cfg.example
index 2e50834c..152de2bc 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -245,7 +245,7 @@ scontrol_command = /usr/bin/scontrol
[submitted_job_comments]
awaits_release = job id `{job_id}` awaits release by job manager
initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}`
-with_accelerator = " and accelerator {accelerator}"
+with_accelerator = and accelerator `{accelerator}`
[new_job_comments]
From 34534bb10edb8b1d3643e46bba61b5f6443bdd10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20R=C3=B6blitz?=
Date: Tue, 10 Sep 2024 16:55:08 +0200
Subject: [PATCH 56/64] fix typo in README.md
Co-authored-by: Kenneth Hoste
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 04da8630..feb9d38e 100644
--- a/README.md
+++ b/README.md
@@ -656,7 +656,7 @@ initial_comment = New job on instance `{app_name}` for architecture `{arch_name}
```
`initial_comment` is used to create a comment to a PR when a new job has been
created. Note, the part '{accelerator_spec}' is only filled-in by the bot if the
-argument 'accelerator' to the `bot: build` command have been used.
+argument 'accelerator' to the `bot: build` command has been used.
```
with_accelerator = and accelerator `{accelerator}`
```
From 8f44ba9846c7d9eb37fefd232e0884ab9651e627 Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 21:11:17 +0200
Subject: [PATCH 57/64] using None for undefined accelerator, improved logging
---
tasks/build.py | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/tasks/build.py b/tasks/build.py
index 5b214582..56068e04 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -475,11 +475,16 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
# call to just before download_pr
year_month, pr_id, run_dir = create_pr_dir(pr, cfg, event_info)
- accelerator = "none"
# determine accelerator from action_filter argument
accelerators = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_ACCEL)
- if len(accelerators) > 0:
+ if len(accelerators) == 1:
accelerator = accelerators[0]
+ elif len(accelerators) > 1:
+ log(f"{fn}(): found more than one ({len(accelerators)}) accelerator requirement")
+ accelerator = None
+ else:
+ log(f"{fn}(): found no accelerator requirement")
+ accelerator = None
jobs = []
for arch, slurm_opt in arch_map.items():
@@ -529,9 +534,10 @@ def prepare_jobs(pr, cfg, event_info, action_filter):
# prepare job configuration file 'job.cfg' in directory /cfg
cpu_target = '/'.join(arch.split('/')[1:])
os_type = arch.split('/')[0]
- log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'")
- log(f"{fn}(): accelerator = '{accelerator}'")
+ log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'"
+ f", accelerator = '{accelerator}'")
+
prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator)
# enlist jobs to proceed
@@ -749,12 +755,11 @@ def create_pr_comment(job, job_id, app_name, pr, gh, symlink):
submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS]
- # obtain accelerator from job.accelerator
- accelerator = job.accelerator
+ # set string for accelerator if job.accelerator is defined/set (e.g., not None)
accelerator_spec_str = ''
- if accelerator != 'none':
+ if job.accelerator:
accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}"
- accelerator_spec_str = accelerator_spec.format(accelerator=accelerator)
+ accelerator_spec_str = accelerator_spec.format(accelerator=job.accelerator)
# get current date and time
dt = datetime.now(timezone.utc)
From 216e2dbc41a045ec92423b7b8509c7215650137e Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 21:22:30 +0200
Subject: [PATCH 58/64] fix failing pytests for extended Job tuple
---
tests/test_task_build.py | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/tests/test_task_build.py b/tests/test_task_build.py
index 6f79fc74..f432d768 100644
--- a/tests/test_task_build.py
+++ b/tests/test_task_build.py
@@ -282,7 +282,7 @@ def test_create_pr_comment_succeeds(mocked_github, tmpdir):
print("CREATING PR COMMENT")
ym = datetime.today().strftime('%Y.%m')
pr_number = 1
- job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number)
+ job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic")
job_id = "123"
app_name = "pytest"
@@ -311,7 +311,7 @@ def test_create_pr_comment_succeeds_none(mocked_github, tmpdir):
print("CREATING PR COMMENT")
ym = datetime.today().strftime('%Y.%m')
pr_number = 1
- job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number)
+ job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic")
job_id = "123"
app_name = "pytest"
@@ -336,7 +336,7 @@ def test_create_pr_comment_raises_once_then_succeeds(mocked_github, tmpdir):
print("CREATING PR COMMENT")
ym = datetime.today().strftime('%Y.%m')
pr_number = 1
- job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number)
+ job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic")
job_id = "123"
app_name = "pytest"
@@ -361,7 +361,7 @@ def test_create_pr_comment_always_raises(mocked_github, tmpdir):
print("CREATING PR COMMENT")
ym = datetime.today().strftime('%Y.%m')
pr_number = 1
- job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number)
+ job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic")
job_id = "123"
app_name = "pytest"
@@ -387,7 +387,7 @@ def test_create_pr_comment_three_raises(mocked_github, tmpdir):
print("CREATING PR COMMENT")
ym = datetime.today().strftime('%Y.%m')
pr_number = 1
- job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number)
+ job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic")
job_id = "123"
app_name = "pytest"
@@ -409,7 +409,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir):
# create some test data
ym = datetime.today().strftime('%Y.%m')
pr_number = 999
- job = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number)
+ job = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic")
job_id = "123"
@@ -440,14 +440,14 @@ def test_create_read_metadata_file(mocked_github, tmpdir):
# use directory that does not exist
dir_does_not_exist = os.path.join(tmpdir, "dir_does_not_exist")
- job2 = Job(dir_does_not_exist, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number)
+ job2 = Job(dir_does_not_exist, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic")
job_id2 = "222"
with pytest.raises(FileNotFoundError):
create_metadata_file(job2, job_id2, pr_comment)
# use directory without write permission
dir_without_write_perm = os.path.join("/")
- job3 = Job(dir_without_write_perm, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number)
+ job3 = Job(dir_without_write_perm, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic")
job_id3 = "333"
with pytest.raises(OSError):
create_metadata_file(job3, job_id3, pr_comment)
@@ -457,7 +457,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir):
# use undefined values for parameters
# job_id = None
- job4 = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number)
+ job4 = Job(tmpdir, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic")
job_id4 = None
create_metadata_file(job4, job_id4, pr_comment)
@@ -472,7 +472,7 @@ def test_create_read_metadata_file(mocked_github, tmpdir):
# use undefined values for parameters
# job.working_dir = None
- job5 = Job(None, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number)
+ job5 = Job(None, "test/architecture", "EESSI", "--speed_up_job", ym, pr_number, "fpga/magic")
job_id5 = "555"
with pytest.raises(TypeError):
create_metadata_file(job5, job_id5, pr_comment)
From 35dac28497b9380d89035e88a0710143a59bef0a Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 21:30:08 +0200
Subject: [PATCH 59/64] adjust test data (app.cfg)
---
tests/test_app.cfg | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/test_app.cfg b/tests/test_app.cfg
index fd91ed8b..43e11bf9 100644
--- a/tests/test_app.cfg
+++ b/tests/test_app.cfg
@@ -17,8 +17,9 @@
# variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it
[submitted_job_comments]
-initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}`
awaits_release = job id `{job_id}` awaits release by job manager
+initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}`
+with_accelerator = and accelerator `{accelerator}`
[new_job_comments]
awaits_lauch = job awaits launch by Slurm scheduler
From 687c256a5a6e5b4165ad09bc48dcab78d8fe425c Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Tue, 10 Sep 2024 21:57:35 +0200
Subject: [PATCH 60/64] cfg values cannot be `None`
---
tasks/build.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tasks/build.py b/tasks/build.py
index 56068e04..d70103ee 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -633,7 +633,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
job_cfg[job_cfg_arch_section] = {}
job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR] = software_subdir
job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_OS_TYPE] = os_type
- job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator
+ job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator if accelerator else ''
# copy contents of directory containing repository configuration to directory
# containing job configuration/metadata
From 9af672cf89cd87d65d4fefb135eebbb98371e5af Mon Sep 17 00:00:00 2001
From: Kenneth Hoste
Date: Tue, 17 Sep 2024 15:57:14 +0200
Subject: [PATCH 61/64] mark `with_accelerator` setting in
`[submitted_job_comments]`' section as required
---
eessi_bot_event_handler.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py
index ac943faf..00a1db81 100644
--- a/eessi_bot_event_handler.py
+++ b/eessi_bot_event_handler.py
@@ -97,7 +97,8 @@
config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required
config.SECTION_SUBMITTED_JOB_COMMENTS: [
config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, # required
- config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE] # required
+ config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE, # required
+ config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR], # required
}
From f172396dd2a2f76544c7e49a6744fd1d06c6fe9e Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Wed, 18 Sep 2024 20:02:15 +0200
Subject: [PATCH 62/64] release notes for v0.6.0
---
RELEASE_NOTES | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 20080acf..f203c1ed 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,6 +1,37 @@
This file contains a description of the major changes to the EESSI
build-and-deploy bot. For more detailed information, please see the git log.
+v0.6.0 (18 September 2024)
+--------------------------
+
+This is a minor release of the EESSI build-and-deploy bot.
+
+Bug fixes:
+* mark 'with_accelerator' setting as required (#282)
+
+Improvements:
+* move merged PR job directories to 'trash_bin_dir' (#271)
+ * the target directory can be defined with the 'app.cfg' setting 'trash_bin_dir'
+ * it uses 'shutil.move' which tries to use 'mv' if source and target are on the
+ same filesystem
+* add setting to give all jobs a unique name (#273)
+* move closed PR job directories to 'trash_bin_dir' (#275)
+* add filter for accelerators (#276)
+* add support for updating Slurm options through user-defined python module in
+ target PR (#277)
+* use GitHub API for downloading the diff of a PR (#278)
+* add documentation about private repos (#279)
+* pass accelerator value to job scripts (via job.cfg) and extend PR comment if
+ the 'accelerator' argument is used (#280)
+
+New 'app.cfg' settings (see README.md and app.cfg.example for details):
+* (optional) 'allow_update_submit_opts'
+* (required) 'job_name'
+* (required) 'moved_job_dirs_comment'
+* (required) 'trash_bin_dir'
+* (required) 'with_accelerator'
+
+
v0.5.0 (16 May 2024)
--------------------------
From 5cdc4baa9a21ac859dc9aac638a45b403a2bda7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20R=C3=B6blitz?=
Date: Wed, 18 Sep 2024 20:16:13 +0200
Subject: [PATCH 63/64] merge info on 280 and 282
Co-authored-by: Kenneth Hoste
---
RELEASE_NOTES | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index f203c1ed..84ca89e8 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -22,7 +22,7 @@ Improvements:
* use GitHub API for downloading the diff of a PR (#278)
* add documentation about private repos (#279)
* pass accelerator value to job scripts (via job.cfg) and extend PR comment if
- the 'accelerator' argument is used (#280)
+ the 'accelerator' argument is used (#280, #282)
New 'app.cfg' settings (see README.md and app.cfg.example for details):
* (optional) 'allow_update_submit_opts'
From a22c4340bd5f6ee6e9984d6fa2bf785c385d546c Mon Sep 17 00:00:00 2001
From: Thomas Roeblitz
Date: Wed, 18 Sep 2024 20:26:03 +0200
Subject: [PATCH 64/64] update release notes
---
RELEASE_NOTES | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index f203c1ed..dd5378e1 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -6,9 +6,6 @@ v0.6.0 (18 September 2024)
This is a minor release of the EESSI build-and-deploy bot.
-Bug fixes:
-* mark 'with_accelerator' setting as required (#282)
-
Improvements:
* move merged PR job directories to 'trash_bin_dir' (#271)
* the target directory can be defined with the 'app.cfg' setting 'trash_bin_dir'
@@ -22,14 +19,14 @@ Improvements:
* use GitHub API for downloading the diff of a PR (#278)
* add documentation about private repos (#279)
* pass accelerator value to job scripts (via job.cfg) and extend PR comment if
- the 'accelerator' argument is used (#280)
+ the 'accelerator' argument is used (#280, #282)
New 'app.cfg' settings (see README.md and app.cfg.example for details):
-* (optional) 'allow_update_submit_opts'
-* (required) 'job_name'
-* (required) 'moved_job_dirs_comment'
-* (required) 'trash_bin_dir'
-* (required) 'with_accelerator'
+* (optional) 'allow_update_submit_opts' in section '[buildenv]'
+* (required) 'job_name' in section '[buildenv]'
+* (required) 'moved_job_dirs_comment' in section '[clean_up]'
+* (required) 'trash_bin_dir' in section '[clean_up]'
+* (required) 'with_accelerator' in section '[submitted_job_comments]'
v0.5.0 (16 May 2024)