diff --git a/README.md b/README.md index d855ef43..ca96d2eb 100644 --- a/README.md +++ b/README.md @@ -404,6 +404,20 @@ submit_command = /usr/bin/sbatch ``` `submit_command` is the full path to the Slurm job submission command used for submitting batch jobs. You may want to verify if `sbatch` is provided at that path or determine its actual location (using `which sbatch`). +``` +build_permission = GH_ACCOUNT_1 GH_ACCOUNT_2 ... +``` +`build_permission` defines which GitHub accounts have the permission to trigger +build jobs, i.e., for which accounts the bot acts on `bot: build ...` commands. +If the value is left empty, everyone can trigger build jobs. + +``` +no_build_permission_comment = The `bot: build ...` command has been used by user `{build_labeler}`, but this person does not have permission to trigger builds. +``` +`no_build_permission_comment` defines a comment (template) that is used when +the account trying to trigger build jobs has no permission to do so. + + #### `[bot_control]` section The `[bot_control]` section contains settings for configuring the feature to @@ -485,6 +499,43 @@ This defines a message that is added to the status table in a PR comment corresponding to a job whose tarball should have been uploaded (e.g., after setting the `bot:deploy` label). + +``` +metadata_prefix = LOCATION_WHERE_METADATA_FILE_GETS_DEPOSITED +tarball_prefix = LOCATION_WHERE_TARBALL_GETS_DEPOSITED +``` + +These two settings are used to define where (which directory) in the S3 bucket +(see `bucket_name` above) the metadata file and the tarball will be stored. The +value `LOCATION...` can be a string value to always use the same 'prefix' +regardless of the target CVMFS repository, or can be a mapping of a target +repository id (see also `repo_target_map` below) to a prefix. + +The prefix itself can use some (environment) variables that are set within +the upload script (see `tarball_upload_script` above). Currently those are: + * `'${github_repository}'` (which would be expanded to the full name of the GitHub + repository, e.g., `EESSI/software-layer`), + * `'${legacy_aws_path}'` (which expands to the legacy/old prefix being used for + storing tarballs/metadata files, the old prefix is + `EESSI_VERSION/TARBALL_TYPE/OS_TYPE/CPU_ARCHITECTURE/TIMESTAMP/`), _and_ + * `'${pull_request_number}'` (which would be expanded to the number of the pull + request from which the tarball originates). +Note, it's important to single-quote (`'`) the variables as shown above, because +they may likely not be defined when the bot calls the upload script. + +The list of supported variables can be shown by running +`scripts/eessi-upload-to-staging --list-variables`. + +**Examples:** +``` +metadata_prefix = {"eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} +tarball_prefix = { + "eessi-pilot-2023.06": "", + "eessi.io-2023.06": "new/${github_repository}/${pull_request_number}" + } +``` +If left empty, the old/legacy prefix is being used. + #### `[architecturetargets]` section The section `[architecturetargets]` defines for which targets (OS/SUBDIR), (for example `linux/x86_64/amd/zen2`) the EESSI bot should submit jobs, and which additional `sbatch` parameters will be used for requesting a compute node with the CPU microarchitecture needed to build the software stack. @@ -657,6 +708,53 @@ job_test_unknown_fmt =
:shrug: UNKNOWN _(click triangle for de `job_test_unknown_fmt` is used in case no test file (produced by `bot/check-test.sh` provided by target repository) was found. + +#### `[download_pr_comments]` section + +The `[download_pr_comments]` section sets templates for messages related to +downloading the contents of a pull request. +``` +git_clone_failure = Unable to clone the target repository. +``` +`git_clone_failure` is shown when `git clone` failed. + +``` +git_clone_tip = _Tip: This could be a connection failure. Try again and if the issue remains check if the address is correct_. +``` +`git_clone_tip` should contain some hint on how to deal with the issue. It is shown when `git clone` failed. + +``` +git_checkout_failure = Unable to checkout to the correct branch. +``` +`git_checkout_failure` is shown when `git checkout` failed. + +``` +git_checkout_tip = _Tip: Ensure that the branch name is correct and the target branch is available._ +``` +`git_checkout_tip` should contain some hint on how to deal with the failure. It +is shown when `git checkout` failed. + +``` +curl_failure = Unable to download the `.diff` file. +``` +`curl_failure` is shown when downloading the `PR_NUMBER.diff` +``` +curl_tip = _Tip: This could be a connection failure. Try again and if the issue remains check if the address is correct_ +``` +`curl_tip` should help in how to deal with failing downloads of the `.diff` file. + +``` +git_apply_failure = Unable to download or merge changes between the source branch and the destination branch. +``` +`git_apply_failure` is shown when applying the `.diff` file with `git apply` +failed. + +``` +git_apply_tip = _Tip: This can usually be resolved by syncing your branch and resolving any merge conflicts._ +``` +`git_apply_tip` should guide the contributor/maintainer about resolving the cause +of `git apply` failing. + # Instructions to run the bot components The bot consists of three components: diff --git a/RELEASE_NOTES b/RELEASE_NOTES index b8d09e8c..bfc562ae 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,6 +1,22 @@ This file contains a description of the major changes to the EESSI build-and-deploy bot. For more detailed information, please see the git log. +v0.4.0 (28 February 2024) +-------------------------- + +This is a minor release of the EESSI build-and-deploy bot. + +Bug fixes: +* fixes issue using wrong values when using the `bot: status` command (#251) + +Improvements: +* make bot report when preparing the job working directory failed, for example due to merge conflict in a pull request (#248) +* adding the pull request comment id to the metadata file that is uploaded to the + the S3 bucket (#247, #249, #250, #253) +* enabling configurable upload directories for tarball and metadata file (#254) +* only make bot respond to pull request comments that contain a bot command (#257) + + v0.3.0 (30 January 2024) -------------------------- diff --git a/app.cfg.example b/app.cfg.example index 7a6a1073..f01c2be9 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -147,6 +147,28 @@ deploy_permission = # template for comment when user who set a label has no permission to trigger deploying tarballs no_deploy_permission_comment = Label `bot:deploy` has been set by user `{deploy_labeler}`, but this person does not have permission to trigger deployments +# settings for where (directory) in the S3 bucket to store the metadata file and +# the tarball +# - Can be a string value to always use the same 'prefix' regardless of the target +# CVMFS repository, or can be a mapping of a target repository id (see also +# repo_target_map) to a prefix. +# - The prefix itself can use some (environment) variables that are set within +# the script. Currently those are: +# * 'github_repository' (which would be expanded to the full name of the GitHub +# repository, e.g., 'EESSI/software-layer'), +# * 'legacy_aws_path' (which expands to the legacy/old prefix being used for +# storing tarballs/metadata files) and +# * 'pull_request_number' (which would be expanded to the number of the pull +# request from which the tarball originates). +# - The list of supported variables can be shown by running +# `scripts/eessi-upload-to-staging --list-variables`. +# - Examples: +# metadata_prefix = {"eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} +# tarball_prefix = {"eessi-pilot-2023.06": "", "eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} +# If left empty, the old/legacy prefix is being used. +metadata_prefix = +tarball_prefix = + [architecturetargets] # defines both for which architectures the bot will build @@ -219,3 +241,13 @@ no_matching_tarball = No tarball matching `{tarball_pattern}` found in job dir. multiple_tarballs = Found {num_tarballs} tarballs in job dir - only 1 matching `{tarball_pattern}` expected. job_result_unknown_fmt =
:shrug: UNKNOWN _(click triangle for detailed information)_
  • Job results file `{filename}` does not exist in job directory, or parsing it failed.
  • No artefacts were found/reported.
job_test_unknown_fmt =
:shrug: UNKNOWN _(click triangle for detailed information)_
  • Job test file `{filename}` does not exist in job directory, or parsing it failed.
+ +[download_pr_comments] +git_clone_failure = Unable to clone the target repository. +git_clone_tip = _Tip: This could be a connection failure. Try again and if the issue remains check if the address is correct_. +git_checkout_failure = Unable to checkout to the correct branch. +git_checkout_tip = _Tip: Ensure that the branch name is correct and the target branch is available._ +curl_failure = Unable to download the `.diff` file. +curl_tip = _Tip: This could be a connection failure. Try again and if the issue remains check if the address is correct_ +git_apply_failure = Unable to download or merge changes between the source branch and the destination branch. +git_apply_tip = _Tip: This can usually be resolved by syncing your branch and resolving any merge conflicts._ diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index 024615ce..be48306f 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -31,7 +31,8 @@ from tasks.deploy import deploy_built_artefacts from tools import config from tools.args import event_handler_parse -from tools.commands import EESSIBotCommand, EESSIBotCommandError, get_bot_command +from tools.commands import EESSIBotCommand, EESSIBotCommandError, \ + contains_any_bot_command, get_bot_command from tools.permissions import check_command_permission from tools.pr_comments import create_comment @@ -113,7 +114,25 @@ def handle_issue_comment_event(self, event_info, log_file=None): # currently, only commands in new comments are supported # - commands have the syntax 'bot: COMMAND [ARGS*]' - # first check if sender is authorized to send any command + # only scan for commands in newly created comments + if action == 'created': + comment_received = request_body['comment']['body'] + self.log(f"comment action '{action}' is handled") + else: + # NOTE we do not respond to an updated PR comment with yet another + # new PR comment, because it would make the bot very noisy or + # worse could result in letting the bot enter an endless loop + self.log(f"comment action '{action}' not handled") + return + # at this point we know that we are handling a new comment + + # check if comment does not contain a bot command + if not contains_any_bot_command(comment_received): + self.log("comment does not contain a bot comment; not processing it further") + return + # at this point we know that the comment contains a bot command + + # check if sender is authorized to send any command # - this serves a double purpose: # 1. check permission # 2. skip any comment updates that were done by the bot itself @@ -150,17 +169,6 @@ def handle_issue_comment_event(self, event_info, log_file=None): else: self.log(f"account `{sender}` has permission to send commands to bot") - # only scan for commands in newly created comments - if action == 'created': - comment_received = request_body['comment']['body'] - self.log(f"comment action '{action}' is handled") - else: - # NOTE we do not respond to an updated PR comment with yet another - # new PR comment, because it would make the bot very noisy or - # worse could result in letting the bot enter an endless loop - self.log(f"comment action '{action}' not handled") - return - # search for commands in comment comment_response = '' commands = [] diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index 4830c1e3..5c475898 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -42,7 +42,7 @@ from connections import github from tools import config, run_cmd from tools.args import job_manager_parse -from tools.job_metadata import read_metadata_file +from tools.job_metadata import read_job_metadata_from_file, read_metadata_file from tools.pr_comments import get_submitted_job_comment, update_comment @@ -251,24 +251,6 @@ def determine_finished_jobs(self, known_jobs, current_jobs): return finished_jobs - def read_job_pr_metadata(self, job_metadata_path): - """ - Read job metadata file and return the contents of the 'PR' section. - - Args: - job_metadata_path (string): path to job metadata file - - Returns: - (ConfigParser): instance of ConfigParser corresponding to the 'PR' - section or None - """ - # reuse function from module tools.job_metadata to read metadata file - metadata = read_metadata_file(job_metadata_path, self.logfile) - if metadata and "PR" in metadata: - return metadata["PR"] - else: - return None - def read_job_result(self, job_result_file_path): """ Read job result file and return the contents of the 'RESULT' section. @@ -350,7 +332,7 @@ def process_new_job(self, new_job): # assuming that a bot job's working directory contains a metadata # file, its existence is used to check if the job belongs to the bot - metadata_pr = self.read_job_pr_metadata(job_metadata_path) + metadata_pr = read_job_metadata_from_file(job_metadata_path, self.logfile) if metadata_pr is None: log(f"No metadata file found at {job_metadata_path} for job {job_id}, so skipping it", @@ -446,7 +428,7 @@ def process_running_jobs(self, running_job): job_metadata_path = os.path.join(job_dir, metadata_file) # check if metadata file exist - metadata_pr = self.read_job_pr_metadata(job_metadata_path) + metadata_pr = read_job_metadata_from_file(job_metadata_path, self.logfile) if metadata_pr is None: raise Exception("Unable to find metadata file") @@ -591,7 +573,7 @@ def process_finished_job(self, finished_job): # obtain id of PR comment to be updated (from file '_bot_jobID.metadata') metadata_file = f"_bot_job{job_id}.metadata" job_metadata_path = os.path.join(new_symlink, metadata_file) - metadata_pr = self.read_job_pr_metadata(job_metadata_path) + metadata_pr = read_job_metadata_from_file(job_metadata_path, self.logfile) if metadata_pr is None: raise Exception("Unable to find metadata file ... skip updating PR comment") @@ -685,6 +667,13 @@ def main(): if max_iter != 0: known_jobs = job_manager.get_known_jobs() while max_iter < 0 or i < max_iter: + # sleep poll_interval seconds (not for the first iteration) + if i != 0: + log( + "job manager main loop: sleep %d seconds" % poll_interval, + job_manager.logfile, + ) + time.sleep(poll_interval) log("job manager main loop: iteration %d" % i, job_manager.logfile) log( "job manager main loop: known_jobs='%s'" % ",".join( @@ -692,7 +681,12 @@ def main(): job_manager.logfile, ) - current_jobs = job_manager.get_current_jobs() + try: + current_jobs = job_manager.get_current_jobs() + except RuntimeError: + i = i + 1 + continue + log( "job manager main loop: current_jobs='%s'" % ",".join( current_jobs.keys()), @@ -747,13 +741,7 @@ def main(): known_jobs = current_jobs - # sleep poll_interval seconds (only if at least one more iteration) - if max_iter < 0 or i + 1 < max_iter: - log( - "job manager main loop: sleep %d seconds" % poll_interval, - job_manager.logfile, - ) - time.sleep(poll_interval) + # add one iteration to the loop i = i + 1 diff --git a/scripts/eessi-upload-to-staging b/scripts/eessi-upload-to-staging index 16b9231e..45e52fbf 100755 --- a/scripts/eessi-upload-to-staging +++ b/scripts/eessi-upload-to-staging @@ -41,7 +41,8 @@ function create_metadata_file _tarball=$1 _url=$2 _repository=$3 - _pull_request=$4 + _pull_request_number=$4 + _pull_request_comment_id=$5 _tmpfile=$(mktemp) @@ -55,11 +56,12 @@ function create_metadata_file --arg sha256 "$(sha256sum "${_tarball}" | awk '{print $1}')" \ --arg url "${_url}" \ --arg repo "${_repository}" \ - --arg pr "${_pull_request}" \ + --arg pr "${_pull_request_number}" \ + --arg pr_comment_id "${_pull_request_comment_id}" \ '{ uploader: {username: $un, ip: $ip, hostname: $hn}, payload: {filename: $fn, size: $sz, ctime: $ct, sha256sum: $sha256, url: $url}, - link2pr: {repo: $repo, pr: $pr}, + link2pr: {repo: $repo, pr: $pr, pr_comment_id: $pr_comment_id}, }' > "${_tmpfile}" echo "${_tmpfile}" @@ -67,14 +69,30 @@ function create_metadata_file function display_help { - echo "Usage: $0 [OPTIONS] " >&2 - echo " -e | --endpoint-url URL - endpoint url (needed for non AWS S3)" >&2 - echo " -h | --help - display this usage information" >&2 - echo " -n | --bucket-name BUCKET - bucket name (same as BUCKET above)" >&2 - echo " -p | --pull-request NUMBER - a pull request NUMBER; used to" >&2 - echo " link the upload to a PR" >&2 - echo " -r | --repository FULL_NAME - a repository name ACCOUNT/REPONAME;" >&2 - echo " used to link the upload to a PR" >&2 + echo "Usage: $0 [OPTIONS] " >&2 + echo " -e | --endpoint-url URL - endpoint url (needed for non AWS S3)" >&2 + echo " -h | --help - display this usage information" >&2 + echo " -i | --pr-comment-id - identifier of a PR comment; may be" >&2 + echo " used to efficiently determine the PR" >&2 + echo " comment to be updated during the" >&2 + echo " ingestion procedure" >&2 + echo " -l | --list-variables - list variables that are available" >&2 + echo " for expansion" >&2 + echo " -m | --metadata-prefix PREFIX - a directory to which the metadata" >&2 + echo " file shall be uploaded; BASH variable" >&2 + echo " expansion will be applied; arg '-l'" >&2 + echo " lists variables that are defined at" >&2 + echo " the time of expansion" >&2 + echo " -n | --bucket-name BUCKET - bucket name (same as BUCKET above)" >&2 + echo " -p | --pull-request-number INT - a pull request number (INT); used to" >&2 + echo " link the upload to a PR" >&2 + echo " -r | --repository FULL_NAME - a repository name ACCOUNT/REPONAME;" >&2 + echo " used to link the upload to a PR" >&2 + echo " -t | --tarball-prefix PREFIX - a directory to which the tarball" >&2 + echo " shall be uploaded; BASH variable" >&2 + echo " expansion will be applied; arg '-l'" >&2 + echo " lists variables that are defined at" >&2 + echo " the time of expansion" >&2 } if [[ $# -lt 1 ]]; then @@ -97,8 +115,19 @@ bucket_name="eessi-staging" # provided via options in the bot's config file app.cfg endpoint_url= -pull_request= -repository= + +# provided via command line arguments +pr_comment_id="none" +pull_request_number="none" +github_repository="EESSI/software-layer" + +# provided via options in the bot's config file app.cfg and/or command line argument +metadata_prefix= +tarball_prefix= + +# other variables +legacy_aws_path= +variables="github_repository legacy_aws_path pull_request_number" while [[ $# -gt 0 ]]; do case $1 in @@ -110,16 +139,36 @@ while [[ $# -gt 0 ]]; do display_help exit 0 ;; + -l|--list-variables) + echo "variables that will be expanded: name (default value)" + for var in ${variables} + do + echo " ${var} (${!var:-unset})" + done + exit 0 + ;; + -i|--pr-comment-id) + pr_comment_id="$2" + shift 2 + ;; + -m|--metadata-prefix) + metadata_prefix="$2" + shift 2 + ;; -n|--bucket-name) bucket_name="$2" shift 2 ;; - -p|--pull-request) - pull_request="$2" + -p|--pull-request-number) + pull_request_number="$2" shift 2 ;; -r|--repository) - repository="$2" + github_repository="$2" + shift 2 + ;; + -t|--tarball-prefix) + tarball_prefix="$2" shift 2 ;; -*|--*) @@ -155,22 +204,50 @@ for file in "$*"; do basefile=$( basename ${file} ) if check_file_name ${basefile}; then if tar tf "${file}" | head -n1 > /dev/null; then - aws_path=$(basename ${file} | tr -s '-' '/' \ + # 'legacy_aws_path' might be used in tarball_prefix or metadata_prefix + # its purpose is to support the old/legacy method to derive the location + # where to store the tarball and metadata file + export legacy_aws_path=$(basename ${file} | tr -s '-' '/' \ | perl -pe 's/^eessi.//;' | perl -pe 's/\.tar\.gz$//;' ) + if [ -z ${tarball_prefix} ]; then + aws_path=${legacy_aws_path} + else + export pull_request_number + export github_repository + aws_path=$(envsubst <<< "${tarball_prefix}") + fi aws_file=$(basename ${file}) echo "Creating metadata file" url="${bucket_base}/${aws_path}/${aws_file}" - metadata_file=$(create_metadata_file "${file}" "${url}" \ - "${repository}" "${pull_request}") + echo "create_metadata_file file=${file} \ + url=${url} \ + github_repository=${github_repository} \ + pull_request_number=${pull_request_number} \ + pr_comment_id=${pr_comment_id}" + metadata_file=$(create_metadata_file "${file}" \ + "${url}" \ + "${github_repository}" \ + "${pull_request_number}" \ + "${pr_comment_id}") echo "metadata:" cat ${metadata_file} echo Uploading to "${url}" + echo " store tarball at ${aws_path}/${aws_file}" upload_to_staging_bucket \ "${file}" \ "${bucket_name}" \ "${aws_path}/${aws_file}" \ "${endpoint_url}" + + if [ -z ${metadata_prefix} ]; then + aws_path=${legacy_aws_path} + else + export pull_request_number + export github_repository + aws_path=$(envsubst <<< "${metadata_prefix}") + fi + echo " store metadata file at ${aws_path}/${aws_file}.meta.txt" upload_to_staging_bucket \ "${metadata_file}" \ "${bucket_name}" \ diff --git a/tasks/build.py b/tasks/build.py index 1ce8e956..36cc08d1 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -40,9 +40,23 @@ BUILD_PERMISSION = "build_permission" CFG_DIRNAME = "cfg" CONTAINER_CACHEDIR = "container_cachedir" +CURL_FAILURE = "curl_failure" +CURL_TIP = "curl_tip" CVMFS_CUSTOMIZATIONS = "cvmfs_customizations" DEFAULT_JOB_TIME_LIMIT = "24:00:00" +DOWNLOAD_PR_COMMENTS = "download_pr_comments" +ERROR_CURL = "curl" +ERROR_GIT_APPLY = "git apply" +ERROR_GIT_CHECKOUT = "git checkout" +ERROR_GIT_CLONE = "curl" +ERROR_NONE = "none" GITHUB = "github" +GIT_CLONE_FAILURE = "git_clone_failure" +GIT_CLONE_TIP = "git_clone_tip" +GIT_CHECKOUT_FAILURE = "git_checkout_failure" +GIT_CHECKOUT_TIP = "git_checkout_tip" +GIT_APPLY_FAILURE = "git_apply_failure" +GIT_APPLY_TIP = "git_apply_tip" HTTPS_PROXY = "https_proxy" HTTP_PROXY = "http_proxy" INITIAL_COMMENT = "initial_comment" @@ -337,7 +351,9 @@ def download_pr(repo_name, branch_name, pr, arch_job_dir): arch_job_dir (string): working directory of the job to be submitted Returns: - None (implicitly) + None (implicitly), in case an error is caught in the git clone, git checkout, curl, + or git apply commands, returns the output, stderror, exit code and a string + stating which of these commands failed. """ # download pull request to arch_job_dir # - 'git clone' repository into arch_job_dir (NOTE 'git clone' requires that @@ -346,20 +362,94 @@ def download_pr(repo_name, branch_name, pr, arch_job_dir): # - 'curl' diff for pull request # - 'git apply' diff file git_clone_cmd = ' '.join(['git clone', f'https://github.com/{repo_name}', arch_job_dir]) - clone_output, clone_error, clone_exit_code = run_cmd(git_clone_cmd, "Clone repo", arch_job_dir) + log(f'cloning with command {git_clone_cmd}') + clone_output, clone_error, clone_exit_code = run_cmd( + git_clone_cmd, "Clone repo", arch_job_dir, raise_on_error=False + ) + if clone_exit_code != 0: + error_stage = ERROR_GIT_CLONE + return clone_output, clone_error, clone_exit_code, error_stage git_checkout_cmd = ' '.join([ 'git checkout', branch_name, ]) - checkout_output, checkout_err, checkout_exit_code = run_cmd(git_checkout_cmd, - "checkout branch '%s'" % branch_name, arch_job_dir) + log(f'checking out with command {git_checkout_cmd}') + checkout_output, checkout_err, checkout_exit_code = run_cmd( + git_checkout_cmd, "checkout branch '%s'" % branch_name, arch_job_dir, raise_on_error=False + ) + if checkout_exit_code != 0: + error_stage = ERROR_GIT_CHECKOUT + return checkout_output, checkout_err, checkout_exit_code, error_stage curl_cmd = f'curl -L https://github.com/{repo_name}/pull/{pr.number}.diff > {pr.number}.diff' - curl_output, curl_error, curl_exit_code = run_cmd(curl_cmd, "Obtain patch", arch_job_dir) + log(f'curl with command {curl_cmd}') + curl_output, curl_error, curl_exit_code = run_cmd( + curl_cmd, "Obtain patch", arch_job_dir, raise_on_error=False + ) + if curl_exit_code != 0: + error_stage = ERROR_CURL + return curl_output, curl_error, curl_exit_code, error_stage git_apply_cmd = f'git apply {pr.number}.diff' - git_apply_output, git_apply_error, git_apply_exit_code = run_cmd(git_apply_cmd, "Apply patch", arch_job_dir) + log(f'git apply with command {git_apply_cmd}') + git_apply_output, git_apply_error, git_apply_exit_code = run_cmd( + git_apply_cmd, "Apply patch", arch_job_dir, raise_on_error=False + ) + if git_apply_exit_code != 0: + error_stage = ERROR_GIT_APPLY + return git_apply_output, git_apply_error, git_apply_exit_code, error_stage + + # need to return four items also in case everything went fine + return 'downloading PR succeeded', 'no error while downloading PR', 0, ERROR_NONE + + +def comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage): + """ + Handle download_pr() exit code and write helpful comment to PR in case of failure + + Args: + base_repo_name (string): name of the repository (format USER_OR_ORGANISATION/REPOSITORY) + pr (github.PullRequest.PullRequest): instance representing the pull request + download_pr_exit_code (int): exit code from download_pr(). 0 if all tasks were successful, + otherwise it corresponds to the error codes of git clone, git checkout, git apply, or curl. + download_pr_error (string): none, or the output of stderr from git clone, git checkout, git apply or curl. + error_stage (string): a string informing the stage where download_pr() failed. Can be 'git clone', + 'git checkout', 'curl', or 'git apply'. + + Return: + None (implicitly). A comment is created in the appropriate PR. + + """ + if download_pr_exit_code != 0: + fn = sys._getframe().f_code.co_name + + download_pr_comments_cfg = config.read_config()[DOWNLOAD_PR_COMMENTS] + if error_stage == ERROR_GIT_CLONE: + download_comment = (f"`{download_pr_error}`" + f"{download_pr_comments_cfg[GIT_CLONE_FAILURE]}" + f"{download_pr_comments_cfg[GIT_CLONE_TIP]}") + elif error_stage == ERROR_GIT_CHECKOUT: + download_comment = (f"`{download_pr_error}`" + f"{download_pr_comments_cfg[GIT_CHECKOUT_FAILURE]}" + f"{download_pr_comments_cfg[GIT_CHECKOUT_TIP]}") + elif error_stage == ERROR_CURL: + download_comment = (f"`{download_pr_error}`" + f"{download_pr_comments_cfg[CURL_FAILURE]}" + f"{download_pr_comments_cfg[CURL_TIP]}") + elif error_stage == ERROR_GIT_APPLY: + download_comment = (f"`{download_pr_error}`" + f"{download_pr_comments_cfg[GIT_APPLY_FAILURE]}" + f"{download_pr_comments_cfg[GIT_APPLY_TIP]}") + + download_comment = pr_comments.create_comment( + repo_name=base_repo_name, pr_number=pr.number, comment=download_comment + ) + if download_comment: + log(f"{fn}(): created PR issue comment with id {download_comment.id}") + else: + log(f"{fn}(): failed to create PR issue comment") + raise ValueError("Unable to download PR and/or sync changes") def apply_cvmfs_customizations(cvmfs_customizations, arch_job_dir): @@ -454,8 +544,10 @@ def prepare_jobs(pr, cfg, event_info, action_filter): log(f"{fn}(): job_dir '{job_dir}'") # TODO optimisation? download once, copy and cleanup initial copy? - download_pr(base_repo_name, base_branch_name, pr, job_dir) - + download_pr_output, download_pr_error, download_pr_exit_code, error_stage = download_pr( + base_repo_name, base_branch_name, pr, job_dir + ) + comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg cpu_target = '/'.join(arch.split('/')[1:]) os_type = arch.split('/')[0] @@ -774,6 +866,8 @@ def request_bot_build_issue_comments(repo_name, pr_number): status_table (dict): dictionary with 'arch', 'date', 'status', 'url' and 'result' for all the finished builds; """ + fn = sys._getframe().f_code.co_name + status_table = {'arch': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() @@ -794,9 +888,12 @@ def request_bot_build_issue_comments(repo_name, pr_number): first_line = comment['body'].split('\n')[0] arch_map = get_architecture_targets(cfg) for arch in arch_map.keys(): - target_arch = '/'.join(arch.split('/')[1:]) + # drop the first element in arch (which names the OS type) and join the remaining items with '-' + target_arch = '-'.join(arch.split('/')[1:]) if target_arch in first_line: status_table['arch'].append(target_arch) + else: + log(f"{fn}(): target_arch '{target_arch}' not found in first line '{first_line}'") # get date, status, url and result from the markdown table comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] @@ -826,7 +923,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): status_table['url'].append(comment['html_url']) if 'FAILURE' in row['comment']: status_table['result'].append(':cry: FAILURE') - elif 'SUCCESS' in value['comment']: + elif 'SUCCESS' in row['comment']: status_table['result'].append(':grin: SUCCESS') elif 'UNKNOWN' in row['comment']: status_table['result'].append(':shrug: UNKNOWN') diff --git a/tasks/deploy.py b/tasks/deploy.py index 39b85a7b..8fc8d708 100644 --- a/tasks/deploy.py +++ b/tasks/deploy.py @@ -26,6 +26,7 @@ from tasks.build import CFG_DIRNAME, JOB_CFG_FILENAME, JOB_REPO_ID, JOB_REPOSITORY from tasks.build import get_build_env_cfg from tools import config, pr_comments, run_cmd +from tools.job_metadata import read_job_metadata_from_file BUCKET_NAME = "bucket_name" @@ -33,7 +34,9 @@ DEPLOY_PERMISSION = "deploy_permission" ENDPOINT_URL = "endpoint_url" JOBS_BASE_DIR = "jobs_base_dir" +METADATA_PREFIX = "metadata_prefix" NO_DEPLOY_PERMISSION_COMMENT = "no_deploy_permission_comment" +TARBALL_PREFIX = "tarball_prefix" TARBALL_UPLOAD_SCRIPT = "tarball_upload_script" UPLOAD_POLICY = "upload_policy" @@ -74,6 +77,26 @@ def determine_job_dirs(pr_number): return job_directories +def determine_pr_comment_id(job_dir): + """ + Determine pr_comment_id by reading _bot_job{JOBID}.metadata in job_dir. + + Args: + job_dir (string): working directory of the job + + Returns: + (int): id of comment corresponding to job in pull request or -1 + """ + # assumes that last part of job_dir encodes the job's id + job_id = os.path.basename(os.path.normpath(job_dir)) + job_metadata_file = os.path.join(job_dir, f"_bot_job{job_id}.metadata") + job_metadata = read_job_metadata_from_file(job_metadata_file) + if job_metadata and "pr_comment_id" in job_metadata: + return int(job_metadata["pr_comment_id"]) + else: + return -1 + + def determine_slurm_out(job_dir): """ Determine path to job stdout/err output file for a given job directory. @@ -167,9 +190,9 @@ def check_build_status(slurm_out, eessi_tarballs): return False -def update_pr_comment(tarball, repo_name, pr_number, state, msg): +def update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, state, msg): """ - Update pull request comment which contains specific tarball name. + Update pull request comment for the given comment id or tarball name Args: tarball (string): name of tarball that is looked for in a PR comment @@ -181,36 +204,18 @@ def update_pr_comment(tarball, repo_name, pr_number, state, msg): Returns: None (implicitly) """ - funcname = sys._getframe().f_code.co_name - gh = github.get_instance() repo = gh.get_repo(repo_name) pull_request = repo.get_pull(pr_number) - # TODO does this always return all comments? - comments = pull_request.get_issue_comments() - for comment in comments: - # NOTE - # adjust search string if format changed by event handler - # (separate process running eessi_bot_event_handler.py) - re_tarball = f".*{tarball}.*" - comment_match = re.search(re_tarball, comment.body) - - if comment_match: - log(f"{funcname}(): found comment with id {comment.id}") - - issue_comment = pull_request.get_issue_comment(int(comment.id)) - - dt = datetime.now(timezone.utc) - comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|" - f"transfer of `{tarball}` to S3 bucket {msg}|") + issue_comment = pr_comments.determine_issue_comment(pull_request, pr_comment_id, tarball) + if issue_comment: + dt = datetime.now(timezone.utc) + comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|" + f"transfer of `{tarball}` to S3 bucket {msg}|") - # append update to existing comment - issue_comment.edit(issue_comment.body + comment_update) - - # leave 'for' loop (only update one comment, because tarball - # should only be referenced in one comment) - break + # append update to existing comment + issue_comment.edit(issue_comment.body + comment_update) def append_tarball_to_upload_log(tarball, job_dir): @@ -232,7 +237,7 @@ def append_tarball_to_upload_log(tarball, job_dir): upload_log.write(f"{job_plus_tarball}\n") -def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number): +def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number, pr_comment_id): """ Upload built tarball to an S3 bucket. @@ -242,6 +247,7 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number): timestamp (int): timestamp of the tarball repo_name (string): repository of the pull request pr_number (int): number of the pull request + pr_comment_id (int): id of the pull request comment Returns: None (implicitly) @@ -258,11 +264,21 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number): tarball_upload_script = deploycfg.get(TARBALL_UPLOAD_SCRIPT) endpoint_url = deploycfg.get(ENDPOINT_URL) or '' bucket_spec = deploycfg.get(BUCKET_NAME) + metadata_prefix = deploycfg.get(METADATA_PREFIX) + tarball_prefix = deploycfg.get(TARBALL_PREFIX) # if bucket_spec value looks like a dict, try parsing it as such if bucket_spec.lstrip().startswith('{'): bucket_spec = json.loads(bucket_spec) + # if metadata_prefix value looks like a dict, try parsing it as such + if metadata_prefix.lstrip().startswith('{'): + metadata_prefix = json.loads(metadata_prefix) + + # if tarball_prefix value looks like a dict, try parsing it as such + if tarball_prefix.lstrip().startswith('{'): + tarball_prefix = json.loads(tarball_prefix) + jobcfg_path = os.path.join(job_dir, CFG_DIRNAME, JOB_CFG_FILENAME) jobcfg = config.read_config(jobcfg_path) target_repo_id = jobcfg[JOB_REPOSITORY][JOB_REPO_ID] @@ -274,16 +290,50 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number): # bucket spec may be a mapping of target repo id to bucket name bucket_name = bucket_spec.get(target_repo_id) if bucket_name is None: - update_pr_comment(tarball, repo_name, pr_number, "not uploaded", + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", f"failed (no bucket specified for {target_repo_id})") return else: log(f"Using bucket for {target_repo_id}: {bucket_name}") else: - update_pr_comment(tarball, repo_name, pr_number, "not uploaded", + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", f"failed (incorrect bucket spec: {bucket_spec})") return + if isinstance(metadata_prefix, str): + metadata_prefix_arg = metadata_prefix + log(f"Using specified metadata prefix: {metadata_prefix_arg}") + elif isinstance(metadata_prefix, dict): + # metadata prefix spec may be a mapping of target repo id to metadata prefix + metadata_prefix_arg = metadata_prefix.get(target_repo_id) + if metadata_prefix_arg is None: + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", + f"failed (no metadata prefix specified for {target_repo_id})") + return + else: + log(f"Using metadata prefix for {target_repo_id}: {metadata_prefix_arg}") + else: + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", + f"failed (incorrect metadata prefix spec: {metadata_prefix_arg})") + return + + if isinstance(tarball_prefix, str): + tarball_prefix_arg = tarball_prefix + log(f"Using specified tarball prefix: {tarball_prefix_arg}") + elif isinstance(tarball_prefix, dict): + # tarball prefix spec may be a mapping of target repo id to tarball prefix + tarball_prefix_arg = tarball_prefix.get(target_repo_id) + if tarball_prefix_arg is None: + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", + f"failed (no tarball prefix specified for {target_repo_id})") + return + else: + log(f"Using tarball prefix for {target_repo_id}: {tarball_prefix_arg}") + else: + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", + f"failed (incorrect tarball prefix spec: {tarball_prefix_arg})") + return + # run 'eessi-upload-to-staging {abs_path}' # (1) construct command line # script assumes a few defaults: @@ -295,8 +345,13 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number): cmd_args.extend(['--bucket-name', bucket_name]) if len(endpoint_url) > 0: cmd_args.extend(['--endpoint-url', endpoint_url]) + if len(metadata_prefix_arg) > 0: + cmd_args.extend(['--metadata-prefix', metadata_prefix_arg]) cmd_args.extend(['--repository', repo_name]) - cmd_args.extend(['--pull-request', str(pr_number)]) + cmd_args.extend(['--pull-request-number', str(pr_number)]) + cmd_args.extend(['--pr-comment-id', str(pr_comment_id)]) + if len(tarball_prefix_arg) > 0: + cmd_args.extend(['--tarball-prefix', tarball_prefix_arg]) cmd_args.append(abs_path) upload_cmd = ' '.join(cmd_args) @@ -307,11 +362,11 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number): # add file to 'job_dir/../uploaded.txt' append_tarball_to_upload_log(tarball, job_dir) # update pull request comment - update_pr_comment(tarball, repo_name, pr_number, "uploaded", + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "uploaded", "succeeded") else: # update pull request comment - update_pr_comment(tarball, repo_name, pr_number, "not uploaded", + update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, "not uploaded", "failed") @@ -371,10 +426,13 @@ def determine_successful_jobs(job_dirs): for job_dir in job_dirs: slurm_out = determine_slurm_out(job_dir) eessi_tarballs = determine_eessi_tarballs(job_dir) + pr_comment_id = determine_pr_comment_id(job_dir) + if check_build_status(slurm_out, eessi_tarballs): log(f"{funcname}(): SUCCESSFUL build in '{job_dir}'") successes.append({'job_dir': job_dir, 'slurm_out': slurm_out, + 'pr_comment_id': pr_comment_id, 'eessi_tarballs': eessi_tarballs}) else: log(f"{funcname}(): FAILED build in '{job_dir}'") @@ -403,9 +461,9 @@ def determine_tarballs_to_deploy(successes, upload_policy): log(f"{funcname}(): num successful jobs {len(successes)}") to_be_deployed = {} - for s in successes: + for job in successes: # all tarballs for successful job - tarballs = s["eessi_tarballs"] + tarballs = job["eessi_tarballs"] log(f"{funcname}(): num tarballs {len(tarballs)}") # full path to first tarball for successful job @@ -438,7 +496,7 @@ def determine_tarballs_to_deploy(successes, upload_policy): else: deploy = True elif upload_policy == "once": - uploaded = uploaded_before(build_target, s["job_dir"]) + uploaded = uploaded_before(build_target, job["job_dir"]) if uploaded is None: deploy = True else: @@ -447,7 +505,8 @@ def determine_tarballs_to_deploy(successes, upload_policy): f"{indent_fname}has been uploaded through '{uploaded}'") if deploy: - to_be_deployed[build_target] = {"job_dir": s["job_dir"], + to_be_deployed[build_target] = {"job_dir": job["job_dir"], + "pr_comment_id": job["pr_comment_id"], "timestamp": timestamp} return to_be_deployed @@ -518,4 +577,5 @@ def deploy_built_artefacts(pr, event_info): for target, job in to_be_deployed.items(): job_dir = job['job_dir'] timestamp = job['timestamp'] - upload_tarball(job_dir, target, timestamp, repo_name, pr.number) + pr_comment_id = job['pr_comment_id'] + upload_tarball(job_dir, target, timestamp, repo_name, pr.number, pr_comment_id) diff --git a/tests/test_eessi_bot_job_manager.py b/tests/test_eessi_bot_job_manager.py index c04033b5..bd492919 100644 --- a/tests/test_eessi_bot_job_manager.py +++ b/tests/test_eessi_bot_job_manager.py @@ -10,35 +10,16 @@ # # license: GPLv2 # -import os + import shutil from eessi_bot_job_manager import EESSIBotSoftwareLayerJobManager -def test_read_job_pr_metadata(tmpdir): +def test_determine_running_jobs(): # copy needed app.cfg from tests directory shutil.copyfile("tests/test_app.cfg", "app.cfg") - # if metadata file does not exist, we should get None as return value - job_manager = EESSIBotSoftwareLayerJobManager() - path = os.path.join(tmpdir, 'test.metadata') - assert job_manager.read_job_pr_metadata(path) is None - - with open(path, 'w') as fp: - fp.write('''[PR] - repo=test - pr_number=12345''') - - metadata_pr = job_manager.read_job_pr_metadata(path) - expected = { - "repo": "test", - "pr_number": "12345", - } - assert metadata_pr == expected - - -def test_determine_running_jobs(): job_manager = EESSIBotSoftwareLayerJobManager() assert job_manager.determine_running_jobs({}) == [] diff --git a/tests/test_tools_job_metadata.py b/tests/test_tools_job_metadata.py new file mode 100644 index 00000000..f5542c6f --- /dev/null +++ b/tests/test_tools_job_metadata.py @@ -0,0 +1,33 @@ +# Tests for 'tools/job_metadata.py' of the EESSI build-and-deploy bot, +# see https://github.com/EESSI/eessi-bot-software-layer +# +# The bot helps with requests to add software installations to the +# EESSI software layer, see https://github.com/EESSI/software-layer +# +# author: Thomas Roeblitz (@trz42) +# +# license: GPLv2 +# + +import os + +from tools.job_metadata import read_job_metadata_from_file + + +def test_read_job_metadata_from_file(tmpdir): + logfile = os.path.join(tmpdir, 'test_read_job_metadata_from_file.log') + # if metadata file does not exist, we should get None as return value + path = os.path.join(tmpdir, 'test.metadata') + assert read_job_metadata_from_file(path, logfile) is None + + with open(path, 'w') as fp: + fp.write('''[PR] + repo=test + pr_number=12345''') + + metadata_pr = read_job_metadata_from_file(path, logfile) + expected = { + "repo": "test", + "pr_number": "12345", + } + assert metadata_pr == expected diff --git a/tools/commands.py b/tools/commands.py index cdc2c8f7..5db8f7f7 100644 --- a/tools/commands.py +++ b/tools/commands.py @@ -20,6 +20,19 @@ from tools.filter import EESSIBotActionFilter, EESSIBotActionFilterError +def contains_any_bot_command(body): + """ + Checks if argument contains any bot command. + + Args: + body (string): possibly multi-line string that may contain a bot command + + Returns: + (bool): True if bot command found, False otherwise + """ + return any(map(get_bot_command, body.split('\n'))) + + def get_bot_command(line): """ Retrieve bot command from a line. diff --git a/tools/job_metadata.py b/tools/job_metadata.py index a90cad90..b8cd4f0d 100644 --- a/tools/job_metadata.py +++ b/tools/job_metadata.py @@ -80,3 +80,28 @@ def read_metadata_file(metadata_path, log_file=None): else: log(f"No metadata file found at {metadata_path}.", log_file) return None + + +def read_job_metadata_from_file(filepath, log_file=None): + """ + Read job metadata from file + + Args: + filepath (string): path to job metadata file + log_file (string): path to log file + + Returns: + job_metadata (dict): dictionary containing job metadata or None + """ + + metadata = read_metadata_file(filepath, log_file=log_file) + if metadata: + # get PR section + if "PR" in metadata: + metadata_pr = metadata["PR"] + else: + metadata_pr = {} + return metadata_pr + else: + log(f"Metadata file '{filepath}' does not exist or could not be read") + return None diff --git a/tools/pr_comments.py b/tools/pr_comments.py index 5c5e6e9b..1b391ed7 100644 --- a/tools/pr_comments.py +++ b/tools/pr_comments.py @@ -48,6 +48,27 @@ def create_comment(repo_name, pr_number, comment): return pull_request.create_issue_comment(comment) +def determine_issue_comment(pull_request, pr_comment_id, search_pattern=None): + """ + Determine issue comment for a given id or using a search pattern. + + Args: + pull_request (github.PullRequest.PullRequest): instance representing the pull request + pr_comment_id (int): number of the comment to the pull request to be returned + search_pattern (string): pattern used to determine the comment to the pull request to be returned + + Returns: + github.IssueComment.IssueComment instance or None (note, github refers to + PyGithub, not the github from the internal connections module) + """ + + if pr_comment_id != -1: + return pull_request.get_issue_comment(pr_comment_id) + else: + # use search pattern to determine issue comment + return get_comment(pull_request, search_pattern) + + @retry(Exception, tries=5, delay=1, backoff=2, max_delay=30) def get_comment(pr, search_pattern): """