From e705494722c82c773d6df63476c2b4ceef8a0292 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 26 Mar 2024 13:18:18 +0100 Subject: [PATCH] replacing 'tarball' with 'artefact' where it makes most sense --- README.md | 39 +++++++++++--------- app.cfg.example | 20 +++++------ scripts/eessi-upload-to-staging | 40 ++++++++++----------- tasks/deploy.py | 64 ++++++++++++++++----------------- tests/test_app.cfg | 6 ++-- 5 files changed, 87 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index ca96d2eb..b26748ea 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,9 @@ You can exit the virtual environment simply by running `deactivate`. ### Step 4.1: Installing tools to access S3 bucket -The [`scripts/eessi-upload-to-staging`](https://github.com/EESSI/eessi-bot-software-layer/blob/main/scripts/eessi-upload-to-staging) script uploads a tarball and an associated metadata file to an S3 bucket. +The +[`scripts/eessi-upload-to-staging`](https://github.com/EESSI/eessi-bot-software-layer/blob/main/scripts/eessi-upload-to-staging) +script uploads an artefact and an associated metadata file to an S3 bucket. It needs two tools for this: * the `aws` command to actually upload the files; @@ -444,14 +446,17 @@ information about the result of the command that was run (can be empty). The `[deploycfg]` section defines settings for uploading built artefacts (tarballs). ``` -tarball_upload_script = PATH_TO_EESSI_BOT/scripts/eessi-upload-to-staging +artefact_upload_script = PATH_TO_EESSI_BOT/scripts/eessi-upload-to-staging ``` -`tarball_upload_script` provides the location for the script used for uploading built software packages to an S3 bucket. +`artefact_upload_script` provides the location for the script used for uploading built software packages to an S3 bucket. ``` endpoint_url = URL_TO_S3_SERVER ``` -`endpoint_url` provides an endpoint (URL) to a server hosting an S3 bucket. The server could be hosted by a commercial cloud provider like AWS or Azure, or running in a private environment, for example, using Minio. The bot uploads tarballs to the bucket which will be periodically scanned by the ingestion procedure at the Stratum 0 server. +`endpoint_url` provides an endpoint (URL) to a server hosting an S3 bucket. The +server could be hosted by a commercial cloud provider like AWS or Azure, or +running in a private environment, for example, using Minio. The bot uploads +artefacts to the bucket which will be periodically scanned by the ingestion procedure at the Stratum 0 server. ```ini @@ -466,7 +471,7 @@ bucket_name = { } ``` -`bucket_name` is the name of the bucket used for uploading of tarballs. +`bucket_name` is the name of the bucket used for uploading of artefacts. The bucket must be available on the default server (`https://${bucket_name}.s3.amazonaws.com`), or the one provided via `endpoint_url`. `bucket_name` can be specified as a string value to use the same bucket for all target repos, or it can be mapping from target repo id to bucket name. @@ -481,7 +486,7 @@ The `upload_policy` defines what policy is used for uploading built artefacts to |`upload_policy` value|Policy| |:--------|:--------------------------------| |`all`|Upload all artefacts (mulitple uploads of the same artefact possible).| -|`latest`|For each build target (prefix in tarball name `eessi-VERSION-{software,init,compat}-OS-ARCH)` only upload the latest built artefact.| +|`latest`|For each build target (prefix in artefact name `eessi-VERSION-{software,init,compat}-OS-ARCH)` only upload the latest built artefact.| |`once`|Only once upload any built artefact for the build target.| |`none`|Do not upload any built artefacts.| @@ -496,30 +501,30 @@ deployment), or a space delimited list of GitHub accounts. no_deploy_permission_comment = Label `bot:deploy` has been set by user `{deploy_labeler}`, but this person does not have permission to trigger deployments ``` This defines a message that is added to the status table in a PR comment -corresponding to a job whose tarball should have been uploaded (e.g., after +corresponding to a job whose artefact should have been uploaded (e.g., after setting the `bot:deploy` label). ``` metadata_prefix = LOCATION_WHERE_METADATA_FILE_GETS_DEPOSITED -tarball_prefix = LOCATION_WHERE_TARBALL_GETS_DEPOSITED +artefact_prefix = LOCATION_WHERE_TARBALL_GETS_DEPOSITED ``` These two settings are used to define where (which directory) in the S3 bucket -(see `bucket_name` above) the metadata file and the tarball will be stored. The +(see `bucket_name` above) the metadata file and the artefact will be stored. The value `LOCATION...` can be a string value to always use the same 'prefix' regardless of the target CVMFS repository, or can be a mapping of a target repository id (see also `repo_target_map` below) to a prefix. The prefix itself can use some (environment) variables that are set within -the upload script (see `tarball_upload_script` above). Currently those are: +the upload script (see `artefact_upload_script` above). Currently those are: * `'${github_repository}'` (which would be expanded to the full name of the GitHub repository, e.g., `EESSI/software-layer`), * `'${legacy_aws_path}'` (which expands to the legacy/old prefix being used for - storing tarballs/metadata files, the old prefix is + storing artefacts/metadata files, the old prefix is `EESSI_VERSION/TARBALL_TYPE/OS_TYPE/CPU_ARCHITECTURE/TIMESTAMP/`), _and_ * `'${pull_request_number}'` (which would be expanded to the number of the pull - request from which the tarball originates). + request from which the artefact originates). Note, it's important to single-quote (`'`) the variables as shown above, because they may likely not be defined when the bot calls the upload script. @@ -529,7 +534,7 @@ The list of supported variables can be shown by running **Examples:** ``` metadata_prefix = {"eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} -tarball_prefix = { +artefact_prefix = { "eessi-pilot-2023.06": "", "eessi.io-2023.06": "new/${github_repository}/${pull_request_number}" } @@ -657,9 +662,9 @@ running_job = job `{job_id}` is running The `[finished_job_comments]` section sets templates for messages about finished jobs. ``` -success = :grin: SUCCESS tarball `{tarball_name}` ({tarball_size} GiB) in job dir +success = :grin: SUCCESS tarball `{artefact_name}` ({artefact_size} GiB) in job dir ``` -`success` specifies the message for a successful job that produced a tarball. +`success` specifies the message for a successful job that produced an artefact. ``` failure = :cry: FAILURE @@ -687,12 +692,12 @@ no_tarball_message = Slurm output lacks message about created tarball. `no_tarball_message` is used to signal the lack of a message about a created tarball. ``` -no_matching_tarball = No tarball matching `{tarball_pattern}` found in job dir. +no_matching_tarball = No tarball matching `{artefact_pattern}` found in job dir. ``` `no_matching_tarball` is used to signal a missing tarball. ``` -multiple_tarballs = Found {num_tarballs} tarballs in job dir - only 1 matching `{tarball_pattern}` expected. +multiple_tarballs = Found {num_artefacts} tarballs in job dir - only 1 matching `{artefact_pattern}` expected. ``` `multiple_tarballs` is used to report that multiple tarballs have been found. diff --git a/app.cfg.example b/app.cfg.example index 3f9b3cf5..867363bc 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -127,7 +127,7 @@ no_build_permission_comment = Label `bot:build` has been set by user `{build_lab [deploycfg] # script for uploading built software packages -tarball_upload_script = PATH_TO_EESSI_BOT/scripts/eessi-upload-to-staging +artefact_upload_script = PATH_TO_EESSI_BOT/scripts/eessi-upload-to-staging # URL to S3/minio bucket # if attribute is set, bucket_base will be constructed as follows @@ -160,11 +160,11 @@ upload_policy = once # value can be a space delimited list of GH accounts deploy_permission = -# template for comment when user who set a label has no permission to trigger deploying tarballs +# template for comment when user who set a label has no permission to trigger deploying artefacts no_deploy_permission_comment = Label `bot:deploy` has been set by user `{deploy_labeler}`, but this person does not have permission to trigger deployments # settings for where (directory) in the S3 bucket to store the metadata file and -# the tarball +# the artefact # - Can be a string value to always use the same 'prefix' regardless of the target # CVMFS repository, or can be a mapping of a target repository id (see also # repo_target_map) to a prefix. @@ -173,17 +173,17 @@ no_deploy_permission_comment = Label `bot:deploy` has been set by user `{deploy_ # * 'github_repository' (which would be expanded to the full name of the GitHub # repository, e.g., 'EESSI/software-layer'), # * 'legacy_aws_path' (which expands to the legacy/old prefix being used for -# storing tarballs/metadata files) and +# storing artefacts/metadata files) and # * 'pull_request_number' (which would be expanded to the number of the pull -# request from which the tarball originates). +# request from which the artefact originates). # - The list of supported variables can be shown by running # `scripts/eessi-upload-to-staging --list-variables`. # - Examples: # metadata_prefix = {"eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} -# tarball_prefix = {"eessi-pilot-2023.06": "", "eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} +# artefact_prefix = {"eessi-pilot-2023.06": "", "eessi.io-2023.06": "new/${github_repository}/${pull_request_number}"} # If left empty, the old/legacy prefix is being used. metadata_prefix = -tarball_prefix = +artefact_prefix = [architecturetargets] @@ -247,14 +247,14 @@ running_job = job `{job_id}` is running [finished_job_comments] -success = :grin: SUCCESS tarball `{tarball_name}` ({tarball_size} GiB) in job dir +success = :grin: SUCCESS tarball `{artefact_name}` ({artefact_size} GiB) in job dir failure = :cry: FAILURE no_slurm_out = No slurm output `{slurm_out}` in job dir slurm_out = Found slurm output `{slurm_out}` in job dir missing_modules = Slurm output lacks message "No missing modules!". no_tarball_message = Slurm output lacks message about created tarball. -no_matching_tarball = No tarball matching `{tarball_pattern}` found in job dir. -multiple_tarballs = Found {num_tarballs} tarballs in job dir - only 1 matching `{tarball_pattern}` expected. +no_matching_tarball = No tarball matching `{artefact_pattern}` found in job dir. +multiple_tarballs = Found {num_artefacts} tarballs in job dir - only 1 matching `{artefact_pattern}` expected. job_result_unknown_fmt =
:shrug: UNKNOWN _(click triangle for detailed information)_
job_test_unknown_fmt =
:shrug: UNKNOWN _(click triangle for detailed information)_
diff --git a/scripts/eessi-upload-to-staging b/scripts/eessi-upload-to-staging index 45e52fbf..b5e4482d 100755 --- a/scripts/eessi-upload-to-staging +++ b/scripts/eessi-upload-to-staging @@ -38,7 +38,7 @@ function check_file_name function create_metadata_file { - _tarball=$1 + _artefact=$1 _url=$2 _repository=$3 _pull_request_number=$4 @@ -50,10 +50,10 @@ function create_metadata_file --arg un $(whoami) \ --arg ip $(curl -s https://checkip.amazonaws.com) \ --arg hn "$(hostname -f)" \ - --arg fn "$(basename ${_tarball})" \ - --arg sz "$(du -b "${_tarball}" | awk '{print $1}')" \ - --arg ct "$(date -r "${_tarball}")" \ - --arg sha256 "$(sha256sum "${_tarball}" | awk '{print $1}')" \ + --arg fn "$(basename ${_artefact})" \ + --arg sz "$(du -b "${_artefact}" | awk '{print $1}')" \ + --arg ct "$(date -r "${_artefact}")" \ + --arg sha256 "$(sha256sum "${_artefact}" | awk '{print $1}')" \ --arg url "${_url}" \ --arg repo "${_repository}" \ --arg pr "${_pull_request_number}" \ @@ -70,6 +70,11 @@ function create_metadata_file function display_help { echo "Usage: $0 [OPTIONS] " >&2 + echo " -a | --artefact-prefix PREFIX - a directory to which the artefact" >&2 + echo " shall be uploaded; BASH variable" >&2 + echo " expansion will be applied; arg '-l'" >&2 + echo " lists variables that are defined at" >&2 + echo " the time of expansion" >&2 echo " -e | --endpoint-url URL - endpoint url (needed for non AWS S3)" >&2 echo " -h | --help - display this usage information" >&2 echo " -i | --pr-comment-id - identifier of a PR comment; may be" >&2 @@ -88,11 +93,6 @@ function display_help echo " link the upload to a PR" >&2 echo " -r | --repository FULL_NAME - a repository name ACCOUNT/REPONAME;" >&2 echo " used to link the upload to a PR" >&2 - echo " -t | --tarball-prefix PREFIX - a directory to which the tarball" >&2 - echo " shall be uploaded; BASH variable" >&2 - echo " expansion will be applied; arg '-l'" >&2 - echo " lists variables that are defined at" >&2 - echo " the time of expansion" >&2 } if [[ $# -lt 1 ]]; then @@ -123,7 +123,7 @@ github_repository="EESSI/software-layer" # provided via options in the bot's config file app.cfg and/or command line argument metadata_prefix= -tarball_prefix= +artefact_prefix= # other variables legacy_aws_path= @@ -131,6 +131,10 @@ variables="github_repository legacy_aws_path pull_request_number" while [[ $# -gt 0 ]]; do case $1 in + -a|--artefact-prefix) + artefact_prefix="$2" + shift 2 + ;; -e|--endpoint-url) endpoint_url="$2" shift 2 @@ -167,10 +171,6 @@ while [[ $# -gt 0 ]]; do github_repository="$2" shift 2 ;; - -t|--tarball-prefix) - tarball_prefix="$2" - shift 2 - ;; -*|--*) echo "Error: Unknown option: $1" >&2 exit 1 @@ -204,17 +204,17 @@ for file in "$*"; do basefile=$( basename ${file} ) if check_file_name ${basefile}; then if tar tf "${file}" | head -n1 > /dev/null; then - # 'legacy_aws_path' might be used in tarball_prefix or metadata_prefix + # 'legacy_aws_path' might be used in artefact_prefix or metadata_prefix # its purpose is to support the old/legacy method to derive the location - # where to store the tarball and metadata file + # where to store the artefact and metadata file export legacy_aws_path=$(basename ${file} | tr -s '-' '/' \ | perl -pe 's/^eessi.//;' | perl -pe 's/\.tar\.gz$//;' ) - if [ -z ${tarball_prefix} ]; then + if [ -z ${artefact_prefix} ]; then aws_path=${legacy_aws_path} else export pull_request_number export github_repository - aws_path=$(envsubst <<< "${tarball_prefix}") + aws_path=$(envsubst <<< "${artefact_prefix}") fi aws_file=$(basename ${file}) echo "Creating metadata file" @@ -233,7 +233,7 @@ for file in "$*"; do cat ${metadata_file} echo Uploading to "${url}" - echo " store tarball at ${aws_path}/${aws_file}" + echo " store artefact at ${aws_path}/${aws_file}" upload_to_staging_bucket \ "${file}" \ "${bucket_name}" \ diff --git a/tasks/deploy.py b/tasks/deploy.py index 70925453..afd61662 100644 --- a/tasks/deploy.py +++ b/tasks/deploy.py @@ -31,6 +31,8 @@ from tools import config, job_metadata, pr_comments, run_cmd +ARTEFACT_PREFIX = "artefact_prefix" +ARTEFACT_UPLOAD_SCRIPT = "artefact_upload_script" BUCKET_NAME = "bucket_name" DEPLOYCFG = "deploycfg" DEPLOY_PERMISSION = "deploy_permission" @@ -38,8 +40,6 @@ JOBS_BASE_DIR = "jobs_base_dir" METADATA_PREFIX = "metadata_prefix" NO_DEPLOY_PERMISSION_COMMENT = "no_deploy_permission_comment" -TARBALL_PREFIX = "tarball_prefix" -TARBALL_UPLOAD_SCRIPT = "tarball_upload_script" UPLOAD_POLICY = "upload_policy" @@ -201,12 +201,12 @@ def check_job_status(job_dir): return False -def update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, state, msg): +def update_pr_comment(artefact, repo_name, pr_number, pr_comment_id, state, msg): """ - Update pull request comment for the given comment id or tarball name + Update pull request comment for the given comment id or artefact name Args: - tarball (string): name of tarball that is looked for in a PR comment + artefact (string): name of artefact that is looked for in a PR comment repo_name (string): name of the repository (USER_ORG/REPOSITORY) pr_number (int): pull request number state (string): value for state column to be used in update @@ -219,23 +219,23 @@ def update_pr_comment(tarball, repo_name, pr_number, pr_comment_id, state, msg): repo = gh.get_repo(repo_name) pull_request = repo.get_pull(pr_number) - issue_comment = pr_comments.determine_issue_comment(pull_request, pr_comment_id, tarball) + issue_comment = pr_comments.determine_issue_comment(pull_request, pr_comment_id, artefact) if issue_comment: dt = datetime.now(timezone.utc) comment_update = (f"\n|{dt.strftime('%b %d %X %Z %Y')}|{state}|" - f"transfer of `{tarball}` to S3 bucket {msg}|") + f"transfer of `{artefact}` to S3 bucket {msg}|") # append update to existing comment issue_comment.edit(issue_comment.body + comment_update) -def append_tarball_to_upload_log(tarball, job_dir): +def append_artefact_to_upload_log(artefact, job_dir): """ - Append tarball to upload log. + Append artefact to upload log. Args: - tarball (string): name of tarball that has been uploaded - job_dir (string): directory of the job that built the tarball + artefact (string): name of artefact that has been uploaded + job_dir (string): directory of the job that built the artefact Returns: None (implicitly) @@ -244,8 +244,8 @@ def append_tarball_to_upload_log(tarball, job_dir): pr_base_dir = os.path.dirname(job_dir) uploaded_txt = os.path.join(pr_base_dir, 'uploaded.txt') with open(uploaded_txt, "a") as upload_log: - job_plus_tarball = os.path.join(os.path.basename(job_dir), tarball) - upload_log.write(f"{job_plus_tarball}\n") + job_plus_artefact = os.path.join(os.path.basename(job_dir), artefact) + upload_log.write(f"{job_plus_artefact}\n") def upload_artefact(job_dir, payload, timestamp, repo_name, pr_number, pr_comment_id): @@ -273,11 +273,11 @@ def upload_artefact(job_dir, payload, timestamp, repo_name, pr_number, pr_commen # obtain config settings cfg = config.read_config() deploycfg = cfg[DEPLOYCFG] - tarball_upload_script = deploycfg.get(TARBALL_UPLOAD_SCRIPT) + artefact_upload_script = deploycfg.get(ARTEFACT_UPLOAD_SCRIPT) endpoint_url = deploycfg.get(ENDPOINT_URL) or '' bucket_spec = deploycfg.get(BUCKET_NAME) metadata_prefix = deploycfg.get(METADATA_PREFIX) - tarball_prefix = deploycfg.get(TARBALL_PREFIX) + artefact_prefix = deploycfg.get(ARTEFACT_PREFIX) # if bucket_spec value looks like a dict, try parsing it as such if bucket_spec.lstrip().startswith('{'): @@ -287,9 +287,9 @@ def upload_artefact(job_dir, payload, timestamp, repo_name, pr_number, pr_commen if metadata_prefix.lstrip().startswith('{'): metadata_prefix = json.loads(metadata_prefix) - # if tarball_prefix value looks like a dict, try parsing it as such - if tarball_prefix.lstrip().startswith('{'): - tarball_prefix = json.loads(tarball_prefix) + # if artefact_prefix value looks like a dict, try parsing it as such + if artefact_prefix.lstrip().startswith('{'): + artefact_prefix = json.loads(artefact_prefix) jobcfg_path = os.path.join(job_dir, CFG_DIRNAME, JOB_CFG_FILENAME) jobcfg = config.read_config(jobcfg_path) @@ -329,21 +329,21 @@ def upload_artefact(job_dir, payload, timestamp, repo_name, pr_number, pr_commen f"failed (incorrect metadata prefix spec: {metadata_prefix_arg})") return - if isinstance(tarball_prefix, str): - tarball_prefix_arg = tarball_prefix - log(f"Using specified artefact prefix: {tarball_prefix_arg}") - elif isinstance(tarball_prefix, dict): + if isinstance(artefact_prefix, str): + artefact_prefix_arg = artefact_prefix + log(f"Using specified artefact prefix: {artefact_prefix_arg}") + elif isinstance(artefact_prefix, dict): # artefact prefix spec may be a mapping of target repo id to artefact prefix - tarball_prefix_arg = tarball_prefix.get(target_repo_id) - if tarball_prefix_arg is None: + artefact_prefix_arg = artefact_prefix.get(target_repo_id) + if artefact_prefix_arg is None: update_pr_comment(artefact, repo_name, pr_number, pr_comment_id, "not uploaded", f"failed (no artefact prefix specified for {target_repo_id})") return else: - log(f"Using artefact prefix for {target_repo_id}: {tarball_prefix_arg}") + log(f"Using artefact prefix for {target_repo_id}: {artefact_prefix_arg}") else: update_pr_comment(artefact, repo_name, pr_number, pr_comment_id, "not uploaded", - f"failed (incorrect artefact prefix spec: {tarball_prefix_arg})") + f"failed (incorrect artefact prefix spec: {artefact_prefix_arg})") return # run 'eessi-upload-to-staging {abs_path}' @@ -352,18 +352,18 @@ def upload_artefact(job_dir, payload, timestamp, repo_name, pr_number, pr_commen # bucket_name = 'eessi-staging' # if endpoint_url not set use EESSI S3 bucket # (2) run command - cmd_args = [tarball_upload_script, ] + cmd_args = [artefact_upload_script, ] + if len(artefact_prefix_arg) > 0: + cmd_args.extend(['--artefact-prefix', artefact_prefix_arg]) if len(bucket_name) > 0: cmd_args.extend(['--bucket-name', bucket_name]) if len(endpoint_url) > 0: cmd_args.extend(['--endpoint-url', endpoint_url]) if len(metadata_prefix_arg) > 0: cmd_args.extend(['--metadata-prefix', metadata_prefix_arg]) - cmd_args.extend(['--repository', repo_name]) - cmd_args.extend(['--pull-request-number', str(pr_number)]) cmd_args.extend(['--pr-comment-id', str(pr_comment_id)]) - if len(tarball_prefix_arg) > 0: - cmd_args.extend(['--tarball-prefix', tarball_prefix_arg]) + cmd_args.extend(['--pull-request-number', str(pr_number)]) + cmd_args.extend(['--repository', repo_name]) cmd_args.append(abs_path) upload_cmd = ' '.join(cmd_args) @@ -372,7 +372,7 @@ def upload_artefact(job_dir, payload, timestamp, repo_name, pr_number, pr_commen if ec == 0: # add file to 'job_dir/../uploaded.txt' - append_tarball_to_upload_log(artefact, job_dir) + append_artefact_to_upload_log(artefact, job_dir) # update pull request comment update_pr_comment(artefact, repo_name, pr_number, pr_comment_id, "uploaded", "succeeded") diff --git a/tests/test_app.cfg b/tests/test_app.cfg index f9634422..31797fa6 100644 --- a/tests/test_app.cfg +++ b/tests/test_app.cfg @@ -25,11 +25,11 @@ awaits_lauch = job awaits launch by Slurm scheduler running_job = job `{job_id}` is running [finished_job_comments] -success = :grin: SUCCESS tarball `{tarball_name}` ({tarball_size} GiB) in job dir +success = :grin: SUCCESS tarball `{artefact_name}` ({artefact_size} GiB) in job dir failure = :cry: FAILURE no_slurm_out = No slurm output `{slurm_out}` in job dir slurm_out = Found slurm output `{slurm_out}` in job dir missing_modules = Slurm output lacks message "No missing modules!". no_tarball_message = Slurm output lacks message about created tarball. -no_matching_tarball = No tarball matching `{tarball_pattern}` found in job dir. -multiple_tarballs = Found {num_tarballs} tarballs in job dir - only 1 matching `{tarball_pattern}` expected. +no_matching_tarball = No tarball matching `{artefact_pattern}` found in job dir. +multiple_tarballs = Found {num_artefacts} tarballs in job dir - only 1 matching `{artefact_pattern}` expected.