From db78b7c692a045736429ddaec3fccf19b322e6b5 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 21 Aug 2023 13:27:06 -0700 Subject: [PATCH 1/4] dev: Include devel/check-readme in its own checks No harm in describing it in the README, esp. as other development programs are to be added imminently. --- README.md | 6 ++++++ devel/check-readme | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a123677..7dd01cf 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,12 @@ Text templates for messages and summaries in our workflows. - [attach-aws-batch](text-templates/attach-aws-batch.md) +## Development tools for this repo itself + +- Linting to ensure the README stays complete + ([devel/check-readme](devel/check-readme)) + + ## Configuration for this repo itself - [Dependabot configuration](.github/dependabot.yml) diff --git a/devel/check-readme b/devel/check-readme index 1b83235..9eb6e17 100755 --- a/devel/check-readme +++ b/devel/check-readme @@ -44,7 +44,6 @@ files-to-ignore() { git ls-files \ .gitignore \ README.md \ - devel/check-readme \ 'images/*' \ actions/setup-ssh/!(*.yaml|README.md) \ actions/setup-debugger/!(*.yaml|README.md) \ From edd329054b8cfcd0c45ad32588c1fa869aa0da1a Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 21 Aug 2023 12:14:40 -0700 Subject: [PATCH 2/4] pathogen-repo-build: Generate the workflow YAML from a separately-authored input YAML This allows us to author the workflow using YAML anchors/references (especially with merge keys) since GitHub Actions doesn't otherwise support those YAML features. The lack of support is a shame because GitHub Actions workflows can be very repetitive and anchors/references/merges are a decent solution to that. I'm about to add substantial conceptual replication that we won't want to maintain concretely replicated in the file. Since the generated file must be checked in, a new CI step ensures the generated file matches the authored file and the generated file is excluded from git diffs by default. An optional local pre-commit hook is also available for making sure you craft commits that won't run afoul of the CI check later. Or you can, as necessary, run `make` manually before committing. --- .gitattributes | 4 + .github/workflows/ci.yaml | 1 + .github/workflows/pathogen-repo-build.yaml | 103 ++----- .github/workflows/pathogen-repo-build.yaml.in | 254 ++++++++++++++++++ Makefile | 25 ++ README.md | 12 +- devel/explode-yaml | 12 + devel/pre-commit | 33 +++ devel/regenerate-workflow | 99 +++++++ 9 files changed, 465 insertions(+), 78 deletions(-) create mode 100644 .gitattributes create mode 100644 .github/workflows/pathogen-repo-build.yaml.in create mode 100644 Makefile create mode 100755 devel/explode-yaml create mode 100755 devel/pre-commit create mode 100755 devel/regenerate-workflow diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6bc8c68 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# This is a large generated file that, while text, it is not useful to +# routinely show the diff of. A diff can be forced as needed, e.g. with `git +# diff --text`. +/.github/workflows/pathogen-repo-build.yaml -diff diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b0b22a3..dc9705f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -74,5 +74,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - run: make check - run: ./devel/check-readme - uses: ./actions/shellcheck diff --git a/.github/workflows/pathogen-repo-build.yaml b/.github/workflows/pathogen-repo-build.yaml index 9e82f57..5ac8f27 100644 --- a/.github/workflows/pathogen-repo-build.yaml +++ b/.github/workflows/pathogen-repo-build.yaml @@ -1,8 +1,9 @@ +# DO NOT EDIT - GENERATED + # This workflow is intended to be called by workflows in our various pathogen # build repos. See workflow-templates/pathogen-repo-builds.yaml (a "starter" # workflow) in this repo for an example of what the caller workflow looks like. name: Pathogen repo build - defaults: run: # This is the same as GitHub Action's `bash` keyword as of 20 June 2023: @@ -11,62 +12,43 @@ defaults: # Completely spelling it out here so that GitHub can't change it out from under us # and we don't have to refer to the docs to know the expected behavior. shell: bash --noprofile --norc -eo pipefail {0} - on: workflow_call: inputs: repo: description: >- - Repository name with owner (e.g. nextstrain/zika). Defaults to the - repository of the caller workflow. + Repository name with owner (e.g. nextstrain/zika). Defaults to the repository of the caller workflow. type: string default: ${{ github.repository }} required: false - runtime: description: >- - Nextstrain runtime under which to run the build. - Currently only supports docker, conda, and aws-batch. - Defaults to "docker". + Nextstrain runtime under which to run the build. Currently only supports docker, conda, and aws-batch. Defaults to "docker". - The aws-batch runtime requires AWS credentials. These may come - directly from secrets or indirectly from assuming a role via GitHub - Actions' OIDC provider. + The aws-batch runtime requires AWS credentials. These may come directly from secrets or indirectly from assuming a role via GitHub Actions' OIDC provider. The following secrets are used if present: - - AWS_ACCESS_KEY_ID - - AWS_SECRET_ACCESS_KEY + - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY - They must be defined in the repo's Actions secrets and passed to this - workflow with `secrets: inherit`. + They must be defined in the repo's Actions secrets and passed to this workflow with `secrets: inherit`. - If no secrets are present, the GitHubActionsRoleNextstrainBatchJobs - role is assumed (in both senses of the verb). + If no secrets are present, the GitHubActionsRoleNextstrainBatchJobs role is assumed (in both senses of the verb). type: string default: docker required: false - run: description: >- - The full `nextstrain build` command to run for the build. - Defaults to `nextstrain build .` + The full `nextstrain build` command to run for the build. Defaults to `nextstrain build .` + + Use the runtime input to select the runtime for the build instead of the runtime selection options to ensure that the runtime is properly set up within the GitHub Action job. - Use the runtime input to select the runtime for the build instead of - the runtime selection options to ensure that the runtime is properly - set up within the GitHub Action job. + The pathogen repo is cloned to the top level of the working directory of the GitHub Action, so use `.` to point to the pathogen repo directory. - The pathogen repo is cloned to the top level of the working directory - of the GitHub Action, so use `.` to point to the pathogen repo directory. + If your build runs longer than the 6 hour limit for GitHub Action jobs, consider using the `--detach` flag for the aws-batch runtime. - If your build runs longer than the 6 hour limit for GitHub Action jobs, - consider using the `--detach` flag for the aws-batch runtime. + All environment variables provided via the env input and all secrets provided via `secrets: inherit` can be passed to the build runtime via the `--env` option. If AWS credentials were acquired by the GitHub Action job via role assumption, the following environment variables are also available to be passed: - All environment variables provided via the env input and all secrets - provided via `secrets: inherit` can be passed to the build runtime - via the `--env` option. If AWS credentials were acquired by the - GitHub Action job via role assumption, the following environment - variables are also available to be passed: - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY @@ -74,16 +56,12 @@ on: type: string default: nextstrain build . required: false - env: description: >- - Environment variables to set for this reusable workflow since - environment variables in the caller workflow are not propagated to - reusable workflows. This is expected to be a string containing YAML. + Environment variables to set for this reusable workflow since environment variables in the caller workflow are not propagated to reusable workflows. This is expected to be a string containing YAML. + + This is easily produced, for example, by pretending you're writing normal nested YAML within a literal multi-line block scalar (introduced by "|"): - This is easily produced, for example, by pretending - you're writing normal nested YAML within a literal multi-line block - scalar (introduced by "|"): with: env: | @@ -94,29 +72,22 @@ on: not yaml - Do not use for secrets! Instead, pass them via GitHub Action's - dedicated secrets mechanism. + Do not use for secrets! Instead, pass them via GitHub Action's dedicated secrets mechanism. type: string default: "" required: false - artifact-name: description: >- Name to use for the build output artifact uploaded at end of the workflow. - If you're invoking this workflow multiple times from the same calling - workflow, you should set this. Otherwise, the default "build-outputs" - is probably fine. + If you're invoking this workflow multiple times from the same calling workflow, you should set this. Otherwise, the default "build-outputs" is probably fine. type: string default: build-outputs required: false - artifact-paths: description: >- - List of paths to include in the build output artifact uploaded - at the end of the workflow, as a string following the format of the - `paths` input of the `actions/upload-artifact` action. - For example: + List of paths to include in the build output artifact uploaded at the end of the workflow, as a string following the format of the `paths` input of the `actions/upload-artifact` action. For example: + with: artifact-paths: | @@ -126,6 +97,7 @@ on: The default paths included in the artifact are: + build.log auspice/ results/ @@ -133,25 +105,15 @@ on: logs/ .snakemake/log/ - The "build.log" contains log messages from the `nextstrain build` command. - The other paths are common output paths for Nextstrain builds. - If a path does not exist in your build, then the action will still - succeed and will print out a warning for the non-existent file(s). - Use an exclude pattern for any of the default paths that you would like to - exclude from the artifact (e.g. !build.log). + The "build.log" contains log messages from the `nextstrain build` command. The other paths are common output paths for Nextstrain builds. If a path does not exist in your build, then the action will still succeed and will print out a warning for the non-existent file(s). Use an exclude pattern for any of the default paths that you would like to exclude from the artifact (e.g. !build.log). - This is not supported for builds on AWS Batch because the workflow - detaches from the build. Please use the `nextstrain build` command - locally to reattach to AWS Batch builds to download outputs. + This is not supported for builds on AWS Batch because the workflow detaches from the build. Please use the `nextstrain build` command locally to reattach to AWS Batch builds to download outputs. type: string required: false - env: NEXTSTRAIN_GITHUB_DIR: .git/nextstrain/.github - permissions: id-token: write - jobs: workflow-context: runs-on: ubuntu-latest @@ -161,7 +123,6 @@ jobs: outputs: repository: ${{ steps.workflow-context.outputs.repository }} sha: ${{ steps.workflow-context.outputs.sha }} - run-build: needs: workflow-context runs-on: ubuntu-latest @@ -170,7 +131,6 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ inputs.repo }} - # Need to run this after the build repo is cloned so that cloning the # build repo does not overwrite the .git dir and remove the extra support files # that we need from nextstrain/.github repo @@ -180,7 +140,6 @@ jobs: repository: ${{ needs.workflow-context.outputs.repository }} ref: ${{ needs.workflow-context.outputs.sha }} path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} - - if: inputs.env name: Set environment variables env: @@ -188,9 +147,7 @@ jobs: run: > # shellcheck disable=SC2154 - echo "$env" - | "$NEXTSTRAIN_GITHUB_DIR"/bin/yaml-to-envvars - | tee -a "$GITHUB_ENV" + echo "$env" | "$NEXTSTRAIN_GITHUB_DIR"/bin/yaml-to-envvars | tee -a "$GITHUB_ENV" - name: Set secrets as environment variables env: @@ -198,10 +155,7 @@ jobs: run: > # shellcheck disable=SC2154 - echo "$secrets" - | jq 'del(.github_token)' - | "$NEXTSTRAIN_GITHUB_DIR"/bin/json-to-envvars - | tee -a "$GITHUB_ENV" + echo "$secrets" | jq 'del(.github_token)' | "$NEXTSTRAIN_GITHUB_DIR"/bin/json-to-envvars | tee -a "$GITHUB_ENV" - if: inputs.runtime == 'aws-batch' uses: aws-actions/configure-aws-credentials@v4 @@ -211,13 +165,11 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} role-duration-seconds: 43200 # seconds, or 12 hours - - name: Setup runtime ${{ inputs.runtime }} uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli with: cli-version: ">=7.1.0" runtime: ${{ inputs.runtime }} - - name: Run build via ${{ inputs.runtime }} env: NEXTSTRAIN_BUILD_COMMAND: ${{ inputs.run }} @@ -226,19 +178,16 @@ jobs: set -x eval "$NEXTSTRAIN_BUILD_COMMAND" |& tee build.log - - if: ${{ inputs.runtime == 'aws-batch' }} name: Get AWS Batch job id id: aws-batch run: | echo "AWS_BATCH_JOB_ID=$(sed -nE 's/.+AWS Batch Job ID\:.+ ([-a-f0-9]+)$/\1/p' < build.log)" >> "$GITHUB_ENV" - - if: env.AWS_BATCH_JOB_ID name: Generate AWS Batch summary run: | "$NEXTSTRAIN_GITHUB_DIR"/bin/interpolate-env < "$NEXTSTRAIN_GITHUB_DIR"/text-templates/attach-aws-batch.md \ > "$GITHUB_STEP_SUMMARY" - - if: always() uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/pathogen-repo-build.yaml.in b/.github/workflows/pathogen-repo-build.yaml.in new file mode 100644 index 0000000..9e82f57 --- /dev/null +++ b/.github/workflows/pathogen-repo-build.yaml.in @@ -0,0 +1,254 @@ +# This workflow is intended to be called by workflows in our various pathogen +# build repos. See workflow-templates/pathogen-repo-builds.yaml (a "starter" +# workflow) in this repo for an example of what the caller workflow looks like. +name: Pathogen repo build + +defaults: + run: + # This is the same as GitHub Action's `bash` keyword as of 20 June 2023: + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell + # + # Completely spelling it out here so that GitHub can't change it out from under us + # and we don't have to refer to the docs to know the expected behavior. + shell: bash --noprofile --norc -eo pipefail {0} + +on: + workflow_call: + inputs: + repo: + description: >- + Repository name with owner (e.g. nextstrain/zika). Defaults to the + repository of the caller workflow. + type: string + default: ${{ github.repository }} + required: false + + runtime: + description: >- + Nextstrain runtime under which to run the build. + Currently only supports docker, conda, and aws-batch. + Defaults to "docker". + + The aws-batch runtime requires AWS credentials. These may come + directly from secrets or indirectly from assuming a role via GitHub + Actions' OIDC provider. + + The following secrets are used if present: + + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + + They must be defined in the repo's Actions secrets and passed to this + workflow with `secrets: inherit`. + + If no secrets are present, the GitHubActionsRoleNextstrainBatchJobs + role is assumed (in both senses of the verb). + type: string + default: docker + required: false + + run: + description: >- + The full `nextstrain build` command to run for the build. + Defaults to `nextstrain build .` + + Use the runtime input to select the runtime for the build instead of + the runtime selection options to ensure that the runtime is properly + set up within the GitHub Action job. + + The pathogen repo is cloned to the top level of the working directory + of the GitHub Action, so use `.` to point to the pathogen repo directory. + + If your build runs longer than the 6 hour limit for GitHub Action jobs, + consider using the `--detach` flag for the aws-batch runtime. + + All environment variables provided via the env input and all secrets + provided via `secrets: inherit` can be passed to the build runtime + via the `--env` option. If AWS credentials were acquired by the + GitHub Action job via role assumption, the following environment + variables are also available to be passed: + + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + type: string + default: nextstrain build . + required: false + + env: + description: >- + Environment variables to set for this reusable workflow since + environment variables in the caller workflow are not propagated to + reusable workflows. This is expected to be a string containing YAML. + + This is easily produced, for example, by pretending + you're writing normal nested YAML within a literal multi-line block + scalar (introduced by "|"): + + with: + env: | + FOO: bar + I_CANT_BELIEVE: "it's not YAML" + would_you_believe: | + it's + not + yaml + + Do not use for secrets! Instead, pass them via GitHub Action's + dedicated secrets mechanism. + type: string + default: "" + required: false + + artifact-name: + description: >- + Name to use for the build output artifact uploaded at end of the workflow. + + If you're invoking this workflow multiple times from the same calling + workflow, you should set this. Otherwise, the default "build-outputs" + is probably fine. + type: string + default: build-outputs + required: false + + artifact-paths: + description: >- + List of paths to include in the build output artifact uploaded + at the end of the workflow, as a string following the format of the + `paths` input of the `actions/upload-artifact` action. + For example: + + with: + artifact-paths: | + results/ + auspice/ + logs/ + + The default paths included in the artifact are: + + build.log + auspice/ + results/ + benchmarks/ + logs/ + .snakemake/log/ + + The "build.log" contains log messages from the `nextstrain build` command. + The other paths are common output paths for Nextstrain builds. + If a path does not exist in your build, then the action will still + succeed and will print out a warning for the non-existent file(s). + Use an exclude pattern for any of the default paths that you would like to + exclude from the artifact (e.g. !build.log). + + This is not supported for builds on AWS Batch because the workflow + detaches from the build. Please use the `nextstrain build` command + locally to reattach to AWS Batch builds to download outputs. + type: string + required: false + +env: + NEXTSTRAIN_GITHUB_DIR: .git/nextstrain/.github + +permissions: + id-token: write + +jobs: + workflow-context: + runs-on: ubuntu-latest + steps: + - id: workflow-context + uses: nextstrain/.github/actions/workflow-context@master + outputs: + repository: ${{ steps.workflow-context.outputs.repository }} + sha: ${{ steps.workflow-context.outputs.sha }} + + run-build: + needs: workflow-context + runs-on: ubuntu-latest + steps: + - name: Checkout build repository + uses: actions/checkout@v4 + with: + repository: ${{ inputs.repo }} + + # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + - name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + uses: actions/checkout@v4 + with: + repository: ${{ needs.workflow-context.outputs.repository }} + ref: ${{ needs.workflow-context.outputs.sha }} + path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} + + - if: inputs.env + name: Set environment variables + env: + env: ${{ inputs.env }} + run: > + # shellcheck disable=SC2154 + + echo "$env" + | "$NEXTSTRAIN_GITHUB_DIR"/bin/yaml-to-envvars + | tee -a "$GITHUB_ENV" + + - name: Set secrets as environment variables + env: + secrets: ${{ toJson(secrets) }} + run: > + # shellcheck disable=SC2154 + + echo "$secrets" + | jq 'del(.github_token)' + | "$NEXTSTRAIN_GITHUB_DIR"/bin/json-to-envvars + | tee -a "$GITHUB_ENV" + + - if: inputs.runtime == 'aws-batch' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} + role-duration-seconds: 43200 # seconds, or 12 hours + + - name: Setup runtime ${{ inputs.runtime }} + uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli + with: + cli-version: ">=7.1.0" + runtime: ${{ inputs.runtime }} + + - name: Run build via ${{ inputs.runtime }} + env: + NEXTSTRAIN_BUILD_COMMAND: ${{ inputs.run }} + run: | + # shellcheck disable=SC2154 + set -x + + eval "$NEXTSTRAIN_BUILD_COMMAND" |& tee build.log + + - if: ${{ inputs.runtime == 'aws-batch' }} + name: Get AWS Batch job id + id: aws-batch + run: | + echo "AWS_BATCH_JOB_ID=$(sed -nE 's/.+AWS Batch Job ID\:.+ ([-a-f0-9]+)$/\1/p' < build.log)" >> "$GITHUB_ENV" + + - if: env.AWS_BATCH_JOB_ID + name: Generate AWS Batch summary + run: | + "$NEXTSTRAIN_GITHUB_DIR"/bin/interpolate-env < "$NEXTSTRAIN_GITHUB_DIR"/text-templates/attach-aws-batch.md \ + > "$GITHUB_STEP_SUMMARY" + + - if: always() + uses: actions/upload-artifact@v4 + with: + if-no-files-found: warn + name: ${{ inputs.artifact-name }} + path: | + build.log + auspice/ + results/ + benchmarks/ + logs/ + .snakemake/log/ + ${{ inputs.artifact-paths }} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cf89763 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +SHELL := /bin/bash -euo pipefail +.SILENT: + +workflows := .github/workflows/*.yaml.in + +inputs := $(sort $(wildcard $(workflows)) $(shell git ls-files --cached -- '$(workflows)')) +outputs := $(inputs:.in=) + +## Regenerate all the things. +all: $(outputs) + +## Regenerate all the things and error if anything changed. +check: $(outputs) + git diff --exit-code --text HEAD -- $(outputs) + +## Regenerate an exploded workflow YAML. +.github/workflows/%.yaml: .github/workflows/%.yaml.in PHONY + ./devel/regenerate-workflow $< + +## Print this help message. +help: + @perl -ne 'print if /^## / ... s/^(? +# ² +# +set -euo pipefail + +docker run --rm --interactive mikefarah/yq ' + explode(.) head_comment="DO NOT EDIT - GENERATED\n\n" + head_comment +' diff --git a/devel/pre-commit b/devel/pre-commit new file mode 100755 index 0000000..78ebb25 --- /dev/null +++ b/devel/pre-commit @@ -0,0 +1,33 @@ +#!/bin/bash +# Git pre-commit hook to forcibly regenerate generated files on every commit. +# +# Optional, but helps keep things in sync locally so that our check doesn't +# fail in CI after the push. +# +# To use: +# +# ln -sv ../../devel/pre-commit .git/hooks/pre-commit +# +set -euo pipefail + +main() { + exec 3>&1 1> >(prefix-output) 2> >(prefix-output >&2) + + # Don't error, just quietly quit, if we're installed but operating on an + # older version of the repo before the Makefile existed. + [[ -f Makefile ]] || exit 0 + + make + + echo >&3 +} + +prefix-output() { + local line + IFS=$'\0' + while read -r line; do + echo "[pre-commit] $line" + done +} + +main "$@" diff --git a/devel/regenerate-workflow b/devel/regenerate-workflow new file mode 100755 index 0000000..0386be5 --- /dev/null +++ b/devel/regenerate-workflow @@ -0,0 +1,99 @@ +#!/bin/bash +set -euo pipefail + +devel="$(dirname "$0")" + +main() { + local in out + + if [[ "$1" == *.in ]]; then + in="$1" + out="${1%.in}" + else + in="$1.in" + out="$1" + fi + + local generated=0 + echo "generating $out" + + if git-unmerged "$in"; then + echo "error: input file $in is unmerged; please resolve conflicts first" >&2 + exit 1 + fi + + if [[ -f "$in" ]]; then + generated=1 + "$devel"/explode-yaml < "$in" > "$out" & + fi + + if git-tracked "$in"; then + generated=1 + "$devel"/explode-yaml \ + < <(git cat-file blob :"$in") \ + > >(git-add-stdin-as "$out") \ + & + fi + + wait + + if [[ "$generated" -eq 0 ]]; then + echo "error: input file $in neither exists on disk nor is it tracked by git" >&2 + exit 1 + fi +} + +git-unmerged() { + # See git-ls-files(1) and git-read-tree(1) for more details on what we're + # reading here from Git's index. The gist of it is that when a file X is + # unmerged, it will show up in the index as three entries with the third + # field (stage) being 1, 2, and 3: + # + # 100644 78981922613b2afb6025042ff6bd878ac1994e85 1 X + # 100644 d00491fd7e5bb6fa28c517a0bb32b8b506539d4d 2 X + # 100644 0cfbf08886fca9a91cb753ec8734c84fcbe52c9f 3 X + # + # In a normal state, i.e. without merge conflicts or after conflicts have + # been resolved and recorded, then it shows up as a single entry where + # stage is 0: + # + # 100644 78981922613b2afb6025042ff6bd878ac1994e85 0 X + # + git ls-files --stage -z -- "$1" | while read -rd $'\0' mode object stage path; do + if [[ "$stage" != 0 ]]; then + return 0 + fi + done + return 1 +} + +git-tracked() { + git ls-files --stage --error-unmatch -- "$1" >/dev/null 2>&1 +} + +git-add-stdin-as() { + # Stages the contents on stdin as the given path in Git's index, without + # touching the working tree. + # + # Originally written for . + local path="$1" + local repo_path mode object + + # Convert filesystem $path to a canonicalized path from the root of the + # repo. This is required for the commands below. + repo_path="$(git ls-files --full-name --error-unmatch -- "$path")" + + # Use existing mode (e.g. 100644) + mode="$(git ls-tree --format "%(objectmode)" HEAD :/"$repo_path")" + + # Create new object in git's object database from the contents on stdin. + # Using --path ensures that any filters (e.g. eol textconv or otherwise) + # that would apply to $path are applied to the contents on stdin too. + object="$(git hash-object -w --stdin --path "$repo_path")" + + # Stage the new object as an update to $path (as if with `git add` after + # actually modifying $path). + git update-index --cacheinfo "$mode,$object,$repo_path" +} + +main "$@" From 1f41bea321677acee4400ece21de37e664608372 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 21 Aug 2023 14:09:56 -0700 Subject: [PATCH 3/4] pathogen-repo-build: Support manual triggering with workflow_dispatch This lets us more easily test it in development. --- .github/workflows/pathogen-repo-build.yaml | 97 +++++++++++++++++++ .github/workflows/pathogen-repo-build.yaml.in | 12 ++- 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pathogen-repo-build.yaml b/.github/workflows/pathogen-repo-build.yaml index 5ac8f27..fab63f5 100644 --- a/.github/workflows/pathogen-repo-build.yaml +++ b/.github/workflows/pathogen-repo-build.yaml @@ -110,6 +110,103 @@ on: This is not supported for builds on AWS Batch because the workflow detaches from the build. Please use the `nextstrain build` command locally to reattach to AWS Batch builds to download outputs. type: string required: false + workflow_dispatch: + inputs: + runtime: + description: >- + Nextstrain runtime under which to run the build. Currently only supports docker, conda, and aws-batch. Defaults to "docker". + + The aws-batch runtime requires AWS credentials. These may come directly from secrets or indirectly from assuming a role via GitHub Actions' OIDC provider. + + The following secrets are used if present: + + - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY + + They must be defined in the repo's Actions secrets and passed to this workflow with `secrets: inherit`. + + If no secrets are present, the GitHubActionsRoleNextstrainBatchJobs role is assumed (in both senses of the verb). + type: string + default: docker + required: false + run: + description: >- + The full `nextstrain build` command to run for the build. Defaults to `nextstrain build .` + + Use the runtime input to select the runtime for the build instead of the runtime selection options to ensure that the runtime is properly set up within the GitHub Action job. + + The pathogen repo is cloned to the top level of the working directory of the GitHub Action, so use `.` to point to the pathogen repo directory. + + If your build runs longer than the 6 hour limit for GitHub Action jobs, consider using the `--detach` flag for the aws-batch runtime. + + All environment variables provided via the env input and all secrets provided via `secrets: inherit` can be passed to the build runtime via the `--env` option. If AWS credentials were acquired by the GitHub Action job via role assumption, the following environment variables are also available to be passed: + + + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + type: string + default: nextstrain build . + required: false + env: + description: >- + Environment variables to set for this reusable workflow since environment variables in the caller workflow are not propagated to reusable workflows. This is expected to be a string containing YAML. + + This is easily produced, for example, by pretending you're writing normal nested YAML within a literal multi-line block scalar (introduced by "|"): + + + with: + env: | + FOO: bar + I_CANT_BELIEVE: "it's not YAML" + would_you_believe: | + it's + not + yaml + + Do not use for secrets! Instead, pass them via GitHub Action's dedicated secrets mechanism. + type: string + default: "" + required: false + artifact-name: + description: >- + Name to use for the build output artifact uploaded at end of the workflow. + + If you're invoking this workflow multiple times from the same calling workflow, you should set this. Otherwise, the default "build-outputs" is probably fine. + type: string + default: build-outputs + required: false + artifact-paths: + description: >- + List of paths to include in the build output artifact uploaded at the end of the workflow, as a string following the format of the `paths` input of the `actions/upload-artifact` action. For example: + + + with: + artifact-paths: | + results/ + auspice/ + logs/ + + The default paths included in the artifact are: + + + build.log + auspice/ + results/ + benchmarks/ + logs/ + .snakemake/log/ + + The "build.log" contains log messages from the `nextstrain build` command. The other paths are common output paths for Nextstrain builds. If a path does not exist in your build, then the action will still succeed and will print out a warning for the non-existent file(s). Use an exclude pattern for any of the default paths that you would like to exclude from the artifact (e.g. !build.log). + + This is not supported for builds on AWS Batch because the workflow detaches from the build. Please use the `nextstrain build` command locally to reattach to AWS Batch builds to download outputs. + type: string + required: false + repo: + description: >- + Repository name with owner (e.g. nextstrain/zika). + type: string + default: "" + required: true env: NEXTSTRAIN_GITHUB_DIR: .git/nextstrain/.github permissions: diff --git a/.github/workflows/pathogen-repo-build.yaml.in b/.github/workflows/pathogen-repo-build.yaml.in index 9e82f57..334a4f9 100644 --- a/.github/workflows/pathogen-repo-build.yaml.in +++ b/.github/workflows/pathogen-repo-build.yaml.in @@ -14,7 +14,7 @@ defaults: on: workflow_call: - inputs: + inputs: &inputs repo: description: >- Repository name with owner (e.g. nextstrain/zika). Defaults to the @@ -146,6 +146,16 @@ on: type: string required: false + workflow_dispatch: + inputs: + <<: *inputs + repo: + description: >- + Repository name with owner (e.g. nextstrain/zika). + type: string + default: "" + required: true + env: NEXTSTRAIN_GITHUB_DIR: .git/nextstrain/.github From 57eb5e53e628782f9470db2d262e4d5ee1622eae Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 12 Sep 2023 15:42:18 -0700 Subject: [PATCH 4/4] pathogen-repo-build: Wait for AWS Batch jobs to finish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids the disconnect between the success/failure status of a GitHub Actions workflow run and the actual AWS Batch job, which makes for easier reporting and debugging and generally less cognitive dissonance. GitHub Actions workflow _runs_ have a very high max timeout of 35 days, but each _job_ in a workflow has a much lower max timeout of 6 hours. Many of our builds should be less than 6 hours¹, but here we support builds up to 24 hours by chaining together 4 GitHub Actions jobs. We can add more jobs to the chain if we need to, but I don't foresee that. Nearly all of our builds are in public repos, which means they won't consume usage minutes from our GitHub Actions quota. However, they _will_ consume concurrency limits from the quota. We were already frequently bumping into the default free-tier quota of 20 concurrent jobs, so adding more long-running jobs (to wait around for the AWS Batch jobs) was a nonstarter until we upgraded to a Team plan with its corresponding quota of 60 concurrent jobs.² ¹ As of 24 Jan, all Batch jobs except one in the prior week were sub 12 hours. The exception was the GISAID ncov-ingest job launched on 16 Jan. Next longest jobs were 10.5 hours, all GenBank ncov-ingest jobs. ² --- .github/workflows/pathogen-repo-build.yaml | 319 +++++++++++++++++- .github/workflows/pathogen-repo-build.yaml.in | 138 +++++++- 2 files changed, 444 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pathogen-repo-build.yaml b/.github/workflows/pathogen-repo-build.yaml index fab63f5..ce5efe9 100644 --- a/.github/workflows/pathogen-repo-build.yaml +++ b/.github/workflows/pathogen-repo-build.yaml @@ -45,7 +45,7 @@ on: The pathogen repo is cloned to the top level of the working directory of the GitHub Action, so use `.` to point to the pathogen repo directory. - If your build runs longer than the 6 hour limit for GitHub Action jobs, consider using the `--detach` flag for the aws-batch runtime. + If your build runs longer than the 6 hour limit for a single GitHub Action job, then use the aws-batch runtime and the `--detach` flag. Subsequent chained jobs will be automatically used to wait on the remote build for up to 24 hours total. All environment variables provided via the env input and all secrets provided via `secrets: inherit` can be passed to the build runtime via the `--env` option. If AWS credentials were acquired by the GitHub Action job via role assumption, the following environment variables are also available to be passed: @@ -136,7 +136,7 @@ on: The pathogen repo is cloned to the top level of the working directory of the GitHub Action, so use `.` to point to the pathogen repo directory. - If your build runs longer than the 6 hour limit for GitHub Action jobs, consider using the `--detach` flag for the aws-batch runtime. + If your build runs longer than the 6 hour limit for a single GitHub Action job, then use the aws-batch runtime and the `--detach` flag. Subsequent chained jobs will be automatically used to wait on the remote build for up to 24 hours total. All environment variables provided via the env input and all secrets provided via `secrets: inherit` can be passed to the build runtime via the `--env` option. If AWS credentials were acquired by the GitHub Action job via role assumption, the following environment variables are also available to be passed: @@ -228,10 +228,10 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ inputs.repo }} - # Need to run this after the build repo is cloned so that cloning the - # build repo does not overwrite the .git dir and remove the extra support files - # that we need from nextstrain/.github repo - - name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + - # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) uses: actions/checkout@v4 with: repository: ${{ needs.workflow-context.outputs.repository }} @@ -265,7 +265,7 @@ jobs: - name: Setup runtime ${{ inputs.runtime }} uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli with: - cli-version: ">=7.1.0" + cli-version: ">=7.4.0" runtime: ${{ inputs.runtime }} - name: Run build via ${{ inputs.runtime }} env: @@ -298,3 +298,308 @@ jobs: logs/ .snakemake/log/ ${{ inputs.artifact-paths }} + outputs: + AWS_BATCH_JOB_ID: ${{ env.AWS_BATCH_JOB_ID }} + # Wait for up to 6 hours (the maximum GitHub Actions job timeout¹) for the + # AWS Batch job to finish. + # + # ¹ + # + wait-1: + needs: [run-build, workflow-context] + if: needs.run-build.outputs.AWS_BATCH_JOB_ID + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + # Uses needs.workflow-context.outputs + - # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + uses: actions/checkout@v4 + with: + repository: ${{ needs.workflow-context.outputs.repository }} + ref: ${{ needs.workflow-context.outputs.sha }} + path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} + - if: inputs.runtime == 'aws-batch' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} + role-duration-seconds: 43200 # seconds, or 12 hours + - name: Setup runtime ${{ inputs.runtime }} + uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli + with: + cli-version: ">=7.4.0" + runtime: ${{ inputs.runtime }} + - id: attach + name: Attach to AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # When a running process is to be cancelled (e.g. due to either user + # request or job timeout), GitHub Actions sends it SIGINT and waits + # 7.5s for it to exit.¹ We immediately detach on interrupt, letting + # the next wait-N job in our chain pick it back up (if this job timed + # out) or the cancellation job (if by user request). + # + # ¹ + exec nextstrain build \ + --aws-batch \ + --attach "$AWS_BATCH_JOB_ID" \ + --detach-on-interrupt \ + --no-download + # Allow the workflow to be considered successful even if this job errors + # due to cancellation (timing out). Unfortunately, this doesn't + # distinguish between error from cancellation and error from command + # failure, so we work around that below. + continue-on-error: true + # Emit a "conclusion" output for the job that's based on the built-in + # conclusion (success, failure, cancelled) of the "attach" step above. + # This is the conclusion we care about for the job since the job's own + # "conclusion" is masked/transformed by "continue-on-error: true" above. + outputs: + attach-step-conclusion: ${{ steps.attach.conclusion }} + # Wait for up to another 6 hours (hours 6–12) if the preceding wait-N job + # timed out while attached to the AWS Batch job. + wait-2: + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + # Uses needs.workflow-context.outputs + - # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + uses: actions/checkout@v4 + with: + repository: ${{ needs.workflow-context.outputs.repository }} + ref: ${{ needs.workflow-context.outputs.sha }} + path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} + - if: inputs.runtime == 'aws-batch' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} + role-duration-seconds: 43200 # seconds, or 12 hours + - name: Setup runtime ${{ inputs.runtime }} + uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli + with: + cli-version: ">=7.4.0" + runtime: ${{ inputs.runtime }} + - id: attach + name: Attach to AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # When a running process is to be cancelled (e.g. due to either user + # request or job timeout), GitHub Actions sends it SIGINT and waits + # 7.5s for it to exit.¹ We immediately detach on interrupt, letting + # the next wait-N job in our chain pick it back up (if this job timed + # out) or the cancellation job (if by user request). + # + # ¹ + exec nextstrain build \ + --aws-batch \ + --attach "$AWS_BATCH_JOB_ID" \ + --detach-on-interrupt \ + --no-download + # Allow the workflow to be considered successful even if this job errors + # due to cancellation (timing out). Unfortunately, this doesn't + # distinguish between error from cancellation and error from command + # failure, so we work around that below. + continue-on-error: true + # Emit a "conclusion" output for the job that's based on the built-in + # conclusion (success, failure, cancelled) of the "attach" step above. + # This is the conclusion we care about for the job since the job's own + # "conclusion" is masked/transformed by "continue-on-error: true" above. + outputs: + attach-step-conclusion: ${{ steps.attach.conclusion }} + needs: [wait-1, run-build, workflow-context] + if: needs.wait-1.outputs.attach-step-conclusion == 'cancelled' + # 12–18 hours + wait-3: + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + # Uses needs.workflow-context.outputs + - # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + uses: actions/checkout@v4 + with: + repository: ${{ needs.workflow-context.outputs.repository }} + ref: ${{ needs.workflow-context.outputs.sha }} + path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} + - if: inputs.runtime == 'aws-batch' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} + role-duration-seconds: 43200 # seconds, or 12 hours + - name: Setup runtime ${{ inputs.runtime }} + uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli + with: + cli-version: ">=7.4.0" + runtime: ${{ inputs.runtime }} + - id: attach + name: Attach to AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # When a running process is to be cancelled (e.g. due to either user + # request or job timeout), GitHub Actions sends it SIGINT and waits + # 7.5s for it to exit.¹ We immediately detach on interrupt, letting + # the next wait-N job in our chain pick it back up (if this job timed + # out) or the cancellation job (if by user request). + # + # ¹ + exec nextstrain build \ + --aws-batch \ + --attach "$AWS_BATCH_JOB_ID" \ + --detach-on-interrupt \ + --no-download + # Allow the workflow to be considered successful even if this job errors + # due to cancellation (timing out). Unfortunately, this doesn't + # distinguish between error from cancellation and error from command + # failure, so we work around that below. + continue-on-error: true + # Emit a "conclusion" output for the job that's based on the built-in + # conclusion (success, failure, cancelled) of the "attach" step above. + # This is the conclusion we care about for the job since the job's own + # "conclusion" is masked/transformed by "continue-on-error: true" above. + outputs: + attach-step-conclusion: ${{ steps.attach.conclusion }} + needs: [wait-2, run-build, workflow-context] + if: needs.wait-2.outputs.attach-step-conclusion == 'cancelled' + # 18–24 hours + wait-4: + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + # Uses needs.workflow-context.outputs + - # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + uses: actions/checkout@v4 + with: + repository: ${{ needs.workflow-context.outputs.repository }} + ref: ${{ needs.workflow-context.outputs.sha }} + path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} + - if: inputs.runtime == 'aws-batch' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} + role-duration-seconds: 43200 # seconds, or 12 hours + - name: Setup runtime ${{ inputs.runtime }} + uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli + with: + cli-version: ">=7.4.0" + runtime: ${{ inputs.runtime }} + - id: attach + name: Attach to AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # When a running process is to be cancelled (e.g. due to either user + # request or job timeout), GitHub Actions sends it SIGINT and waits + # 7.5s for it to exit.¹ We immediately detach on interrupt, letting + # the next wait-N job in our chain pick it back up (if this job timed + # out) or the cancellation job (if by user request). + # + # ¹ + exec nextstrain build \ + --aws-batch \ + --attach "$AWS_BATCH_JOB_ID" \ + --detach-on-interrupt \ + --no-download + # Allow the workflow to be considered successful even if this job errors + # due to cancellation (timing out). Unfortunately, this doesn't + # distinguish between error from cancellation and error from command + # failure, so we work around that below. + continue-on-error: true + # Emit a "conclusion" output for the job that's based on the built-in + # conclusion (success, failure, cancelled) of the "attach" step above. + # This is the conclusion we care about for the job since the job's own + # "conclusion" is masked/transformed by "continue-on-error: true" above. + outputs: + attach-step-conclusion: ${{ steps.attach.conclusion }} + needs: [wait-3, run-build, workflow-context] + if: needs.wait-3.outputs.attach-step-conclusion == 'cancelled' + # Since the wait-N jobs use "continue-on-error: true" out of necessity (to + # avoid failing the whole workflow when they time out and get cancelled), we + # use a final job here to succeed or fail the whole workflow based on the + # aggregate of their "attach" step conclusions. + wait-conclusion: + needs: [wait-1, wait-2, wait-3, wait-4] + if: always() + runs-on: ubuntu-latest + steps: + - name: All attach steps in wait-N jobs were successful (or skipped) + run: | + # shellcheck disable=SC2242 + + exit ${{ contains(needs.*.outputs.attach-step-conclusion, 'failure') && '1' || '0' }} + # XXX TODO: Jobs can fall off the end of our wait-N chain and appear to be + # successful/complete in GitHub but still running on AWS. Probably very + # rare in reality, though, for an AWS job to take longer than 24h? + # -trs, 12 Sept 2023 + # Cancel the AWS Batch job if the GitHub workflow run is cancelled. + # + # We depend on the last wait-N job (wait-4) so that this job doesn't get + # skipped immediately after run-build. It needs to be at the end of the + # chain. + cancellation: + needs: [wait-4, run-build, workflow-context] + if: cancelled() + runs-on: ubuntu-latest + steps: + # Uses needs.workflow-context.outputs + - # Need to run this after the build repo is cloned so that cloning the + # build repo does not overwrite the .git dir and remove the extra support files + # that we need from nextstrain/.github repo + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + uses: actions/checkout@v4 + with: + repository: ${{ needs.workflow-context.outputs.repository }} + ref: ${{ needs.workflow-context.outputs.sha }} + path: ${{ env.NEXTSTRAIN_GITHUB_DIR }} + - if: inputs.runtime == 'aws-batch' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} + role-duration-seconds: 43200 # seconds, or 12 hours + - name: Setup runtime ${{ inputs.runtime }} + uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli + with: + cli-version: ">=7.4.0" + runtime: ${{ inputs.runtime }} + - id: cancel + name: Cancel AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # `nextstrain` will stay attached while it waits for cancellation to + # occur, before finally exiting non-zero. In the unlikely event that + # the job completes before cancellation can occur, it'll exit 0, and + # we want to treat that as an error. + nextstrain build --aws-batch --attach "$AWS_BATCH_JOB_ID" --cancel \ + && exit 1 \ + || exit 0 + # The cancellation job may fail, but we don't want that to impact the + # overall workflow run status. + continue-on-error: true diff --git a/.github/workflows/pathogen-repo-build.yaml.in b/.github/workflows/pathogen-repo-build.yaml.in index 334a4f9..d647e39 100644 --- a/.github/workflows/pathogen-repo-build.yaml.in +++ b/.github/workflows/pathogen-repo-build.yaml.in @@ -59,8 +59,10 @@ on: The pathogen repo is cloned to the top level of the working directory of the GitHub Action, so use `.` to point to the pathogen repo directory. - If your build runs longer than the 6 hour limit for GitHub Action jobs, - consider using the `--detach` flag for the aws-batch runtime. + If your build runs longer than the 6 hour limit for a single GitHub + Action job, then use the aws-batch runtime and the `--detach` flag. + Subsequent chained jobs will be automatically used to wait on the + remote build for up to 24 hours total. All environment variables provided via the env input and all secrets provided via `secrets: inherit` can be passed to the build runtime @@ -184,7 +186,8 @@ jobs: # Need to run this after the build repo is cloned so that cloning the # build repo does not overwrite the .git dir and remove the extra support files # that we need from nextstrain/.github repo - - name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) + - &checkout-workflow-support + name: Checkout ${{ needs.workflow-context.outputs.repository }} (sha ${{ needs.workflow-context.outputs.sha }}) uses: actions/checkout@v4 with: repository: ${{ needs.workflow-context.outputs.repository }} @@ -213,7 +216,8 @@ jobs: | "$NEXTSTRAIN_GITHUB_DIR"/bin/json-to-envvars | tee -a "$GITHUB_ENV" - - if: inputs.runtime == 'aws-batch' + - &setup-aws-credentials + if: inputs.runtime == 'aws-batch' uses: aws-actions/configure-aws-credentials@v4 with: aws-region: us-east-1 @@ -222,10 +226,11 @@ jobs: role-to-assume: ${{ secrets.AWS_ACCESS_KEY_ID == '' && 'arn:aws:iam::827581582529:role/GitHubActionsRoleNextstrainBatchJobs' || '' }} role-duration-seconds: 43200 # seconds, or 12 hours - - name: Setup runtime ${{ inputs.runtime }} + - &setup-runtime + name: Setup runtime ${{ inputs.runtime }} uses: ./.git/nextstrain/.github/actions/setup-nextstrain-cli with: - cli-version: ">=7.1.0" + cli-version: ">=7.4.0" runtime: ${{ inputs.runtime }} - name: Run build via ${{ inputs.runtime }} @@ -262,3 +267,124 @@ jobs: logs/ .snakemake/log/ ${{ inputs.artifact-paths }} + + outputs: + AWS_BATCH_JOB_ID: ${{ env.AWS_BATCH_JOB_ID }} + + # Wait for up to 6 hours (the maximum GitHub Actions job timeout¹) for the + # AWS Batch job to finish. + # + # ¹ + # + wait-1: &wait + needs: [run-build, workflow-context] + if: needs.run-build.outputs.AWS_BATCH_JOB_ID + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + # Uses needs.workflow-context.outputs + - *checkout-workflow-support + - *setup-aws-credentials + - *setup-runtime + + - id: attach + name: Attach to AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # When a running process is to be cancelled (e.g. due to either user + # request or job timeout), GitHub Actions sends it SIGINT and waits + # 7.5s for it to exit.¹ We immediately detach on interrupt, letting + # the next wait-N job in our chain pick it back up (if this job timed + # out) or the cancellation job (if by user request). + # + # ¹ + exec nextstrain build \ + --aws-batch \ + --attach "$AWS_BATCH_JOB_ID" \ + --detach-on-interrupt \ + --no-download + + # Allow the workflow to be considered successful even if this job errors + # due to cancellation (timing out). Unfortunately, this doesn't + # distinguish between error from cancellation and error from command + # failure, so we work around that below. + continue-on-error: true + + # Emit a "conclusion" output for the job that's based on the built-in + # conclusion (success, failure, cancelled) of the "attach" step above. + # This is the conclusion we care about for the job since the job's own + # "conclusion" is masked/transformed by "continue-on-error: true" above. + outputs: + attach-step-conclusion: ${{ steps.attach.conclusion }} + + # Wait for up to another 6 hours (hours 6–12) if the preceding wait-N job + # timed out while attached to the AWS Batch job. + wait-2: + <<: *wait + needs: [wait-1, run-build, workflow-context] + if: needs.wait-1.outputs.attach-step-conclusion == 'cancelled' + + # 12–18 hours + wait-3: + <<: *wait + needs: [wait-2, run-build, workflow-context] + if: needs.wait-2.outputs.attach-step-conclusion == 'cancelled' + + # 18–24 hours + wait-4: + <<: *wait + needs: [wait-3, run-build, workflow-context] + if: needs.wait-3.outputs.attach-step-conclusion == 'cancelled' + + # Since the wait-N jobs use "continue-on-error: true" out of necessity (to + # avoid failing the whole workflow when they time out and get cancelled), we + # use a final job here to succeed or fail the whole workflow based on the + # aggregate of their "attach" step conclusions. + wait-conclusion: + needs: [wait-1, wait-2, wait-3, wait-4] + if: always() + runs-on: ubuntu-latest + steps: + - name: All attach steps in wait-N jobs were successful (or skipped) + run: | + # shellcheck disable=SC2242 + + exit ${{ contains(needs.*.outputs.attach-step-conclusion, 'failure') && '1' || '0' }} + + # XXX TODO: Jobs can fall off the end of our wait-N chain and appear to be + # successful/complete in GitHub but still running on AWS. Probably very + # rare in reality, though, for an AWS job to take longer than 24h? + # -trs, 12 Sept 2023 + + # Cancel the AWS Batch job if the GitHub workflow run is cancelled. + # + # We depend on the last wait-N job (wait-4) so that this job doesn't get + # skipped immediately after run-build. It needs to be at the end of the + # chain. + cancellation: + needs: [wait-4, run-build, workflow-context] + if: cancelled() + runs-on: ubuntu-latest + steps: + # Uses needs.workflow-context.outputs + - *checkout-workflow-support + - *setup-aws-credentials + - *setup-runtime + + - id: cancel + name: Cancel AWS Batch job + env: + AWS_BATCH_JOB_ID: ${{ needs.run-build.outputs.AWS_BATCH_JOB_ID }} + run: | + # `nextstrain` will stay attached while it waits for cancellation to + # occur, before finally exiting non-zero. In the unlikely event that + # the job completes before cancellation can occur, it'll exit 0, and + # we want to treat that as an error. + nextstrain build --aws-batch --attach "$AWS_BATCH_JOB_ID" --cancel \ + && exit 1 \ + || exit 0 + + # The cancellation job may fail, but we don't want that to impact the + # overall workflow run status. + continue-on-error: true