From ea15eb2631c24e8185ba6332c04b5898ccdbac98 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 20 Jun 2024 17:06:40 -0700 Subject: [PATCH 1/6] Add `deploy_all` rule This is a shared rule between the default builds and the full genome build and it works because both Snakemake workflows independently define their own `all` rule. As usual, this requires AWS credentials to upload the Auspice JSONs to the nextstrain-data S3 bucket. --- README.md | 9 +++++++++ Snakefile | 3 +++ Snakefile.genome | 3 +++ rules/deploy.smk | 15 +++++++++++++++ 4 files changed, 30 insertions(+) create mode 100644 rules/deploy.smk diff --git a/README.md b/README.md index 66f749a..3ba2d86 100755 --- a/README.md +++ b/README.md @@ -16,6 +16,15 @@ nextstrain build --aws-batch --aws-batch-cpus 16 --aws-batch-memory 28800 . --jo Please see [nextstrain.org/docs](https://nextstrain.org/docs) for details about augur and pathogen builds. +### Deploying builds + +The pipeline can automatically deploy resulting builds within the auspice folder +to nextstrain.org by running: + +``` +nextstrain build . deploy_all +``` + ## Creating a custom build The easiest way to generate your own, custom avian-flu build is to use the quickstart-build as a starting template. Simply clone the quickstart-build, run with the example data, and edit the Snakefile to customize. This build includes example data and a simplified, heavily annotated Snakefile that goes over the structure of Snakefiles and annotates rules and inputs/outputs that can be modified. This build, with it's own readme, is available [here](https://github.com/nextstrain/avian-flu/tree/master/quickstart-build). diff --git a/Snakefile b/Snakefile index 9f1a9a1..0b19545 100755 --- a/Snakefile +++ b/Snakefile @@ -22,6 +22,9 @@ rule all: input: auspice_json = all_targets() +# This must be after the `all` rule above since it depends on its inputs +include: "rules/deploy.smk" + rule test_target: """ For testing purposes such as CI workflows. diff --git a/Snakefile.genome b/Snakefile.genome index 27bd8be..a1a63e9 100644 --- a/Snakefile.genome +++ b/Snakefile.genome @@ -33,6 +33,9 @@ def subtype(build_name): rule all: input: expand("auspice/avian-flu_{build_name}_genome.json", build_name=BUILD_NAME) +# This must be after the `all` rule above since it depends on its inputs +include: "rules/deploy.smk" + rule files: params: reference = lambda w: f"config/reference_{subtype(w.build_name)}_{{segment}}.gb", diff --git a/rules/deploy.smk b/rules/deploy.smk new file mode 100644 index 0000000..101b8be --- /dev/null +++ b/rules/deploy.smk @@ -0,0 +1,15 @@ +DEPLOY_URL = config.get('deploy_url', "s3://nextstrain-data") + + +rule deploy_all: + """ + Upload all builds to AWS S3 + Depends on indendent Snakemake workflow's defined `all` rule + """ + input: rules.all.input + params: + s3_dst = DEPLOY_URL + shell: + """ + nextstrain remote upload {params.s3_dst:q} {input} + """ From 759b94506f0a1cc5045fc86304ab53a42b70e82a Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 20 Jun 2024 13:28:41 -0700 Subject: [PATCH 2/6] Add GH Action for NCBI build Currently runs the full genome build and automatically uploads the resulting build to `s3://nextstrain-data`. --- .github/workflows/phylogenetic-ncbi.yaml | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/phylogenetic-ncbi.yaml diff --git a/.github/workflows/phylogenetic-ncbi.yaml b/.github/workflows/phylogenetic-ncbi.yaml new file mode 100644 index 0000000..a7dab78 --- /dev/null +++ b/.github/workflows/phylogenetic-ncbi.yaml @@ -0,0 +1,29 @@ +name: Phylogenetic NCBI + +defaults: + run: + # This is the same as GitHub Action's `bash` keyword as of 20 June 2023: + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell + # + # Completely spelling it out here so that GitHub can't change it out from under us + # and we don't have to refer to the docs to know the expected behavior. + shell: bash --noprofile --norc -eo pipefail {0} + +on: + workflow_dispatch: + +jobs: + phylogenetic: + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit + with: + runtime: docker + run: | + nextstrain build + . \ + deploy_all \ + --snakefile Snakefile.genome \ + --config s3_src=s3://nextstrain-data/files/workflows/avian-flu/h5n1 + artifact-name: phylogenetic-full-genome-build-output From 7f2a2395c3a5f8ae104b4e79540e8ef5aa06a990 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 21 Jun 2024 13:15:50 -0700 Subject: [PATCH 3/6] phylogenetic-ncbi: Add `trial_name` input Allows us to test builds and deploy them to staging instead of production. --- .github/workflows/phylogenetic-ncbi.yaml | 27 ++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/.github/workflows/phylogenetic-ncbi.yaml b/.github/workflows/phylogenetic-ncbi.yaml index a7dab78..7bfc085 100644 --- a/.github/workflows/phylogenetic-ncbi.yaml +++ b/.github/workflows/phylogenetic-ncbi.yaml @@ -11,6 +11,14 @@ defaults: on: workflow_dispatch: + inputs: + trial-name: + description: | + Trial name for deploying builds. + If not set, builds will overwrite existing builds at s3://nextstrain-data/avian-flu* + If set, builds will be deployed to s3://nextstrain-staging/avian-flu_trials__* + required: false + type: string jobs: phylogenetic: @@ -21,9 +29,24 @@ jobs: with: runtime: docker run: | - nextstrain build + declare -a config; + + config+=( + s3_src="s3://nextstrain-data/files/workflows/avian-flu/h5n1" + ); + + if [[ "$TRIAL_NAME" ]]; then + config+=( + deploy_url="s3://nextstrain-staging/avian-flu_trials_${TRIAL_NAME}_" + ) + fi; + + nextstrain build \ . \ deploy_all \ --snakefile Snakefile.genome \ - --config s3_src=s3://nextstrain-data/files/workflows/avian-flu/h5n1 + --config "${config[@]}" + + env: | + TRIAL_NAME: ${{ inputs.trial-name }} artifact-name: phylogenetic-full-genome-build-output From f652842b18badfcb7517e1cd4b66014373cc09c1 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 21 Jun 2024 13:53:06 -0700 Subject: [PATCH 4/6] phylogenetic-ncbi: Add `image` input to customize Docker image --- .github/workflows/phylogenetic-ncbi.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/phylogenetic-ncbi.yaml b/.github/workflows/phylogenetic-ncbi.yaml index 7bfc085..45e286b 100644 --- a/.github/workflows/phylogenetic-ncbi.yaml +++ b/.github/workflows/phylogenetic-ncbi.yaml @@ -12,6 +12,10 @@ defaults: on: workflow_dispatch: inputs: + image: + description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")' + required: false + type: string trial-name: description: | Trial name for deploying builds. @@ -48,5 +52,6 @@ jobs: --config "${config[@]}" env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} TRIAL_NAME: ${{ inputs.trial-name }} artifact-name: phylogenetic-full-genome-build-output From 4a12b55da7b355bffd906ccef7e3368da74b2968 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 21 Jun 2024 14:12:38 -0700 Subject: [PATCH 5/6] Copy phylogenetic-ncbi to phylogenetic-fauna Will make edits in subsequent commit to reflect the default fauna build --- .github/workflows/phylogenetic-fauna.yaml | 57 +++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/workflows/phylogenetic-fauna.yaml diff --git a/.github/workflows/phylogenetic-fauna.yaml b/.github/workflows/phylogenetic-fauna.yaml new file mode 100644 index 0000000..45e286b --- /dev/null +++ b/.github/workflows/phylogenetic-fauna.yaml @@ -0,0 +1,57 @@ +name: Phylogenetic NCBI + +defaults: + run: + # This is the same as GitHub Action's `bash` keyword as of 20 June 2023: + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell + # + # Completely spelling it out here so that GitHub can't change it out from under us + # and we don't have to refer to the docs to know the expected behavior. + shell: bash --noprofile --norc -eo pipefail {0} + +on: + workflow_dispatch: + inputs: + image: + description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")' + required: false + type: string + trial-name: + description: | + Trial name for deploying builds. + If not set, builds will overwrite existing builds at s3://nextstrain-data/avian-flu* + If set, builds will be deployed to s3://nextstrain-staging/avian-flu_trials__* + required: false + type: string + +jobs: + phylogenetic: + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit + with: + runtime: docker + run: | + declare -a config; + + config+=( + s3_src="s3://nextstrain-data/files/workflows/avian-flu/h5n1" + ); + + if [[ "$TRIAL_NAME" ]]; then + config+=( + deploy_url="s3://nextstrain-staging/avian-flu_trials_${TRIAL_NAME}_" + ) + fi; + + nextstrain build \ + . \ + deploy_all \ + --snakefile Snakefile.genome \ + --config "${config[@]}" + + env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} + TRIAL_NAME: ${{ inputs.trial-name }} + artifact-name: phylogenetic-full-genome-build-output From 08ae7e03face1928e58b00eb567dc6ccbd92e114 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 21 Jun 2024 14:13:54 -0700 Subject: [PATCH 6/6] phylogenetic-fauna: Update for default builds with fauna data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I think the diff in this commit warrants maintaining separate GH Action workflows for NCBI vs fauna data builds instead of trying to shoehorn both into a single complicated GH Action workflow. Uses AWS Batch runtime with cpus/memory according to the repo's README instructions.¹ ¹ --- .github/workflows/phylogenetic-fauna.yaml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/phylogenetic-fauna.yaml b/.github/workflows/phylogenetic-fauna.yaml index 45e286b..482de73 100644 --- a/.github/workflows/phylogenetic-fauna.yaml +++ b/.github/workflows/phylogenetic-fauna.yaml @@ -1,4 +1,4 @@ -name: Phylogenetic NCBI +name: Phylogenetic Fauna defaults: run: @@ -31,14 +31,10 @@ jobs: uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master secrets: inherit with: - runtime: docker + runtime: aws-batch run: | declare -a config; - config+=( - s3_src="s3://nextstrain-data/files/workflows/avian-flu/h5n1" - ); - if [[ "$TRIAL_NAME" ]]; then config+=( deploy_url="s3://nextstrain-staging/avian-flu_trials_${TRIAL_NAME}_" @@ -46,12 +42,15 @@ jobs: fi; nextstrain build \ + --detach \ + --no-download \ + --cpus 16 \ + --memory 28800mib \ . \ deploy_all \ - --snakefile Snakefile.genome \ --config "${config[@]}" env: | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} TRIAL_NAME: ${{ inputs.trial-name }} - artifact-name: phylogenetic-full-genome-build-output + artifact-name: phylogenetic-fauna-build-output