Skip to content

Commit

Permalink
Merge pull request #55 from nextstrain/reusable-workflows
Browse files Browse the repository at this point in the history
Add independent ingest and phylogenetic GH Action workflows
  • Loading branch information
joverlee521 authored Apr 18, 2024
2 parents 0afe650 + 267e72d commit fe37759
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 48 deletions.
49 changes: 4 additions & 45 deletions .github/workflows/ingest-to-phylogenetic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,10 @@ jobs:
ingest:
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
uses: ./.github/workflows/ingest.yaml
secrets: inherit
with:
# Starting with the default docker runtime
# We can migrate to AWS Batch when/if we need to for more resources or if
# the job runs longer than the GH Action limit of 6 hours.
runtime: docker
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.ingest_image }}
run: |
nextstrain build \
--env AWS_ACCESS_KEY_ID \
--env AWS_SECRET_ACCESS_KEY \
ingest \
upload_all \
--configfile build-configs/nextstrain-automation/config.yaml
# Specifying artifact name to differentiate ingest build outputs from
# the phylogenetic build outputs
artifact-name: ingest-build-output
artifact-paths: |
ingest/results/
ingest/benchmarks/
ingest/logs/
ingest/.snakemake/log/
image: ${{ inputs.ingest_image }}

# Check if ingest results include new data by checking for the cache
# of the file with the results' Metadata.sh256sum (which should have been added within upload-to-s3)
Expand Down Expand Up @@ -114,28 +94,7 @@ jobs:
if: ${{ needs.check-new-data.outputs.cache-hit != 'true' }}
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
uses: ./.github/workflows/phylogenetic.yaml
secrets: inherit
with:
# Starting with the default docker runtime
# We can migrate to AWS Batch when/if we need to for more resources or if
# the job runs longer than the GH Action limit of 6 hours.
runtime: docker
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.phylogenetic_image }}
run: |
nextstrain build \
--env AWS_ACCESS_KEY_ID \
--env AWS_SECRET_ACCESS_KEY \
phylogenetic \
deploy_all \
--configfile build-configs/nextstrain-automation/config.yaml
# Specifying artifact name to differentiate ingest build outputs from
# the phylogenetic build outputs
artifact-name: phylogenetic-build-output
artifact-paths: |
phylogenetic/auspice/
phylogenetic/results/
phylogenetic/benchmarks/
phylogenetic/logs/
phylogenetic/.snakemake/log/
image: ${{ inputs.phylogenetic_image }}
82 changes: 82 additions & 0 deletions .github/workflows/ingest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: Ingest

defaults:
run:
# This is the same as GitHub Action's `bash` keyword as of 20 June 2023:
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
#
# Completely spelling it out here so that GitHub can't change it out from under us
# and we don't have to refer to the docs to know the expected behavior.
shell: bash --noprofile --norc -eo pipefail {0}

on:
workflow_call:
inputs:
image:
description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")'
required: false
type: string

workflow_dispatch:
inputs:
image:
description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")'
required: false
type: string
trial_name:
description: |
Trial name for outputs.
If not set, outputs will overwrite files at s3://nextstrain-data/files/workflows/zika/
If set, outputs will be uploaded to s3://nextstrain-data/files/workflows/zika/trials/<trial_name>/
required: false
type: string

jobs:
set_config_overrides:
runs-on: ubuntu-latest
steps:
- id: config
name: Set config overrides
env:
TRIAL_NAME: ${{ inputs.trial_name }}
run: |
config=""
if [[ "$TRIAL_NAME" ]]; then
config+="--config"
config+=" s3_dst='s3://nextstrain-data/files/workflows/zika/trials/"$TRIAL_NAME"'"
fi
echo "config=$config" >> "$GITHUB_OUTPUT"
outputs:
config_overrides: ${{ steps.config.outputs.config }}

ingest:
needs: [set_config_overrides]
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
# Starting with the default docker runtime
# We can migrate to AWS Batch when/if we need to for more resources or if
# the job runs longer than the GH Action limit of 6 hours.
runtime: docker
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }}
CONFIG_OVERRIDES: ${{ needs.set_config_overrides.outputs.config_overrides }}
run: |
nextstrain build \
--env AWS_ACCESS_KEY_ID \
--env AWS_SECRET_ACCESS_KEY \
ingest \
upload_all \
--configfile build-configs/nextstrain-automation/config.yaml \
$CONFIG_OVERRIDES
# Specifying artifact name to differentiate ingest build outputs from
# the phylogenetic build outputs
artifact-name: ingest-build-output
artifact-paths: |
ingest/results/
ingest/benchmarks/
ingest/logs/
ingest/.snakemake/log/
109 changes: 109 additions & 0 deletions .github/workflows/phylogenetic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: Phylogenetic

defaults:
run:
# This is the same as GitHub Action's `bash` keyword as of 20 June 2023:
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
#
# Completely spelling it out here so that GitHub can't change it out from under us
# and we don't have to refer to the docs to know the expected behavior.
shell: bash --noprofile --norc -eo pipefail {0}

on:
workflow_call:
inputs:
image:
description: 'Specific container image to use for phylogenetic workflow (will override the default of "nextstrain build")'
required: false
type: string

workflow_dispatch:
inputs:
image:
description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")'
required: false
type: string
trial_name:
description: |
Trial name for deploying builds.
If not set, builds will overwrite existing builds at s3://nextstrain-data/zika*
If set, builds will be deployed to s3://nextstrain-staging/zika_trials_<trial_name>_*
required: false
type: string
sequences_url:
description: |
URL for a sequences.fasta.zst file.
If not provided, will use default sequences_url from phylogenetic/defaults/config_zika.yaml
required: false
type: string
metadata_url:
description: |
URL for a metadata.tsv.zst file.
If not provided, will use default metadata_url from phylogenetic/defaults/config_zika.yaml
required: false
type: string

jobs:
set_config_overrides:
runs-on: ubuntu-latest
steps:
- id: config
name: Set config overrides
env:
TRIAL_NAME: ${{ inputs.trial_name }}
SEQUENCES_URL: ${{ inputs.sequences_url }}
METADATA_URL: ${{ inputs.metadata_url }}
run: |
config=""
if [[ "$TRIAL_NAME" ]]; then
config+=" deploy_url='s3://nextstrain-staging/zika_trials_"$TRIAL_NAME"_'"
fi
if [[ "$SEQUENCES_URL" ]]; then
config+=" sequences_url='"$SEQUENCES_URL"'"
fi
if [[ "$METADATA_URL" ]]; then
config+=" metadata_url='"$METADATA_URL"'"
fi
if [[ $config ]]; then
config="--config $config"
fi
echo "config=$config" >> "$GITHUB_OUTPUT"
outputs:
config_overrides: ${{ steps.config.outputs.config }}

phylogenetic:
needs: [set_config_overrides]
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
# Starting with the default docker runtime
# We can migrate to AWS Batch when/if we need to for more resources or if
# the job runs longer than the GH Action limit of 6 hours.
runtime: docker
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }}
CONFIG_OVERRIDES: ${{ needs.set_config_overrides.outputs.config_overrides }}
run: |
nextstrain build \
--env AWS_ACCESS_KEY_ID \
--env AWS_SECRET_ACCESS_KEY \
phylogenetic \
deploy_all \
--configfile build-configs/nextstrain-automation/config.yaml \
$CONFIG_OVERRIDES
# Specifying artifact name to differentiate ingest build outputs from
# the phylogenetic build outputs
artifact-name: phylogenetic-build-output
artifact-paths: |
phylogenetic/auspice/
phylogenetic/results/
phylogenetic/benchmarks/
phylogenetic/logs/
phylogenetic/.snakemake/log/
5 changes: 5 additions & 0 deletions phylogenetic/defaults/config_zika.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Sequences must be FASTA and metadata must be TSV
# Both files must be zstd compressed
sequences_url: "https://data.nextstrain.org/files/workflows/zika/sequences.fasta.zst"
metadata_url: "https://data.nextstrain.org/files/workflows/zika/metadata.tsv.zst"

strain_id_field: "accession"
display_strain_field: "strain"

Expand Down
6 changes: 3 additions & 3 deletions phylogenetic/rules/prepare_sequences.smk
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ rule download:
sequences = "data/sequences.fasta.zst",
metadata = "data/metadata.tsv.zst"
params:
sequences_url = "https://data.nextstrain.org/files/workflows/zika/sequences.fasta.zst",
metadata_url = "https://data.nextstrain.org/files/workflows/zika/metadata.tsv.zst"
sequences_url = config["sequences_url"],
metadata_url = config["metadata_url"],
shell:
"""
curl -fsSL --compressed {params.sequences_url:q} --output {output.sequences}
Expand Down Expand Up @@ -101,4 +101,4 @@ rule align:
--output {output.alignment} \
--fill-gaps \
--remove-reference
"""
"""

0 comments on commit fe37759

Please sign in to comment.