Ingest to phylogenetic #10
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Ingest to phylogenetic | |
on: | |
workflow_dispatch: | |
jobs: | |
ingest: | |
permissions: | |
id-token: write | |
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master | |
secrets: inherit | |
with: | |
# Starting with the default docker runtime | |
# We can migrate to AWS Batch when/if we need to for more resources or if | |
# the job runs longer than the GH Action limit of 6 hours. | |
runtime: docker | |
run: | | |
nextstrain build \ | |
--env AWS_ACCESS_KEY_ID \ | |
--env AWS_SECRET_ACCESS_KEY \ | |
ingest \ | |
upload_all \ | |
--configfile build-configs/nextstrain-automation/config.yaml | |
# Specifying artifact name to differentiate ingest build outputs from | |
# the phylogenetic build outputs | |
artifact-name: ingest-build-output | |
artifact-paths: | | |
ingest/results/ | |
ingest/benchmarks/ | |
ingest/logs/ | |
ingest/.snakemake/log/ | |
# Check if ingest results include new data by checking for the cache | |
# of the file with the results' Metadata.sh256sum (which should have been added within upload-to-s3) | |
# GitHub will remove any cache entries that have not been accessed in over 7 days, | |
# so if the workflow has not been run over 7 days then it will trigger phylogenetic. | |
check-new-data: | |
needs: [ingest] | |
runs-on: ubuntu-latest | |
outputs: | |
cache-hit: ${{ steps.check-cache.outputs.cache-hit }} | |
steps: | |
- name: Get sha256sum | |
id: get-sha256sum | |
run: | | |
s3_urls=( | |
"s3://nextstrain-data/files/workflows/zika/metadata.tsv.zst" | |
"s3://nextstrain-data/files/workflows/zika/sequences.fasta.zst" | |
) | |
for s3_url in "${s3_urls[@]}"; do | |
s3path="${s3_url#s3://}" | |
bucket="${s3path%%/*}" | |
key="${s3path#*/}" | |
s3_hash="$(aws s3api head-object --no-sign-request --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")" | |
echo "${s3_hash}" >> ingest-output-sha256sum | |
done | |
- name: Check cache | |
id: check-cache | |
uses: actions/cache@v4 | |
with: | |
path: ingest-output-sha256sum | |
key: ingest-output-sha256sum-${{ hashFiles('ingest-output-sha256sum') }} | |
lookup-only: true | |
phylogenetic: | |
needs: [check-new-data] | |
if: ${{ needs.check-new-data.outputs.cache-hit != 'true' }} | |
permissions: | |
id-token: write | |
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master | |
secrets: inherit | |
with: | |
# Starting with the default docker runtime | |
# We can migrate to AWS Batch when/if we need to for more resources or if | |
# the job runs longer than the GH Action limit of 6 hours. | |
runtime: docker | |
run: | | |
nextstrain build \ | |
--env AWS_ACCESS_KEY_ID \ | |
--env AWS_SECRET_ACCESS_KEY \ | |
phylogenetic \ | |
deploy_all \ | |
--configfile build-configs/nextstrain-automation/config.yaml | |
# Specifying artifact name to differentiate ingest build outputs from | |
# the phylogenetic build outputs | |
artifact-name: phylogenetic-build-output | |
artifact-paths: | | |
phylogenetic/auspice/ | |
phylogenetic/results/ | |
phylogenetic/benchmarks/ | |
phylogenetic/logs/ | |
phylogenetic/.snakemake/log/ |