From 377759d0b016489f88bb83b4edc3c33cf034cde4 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 24 Jun 2024 12:47:24 -0700 Subject: [PATCH] ingest-fauna/ingest-ncbi: Add inputs for trial_name and image Motivated by my own need to test the ingest workflows for the latest addition of Nextclade outputs in #62. --- .github/workflows/ingest-fauna.yaml | 28 ++++++++++++++++++++++- .github/workflows/ingest-ncbi.yaml | 35 ++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ingest-fauna.yaml b/.github/workflows/ingest-fauna.yaml index 9621c44..bf64bcf 100644 --- a/.github/workflows/ingest-fauna.yaml +++ b/.github/workflows/ingest-fauna.yaml @@ -11,6 +11,18 @@ defaults: on: workflow_dispatch: + inputs: + image: + description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")' + required: false + type: string + trial-name: + description: | + Trial name for outputs. + If not set, outputs will overwrite files at s3://nextstrain-data-private/files/workflows/avian-flu/ + If set, outputs will be uploaded to s3://nextstrain-data-private/files/workflows/avian-flu/trials// + required: false + type: string jobs: ingest: @@ -24,11 +36,25 @@ jobs: # the job runs longer than the GH Action limit of 6 hours. runtime: docker run: | + declare -a config; + + if [[ "$TRIAL_NAME" ]]; then + # Create JSON string for the nested upload config + S3_DST="s3://nextstrain-data-private/files/workflows/avian-flu/trial/$TRIAL_NAME" + config+=( + s3_dst=$(jq -cn --arg S3_DST "$S3_DST" '{"fauna": $S3_DST}') + ) + fi; + nextstrain build \ --env RETHINK_HOST \ --env RETHINK_AUTH_KEY \ ingest \ - upload_all + upload_all \ + --config "${config[@]}" + env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} + TRIAL_NAME: ${{ inputs.trial-name }} # Specifying artifact name to differentiate ingest build outputs from # the phylogenetic build outputs artifact-name: ingest-fauna-build-output diff --git a/.github/workflows/ingest-ncbi.yaml b/.github/workflows/ingest-ncbi.yaml index e4a6256..07e749b 100644 --- a/.github/workflows/ingest-ncbi.yaml +++ b/.github/workflows/ingest-ncbi.yaml @@ -32,6 +32,18 @@ on: - cron: '0 17 * * *' workflow_dispatch: + inputs: + image: + description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")' + required: false + type: string + trial-name: + description: | + Trial name for outputs. + If not set, outputs will overwrite files at s3://nextstrain-data/files/workflows/avian-flu/ + If set, outputs will be uploaded to s3://nextstrain-data/files/workflows/avian-flu/trials// + required: false + type: string jobs: ingest: @@ -45,10 +57,31 @@ jobs: # the job runs longer than the GH Action limit of 6 hours. runtime: docker run: | + declare -a config; + + if [[ "$TRIAL_NAME" ]]; then + # Create JSON string for the nested upload config + S3_DST_BASE="s3://nextstrain-data/files/workflows/avian-flu/trial/$TRIAL_NAME" + + config+=( + s3_dst=$( + jq -cn --arg S3_DST_BASE "$S3_DST_BASE" '{ + "joined-ncbi": "\($S3_DST_BASE)/h5n1", + "ncbi": "\($S3_DST_BASE)/h5n1/ncbi", + "andersen-lab": "\($S3_DST_BASE)/h5n1/andersen-lab" + }' + ) + ) + fi; + nextstrain build \ ingest \ upload_all_ncbi \ - --configfile build-configs/ncbi/defaults/config.yaml + --configfile build-configs/ncbi/defaults/config.yaml \ + --config "${config[@]}" + env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} + TRIAL_NAME: ${{ inputs.trial-name }} # Specifying artifact name to differentiate ingest build outputs from # the phylogenetic build outputs artifact-name: ingest-ncbi-build-output