diff --git a/hubverse-aws-upload/README.md b/hubverse-aws-upload/README.md new file mode 100644 index 0000000..7778967 --- /dev/null +++ b/hubverse-aws-upload/README.md @@ -0,0 +1,30 @@ +# hubverse-aws-upload + + +This action uploads hub data to Hubverse-hosted cloud storage. Currently, the workflow has a single job, `upload`, +that pushes data to an AWS S3 bucket. + +The `upload` job perform the following steps: + +1. Inspect the hub's admin config (`admin.json`) for a `cloud` group. +2. If `cloud.enabled` is set to `true`: + - authenticate to the Hubverse AWS account + - use `cloud.host.storage` to determine the name of the hub's S3 bucket + - sync the hub's `hub-config`, `model-metadata`, and `model-output` directories to the S3 bucket + +**Note**: This action is safe to use with non cloud-enabled hubs. +If the hub's `admin.config` does not contain a `cloud` group or has `cloud.enabled` set to anything other than `true`, +the action will skip AWS-related steps. + + +## AWS setup + +Before using this action, a member of the Hubverse development team will need to "onboard" the hub to AWS. Onboarding is +a one-time process that creates: + +- An AWS S3 bucket for the hub +- A set of AWS permissions that allow the repo's GitHub workflows to write to the bucket + +**Important**: The repo's write permissions are limited to the `main` branch. Running this action on another branch +or on a fork will fail. + diff --git a/hubverse-aws-upload/hubverse-aws-upload.yaml b/hubverse-aws-upload/hubverse-aws-upload.yaml new file mode 100644 index 0000000..e90ffec --- /dev/null +++ b/hubverse-aws-upload/hubverse-aws-upload.yaml @@ -0,0 +1,71 @@ +name: Upload hub data to a hubverse-hosted AWS S3 bucket + +on: + push: + branches: + - main + +env: + # Hubverse AWS account number + AWS_ACCOUNT: 767397675902 + +permissions: + contents: read + # id-token write required for AWS auth + id-token: write + +jobs: + upload: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Get hub cloud config + # save cloud-related fields from admin config as environment variables + # (jq json parser is installed on Github-hosted runners) + run: | + cloud_enabled=$(cat ./hub-config/admin.json | jq -r '.cloud.enabled') \ + && echo "CLOUD_ENABLED=$cloud_enabled" + cloud_host=$(cat ./hub-config/admin.json | jq -r '.cloud.host.name') \ + && echo "CLOUD_HOST=$cloud_host" + cloud_storage_service=$(cat ./hub-config/admin.json | jq -r '.cloud.host.storage_service') \ + && echo "CLOUD_STORAGE_SERVICE=$cloud_storage_service" + cloud_storage_location=$(cat ./hub-config/admin.json | jq -r '.cloud.host.storage_location') \ + && echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location" + echo "CLOUD_ENABLED=$cloud_enabled" >> $GITHUB_ENV + echo "CLOUD_HOST=$cloud_host" >> $GITHUB_ENV + echo "CLOUD_STORAGE_SERVICE=$cloud_storage_service" >> $GITHUB_ENV + echo "CLOUD_STORAGE_LOCATION=$cloud_storage_location" >> $GITHUB_ENV + + - name: Configure AWS credentials + # request credentials to assume the hub's AWS role via OpenID Connect + if: env.CLOUD_ENABLED == 'true' + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT }}:role/${{ env.CLOUD_STORAGE_LOCATION }} + aws-region: us-east-1 + + - name: Sync files to cloud storage + # sync specified hub directories to S3 + # (to exclude a directory, remove it from the hub_directories list below) + if: env.CLOUD_ENABLED == 'true' + run: | + hub_directories=( + 'auxiliary-data' + 'hub-config' + 'model-abstracts' + 'model-metadata' + 'model-output' + 'target-data' + ) + for DIRECTORY in "${hub_directories[@]}" + do + if [ -d "./$DIRECTORY" ]; then aws s3 sync "./$DIRECTORY" "s3://$BUCKET_NAME/$DIRECTORY" --delete; fi + done + shell: bash + env: + BUCKET_NAME: ${{ env.CLOUD_STORAGE_LOCATION }} + +