From 51795d1c9786737fdc93c13ffd9027543e0dbeab Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 15 May 2024 11:11:22 -0700 Subject: [PATCH 1/3] Set up Terraform provider for GitHub Will let us manage (and self-document) our GitHub organization and repo settings via this Terraform configuration. --- README.md | 2 ++ env/production/.terraform.lock.hcl | 24 ++++++++++++++++++++++++ env/production/terraform.tf | 9 +++++++++ 3 files changed, 35 insertions(+) diff --git a/README.md b/README.md index 45c9359..22c083b 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ Make those changes so: > You'll need ambiently-configured AWS credentials with broad admin-level > access to read (and optionally modify) resources in our account. > +> You'll also need a `GITHUB_TOKEN` in the environment. +> > Please step cautiously and be careful when using them! diff --git a/env/production/.terraform.lock.hcl b/env/production/.terraform.lock.hcl index 7769532..af16f2f 100644 --- a/env/production/.terraform.lock.hcl +++ b/env/production/.terraform.lock.hcl @@ -25,3 +25,27 @@ provider "registry.terraform.io/hashicorp/aws" { "zh:fac0d2ddeadf9ec53da87922f666e1e73a603a611c57bcbc4b86ac2821619b1d", ] } + +provider "registry.terraform.io/integrations/github" { + version = "6.2.1" + constraints = "~> 6.0" + hashes = [ + "h1:ip7024qn1ewDqlNucxh07DHvuhSLZSqtTGewxNLeYYU=", + "h1:rY+q+OhJm90R900HvO05YNH7Tl0EOnbCLAoG+5niLX8=", + "h1:uDerb9YJo3vAO+wKw+Z064InX5aXom+nKLDry2eGf14=", + "zh:172aa5141c525174f38504a0d2e69d0d16c0a0b941191b7170fe6ae4d7282e30", + "zh:1a098b731fa658c808b591d030cc17cc7dfca1bf001c3c32e596f8c1bf980e9f", + "zh:245d6a1c7e632d8ae4bdd2da2516610c50051e81505cf420a140aa5fa076ea90", + "zh:43c61c230fb4ed26ff1b04b857778e65be3d8f80292759abbe2a9eb3c95f6d97", + "zh:59bb7dd509004921e4322a196be476a2f70471b462802f09d03d6ce96f959860", + "zh:5cb2ab8035d015c0732107c109210243650b6eb115e872091b0f7b98c2763777", + "zh:69d2a6acfcd686f7e859673d1c8a07fc1fc1598a881493f19d0401eb74c0f325", + "zh:77f36d3f46911ace5c50dee892076fddfd64a289999a5099f8d524c0143456d1", + "zh:87df41097dfcde72a1fbe89caca882af257a4763c2e1af669c74dcb8530f9932", + "zh:899dbe621f32d58cb7c6674073a6db8328a9db66eecfb0cc3fc13299fd4e62e7", + "zh:ad2eb7987f02f7dd002076f65a685730705d04435313b5cf44d3a6923629fb29", + "zh:b2145ae7134dba893c7f74ad7dfdc65fdddf6c7b1d0ce7e2f3baa96212322fd8", + "zh:bd6bae3ac5c3f96ad9219d3404aa006ef1480e9041d4c95df1808737e37d911b", + "zh:e89758b20ae59f1b9a6d32c107b17846ddca9634b868cf8f5c927cbb894b1b1f", + ] +} diff --git a/env/production/terraform.tf b/env/production/terraform.tf index c620d66..6e8d63a 100644 --- a/env/production/terraform.tf +++ b/env/production/terraform.tf @@ -5,6 +5,10 @@ terraform { source = "registry.terraform.io/hashicorp/aws" version = "~> 4.32" } + github = { + source = "integrations/github" + version = "~> 6.0" + } } backend "s3" { @@ -25,3 +29,8 @@ terraform { provider "aws" { region = "us-east-1" } + +provider "github" { + # Authn is via GITHUB_TOKEN + owner = "nextstrain" +} From a5883bd50bd9ddfc0a1990673e392cb8fe420ccb Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 15 May 2024 11:44:33 -0700 Subject: [PATCH 2/3] Roles for our pathogen-repo-build GitHub Actions workflow A collection of templated repo-specific roles for inside a Nextstrain runtime and one role for the GitHub Actions job itself, i.e. outside the runtime. The inside-the-runtime roles are given pathogen-specific permissions necessary for the ingest and phylogenetic workflows of a pathogen repo. The outside-the-runtime role is only necessary/used at the moment for access to AWS Batch. The roles can only be assumed by specific repos when performed by our centralized pathogen-repo-build workflow. While this doesn't completely prevent off-label use by other GitHub Actions workflows launched from a pathogen repo, it does make it more involved to do so, hopefully to the point of discouragement. The associated GitHub repository configuration is managed by Terraform now as well since the customization of the "sub" claim in GitHub Action's OIDC token is tightly coupled to our AWS role trust policies. Resolves: Related-to: , Supersedes: , --- README.md | 3 +- .../aws-iam-policy-NextstrainPathogen@.tf | 75 +++++++++++++++++++ ...am-policy-NextstrainPathogenNcovPrivate.tf | 43 +++++++++++ ...le-GitHubActionsRoleNextstrainBatchJobs.tf | 8 +- ...m-role-GitHubActionsRoleNextstrainRepo@.tf | 43 +++++++++++ env/production/github-oidc.tf | 15 ++++ env/production/locals.tf | 32 ++++++++ 7 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 env/production/aws-iam-policy-NextstrainPathogen@.tf create mode 100644 env/production/aws-iam-policy-NextstrainPathogenNcovPrivate.tf create mode 100644 env/production/aws-iam-role-GitHubActionsRoleNextstrainRepo@.tf create mode 100644 env/production/github-oidc.tf create mode 100644 env/production/locals.tf diff --git a/README.md b/README.md index 22c083b..a87295b 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,8 @@ Make those changes so: > You'll need ambiently-configured AWS credentials with broad admin-level > access to read (and optionally modify) resources in our account. > -> You'll also need a `GITHUB_TOKEN` in the environment. +> You'll also need a `GITHUB_TOKEN` in the environment with the `actions:write` +> fine-grained token permission on our repos. > > Please step cautiously and be careful when using them! diff --git a/env/production/aws-iam-policy-NextstrainPathogen@.tf b/env/production/aws-iam-policy-NextstrainPathogen@.tf new file mode 100644 index 0000000..384dc9c --- /dev/null +++ b/env/production/aws-iam-policy-NextstrainPathogen@.tf @@ -0,0 +1,75 @@ +# Per-pathogen policy, granting access to a single pathogen's data +resource "aws_iam_policy" "NextstrainPathogen" { + for_each = local.pathogen_repos + + name = "NextstrainPathogen@${each.key}" + description = "Provides permissions to upload datasets, workflow files, etc. for a Nextstrain pathogen" + + policy = jsonencode({ + "Version": "2012-10-17", + "Statement": [ + # Technically we don't need to include the public buckets + # nextstrain-data and nextstrain-staging in this statement since they + # already allow a superset of this with their bucket policies, but it's + # good to be explicit about what permissions we require. + # -trs, 16 Feb 2024 + { + "Sid": "List", + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:ListBucketVersions", + "s3:GetBucketLocation", + "s3:GetBucketVersioning", + ], + "Resource": [ + "arn:aws:s3:::nextstrain-data", + "arn:aws:s3:::nextstrain-data-private", + "arn:aws:s3:::nextstrain-staging", + ], + "Condition": { + "StringLike": { + "s3:prefix": [ + "${each.key}.json", + "${each.key}_*.json", + "files/workflows/${each.key}/*", + "files/datasets/${each.key}/*", + ] + } + } + }, + { + "Sid": "ReadWrite", + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:GetObjectTagging", + "s3:GetObjectVersion", + "s3:GetObjectVersionTagging", + "s3:PutObject", + "s3:PutObjectTagging", + "s3:DeleteObject", + # but NOT s3:DeleteObjectVersion so objects can't be completely wiped + ], + "Resource": [ + # Auspice dataset JSONs + "arn:aws:s3:::nextstrain-data/${each.key}.json", + "arn:aws:s3:::nextstrain-data/${each.key}_*.json", + "arn:aws:s3:::nextstrain-staging/${each.key}.json", + "arn:aws:s3:::nextstrain-staging/${each.key}_*.json", + "arn:aws:s3:::nextstrain-staging/trial_*_${each.key}.json", + "arn:aws:s3:::nextstrain-staging/trial_*_${each.key}_*.json", + + # Associated data files + # + "arn:aws:s3:::nextstrain-data/files/workflows/${each.key}/*", + "arn:aws:s3:::nextstrain-data/files/datasets/${each.key}/*", + "arn:aws:s3:::nextstrain-data-private/files/workflows/${each.key}/*", + "arn:aws:s3:::nextstrain-data-private/files/datasets/${each.key}/*", + "arn:aws:s3:::nextstrain-staging/files/workflows/${each.key}/*", + "arn:aws:s3:::nextstrain-staging/files/datasets/${each.key}/*", + ], + }, + ] + }) +} diff --git a/env/production/aws-iam-policy-NextstrainPathogenNcovPrivate.tf b/env/production/aws-iam-policy-NextstrainPathogenNcovPrivate.tf new file mode 100644 index 0000000..ffb4ec5 --- /dev/null +++ b/env/production/aws-iam-policy-NextstrainPathogenNcovPrivate.tf @@ -0,0 +1,43 @@ +# Single-pathogen policy, special-case for the historical reason that +# nextstrain-ncov-private predates the more general nextstrain-data-private. +resource "aws_iam_policy" "NextstrainPathogenNcovPrivate" { + name = "NextstrainPathogen@ncov+private" + description = "Provides permissions to upload datasets, workflow files, etc. to the ncov-private bucket for the Nextstrain ncov pathogen" + + policy = jsonencode({ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "NcovPrivateList", + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:ListBucketVersions", + "s3:GetBucketLocation", + "s3:GetBucketVersioning", + ], + "Resource": [ + "arn:aws:s3:::nextstrain-ncov-private", + ], + }, + { + "Sid": "NcovPrivateReadWrite", + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:GetObjectTagging", + "s3:GetObjectVersion", + "s3:GetObjectVersionTagging", + "s3:PutObject", + "s3:PutObjectTagging", + "s3:DeleteObject", + # but NOT s3:DeleteObjectVersion so objects can't be completely wiped + ], + "Resource": [ + # This bucket is akin to nextstrain-data-private/files/{workflows,datasets}/ncov/. + "arn:aws:s3:::nextstrain-ncov-private/*", + ], + }, + ] + }) +} diff --git a/env/production/aws-iam-role-GitHubActionsRoleNextstrainBatchJobs.tf b/env/production/aws-iam-role-GitHubActionsRoleNextstrainBatchJobs.tf index 124cb58..76c4920 100644 --- a/env/production/aws-iam-role-GitHubActionsRoleNextstrainBatchJobs.tf +++ b/env/production/aws-iam-role-GitHubActionsRoleNextstrainBatchJobs.tf @@ -3,9 +3,10 @@ import { id = "GitHubActionsRoleNextstrainBatchJobs" } +# Multi-repo role, granting access to Batch resource "aws_iam_role" "GitHubActionsRoleNextstrainBatchJobs" { name = "GitHubActionsRoleNextstrainBatchJobs" - description = "Provides permissions to run jobs on AWS Batch via the Nextstrain CLI to select GitHub Actions OIDC workflows." + description = "Provides permissions to launch and monitor jobs on AWS Batch via the Nextstrain CLI to select GitHub Actions OIDC workflows." max_session_duration = 43200 # seconds (12 hours) @@ -21,7 +22,10 @@ resource "aws_iam_role" "GitHubActionsRoleNextstrainBatchJobs" { "Condition": { "StringLike": { "token.actions.githubusercontent.com:aud": "sts.amazonaws.com", - "token.actions.githubusercontent.com:sub": "repo:nextstrain/.github:*" + "token.actions.githubusercontent.com:sub": [ + for repo in keys(local.repo_pathogens): + "repo:nextstrain/${repo}:*:job_workflow_ref:nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@*" + ] } }, } diff --git a/env/production/aws-iam-role-GitHubActionsRoleNextstrainRepo@.tf b/env/production/aws-iam-role-GitHubActionsRoleNextstrainRepo@.tf new file mode 100644 index 0000000..7f63a1a --- /dev/null +++ b/env/production/aws-iam-role-GitHubActionsRoleNextstrainRepo@.tf @@ -0,0 +1,43 @@ +# Per-repo role, granting access to pathogens +resource "aws_iam_role" "GitHubActionsRoleNextstrainRepo" { + for_each = local.repo_pathogens + + name = "GitHubActionsRoleNextstrainRepo@${each.key}" + description = "Provides permissions to upload datasets, workflow files, etc. for a Nextstrain pathogen to select repos and select GitHub Actions OIDC workflows." + + max_session_duration = 43200 # seconds (12 hours) + + assume_role_policy = jsonencode({ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": aws_iam_openid_connect_provider.github-actions.arn + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringLike": { + "token.actions.githubusercontent.com:aud": "sts.amazonaws.com", + "token.actions.githubusercontent.com:sub": "repo:nextstrain/${each.key}:*:job_workflow_ref:nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@*" + } + }, + } + ] + }) + + managed_policy_arns = flatten([ + # Pathogen-specific permissions to standard public/private buckets + [for p in each.value: aws_iam_policy.NextstrainPathogen[p].arn], + + # Special-case permissions to nextstrain-ncov-private bucket + contains(each.value, "ncov") + ? [aws_iam_policy.NextstrainPathogenNcovPrivate.arn] + : [], + + # Builds inside the AWS Batch runtime need access to the jobs bucket. + aws_iam_policy.NextstrainJobsAccessToBucket.arn, + ]) + + inline_policy {} +} diff --git a/env/production/github-oidc.tf b/env/production/github-oidc.tf new file mode 100644 index 0000000..b6494bf --- /dev/null +++ b/env/production/github-oidc.tf @@ -0,0 +1,15 @@ +resource "github_actions_repository_oidc_subject_claim_customization_template" "nextstrain" { + for_each = toset(keys(local.repo_pathogens)) + repository = each.key + + # + use_default = false + include_claim_keys = [ + # The GitHub default… + "repo", + "context", + + # …plus the //@ of the workflow obtaining the token, if any. + "job_workflow_ref", + ] +} diff --git a/env/production/locals.tf b/env/production/locals.tf new file mode 100644 index 0000000..1c84bc3 --- /dev/null +++ b/env/production/locals.tf @@ -0,0 +1,32 @@ +locals { + # By design our repo names are usually equal to the pathogen names, but + # they're two separate things/namespaces and linkages don't always line up + # 1:1. Some resources (roles, policies, etc) are more naturally oriented + # per-pathogen (logical), some per-repo (physical). Use two maps to support + # this more easily. This will likely evolve in the future to better support + # our needs. + # -trs, 20 May 2024 + + pathogen_repos = tomap({ + # pathogen name = [repo name, …] + "dengue" = ["dengue"], + "forecasts-ncov" = ["forecasts-ncov"], + "measles" = ["measles"], + "mpox" = ["mpox"], + "ncov" = ["ncov", "ncov-ingest"], + "rsv" = ["rsv"], + "seasonal-flu" = ["seasonal-flu"], + "zika" = ["zika"], + }) + + repo_pathogens = merge( + # repo name = [pathogen name, …] + transpose(local.pathogen_repos), + + tomap({ + # For testing. Ensures a role exists but without any pathogen-specific + # permissions. + ".github" = [], + }), + ) +} From 90fec81fcad333f91dd67db5fe8be9dd74f6855d Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 21 May 2024 16:02:55 -0700 Subject: [PATCH 3/3] README: Document how to configure a new pathogen repository --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.md b/README.md index a87295b..a10b93e 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,34 @@ less understanding about Terraform state management. [`terraform import` command]: https://developer.hashicorp.com/terraform/cli/commands/import +### How to add a new pathogen repository for use with `pathogen-repo-build` + +Some changes are necessary to support a repository's use of our [centralized +pathogen-repo-build.yaml GitHub Actions workflow](https://github.com/nextstrain/.github/blob/HEAD/.github/workflows/pathogen-repo-build.yaml.in). + +1. Add the repository by its short name to the `pathogen_repos` variable in + `env/production/locals.tf`. In most cases, this will be a line like: + + ```hcl + "repo-name" = ["repo-name"], + ``` + +2. Plan, review, and apply changes using the `terraform` command. See synopsis + above, as well as [nextstrain.org's Terraform documentation][]. + + The plan summary should be "3 to add, 1 to change, 0 to destroy". Added + should be: + + - `aws_iam_policy.NextstrainPathogen["repo-name"]` + - `aws_iam_role.GitHubActionsRoleNextstrainRepo["repo-name"]` + - `github_actions_repository_oidc_subject_claim_customization_template.nextstrain["repo-name"]` + + Changed should be: + + - `aws_iam_role.GitHubActionsRoleNextstrainBatchJobs`, a new condition + value entry like `repo:nextstrain/repo-name:*:job_workflow_ref:…`. + + ## Rule of thumb _from [previous discussion](https://github.com/nextstrain/nextstrain.org/issues/748#issuecomment-1792842452)_