From 91448ebeb93e1654072be50495311e6c613edafb Mon Sep 17 00:00:00 2001 From: Fan Ting Wei Date: Fri, 1 Dec 2023 17:06:03 +0800 Subject: [PATCH] feat(nomad): add metadata_options to enable imdsv2 (#297) * add metadata_options * nomad server to use copied nomad-cluster * add spot price * empty string for dedicated instance price * copy vault cluster source code * inout for vault-cluster * add in vault-security-group * add in copied consul-cluster source code * fmt * add consul-client-sg-rules * remove unsupported tags --- .../INOUT.md | 20 + .../README.md | 47 ++ .../main.tf | 81 +++ .../variables.tf | 36 ++ modules/consul-cluster/INOUT.md | 72 +++ modules/consul-cluster/README.md | 395 ++++++++++++++ modules/consul-cluster/main.tf | 242 +++++++++ modules/consul-cluster/outputs.tf | 40 ++ modules/consul-cluster/variables.tf | 291 ++++++++++ modules/consul-iam-policies/INOUT.md | 17 + modules/consul-iam-policies/README.md | 47 ++ modules/consul-iam-policies/main.tf | 36 ++ modules/consul-iam-policies/variables.tf | 16 + modules/consul-security-group-rules/INOUT.md | 27 + modules/consul-security-group-rules/README.md | 47 ++ modules/consul-security-group-rules/main.tf | 288 ++++++++++ .../consul-security-group-rules/variables.tf | 80 +++ modules/core/consul.tf | 4 +- modules/core/nomad_servers.tf | 4 +- modules/core/vault.tf | 4 +- modules/nomad-cluster/main.tf | 6 + modules/vault-cluster/INOUT.md | 73 +++ modules/vault-cluster/README.md | 498 ++++++++++++++++++ modules/vault-cluster/main.tf | 411 +++++++++++++++ modules/vault-cluster/outputs.tf | 52 ++ modules/vault-cluster/variables.tf | 244 +++++++++ modules/vault-security-group-rules/INOUT.md | 21 + modules/vault-security-group-rules/README.md | 48 ++ modules/vault-security-group-rules/main.tf | 53 ++ .../vault-security-group-rules/variables.tf | 38 ++ 30 files changed, 3232 insertions(+), 6 deletions(-) create mode 100644 modules/consul-client-security-group-rules/INOUT.md create mode 100644 modules/consul-client-security-group-rules/README.md create mode 100644 modules/consul-client-security-group-rules/main.tf create mode 100644 modules/consul-client-security-group-rules/variables.tf create mode 100644 modules/consul-cluster/INOUT.md create mode 100644 modules/consul-cluster/README.md create mode 100644 modules/consul-cluster/main.tf create mode 100644 modules/consul-cluster/outputs.tf create mode 100644 modules/consul-cluster/variables.tf create mode 100644 modules/consul-iam-policies/INOUT.md create mode 100644 modules/consul-iam-policies/README.md create mode 100644 modules/consul-iam-policies/main.tf create mode 100644 modules/consul-iam-policies/variables.tf create mode 100644 modules/consul-security-group-rules/INOUT.md create mode 100644 modules/consul-security-group-rules/README.md create mode 100644 modules/consul-security-group-rules/main.tf create mode 100644 modules/consul-security-group-rules/variables.tf create mode 100644 modules/vault-cluster/INOUT.md create mode 100644 modules/vault-cluster/README.md create mode 100644 modules/vault-cluster/main.tf create mode 100644 modules/vault-cluster/outputs.tf create mode 100644 modules/vault-cluster/variables.tf create mode 100644 modules/vault-security-group-rules/INOUT.md create mode 100644 modules/vault-security-group-rules/README.md create mode 100644 modules/vault-security-group-rules/main.tf create mode 100644 modules/vault-security-group-rules/variables.tf diff --git a/modules/consul-client-security-group-rules/INOUT.md b/modules/consul-client-security-group-rules/INOUT.md new file mode 100644 index 00000000..cdd68bb8 --- /dev/null +++ b/modules/consul-client-security-group-rules/INOUT.md @@ -0,0 +1,20 @@ +## Providers + +| Name | Version | +|------|---------| +| aws | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:-----:| +| allowed\_inbound\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Consul | `list(string)` | `[]` | no | +| allowed\_inbound\_security\_group\_count | The number of entries in var.allowed\_inbound\_security\_group\_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only. | `number` | `0` | no | +| allowed\_inbound\_security\_group\_ids | A list of security group IDs that will be allowed to connect to Consul | `list(string)` | `[]` | no | +| security\_group\_id | The ID of the security group to which we should add the Consul security group rules | `any` | n/a | yes | +| serf\_lan\_port | The port used to handle gossip in the LAN. Required by all agents. | `number` | `8301` | no | + +## Outputs + +No output. + diff --git a/modules/consul-client-security-group-rules/README.md b/modules/consul-client-security-group-rules/README.md new file mode 100644 index 00000000..777b679a --- /dev/null +++ b/modules/consul-client-security-group-rules/README.md @@ -0,0 +1,47 @@ +# Consul Client Security Group Rules Module + +This folder contains a [Terraform](https://www.terraform.io/) module that defines the security group rules used by a +[Consul](https://www.consul.io/) client to control the traffic that is allowed to go in and out. + +Normally, you'd get these rules by default if you're using the [consul-cluster module](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/consul-cluster), but if +you're running Consul on top of a different cluster, then you can use this module to add the necessary security group +rules to that cluster. For example, imagine you were using the [vault-cluster +module](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/vault-cluster) to run a cluster of +servers that have both Vault and Consul agent on each node: + +```hcl +module "vault_servers" { + source = "git::git@github.com:hashicorp/terraform-aws-vault.git//modules/vault-cluster?ref=v0.0.1" + + # This AMI has both Vault and Consul installed + ami_id = "ami-1234abcd" +} +``` + +The `vault-cluster` module will provide the security group rules for Vault, but not for the Consul agent. To ensure those servers +have the necessary ports open for using Consul, you can use this module as follows: + +```hcl +module "security_group_rules" { + source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-client-security-group-rules?ref=v0.0.2" + + security_group_id = "${module.vault_servers.security_group_id}" + + # ... (other params omitted) ... +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of this module. The double slash (`//`) is intentional + and required. Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/docs/modules/sources.html)). The `ref` parameter specifies a specific Git tag in + this repo. That way, instead of using the latest version of this module from the `master` branch, which + will change every time you run Terraform, you're using a fixed version of the repo. + +* `security_group_id`: Use this parameter to specify the ID of the security group to which the rules in this module + should be added. + +You can find the other parameters in [variables.tf](variables.tf). + +Check out the [consul-cluster module](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/consul-cluster) for working sample code. diff --git a/modules/consul-client-security-group-rules/main.tf b/modules/consul-client-security-group-rules/main.tf new file mode 100644 index 00000000..74df0909 --- /dev/null +++ b/modules/consul-client-security-group-rules/main.tf @@ -0,0 +1,81 @@ +## --------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# --------------------------------------------------------------------------------------------------------------------- + +terraform { + # This module is now only being tested with Terraform 0.13.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.13.x code. + required_version = ">= 0.12.26" +} + +## --------------------------------------------------------------------------------------------------------------------- +# CREATE THE SECURITY GROUP RULES THAT CONTROL WHAT TRAFFIC CAN GO IN AND OUT OF A CONSUL AGENT CLUSTER +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_security_group_rule" "allow_serf_lan_tcp_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.serf_lan_port + to_port = var.serf_lan_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_lan_udp_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.serf_lan_port + to_port = var.serf_lan_port + protocol = "udp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_lan_tcp_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.serf_lan_port + to_port = var.serf_lan_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_lan_udp_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.serf_lan_port + to_port = var.serf_lan_port + protocol = "udp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +# Similar to the *_inbound_from_security_group_ids rules, allow inbound from ourself + +resource "aws_security_group_rule" "allow_serf_lan_tcp_inbound_from_self" { + type = "ingress" + from_port = var.serf_lan_port + to_port = var.serf_lan_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_lan_udp_inbound_from_self" { + type = "ingress" + from_port = var.serf_lan_port + to_port = var.serf_lan_port + protocol = "udp" + self = true + + security_group_id = var.security_group_id +} + diff --git a/modules/consul-client-security-group-rules/variables.tf b/modules/consul-client-security-group-rules/variables.tf new file mode 100644 index 00000000..c9c25d00 --- /dev/null +++ b/modules/consul-client-security-group-rules/variables.tf @@ -0,0 +1,36 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# You must provide a value for each of these parameters. +# --------------------------------------------------------------------------------------------------------------------- + +variable "security_group_id" { + description = "The ID of the security group to which we should add the Consul security group rules" +} + +variable "allowed_inbound_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Consul" + type = list(string) + default = [] +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "allowed_inbound_security_group_ids" { + description = "A list of security group IDs that will be allowed to connect to Consul" + type = list(string) + default = [] +} + +variable "allowed_inbound_security_group_count" { + description = "The number of entries in var.allowed_inbound_security_group_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only." + default = 0 +} + +variable "serf_lan_port" { + description = "The port used to handle gossip in the LAN. Required by all agents." + default = 8301 +} + diff --git a/modules/consul-cluster/INOUT.md b/modules/consul-cluster/INOUT.md new file mode 100644 index 00000000..419122bd --- /dev/null +++ b/modules/consul-cluster/INOUT.md @@ -0,0 +1,72 @@ +## Providers + +| Name | Version | +|------|---------| +| aws | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:-----:| +| additional\_security\_group\_ids | A list of additional security group IDs to add to Consul EC2 Instances | `list(string)` | `[]` | no | +| allowed\_inbound\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Consul | `list(string)` | n/a | yes | +| allowed\_inbound\_security\_group\_count | The number of entries in var.allowed\_inbound\_security\_group\_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only. | `number` | `0` | no | +| allowed\_inbound\_security\_group\_ids | A list of security group IDs that will be allowed to connect to Consul | `list(string)` | `[]` | no | +| allowed\_ssh\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow SSH connections | `list(string)` | `[]` | no | +| allowed\_ssh\_security\_group\_count | The number of entries in var.allowed\_ssh\_security\_group\_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only. | `number` | `0` | no | +| allowed\_ssh\_security\_group\_ids | A list of security group IDs from which the EC2 Instances will allow SSH connections | `list(string)` | `[]` | no | +| ami\_id | The ID of the AMI to run in this cluster. Should be an AMI that had Consul installed and configured by the install-consul module. | `string` | n/a | yes | +| associate\_public\_ip\_address | If set to true, associate a public IP address with each EC2 Instance in the cluster. | `bool` | `false` | no | +| availability\_zones | The availability zones into which the EC2 Instances should be deployed. We recommend one availability zone per node in the cluster\_size variable. At least one of var.subnet\_ids or var.availability\_zones must be non-empty. | `list(string)` | n/a | yes | +| cli\_rpc\_port | The port used by all agents to handle RPC from the CLI. | `number` | `8400` | no | +| cluster\_name | The name of the Consul cluster (e.g. consul-stage). This variable is used to namespace all resources created by this module. | `string` | n/a | yes | +| cluster\_size | The number of nodes to have in the Consul cluster. We strongly recommended that you use either 3 or 5. | `number` | `3` | no | +| cluster\_tag\_key | Add a tag with this key and the value var.cluster\_tag\_value to each Instance in the ASG. This can be used to automatically find other Consul nodes and form a cluster. | `string` | `"consul-servers"` | no | +| cluster\_tag\_value | Add a tag with key var.clsuter\_tag\_key and this value to each Instance in the ASG. This can be used to automatically find other Consul nodes and form a cluster. | `string` | `"auto-join"` | no | +| dns\_port | The port used to resolve DNS queries. | `number` | `8600` | no | +| enable\_https\_port | If set to true, allow access to the Consul HTTPS port defined via the https\_api\_port variable. | `bool` | `false` | no | +| enable\_iam\_setup | If true, create the IAM Role, IAM Instance Profile, and IAM Policies. If false, these will not be created, and you can pass in your own IAM Instance Profile via var.iam\_instance\_profile\_name. | `bool` | `true` | no | +| enabled\_metrics | List of autoscaling group metrics to enable. | `list(string)` | `[]` | no | +| health\_check\_grace\_period | Time, in seconds, after instance comes into service before checking health. | `number` | `300` | no | +| health\_check\_type | Controls how health checking is done. Must be one of EC2 or ELB. | `string` | `"EC2"` | no | +| http\_api\_port | The port used by clients to talk to the HTTP API | `number` | `8500` | no | +| https\_api\_port | The port used by clients to talk to the HTTPS API. Only used if enable\_https\_port is set to true. | `number` | `8501` | no | +| iam\_instance\_profile\_name | If enable\_iam\_setup is false then this will be the name of the IAM instance profile to attach | `string` | n/a | yes | +| iam\_permissions\_boundary | If set, restricts the created IAM role to the given permissions boundary | `string` | n/a | yes | +| instance\_profile\_path | Path in which to create the IAM instance profile. | `string` | `"/"` | no | +| instance\_type | The type of EC2 Instances to run for each node in the cluster (e.g. t2.micro). | `string` | n/a | yes | +| protect\_from\_scale\_in | (Optional) Allows setting instance protection. The autoscaling group will not select instances with this setting for termination during scale in events. | `bool` | `false` | no | +| root\_volume\_delete\_on\_termination | Whether the volume should be destroyed on instance termination. | `bool` | `true` | no | +| root\_volume\_ebs\_optimized | If true, the launched EC2 instance will be EBS-optimized. | `bool` | `false` | no | +| root\_volume\_encrypted | Encrypt the root volume at rest | `bool` | `false` | no | +| root\_volume\_size | The size, in GB, of the root EBS volume. | `number` | `50` | no | +| root\_volume\_type | The type of volume. Must be one of: standard, gp2, or io1. | `string` | `"standard"` | no | +| security\_group\_tags | Tags to be applied to the LC security group | `map(string)` | `{}` | no | +| serf\_lan\_port | The port used to handle gossip in the LAN. Required by all agents. | `number` | `8301` | no | +| serf\_wan\_port | The port used by servers to gossip over the WAN to other servers. | `number` | `8302` | no | +| server\_rpc\_port | The port used by servers to handle incoming requests from other agents. | `number` | `8300` | no | +| service\_linked\_role\_arn | The ARN of the service-linked role that the ASG will use to call other AWS services | `string` | n/a | yes | +| spot\_price | The maximum hourly price to pay for EC2 Spot Instances. | `number` | n/a | yes | +| ssh\_key\_name | The name of an EC2 Key Pair that can be used to SSH to the EC2 Instances in this cluster. Set to an empty string to not associate a Key Pair. | `string` | n/a | yes | +| ssh\_port | The port used for SSH connections | `number` | `22` | no | +| subnet\_ids | The subnet IDs into which the EC2 Instances should be deployed. We recommend one subnet ID per node in the cluster\_size variable. At least one of var.subnet\_ids or var.availability\_zones must be non-empty. | `list(string)` | n/a | yes | +| tags | List of extra tag blocks added to the autoscaling group configuration. Each element in the list is a map containing keys 'key', 'value', and 'propagate\_at\_launch' mapped to the respective values. | `list(object({ key : string, value : string, propagate_at_launch : bool }))` | `[]` | no | +| tenancy | The tenancy of the instance. Must be one of: null, default or dedicated. For EC2 Spot Instances only null or dedicated can be used. | `string` | n/a | yes | +| termination\_policies | A list of policies to decide how the instances in the auto scale group should be terminated. The allowed values are OldestInstance, NewestInstance, OldestLaunchConfiguration, ClosestToNextInstanceHour, Default. | `string` | `"Default"` | no | +| user\_data | A User Data script to execute while the server is booting. We recommend passing in a bash script that executes the run-consul script, which should have been installed in the Consul AMI by the install-consul module. | `string` | n/a | yes | +| vpc\_id | The ID of the VPC in which to deploy the Consul cluster | `string` | n/a | yes | +| wait\_for\_capacity\_timeout | A maximum duration that Terraform should wait for ASG instances to be healthy before timing out. Setting this to '0' causes Terraform to skip all Capacity Waiting behavior. | `string` | `"10m"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| asg\_name | This is the name for the autoscaling group generated by the module | +| cluster\_size | This is the desired size of the consul cluster in the autoscaling group | +| cluster\_tag\_key | This is the tag key used to allow the consul servers to autojoin | +| cluster\_tag\_value | This is the tag value used to allow the consul servers to autojoin | +| iam\_role\_arn | This is the arn of instance role if enable\_iam\_setup variable is set to true | +| iam\_role\_id | This is the id of instance role if enable\_iam\_setup variable is set to true | +| launch\_config\_name | This is the name of the launch\_configuration used to bootstrap the cluster instances | +| security\_group\_id | This is the id of security group that governs ingress and egress for the cluster instances | + diff --git a/modules/consul-cluster/README.md b/modules/consul-cluster/README.md new file mode 100644 index 00000000..b9a3c559 --- /dev/null +++ b/modules/consul-cluster/README.md @@ -0,0 +1,395 @@ +# Consul Cluster + +This folder contains a [Terraform](https://www.terraform.io/) module to deploy a +[Consul](https://www.consul.io/) cluster in [AWS](https://aws.amazon.com/) on top of an Auto Scaling Group. This module +is designed to deploy an [Amazon Machine Image (AMI)](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) +that has Consul installed via the [install-consul](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/install-consul) module in this Module. + + + +## How do you use this module? + +This folder defines a [Terraform module](https://www.terraform.io/docs/modules/usage.html), which you can use in your +code by adding a `module` configuration and setting its `source` parameter to URL of this folder: + +```hcl +module "consul_cluster" { + # TODO: update this to the final URL + # Use version v0.0.5 of the consul-cluster module + source = "github.com/hashicorp/terraform-aws-consul//modules/consul-cluster?ref=v0.0.5" + + # Specify the ID of the Consul AMI. You should build this using the scripts in the install-consul module. + ami_id = "ami-abcd1234" + + # Add this tag to each node in the cluster + cluster_tag_key = "consul-cluster" + cluster_tag_value = "consul-cluster-example" + + # Configure and start Consul during boot. It will automatically form a cluster with all nodes that have that same tag. + user_data = <<-EOF + #!/bin/bash + /opt/consul/bin/run-consul --server --cluster-tag-key consul-cluster --cluster-tag-value consul-cluster-example + EOF + + # ... See variables.tf for the other parameters you must define for the consul-cluster module +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of the consul-cluster module. The double slash (`//`) is intentional + and required. Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/docs/modules/sources.html)). The `ref` parameter specifies a specific Git tag in + this repo. That way, instead of using the latest version of this module from the `master` branch, which + will change every time you run Terraform, you're using a fixed version of the repo. + +* `ami_id`: Use this parameter to specify the ID of a Consul [Amazon Machine Image + (AMI)](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) to deploy on each server in the cluster. You + should install Consul in this AMI using the scripts in the [install-consul](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/install-consul) module. + +* `user_data`: Use this parameter to specify a [User + Data](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html#user-data-shell-scripts) script that each + server will run during boot. This is where you can use the [run-consul script](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/run-consul) to configure and + run Consul. The `run-consul` script is one of the scripts installed by the [install-consul](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/install-consul) + module. + +You can find the other parameters in [variables.tf](variables.tf). + +Check out the [consul-cluster example](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/root-example) for fully-working sample code. + + + + +## How do you connect to the Consul cluster? + +### Using the HTTP API from your own computer + +If you want to connect to the cluster from your own computer, the easiest way is to use the [HTTP +API](https://www.consul.io/docs/agent/http.html). Note that this only works if the Consul cluster is running in public +subnets and/or your default VPC (as in the [consul-cluster example](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/root-example)), which is OK for testing +and experimentation, but NOT recommended for production usage. + +To use the HTTP API, you first need to get the public IP address of one of the Consul Servers. You can find Consul +servers by using AWS tags. If you're running the [consul-cluster example](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/root-example), the +[consul-examples-helper.sh script](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/consul-examples-helper/consul-examples-helper.sh) will do the tag lookup +for you automatically (note, you must have the [AWS CLI](https://aws.amazon.com/cli/), +[jq](https://stedolan.github.io/jq/), and the [Consul agent](https://www.consul.io/) installed locally): + +``` +> ../consul-examples-helper/consul-examples-helper.sh + +Your Consul servers are running at the following IP addresses: + +34.200.218.123 +34.205.127.138 +34.201.165.11 +``` + +You can use one of these IP addresses with the `members` command to see a list of cluster nodes: + +``` +> consul members -http-addr=11.22.33.44:8500 + +Node Address Status Type Build Protocol DC +i-0051c3ea00e9691a0 172.31.35.148:8301 alive client 0.8.0 2 us-east-1 +i-00aea529cce1761d4 172.31.47.236:8301 alive client 0.8.0 2 us-east-1 +i-01bc94ccfa032d82d 172.31.27.193:8301 alive client 0.8.0 2 us-east-1 +i-04271e97808f15d63 172.31.25.174:8301 alive server 0.8.0 2 us-east-1 +i-0483b07abe49ea7ff 172.31.5.42:8301 alive client 0.8.0 2 us-east-1 +i-098fb1ebd5ca443bf 172.31.55.203:8301 alive client 0.8.0 2 us-east-1 +i-0eb961b6825f7871c 172.31.65.9:8301 alive client 0.8.0 2 us-east-1 +i-0ee6dcf715adbff5f 172.31.67.235:8301 alive server 0.8.0 2 us-east-1 +i-0fd0e63682a94b245 172.31.54.84:8301 alive server 0.8.0 2 us-east-1 +``` + +You can also try inserting a value: + +``` +> consul kv put -http-addr=11.22.33.44:8500 foo bar + +Success! Data written to: foo +``` + +And reading that value back: + +``` +> consul kv get -http-addr=11.22.33.44:8500 foo + +bar +``` + +Finally, you can try opening up the Consul UI in your browser at the URL `http://11.22.33.44:8500/ui/`. + +![Consul UI](https://github.com/hashicorp/terraform-aws-consul/blob/master/_docs/consul-ui-screenshot.png?raw=true) + + +### Using the Consul agent on another EC2 Instance + +The easiest way to run [Consul agent](https://www.consul.io/docs/agent/basics.html) and have it connect to the Consul +cluster is to use the same EC2 tags the Consul servers use to discover each other during bootstrapping. + +For example, imagine you deployed a Consul cluster in `us-east-1` as follows: + + + +```hcl +module "consul_cluster" { + source = "github.com/hashicorp/terraform-aws-consul//modules/consul-cluster?ref=v0.0.5" + + # Add this tag to each node in the cluster + cluster_tag_key = "consul-cluster" + cluster_tag_value = "consul-cluster-example" + + # ... Other params omitted ... +} +``` + +Using the `retry-join-ec2-xxx` params, you can connect run a Consul agent on an EC2 Instance as follows: + +``` +consul agent -retry-join-ec2-tag-key=consul-cluster -retry-join-ec2-tag-value=consul-cluster-example -data-dir=/tmp/consul +``` + +Two important notes about this command: + +1. By default, the Consul cluster nodes advertise their *private* IP addresses, so the command above only works from + EC2 Instances inside the same VPC (or any VPC with proper peering connections and route table entries). +1. In order to look up the EC2 tags, the EC2 Instance where you're running this command must have an IAM role with + the `ec2:DescribeInstances` permission. + + + +## How do you connect load balancers to the Auto Scaling Group (ASG)? + +You can use the [`aws_autoscaling_attachment`](https://www.terraform.io/docs/providers/aws/r/autoscaling_attachment.html) resource. + +For example, if you are using the new application or network load balancers: + +```hcl +resource "aws_lb_target_group" "test" { + // ... +} + +# Create a new Consul Cluster +module "consul" { + source ="..." + // ... +} + +# Create a new load balancer attachment +resource "aws_autoscaling_attachment" "asg_attachment_bar" { + autoscaling_group_name = "${module.consul.asg_name}" + alb_target_group_arn = "${aws_alb_target_group.test.arn}" +} +``` + +If you are using a "classic" load balancer: + +```hcl +# Create a new load balancer +resource "aws_elb" "bar" { + // ... +} + +# Create a new Consul Cluster +module "consul" { + source ="..." + // ... +} + +# Create a new load balancer attachment +resource "aws_autoscaling_attachment" "asg_attachment_bar" { + autoscaling_group_name = "${module.consul.asg_name}" + elb = "${aws_elb.bar.id}" +} +``` + + + +## What's included in this module? + +This module creates the following architecture: + +![Consul architecture](https://github.com/hashicorp/terraform-aws-consul/blob/master/_docs/architecture.png?raw=true) + +This architecture consists of the following resources: + +* [Auto Scaling Group](#auto-scaling-group) +* [EC2 Instance Tags](#ec2-instance-tags) +* [Security Group](#security-group) +* [IAM Role and Permissions](#iam-role-and-permissions) + + +### Auto Scaling Group + +This module runs Consul on top of an [Auto Scaling Group (ASG)](https://aws.amazon.com/autoscaling/). Typically, you +should run the ASG with 3 or 5 EC2 Instances spread across multiple [Availability +Zones](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html). Each of the EC2 +Instances should be running an AMI that has Consul installed via the [install-consul](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/install-consul) +module. You pass in the ID of the AMI to run using the `ami_id` input parameter. + + +### EC2 Instance Tags + +This module allows you to specify a tag to add to each EC2 instance in the ASG. We recommend using this tag with the +[retry_join_ec2](https://www.consul.io/docs/agent/options.html?#retry_join_ec2) configuration to allow the EC2 +Instances to find each other and automatically form a cluster. + + +### Security Group + +Each EC2 Instance in the ASG has a Security Group that allows: + +* All outbound requests +* All the inbound ports specified in the [Consul documentation](https://www.consul.io/docs/agent/options.html?#ports-used) + +The Security Group ID is exported as an output variable if you need to add additional rules. + +Check out the [Security section](#security) for more details. + + +### IAM Role and Permissions + +Each EC2 Instance in the ASG has an [IAM Role](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) attached. +We give this IAM role a small set of IAM permissions that each EC2 Instance can use to automatically discover the other +Instances in its ASG and form a cluster with them. See the [run-consul required permissions +docs](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/run-consul#required-permissions) for details. + +The IAM Role ARN is exported as an output variable if you need to add additional permissions. + +You can disable the creation of the IAM role and policies if needed by setting `enable_iam_setup` variable to false. This allows you to create the role seperately from this module and supply the external role arn via the `iam_instance_profile_name` variable. + + +## How do you roll out updates? + +If you want to deploy a new version of Consul across the cluster, the best way to do that is to: + +1. Build a new AMI. +1. Set the `ami_id` parameter to the ID of the new AMI. +1. Run `terraform apply`. + +This updates the Launch Configuration of the ASG, so any new Instances in the ASG will have your new AMI, but it does +NOT actually deploy those new instances. To make that happen, you should do the following: + +1. Issue an API call to one of the old Instances in the ASG to have it leave gracefully. E.g.: + + ``` + curl -X PUT :8500/v1/agent/leave + ``` + +1. Once the instance has left the cluster, terminate it: + + ``` + aws ec2 terminate-instances --instance-ids + ``` + +1. After a minute or two, the ASG should automatically launch a new Instance, with the new AMI, to replace the old one. + +1. Wait for the new Instance to boot and join the cluster. + +1. Repeat these steps for each of the other old Instances in the ASG. + +We will add a script in the future to automate this process (PRs are welcome!). + + + + +## What happens if a node crashes? + +There are two ways a Consul node may go down: + +1. The Consul process may crash. In that case, `systemd` should restart it automatically. +1. The EC2 Instance running Consul dies. In that case, the Auto Scaling Group should launch a replacement automatically. + Note that in this case, since the Consul agent did not exit gracefully, and the replacement will have a different ID, + you may have to manually clean out the old nodes using the [force-leave + command](https://www.consul.io/docs/commands/force-leave.html). We may add a script to do this + automatically in the future. For more info, see the [Consul Outage + documentation](https://www.consul.io/docs/guides/outage.html). + + + + +## Security + +Here are some of the main security considerations to keep in mind when using this module: + +1. [Encryption in transit](#encryption-in-transit) +1. [Encryption at rest](#encryption-at-rest) +1. [Dedicated instances](#dedicated-instances) +1. [Security groups](#security-groups) +1. [SSH access](#ssh-access) + + +### Encryption in transit + +Consul can encrypt all of its network traffic. For instructions on enabling network encryption, have a look at the +[How do you handle encryption documentation](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/run-consul#how-do-you-handle-encryption). + + +### Encryption at rest + +The EC2 Instances in the cluster store all their data on the root EBS Volume. To enable encryption for the data at +rest, you must enable encryption in your Consul AMI. If you're creating the AMI using Packer (e.g. as shown in +the [consul-ami example](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/consul-ami)), you need to set the [encrypt_boot +parameter](https://www.packer.io/docs/builders/amazon-ebs.html#encrypt_boot) to `true`. + + +### Dedicated instances + +If you wish to use dedicated instances, you can set the `tenancy` parameter to `"dedicated"` in this module. + + +### Security groups + +This module attaches a security group to each EC2 Instance that allows inbound requests as follows: + +* **Consul**: For all the [ports used by Consul](https://www.consul.io/docs/agent/options.html#ports), you can + use the `allowed_inbound_cidr_blocks` parameter to control the list of + [CIDR blocks](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that will be allowed access and the `allowed_inbound_security_group_ids` parameter to control the security groups that will be allowed access. + +* **SSH**: For the SSH port (default: 22), you can use the `allowed_ssh_cidr_blocks` parameter to control the list of + [CIDR blocks](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that will be allowed access. You can use the `allowed_inbound_ssh_security_group_ids` parameter to control the list of source Security Groups that will be allowed access. + +Note that all the ports mentioned above are configurable via the `xxx_port` variables (e.g. `server_rpc_port`). See +[variables.tf](variables.tf) for the full list. + + + +### SSH access + +You can associate an [EC2 Key Pair](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) with each +of the EC2 Instances in this cluster by specifying the Key Pair's name in the `ssh_key_name` variable. If you don't +want to associate a Key Pair with these servers, set `ssh_key_name` to an empty string. + + + + + +## What's NOT included in this module? + +This module does NOT handle the following items, which you may want to provide on your own: + +* [Monitoring, alerting, log aggregation](#monitoring-alerting-log-aggregation) +* [VPCs, subnets, route tables](#vpcs-subnets-route-tables) +* [DNS entries](#dns-entries) + + +### Monitoring, alerting, log aggregation + +This module does not include anything for monitoring, alerting, or log aggregation. All ASGs and EC2 Instances come +with limited [CloudWatch](https://aws.amazon.com/cloudwatch/) metrics built-in, but beyond that, you will have to +provide your own solutions. + + +### VPCs, subnets, route tables + +This module assumes you've already created your network topology (VPC, subnets, route tables, etc). You will need to +pass in the the relevant info about your network topology (e.g. `vpc_id`, `subnet_ids`) as input variables to this +module. + + +### DNS entries + +This module does not create any DNS entries for Consul (e.g. in Route 53). + + diff --git a/modules/consul-cluster/main.tf b/modules/consul-cluster/main.tf new file mode 100644 index 00000000..645b39d5 --- /dev/null +++ b/modules/consul-cluster/main.tf @@ -0,0 +1,242 @@ +# ---------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# ---------------------------------------------------------------------------------------------------------------------- + +terraform { + # This module is now only being tested with Terraform 0.13.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.13.x code. + required_version = ">= 0.12.26" +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE AN AUTO SCALING GROUP (ASG) TO RUN CONSUL +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_autoscaling_group" "autoscaling_group" { + name_prefix = var.cluster_name + + launch_configuration = aws_launch_configuration.launch_configuration.name + + availability_zones = var.availability_zones + vpc_zone_identifier = var.subnet_ids + + # Run a fixed number of instances in the ASG + min_size = var.cluster_size + max_size = var.cluster_size + desired_capacity = var.cluster_size + termination_policies = [var.termination_policies] + + health_check_type = var.health_check_type + health_check_grace_period = var.health_check_grace_period + wait_for_capacity_timeout = var.wait_for_capacity_timeout + service_linked_role_arn = var.service_linked_role_arn + + enabled_metrics = var.enabled_metrics + + protect_from_scale_in = var.protect_from_scale_in + + lifecycle { + # As of AWS Provider 3.x, inline load_balancers and target_group_arns + # in an aws_autoscaling_group take precedence over attachment resources. + # Since the consul-cluster module does not define any Load Balancers, + # it's safe to assume that we will always want to favor an attachment + # over these inline properties. + # + # For further discussion and links to relevant documentation, see + # https://github.com/hashicorp/terraform-aws-vault/issues/210 + ignore_changes = [load_balancers, target_group_arns] + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE LAUNCH CONFIGURATION TO DEFINE WHAT RUNS ON EACH INSTANCE IN THE ASG +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_launch_configuration" "launch_configuration" { + name_prefix = "${var.cluster_name}-" + image_id = var.ami_id + instance_type = var.instance_type + user_data = var.user_data + spot_price = var.spot_price + + # added to https://github.com/hashicorp/terraform-aws-vault/tree/v0.14.1/modules/vault-cluster + metadata_options { + http_endpoint = "enabled" + http_tokens = "required" + http_put_response_hop_limit = 3 + } + + iam_instance_profile = var.enable_iam_setup ? element( + concat(aws_iam_instance_profile.instance_profile.*.name, [""]), + 0, + ) : var.iam_instance_profile_name + key_name = var.ssh_key_name + + security_groups = concat( + [aws_security_group.lc_security_group.id], + var.additional_security_group_ids, + ) + placement_tenancy = var.tenancy + associate_public_ip_address = var.associate_public_ip_address + + ebs_optimized = var.root_volume_ebs_optimized + + root_block_device { + volume_type = var.root_volume_type + volume_size = var.root_volume_size + delete_on_termination = var.root_volume_delete_on_termination + encrypted = var.root_volume_encrypted + } + + # Important note: whenever using a launch configuration with an auto scaling group, you must set + # create_before_destroy = true. However, as soon as you set create_before_destroy = true in one resource, you must + # also set it in every resource that it depends on, or you'll get an error about cyclic dependencies (especially when + # removing resources). For more info, see: + # + # https://www.terraform.io/docs/providers/aws/r/launch_configuration.html + # https://terraform.io/docs/configuration/resources.html + lifecycle { + create_before_destroy = true + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE A SECURITY GROUP TO CONTROL WHAT REQUESTS CAN GO IN AND OUT OF EACH EC2 INSTANCE +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_security_group" "lc_security_group" { + name_prefix = var.cluster_name + description = "Security group for the ${var.cluster_name} launch configuration" + vpc_id = var.vpc_id + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } + + tags = merge( + { + "Name" = var.cluster_name + }, + var.security_group_tags, + ) +} + +resource "aws_security_group_rule" "allow_ssh_inbound" { + count = length(var.allowed_ssh_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.ssh_port + to_port = var.ssh_port + protocol = "tcp" + cidr_blocks = var.allowed_ssh_cidr_blocks + + security_group_id = aws_security_group.lc_security_group.id +} + +resource "aws_security_group_rule" "allow_ssh_inbound_from_security_group_ids" { + count = var.allowed_ssh_security_group_count + type = "ingress" + from_port = var.ssh_port + to_port = var.ssh_port + protocol = "tcp" + source_security_group_id = element(var.allowed_ssh_security_group_ids, count.index) + + security_group_id = aws_security_group.lc_security_group.id +} + +resource "aws_security_group_rule" "allow_all_outbound" { + type = "egress" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + + security_group_id = aws_security_group.lc_security_group.id +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE CONSUL-SPECIFIC INBOUND/OUTBOUND RULES COME FROM THE CONSUL-SECURITY-GROUP-RULES MODULE +# --------------------------------------------------------------------------------------------------------------------- + +module "security_group_rules" { + source = "../consul-security-group-rules" + + security_group_id = aws_security_group.lc_security_group.id + allowed_inbound_cidr_blocks = var.allowed_inbound_cidr_blocks + allowed_inbound_security_group_ids = var.allowed_inbound_security_group_ids + allowed_inbound_security_group_count = var.allowed_inbound_security_group_count + + server_rpc_port = var.server_rpc_port + cli_rpc_port = var.cli_rpc_port + serf_lan_port = var.serf_lan_port + serf_wan_port = var.serf_wan_port + http_api_port = var.http_api_port + https_api_port = var.https_api_port + dns_port = var.dns_port + + enable_https_port = var.enable_https_port +} + +# --------------------------------------------------------------------------------------------------------------------- +# ATTACH AN IAM ROLE TO EACH EC2 INSTANCE +# We can use the IAM role to grant the instance IAM permissions so we can use the AWS CLI without having to figure out +# how to get our secret AWS access keys onto the box. +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_iam_instance_profile" "instance_profile" { + count = var.enable_iam_setup ? 1 : 0 + + name_prefix = var.cluster_name + path = var.instance_profile_path + role = element(concat(aws_iam_role.instance_role.*.name, [""]), 0) + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + +resource "aws_iam_role" "instance_role" { + count = var.enable_iam_setup ? 1 : 0 + + name_prefix = var.cluster_name + assume_role_policy = data.aws_iam_policy_document.instance_role.json + + permissions_boundary = var.iam_permissions_boundary + + # aws_iam_instance_profile.instance_profile in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + +data "aws_iam_policy_document" "instance_role" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE IAM POLICIES COME FROM THE CONSUL-IAM-POLICIES MODULE +# --------------------------------------------------------------------------------------------------------------------- + +module "iam_policies" { + source = "../consul-iam-policies" + + enabled = var.enable_iam_setup + iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) +} + diff --git a/modules/consul-cluster/outputs.tf b/modules/consul-cluster/outputs.tf new file mode 100644 index 00000000..a980d658 --- /dev/null +++ b/modules/consul-cluster/outputs.tf @@ -0,0 +1,40 @@ +output "asg_name" { + value = aws_autoscaling_group.autoscaling_group.name + description = "This is the name for the autoscaling group generated by the module" +} + +output "cluster_size" { + value = aws_autoscaling_group.autoscaling_group.desired_capacity + description = "This is the desired size of the consul cluster in the autoscaling group" +} + +output "launch_config_name" { + value = aws_launch_configuration.launch_configuration.name + description = "This is the name of the launch_configuration used to bootstrap the cluster instances" +} + +output "iam_role_arn" { + value = element(concat(aws_iam_role.instance_role.*.arn, [""]), 0) + description = "This is the arn of instance role if enable_iam_setup variable is set to true" +} + +output "iam_role_id" { + value = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) + description = "This is the id of instance role if enable_iam_setup variable is set to true" +} + +output "security_group_id" { + value = aws_security_group.lc_security_group.id + description = "This is the id of security group that governs ingress and egress for the cluster instances" +} + +output "cluster_tag_key" { + value = var.cluster_tag_key + description = "This is the tag key used to allow the consul servers to autojoin" +} + +output "cluster_tag_value" { + value = var.cluster_tag_value + description = "This is the tag value used to allow the consul servers to autojoin" +} + diff --git a/modules/consul-cluster/variables.tf b/modules/consul-cluster/variables.tf new file mode 100644 index 00000000..eb7a941b --- /dev/null +++ b/modules/consul-cluster/variables.tf @@ -0,0 +1,291 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# You must provide a value for each of these parameters. +# --------------------------------------------------------------------------------------------------------------------- + +variable "cluster_name" { + description = "The name of the Consul cluster (e.g. consul-stage). This variable is used to namespace all resources created by this module." + type = string +} + +variable "ami_id" { + description = "The ID of the AMI to run in this cluster. Should be an AMI that had Consul installed and configured by the install-consul module." + type = string +} + +variable "instance_type" { + description = "The type of EC2 Instances to run for each node in the cluster (e.g. t2.micro)." + type = string +} + +variable "vpc_id" { + description = "The ID of the VPC in which to deploy the Consul cluster" + type = string +} + +variable "allowed_inbound_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Consul" + type = list(string) +} + +variable "user_data" { + description = "A User Data script to execute while the server is booting. We recommend passing in a bash script that executes the run-consul script, which should have been installed in the Consul AMI by the install-consul module." + type = string +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "cluster_size" { + description = "The number of nodes to have in the Consul cluster. We strongly recommended that you use either 3 or 5." + type = number + default = 3 +} + +variable "cluster_tag_key" { + description = "Add a tag with this key and the value var.cluster_tag_value to each Instance in the ASG. This can be used to automatically find other Consul nodes and form a cluster." + type = string + default = "consul-servers" +} + +variable "cluster_tag_value" { + description = "Add a tag with key var.clsuter_tag_key and this value to each Instance in the ASG. This can be used to automatically find other Consul nodes and form a cluster." + type = string + default = "auto-join" +} + +variable "subnet_ids" { + description = "The subnet IDs into which the EC2 Instances should be deployed. We recommend one subnet ID per node in the cluster_size variable. At least one of var.subnet_ids or var.availability_zones must be non-empty." + type = list(string) + default = null +} + +variable "availability_zones" { + description = "The availability zones into which the EC2 Instances should be deployed. We recommend one availability zone per node in the cluster_size variable. At least one of var.subnet_ids or var.availability_zones must be non-empty." + type = list(string) + default = null +} + +variable "ssh_key_name" { + description = "The name of an EC2 Key Pair that can be used to SSH to the EC2 Instances in this cluster. Set to an empty string to not associate a Key Pair." + type = string + default = null +} + +variable "allowed_ssh_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow SSH connections" + type = list(string) + default = [] +} + +variable "allowed_ssh_security_group_ids" { + description = "A list of security group IDs from which the EC2 Instances will allow SSH connections" + type = list(string) + default = [] +} + +variable "allowed_ssh_security_group_count" { + description = "The number of entries in var.allowed_ssh_security_group_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only." + type = number + default = 0 +} + +variable "allowed_inbound_security_group_ids" { + description = "A list of security group IDs that will be allowed to connect to Consul" + type = list(string) + default = [] +} + +variable "allowed_inbound_security_group_count" { + description = "The number of entries in var.allowed_inbound_security_group_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only." + type = number + default = 0 +} + +variable "additional_security_group_ids" { + description = "A list of additional security group IDs to add to Consul EC2 Instances" + type = list(string) + default = [] +} + +variable "security_group_tags" { + description = "Tags to be applied to the LC security group" + type = map(string) + default = {} +} + +variable "termination_policies" { + description = "A list of policies to decide how the instances in the auto scale group should be terminated. The allowed values are OldestInstance, NewestInstance, OldestLaunchConfiguration, ClosestToNextInstanceHour, Default." + type = string + default = "Default" +} + +variable "associate_public_ip_address" { + description = "If set to true, associate a public IP address with each EC2 Instance in the cluster." + type = bool + default = false +} + +variable "spot_price" { + description = "The maximum hourly price to pay for EC2 Spot Instances." + type = number + default = null +} + +variable "tenancy" { + description = "The tenancy of the instance. Must be one of: null, default or dedicated. For EC2 Spot Instances only null or dedicated can be used." + type = string + default = null +} + +variable "root_volume_ebs_optimized" { + description = "If true, the launched EC2 instance will be EBS-optimized." + type = bool + default = false +} + +variable "root_volume_type" { + description = "The type of volume. Must be one of: standard, gp2, or io1." + type = string + default = "standard" +} + +variable "root_volume_size" { + description = "The size, in GB, of the root EBS volume." + type = number + default = 50 +} + +variable "root_volume_delete_on_termination" { + description = "Whether the volume should be destroyed on instance termination." + type = bool + default = true +} + +variable "root_volume_encrypted" { + description = "Encrypt the root volume at rest" + type = bool + default = false +} + +variable "wait_for_capacity_timeout" { + description = "A maximum duration that Terraform should wait for ASG instances to be healthy before timing out. Setting this to '0' causes Terraform to skip all Capacity Waiting behavior." + type = string + default = "10m" +} + +variable "service_linked_role_arn" { + description = "The ARN of the service-linked role that the ASG will use to call other AWS services" + type = string + default = null +} + +variable "health_check_type" { + description = "Controls how health checking is done. Must be one of EC2 or ELB." + type = string + default = "EC2" +} + +variable "health_check_grace_period" { + description = "Time, in seconds, after instance comes into service before checking health." + type = number + default = 300 +} + +variable "instance_profile_path" { + description = "Path in which to create the IAM instance profile." + type = string + default = "/" +} + +variable "server_rpc_port" { + description = "The port used by servers to handle incoming requests from other agents." + type = number + default = 8300 +} + +variable "cli_rpc_port" { + description = "The port used by all agents to handle RPC from the CLI." + type = number + default = 8400 +} + +variable "serf_lan_port" { + description = "The port used to handle gossip in the LAN. Required by all agents." + type = number + default = 8301 +} + +variable "serf_wan_port" { + description = "The port used by servers to gossip over the WAN to other servers." + type = number + default = 8302 +} + +variable "http_api_port" { + description = "The port used by clients to talk to the HTTP API" + type = number + default = 8500 +} + +variable "https_api_port" { + description = "The port used by clients to talk to the HTTPS API. Only used if enable_https_port is set to true." + type = number + default = 8501 +} + +variable "dns_port" { + description = "The port used to resolve DNS queries." + type = number + default = 8600 +} + +variable "ssh_port" { + description = "The port used for SSH connections" + type = number + default = 22 +} + +variable "tags" { + description = "List of extra tag blocks added to the autoscaling group configuration. Each element in the list is a map containing keys 'key', 'value', and 'propagate_at_launch' mapped to the respective values." + type = list(object({ key : string, value : string, propagate_at_launch : bool })) + default = [] +} + +variable "enabled_metrics" { + description = "List of autoscaling group metrics to enable." + type = list(string) + default = [] +} + +variable "enable_iam_setup" { + description = "If true, create the IAM Role, IAM Instance Profile, and IAM Policies. If false, these will not be created, and you can pass in your own IAM Instance Profile via var.iam_instance_profile_name." + type = bool + default = true +} + +variable "enable_https_port" { + description = "If set to true, allow access to the Consul HTTPS port defined via the https_api_port variable." + type = bool + default = false +} + +variable "iam_instance_profile_name" { + description = "If enable_iam_setup is false then this will be the name of the IAM instance profile to attach" + type = string + default = null +} + +variable "iam_permissions_boundary" { + description = "If set, restricts the created IAM role to the given permissions boundary" + type = string + default = null +} + +variable "protect_from_scale_in" { + description = "(Optional) Allows setting instance protection. The autoscaling group will not select instances with this setting for termination during scale in events." + type = bool + default = false +} diff --git a/modules/consul-iam-policies/INOUT.md b/modules/consul-iam-policies/INOUT.md new file mode 100644 index 00000000..74d6b468 --- /dev/null +++ b/modules/consul-iam-policies/INOUT.md @@ -0,0 +1,17 @@ +## Providers + +| Name | Version | +|------|---------| +| aws | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:-----:| +| enabled | Give the option to disable this module if required | `bool` | `true` | no | +| iam\_role\_id | The ID of the IAM Role to which these IAM policies should be attached | `string` | n/a | yes | + +## Outputs + +No output. + diff --git a/modules/consul-iam-policies/README.md b/modules/consul-iam-policies/README.md new file mode 100644 index 00000000..7725295f --- /dev/null +++ b/modules/consul-iam-policies/README.md @@ -0,0 +1,47 @@ +# Consul IAM Policies + +This folder contains a [Terraform](https://www.terraform.io/) module that defines the IAM Policies used by a +[Consul](https://www.consul.io/) cluster. + +Normally, you'd get these policies by default if you're using the [consul-cluster submodule](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/consul-cluster), +but if you're running Consul on top of a different cluster (e.g. you're co-locating Consul with Nomad), then you can +use this module to add the necessary IAM policies to that that cluster. For example, imagine you were using the +[nomad-cluster module](https://github.com/hashicorp/terraform-aws-nomad/tree/master/modules/nomad-cluster) to run a +cluster of servers that have both Nomad and Consul on each node: + +```hcl +module "nomad_servers" { + source = "git::git@github.com:hashicorp/terraform-aws-nomad.git//modules/nomad-cluster?ref=v0.0.1" + + # This AMI has both Nomad and Consul installed + ami_id = "ami-1234abcd" +} +``` + +The `nomad-cluster` module will provide the IAM policies for Nomad, but not for Consul. To ensure those servers +have the necessary IAM permissions to run Consul, you can use this module as follows: + +```hcl +module "iam_policies" { + source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-iam-policies?ref=v0.0.2" + + iam_role_id = "${module.nomad_servers.iam_role_id}" + + # ... (other params omitted) ... +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of this module. The double slash (`//`) is intentional + and required. Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/docs/modules/sources.html)). The `ref` parameter specifies a specific Git tag in + this repo. That way, instead of using the latest version of this module from the `master` branch, which + will change every time you run Terraform, you're using a fixed version of the repo. + +* `iam_role_id`: Use this parameter to specify the ID of the IAM Role to which the rules in this module + should be added. + +You can find the other parameters in [variables.tf](variables.tf). + +Check out the [consul-cluster example](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/root-example) for working sample code. diff --git a/modules/consul-iam-policies/main.tf b/modules/consul-iam-policies/main.tf new file mode 100644 index 00000000..24f1547b --- /dev/null +++ b/modules/consul-iam-policies/main.tf @@ -0,0 +1,36 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# --------------------------------------------------------------------------------------------------------------------- + +terraform { + # This module is now only being tested with Terraform 0.13.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.13.x code. + required_version = ">= 0.12.26" +} + +# --------------------------------------------------------------------------------------------------------------------- +# ATTACH AN IAM POLICY THAT ALLOWS THE CONSUL NODES TO AUTOMATICALLY DISCOVER EACH OTHER AND FORM A CLUSTER +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_iam_role_policy" "auto_discover_cluster" { + count = var.enabled ? 1 : 0 + name = "auto-discover-cluster" + role = var.iam_role_id + policy = data.aws_iam_policy_document.auto_discover_cluster.json +} + +data "aws_iam_policy_document" "auto_discover_cluster" { + statement { + effect = "Allow" + + actions = [ + "ec2:DescribeInstances", + "ec2:DescribeTags", + "autoscaling:DescribeAutoScalingGroups", + ] + + resources = ["*"] + } +} + diff --git a/modules/consul-iam-policies/variables.tf b/modules/consul-iam-policies/variables.tf new file mode 100644 index 00000000..1bda7959 --- /dev/null +++ b/modules/consul-iam-policies/variables.tf @@ -0,0 +1,16 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# You must provide a value for each of these parameters. +# --------------------------------------------------------------------------------------------------------------------- + +variable "iam_role_id" { + description = "The ID of the IAM Role to which these IAM policies should be attached" + type = string +} + +variable "enabled" { + description = "Give the option to disable this module if required" + type = bool + default = true +} + diff --git a/modules/consul-security-group-rules/INOUT.md b/modules/consul-security-group-rules/INOUT.md new file mode 100644 index 00000000..11498e82 --- /dev/null +++ b/modules/consul-security-group-rules/INOUT.md @@ -0,0 +1,27 @@ +## Providers + +| Name | Version | +|------|---------| +| aws | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:-----:| +| allowed\_inbound\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Consul | `list(string)` | `[]` | no | +| allowed\_inbound\_security\_group\_count | The number of entries in var.allowed\_inbound\_security\_group\_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only. | `number` | `0` | no | +| allowed\_inbound\_security\_group\_ids | A list of security group IDs that will be allowed to connect to Consul | `list(string)` | `[]` | no | +| cli\_rpc\_port | The port used by all agents to handle RPC from the CLI. | `number` | `8400` | no | +| dns\_port | The port used to resolve DNS queries. | `number` | `8600` | no | +| enable\_https\_port | If set to true, allow access to the Consul HTTPS port defined via the https\_api\_port variable. | `bool` | `false` | no | +| http\_api\_port | The port used by clients to talk to the HTTP API | `number` | `8500` | no | +| https\_api\_port | The port used by clients to talk to the HTTPS API. Only used if enable\_https\_port is set to true. | `number` | `8501` | no | +| security\_group\_id | The ID of the security group to which we should add the Consul security group rules | `string` | n/a | yes | +| serf\_lan\_port | The port used to handle gossip in the LAN. Required by all agents. | `number` | `8301` | no | +| serf\_wan\_port | The port used by servers to gossip over the WAN to other servers. | `number` | `8302` | no | +| server\_rpc\_port | The port used by servers to handle incoming requests from other agents. | `number` | `8300` | no | + +## Outputs + +No output. + diff --git a/modules/consul-security-group-rules/README.md b/modules/consul-security-group-rules/README.md new file mode 100644 index 00000000..31515518 --- /dev/null +++ b/modules/consul-security-group-rules/README.md @@ -0,0 +1,47 @@ +# Consul Security Group Rules Module + +This folder contains a [Terraform](https://www.terraform.io/) module that defines the security group rules used by a +[Consul](https://www.consul.io/) cluster to control the traffic that is allowed to go in and out of the cluster. + +Normally, you'd get these rules by default if you're using the [consul-cluster module](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/consul-cluster), but if +you're running Consul on top of a different cluster, then you can use this module to add the necessary security group +rules to that cluster. For example, imagine you were using the [nomad-cluster +module](https://github.com/hashicorp/terraform-aws-nomad/tree/master/modules/nomad-cluster) to run a cluster of +servers that have both Nomad and Consul on each node: + +```hcl +module "nomad_servers" { + source = "git::git@github.com:hashicorp/terraform-aws-nomad.git//modules/nomad-cluster?ref=v0.0.1" + + # This AMI has both Nomad and Consul installed + ami_id = "ami-1234abcd" +} +``` + +The `nomad-cluster` module will provide the security group rules for Nomad, but not for Consul. To ensure those servers +have the necessary ports open for using Consul, you can use this module as follows: + +```hcl +module "security_group_rules" { + source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-security-group-rules?ref=v0.0.2" + + security_group_id = "${module.nomad_servers.security_group_id}" + + # ... (other params omitted) ... +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of this module. The double slash (`//`) is intentional + and required. Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/docs/modules/sources.html)). The `ref` parameter specifies a specific Git tag in + this repo. That way, instead of using the latest version of this module from the `master` branch, which + will change every time you run Terraform, you're using a fixed version of the repo. + +* `security_group_id`: Use this parameter to specify the ID of the security group to which the rules in this module + should be added. + +You can find the other parameters in [variables.tf](variables.tf). + +Check out the [consul-cluster example](https://github.com/hashicorp/terraform-aws-consul/tree/master/examples/root-example) for working sample code. diff --git a/modules/consul-security-group-rules/main.tf b/modules/consul-security-group-rules/main.tf new file mode 100644 index 00000000..4db0c42b --- /dev/null +++ b/modules/consul-security-group-rules/main.tf @@ -0,0 +1,288 @@ +# ---------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# ---------------------------------------------------------------------------------------------------------------------- +terraform { + # This module is now only being tested with Terraform 0.13.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.13.x code. + required_version = ">= 0.12.26" +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE THE SECURITY GROUP RULES THAT CONTROL WHAT TRAFFIC CAN GO IN AND OUT OF A CONSUL CLUSTER +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_security_group_rule" "allow_server_rpc_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.server_rpc_port + to_port = var.server_rpc_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_cli_rpc_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.cli_rpc_port + to_port = var.cli_rpc_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_wan_tcp_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.serf_wan_port + to_port = var.serf_wan_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_wan_udp_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.serf_wan_port + to_port = var.serf_wan_port + protocol = "udp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_http_api_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.http_api_port + to_port = var.http_api_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_https_api_inbound" { + count = var.enable_https_port ? 1 : 0 + type = "ingress" + from_port = var.https_api_port + to_port = var.https_api_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_dns_tcp_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.dns_port + to_port = var.dns_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_dns_udp_inbound" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.dns_port + to_port = var.dns_port + protocol = "udp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_server_rpc_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.server_rpc_port + to_port = var.server_rpc_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_cli_rpc_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.cli_rpc_port + to_port = var.cli_rpc_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_wan_tcp_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.serf_wan_port + to_port = var.serf_wan_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_wan_udp_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.serf_wan_port + to_port = var.serf_wan_port + protocol = "udp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_http_api_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.http_api_port + to_port = var.http_api_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_https_api_inbound_from_security_group_ids" { + count = var.enable_https_port ? var.allowed_inbound_security_group_count : 0 + type = "ingress" + from_port = var.https_api_port + to_port = var.https_api_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_dns_tcp_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.dns_port + to_port = var.dns_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_dns_udp_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.dns_port + to_port = var.dns_port + protocol = "udp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +# Similar to the *_inbound_from_security_group_ids rules, allow inbound from ourself + +resource "aws_security_group_rule" "allow_server_rpc_inbound_from_self" { + type = "ingress" + from_port = var.server_rpc_port + to_port = var.server_rpc_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_cli_rpc_inbound_from_self" { + type = "ingress" + from_port = var.cli_rpc_port + to_port = var.cli_rpc_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_wan_tcp_inbound_from_self" { + type = "ingress" + from_port = var.serf_wan_port + to_port = var.serf_wan_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_serf_wan_udp_inbound_from_self" { + type = "ingress" + from_port = var.serf_wan_port + to_port = var.serf_wan_port + protocol = "udp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_http_api_inbound_from_self" { + type = "ingress" + from_port = var.http_api_port + to_port = var.http_api_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_https_api_inbound_from_self" { + count = var.enable_https_port ? 1 : 0 + type = "ingress" + from_port = var.https_api_port + to_port = var.https_api_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_dns_tcp_inbound_from_self" { + type = "ingress" + from_port = var.dns_port + to_port = var.dns_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_dns_udp_inbound_from_self" { + type = "ingress" + from_port = var.dns_port + to_port = var.dns_port + protocol = "udp" + self = true + + security_group_id = var.security_group_id +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE CONSUL-CLIENT SPECIFIC INBOUND/OUTBOUND RULES COME FROM THE CONSUL-CLIENT-SECURITY-GROUP-RULES MODULE +# --------------------------------------------------------------------------------------------------------------------- + +module "client_security_group_rules" { + source = "../consul-client-security-group-rules" + + security_group_id = var.security_group_id + allowed_inbound_cidr_blocks = var.allowed_inbound_cidr_blocks + allowed_inbound_security_group_ids = var.allowed_inbound_security_group_ids + allowed_inbound_security_group_count = var.allowed_inbound_security_group_count + + serf_lan_port = var.serf_lan_port +} + diff --git a/modules/consul-security-group-rules/variables.tf b/modules/consul-security-group-rules/variables.tf new file mode 100644 index 00000000..90e1bec3 --- /dev/null +++ b/modules/consul-security-group-rules/variables.tf @@ -0,0 +1,80 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# You must provide a value for each of these parameters. +# --------------------------------------------------------------------------------------------------------------------- + +variable "security_group_id" { + description = "The ID of the security group to which we should add the Consul security group rules" + type = string +} + +variable "allowed_inbound_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Consul" + type = list(string) + default = [] +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "allowed_inbound_security_group_ids" { + description = "A list of security group IDs that will be allowed to connect to Consul" + type = list(string) + default = [] +} + +variable "allowed_inbound_security_group_count" { + description = "The number of entries in var.allowed_inbound_security_group_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only." + type = number + default = 0 +} + +variable "server_rpc_port" { + description = "The port used by servers to handle incoming requests from other agents." + type = number + default = 8300 +} + +variable "cli_rpc_port" { + description = "The port used by all agents to handle RPC from the CLI." + type = number + default = 8400 +} + +variable "serf_lan_port" { + description = "The port used to handle gossip in the LAN. Required by all agents." + type = number + default = 8301 +} + +variable "serf_wan_port" { + description = "The port used by servers to gossip over the WAN to other servers." + type = number + default = 8302 +} + +variable "http_api_port" { + description = "The port used by clients to talk to the HTTP API" + type = number + default = 8500 +} + +variable "https_api_port" { + description = "The port used by clients to talk to the HTTPS API. Only used if enable_https_port is set to true." + type = number + default = 8501 +} + +variable "dns_port" { + description = "The port used to resolve DNS queries." + type = number + default = 8600 +} + +variable "enable_https_port" { + description = "If set to true, allow access to the Consul HTTPS port defined via the https_api_port variable." + type = bool + default = false +} diff --git a/modules/core/consul.tf b/modules/core/consul.tf index 772826a4..1baa8fa3 100644 --- a/modules/core/consul.tf +++ b/modules/core/consul.tf @@ -8,8 +8,8 @@ locals { } module "consul_servers" { - source = "hashicorp/consul/aws//modules/consul-cluster" - version = "0.8.4" + # copy of https://github.com/hashicorp/terraform-aws-consul/tree/v0.8.4/modules/consul-cluster + source = "../consul-cluster" cluster_name = var.consul_cluster_name cluster_size = var.consul_cluster_size diff --git a/modules/core/nomad_servers.tf b/modules/core/nomad_servers.tf index 6c1f0610..7b8ca3c5 100644 --- a/modules/core/nomad_servers.tf +++ b/modules/core/nomad_servers.tf @@ -9,8 +9,7 @@ locals { } module "nomad_servers" { - source = "hashicorp/nomad/aws//modules/nomad-cluster" - version = "0.7.1" + source = "../nomad-cluster" asg_name = local.nomad_server_cluster_name cluster_name = local.nomad_server_cluster_name @@ -21,6 +20,7 @@ module "nomad_servers" { min_size = var.nomad_servers_num max_size = var.nomad_servers_num desired_capacity = var.nomad_servers_num + spot_price = "" ami_id = var.nomad_servers_ami_id user_data = local.nomad_server_user_data diff --git a/modules/core/vault.tf b/modules/core/vault.tf index ff600c87..55da387d 100644 --- a/modules/core/vault.tf +++ b/modules/core/vault.tf @@ -20,8 +20,8 @@ EOF } module "vault" { - source = "hashicorp/vault/aws//modules/vault-cluster" - version = "0.14.1" + # copy of https://github.com/hashicorp/terraform-aws-vault/tree/v0.14.1/modules/vault-cluster + source = "../vault-cluster" cluster_name = var.vault_cluster_name cluster_size = var.vault_cluster_size diff --git a/modules/nomad-cluster/main.tf b/modules/nomad-cluster/main.tf index 9212d4f9..a7edf692 100644 --- a/modules/nomad-cluster/main.tf +++ b/modules/nomad-cluster/main.tf @@ -76,6 +76,12 @@ resource "aws_launch_configuration" "launch_configuration" { spot_price = var.spot_price user_data = var.user_data + metadata_options { + http_endpoint = "enabled" + http_tokens = "required" + http_put_response_hop_limit = 3 + } + iam_instance_profile = aws_iam_instance_profile.instance_profile.name key_name = var.ssh_key_name diff --git a/modules/vault-cluster/INOUT.md b/modules/vault-cluster/INOUT.md new file mode 100644 index 00000000..aac282e1 --- /dev/null +++ b/modules/vault-cluster/INOUT.md @@ -0,0 +1,73 @@ +## Providers + +| Name | Version | +|------|---------| +| aws | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:-----:| +| additional\_security\_group\_ids | A list of additional security group IDs to add to Vault EC2 Instances | `list(string)` | `[]` | no | +| allowed\_inbound\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Vault | `list(string)` | n/a | yes | +| allowed\_inbound\_security\_group\_count | The number of entries in var.allowed\_inbound\_security\_group\_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only. | `any` | n/a | yes | +| allowed\_inbound\_security\_group\_ids | A list of security group IDs that will be allowed to connect to Vault | `list(string)` | n/a | yes | +| allowed\_ssh\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow SSH connections | `list(string)` | `[]` | no | +| allowed\_ssh\_security\_group\_ids | A list of security group IDs from which the EC2 Instances will allow SSH connections | `list(string)` | `[]` | no | +| ami\_id | The ID of the AMI to run in this cluster. Should be an AMI that had Vault installed and configured by the install-vault module. | `any` | n/a | yes | +| api\_port | The port to use for Vault API calls | `number` | `8200` | no | +| associate\_public\_ip\_address | If set to true, associate a public IP address with each EC2 Instance in the cluster. We strongly recommend against making Vault nodes publicly accessible, except through an ELB (see the vault-elb module). | `bool` | `false` | no | +| auto\_unseal\_kms\_key\_arn | (Vault Enterprise only) The arn of the KMS key used for unsealing the Vault cluster | `string` | `""` | no | +| availability\_zones | The availability zones into which the EC2 Instances should be deployed. You should typically pass in one availability zone per node in the cluster\_size variable. We strongly recommend against passing in only a list of availability zones, as that will run Vault in the default (and most likely public) subnets in your VPC. At least one of var.subnet\_ids or var.availability\_zones must be non-empty. | `list(string)` | n/a | yes | +| cluster\_extra\_tags | A list of additional tags to add to each Instance in the ASG. Each element in the list must be a map with the keys key, value, and propagate\_at\_launch | `list(object({ key : string, value : string, propagate_at_launch : bool }))` | `[]` | no | +| cluster\_name | The name of the Vault cluster (e.g. vault-stage). This variable is used to namespace all resources created by this module. | `any` | n/a | yes | +| cluster\_port | The port to use for Vault server-to-server communication. | `number` | `8201` | no | +| cluster\_size | The number of nodes to have in the cluster. We strongly recommend setting this to 3 or 5. | `any` | n/a | yes | +| cluster\_tag\_key | Add a tag with this key and the value var.cluster\_name to each Instance in the ASG. | `string` | `"Name"` | no | +| dynamo\_table\_name | Table name for the storage backend, required if `enable_dynamo_backend = true` | `string` | `""` | no | +| dynamo\_table\_region | Table region used for the instance policy. Uses the current region if not supplied. Global tables should use `*` to allow for a cross region deployment to write to their respective table | `string` | `""` | no | +| enable\_auto\_unseal | (Vault Enterprise only) Emable auto unseal of the Vault cluster | `bool` | `false` | no | +| enable\_dynamo\_backend | Whether to use a DynamoDB storage backend instead of Consul | `bool` | `false` | no | +| enable\_s3\_backend | Whether to configure an S3 storage backend in addition to Consul. | `bool` | `false` | no | +| enable\_s3\_bucket\_versioning | Whether to enable bucket versioning for the S3 bucket. | `bool` | `false` | no | +| enabled\_metrics | List of autoscaling group metrics to enable. | `list(string)` | `[]` | no | +| force\_destroy\_s3\_bucket | If 'configure\_s3\_backend' is enabled and you set this to true, when you run terraform destroy, this tells Terraform to delete all the objects in the S3 bucket used for backend storage. You should NOT set this to true in production or you risk losing all your data! This property is only here so automated tests of this module can clean up after themselves. Only used if 'enable\_s3\_backend' is set to true. | `bool` | `false` | no | +| health\_check\_grace\_period | Time, in seconds, after instance comes into service before checking health. | `number` | `300` | no | +| health\_check\_type | Controls how health checking is done. Must be one of EC2 or ELB. | `string` | `"EC2"` | no | +| iam\_permissions\_boundary | If set, restricts the created IAM role to the given permissions boundary | `string` | n/a | yes | +| instance\_profile\_path | Path in which to create the IAM instance profile. | `string` | `"/"` | no | +| instance\_type | The type of EC2 Instances to run for each node in the cluster (e.g. t2.micro). | `any` | n/a | yes | +| root\_volume\_delete\_on\_termination | Whether the volume should be destroyed on instance termination. | `bool` | `true` | no | +| root\_volume\_ebs\_optimized | If true, the launched EC2 instance will be EBS-optimized. | `bool` | `false` | no | +| root\_volume\_size | The size, in GB, of the root EBS volume. | `number` | `50` | no | +| root\_volume\_type | The type of volume. Must be one of: standard, gp2, or io1. | `string` | `"standard"` | no | +| s3\_bucket\_name | The name of the S3 bucket to create and use as a storage backend. Only used if 'enable\_s3\_backend' is set to true. | `string` | `""` | no | +| s3\_bucket\_tags | Tags to be applied to the S3 bucket. | `map(string)` | `{}` | no | +| security\_group\_tags | Tags to be applied to the LC security group | `map(string)` | `{}` | no | +| ssh\_key\_name | The name of an EC2 Key Pair that can be used to SSH to the EC2 Instances in this cluster. Set to an empty string to not associate a Key Pair. | `string` | `""` | no | +| ssh\_port | The port used for SSH connections. | `number` | `22` | no | +| subnet\_ids | The subnet IDs into which the EC2 Instances should be deployed. You should typically pass in one subnet ID per node in the cluster\_size variable. We strongly recommend that you run Vault in private subnets. At least one of var.subnet\_ids or var.availability\_zones must be non-empty. | `list(string)` | n/a | yes | +| tenancy | The tenancy of the instance. Must be one of: default or dedicated. | `string` | `"default"` | no | +| termination\_policies | A list of policies to decide how the instances in the auto scale group should be terminated. The allowed values are OldestInstance, NewestInstance, OldestLaunchConfiguration, ClosestToNextInstanceHour, Default. | `string` | `"Default"` | no | +| user\_data | A User Data script to execute while the server is booting. We recommend passing in a bash script that executes the run-vault script, which should have been installed in the AMI by the install-vault module. | `any` | n/a | yes | +| vpc\_id | The ID of the VPC in which to deploy the cluster | `any` | n/a | yes | +| wait\_for\_capacity\_timeout | A maximum duration that Terraform should wait for ASG instances to be healthy before timing out. Setting this to '0' causes Terraform to skip all Capacity Waiting behavior. | `string` | `"10m"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| asg\_name | n/a | +| cluster\_size | n/a | +| cluster\_tag\_key | n/a | +| cluster\_tag\_value | n/a | +| iam\_instance\_profile\_arn | n/a | +| iam\_instance\_profile\_id | n/a | +| iam\_instance\_profile\_name | n/a | +| iam\_role\_arn | n/a | +| iam\_role\_id | n/a | +| iam\_role\_name | n/a | +| launch\_config\_name | n/a | +| s3\_bucket\_arn | n/a | +| security\_group\_id | n/a | + diff --git a/modules/vault-cluster/README.md b/modules/vault-cluster/README.md new file mode 100644 index 00000000..7b4c4ee5 --- /dev/null +++ b/modules/vault-cluster/README.md @@ -0,0 +1,498 @@ +# Vault Cluster + +This folder contains a [Terraform](https://www.terraform.io/) module that can be used to deploy a +[Vault](https://www.vaultproject.io/) cluster in [AWS](https://aws.amazon.com/) on top of an Auto Scaling Group. This +module is designed to deploy an [Amazon Machine Image (AMI)](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) +that had Vault installed via the [install-vault](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/install-vault) module in this Module. + + + + +## How do you use this module? + +This folder defines a [Terraform module](https://www.terraform.io/docs/modules/usage.html), which you can use in your +code by adding a `module` configuration and setting its `source` parameter to URL of this folder: + +```hcl +module "vault_cluster" { + # Use version v0.0.1 of the vault-cluster module + source = "github.com/hashicorp/terraform-aws-vault//modules/vault-cluster?ref=v0.0.1" + + # Specify the ID of the Vault AMI. You should build this using the scripts in the install-vault module. + ami_id = "ami-abcd1234" + + # Configure and start Vault during boot. + user_data = <<-EOF + #!/bin/bash + /opt/vault/bin/run-vault --tls-cert-file /opt/vault/tls/vault.crt.pem --tls-key-file /opt/vault/tls/vault.key.pem + EOF + + # Add tag to each node in the cluster with value set to var.cluster_name + cluster_tag_key = "Name" + + # Optionally add extra tags to each node in the cluster + cluster_extra_tags = [ + { + key = "Environment" + value = "Dev" + propagate_at_launch = true + }, + { + key = "Department" + value = "Ops" + propagate_at_launch = true + } + ] + + # ... See variables.tf for the other parameters you must define for the vault-cluster module +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of the vault-cluster module. The double slash (`//`) is intentional + and required. Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/docs/modules/sources.html)). The `ref` parameter specifies a specific Git tag in + this repo. That way, instead of using the latest version of this module from the `master` branch, which + will change every time you run Terraform, you're using a fixed version of the repo. + +* `ami_id`: Use this parameter to specify the ID of a Vault [Amazon Machine Image + (AMI)](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) to deploy on each server in the cluster. You + should install Vault in this AMI using the scripts in the [install-vault](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/install-vault) module. + +* `user_data`: Use this parameter to specify a [User + Data](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html#user-data-shell-scripts) script that each + server will run during boot. This is where you can use the [run-vault script](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/run-vault) to configure and + run Vault. The `run-vault` script is one of the scripts installed by the [install-vault](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/install-vault) + module. + +You can find the other parameters in [variables.tf](variables.tf). + +Check out the [root example](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/root-example) and +[vault-cluster-private](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/vault-cluster-private) examples for working sample code. + + + + + +## How do you use the Vault cluster? + +To use the Vault cluster, you will typically need to SSH to each of the Vault servers. If you deployed the +[vault-cluster-private](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/vault-cluster-private) or [the root example](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/root-example) +examples, the [vault-examples-helper.sh script](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/vault-examples-helper/vault-examples-helper.sh) will do the +tag lookup for you automatically (note, you must have the [AWS CLI](https://aws.amazon.com/cli/) and +[jq](https://stedolan.github.io/jq/) installed locally): + +``` +> ../vault-examples-helper/vault-examples-helper.sh + +Your Vault servers are running at the following IP addresses: + +11.22.33.44 +11.22.33.55 +11.22.33.66 +``` + +### Initializing the Vault cluster + +The very first time you deploy a new Vault cluster, you need to [initialize the +Vault](https://www.vaultproject.io/intro/getting-started/deploy.html#initializing-the-vault). The easiest way to do +this is to SSH to one of the servers that has Vault installed and run: + +``` +vault operator init + +Key 1: 427cd2c310be3b84fe69372e683a790e01 +Key 2: 0e2b8f3555b42a232f7ace6fe0e68eaf02 +Key 3: 37837e5559b322d0585a6e411614695403 +Key 4: 8dd72fd7d1af254de5f82d1270fd87ab04 +Key 5: b47fdeb7dda82dbe92d88d3c860f605005 +Initial Root Token: eaf5cc32-b48f-7785-5c94-90b5ce300e9b + +Vault initialized with 5 keys and a key threshold of 3! +``` + +Vault will print out the [unseal keys](https://www.vaultproject.io/docs/concepts/seal.html) and a [root +token](https://www.vaultproject.io/docs/concepts/tokens.html#root-tokens). This is the **only time ever** that all of +this data is known by Vault, so you **MUST** save it in a secure place immediately! Also, this is the only time that +the unseal keys should ever be so close together. You should distribute each one to a different, trusted administrator +for safe keeping in completely separate secret stores and NEVER store them all in the same place. + +In fact, a better option is to initialize Vault with [PGP, GPG, or +Keybase](https://www.vaultproject.io/docs/concepts/pgp-gpg-keybase.html) so that each unseal key is encrypted with a +different user's public key. That way, no one, not even the operator running the `init` command can see all the keys +in one place: + +``` +vault operator init -pgp-keys="keybase:jefferai,keybase:vishalnayak,keybase:sethvargo" + +Key 1: wcBMA37rwGt6FS1VAQgAk1q8XQh6yc... +Key 2: wcBMA0wwnMXgRzYYAQgAavqbTCxZGD... +Key 3: wcFMA2DjqDb4YhTAARAAeTFyYxPmUd... +... +``` + +See [Using PGP, GPG, and Keybase](https://www.vaultproject.io/docs/concepts/pgp-gpg-keybase.html) for more info. + + +### Unsealing the Vault cluster + +Now that you have the unseal keys, you can [unseal Vault](https://www.vaultproject.io/docs/concepts/seal.html) by +having 3 out of the 5 administrators (or whatever your key shard threshold is) do the following: + +1. SSH to a Vault server. +1. Run `vault operator unseal`. +1. Enter the unseal key when prompted. +1. Repeat for each of the other Vault servers. + +Once this process is complete, all the Vault servers will be unsealed and you will be able to start reading and writing +secrets. + + +### Setting up a secrets engine + +In previous versions of Vault (< 1.1.0), a key-value secrets engine was automatically mounted at the path `secret/`. This +module. The examples in this module use versions >= 1.1.0 and thus mount a key-value secrets engine at `secret/` explicitly. + +``` +vault secrets enable -version=1 -path=secret kv +``` + + +### Connecting to the Vault cluster to read and write secrets + +There are three ways to connect to Vault: + +1. [Access Vault from a Vault server](#access-vault-from-a-vault-server) +1. [Access Vault from other servers in the same AWS account](#access-vault-from-other-servers-in-the-same-aws-account) +1. [Access Vault from the public Internet](#access-vault-from-the-public-internet) + + +#### Access Vault from a Vault server + +When you SSH to a Vault server, the Vault client is already configured to talk to the Vault server on localhost, so +you can directly run Vault commands: + +``` +vault read secret/foo + +Key Value +--- ----- +refresh_interval 768h0m0s +value bar +``` + + +#### Access Vault from other servers in the same AWS account + +To access Vault from a different server in the same account, you need to specify the URL of the Vault cluster. You +could manually look up the Vault cluster's IP address, but since this module uses Consul not only as a [storage +backend](https://www.vaultproject.io/docs/configuration/storage/consul.html) but also as a way to register [DNS +entries](https://www.consul.io/docs/guides/forwarding.html), you can access Vault +using a nice domain name instead, such as `vault.service.consul`. + +To set this up, use the [install-dnsmasq +module](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/install-dnsmasq) on each server that +needs to access Vault or [setup-systemd-resolved](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/setup-systemd-resolved) if using Ubuntu 18.04. This allows you to access Vault from your EC2 Instances as follows: + +``` +vault -address=https://vault.service.consul:8200 read secret/foo + +Key Value +--- ----- +refresh_interval 768h0m0s +value bar +``` + +You can configure the Vault address as an environment variable: + +``` +export VAULT_ADDR=https://vault.service.consul:8200 +``` + +That way, you don't have to remember to pass the Vault address every time: + +``` +vault read secret/foo + +Key Value +--- ----- +refresh_interval 768h0m0s +value bar +``` + +Note that if you're using a self-signed TLS cert (e.g. generated from the [private-tls-cert +module](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/private-tls-cert)), you'll need to have the public key of the CA that signed that cert or you'll get +an "x509: certificate signed by unknown authority" error. You could pass the certificate manually: + +``` +vault read -ca-cert=/opt/vault/tls/ca.crt.pem secret/foo + +Key Value +--- ----- +refresh_interval 768h0m0s +value bar +``` + +However, to avoid having to add the `-ca-cert` argument to every single call, you can use the [update-certificate-store +module](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/update-certificate-store) to configure the server to trust the CA. + +Check out the [vault-cluster-private example](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/vault-cluster-private) for working sample code. + + +#### Access Vault from the public Internet + +We **strongly** recommend only running Vault in private subnets. That means it is not directly accessible from the +public Internet, which reduces your surface area to attackers. If you need users to be able to access Vault from +outside of AWS, we recommend using VPN to connect to AWS. + +If VPN is not an option, and Vault must be accessible from the public Internet, you can use the [vault-elb +module](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/vault-elb) to deploy an [Elastic Load Balancer +(ELB)](https://aws.amazon.com/elasticloadbalancing/classicloadbalancer/) in your public subnets, and have all your users +access Vault via this ELB: + +``` +vault -address=https:// read secret/foo +``` + +Where `ELB_DNS_NAME` is the DNS name for your ELB, such as `vault.example.com`. You can configure the Vault address as +an environment variable: + +``` +export VAULT_ADDR=https://vault.example.com +``` + +That way, you don't have to remember to pass the Vault address every time: + +``` +vault read secret/foo +``` + + + + + + +## What's included in this module? + +This module creates the following architecture: + +![Vault architecture](https://github.com/hashicorp/terraform-aws-vault/blob/master/_docs/architecture.png?raw=true) + +This architecture consists of the following resources: + +* [Auto Scaling Group](#auto-scaling-group) +* [Security Group](#security-group) +* [IAM Role and Permissions](#iam-role-and-permissions) +* [S3 bucket](#s3-bucket) (Optional) + + +### Auto Scaling Group + +This module runs Vault on top of an [Auto Scaling Group (ASG)](https://aws.amazon.com/autoscaling/). Typically, you +should run the ASG with 3 or 5 EC2 Instances spread across multiple [Availability +Zones](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html). Each of the EC2 +Instances should be running an AMI that has had Vault installed via the [install-vault](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/install-vault) +module. You pass in the ID of the AMI to run using the `ami_id` input parameter. + + +### Security Group + +Each EC2 Instance in the ASG has a Security Group that allows: + +* All outbound requests +* Inbound requests on Vault's API port (default: port 8200) +* Inbound requests on Vault's cluster port for server-to-server communication (default: port 8201) +* Inbound SSH requests (default: port 22) + +The Security Group ID is exported as an output variable if you need to add additional rules. + +Check out the [Security section](#security) for more details. + + +### IAM Role and Permissions + +Each EC2 Instance in the ASG has an [IAM Role](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) attached. +The IAM Role ARN is exported as an output variable so you can add custom permissions. + + +### S3 bucket (Optional) + +If `configure_s3_backend` is set to `true`, this module will create an [S3 bucket](https://aws.amazon.com/s3/) that Vault +can use as a storage backend. S3 is a good choice for storage because it provides outstanding durability (99.999999999%) +and availability (99.99%). Unfortunately, S3 cannot be used for Vault High Availability coordination, so this module expects +a separate Consul server cluster to be deployed as a high availability backend. + + + +## How do you roll out updates? + +Please note that Vault does not support true zero-downtime upgrades, but with proper upgrade procedure the downtime +should be very short (a few hundred milliseconds to a second depending on how the speed of access to the storage +backend). See the [Vault upgrade guide instructions](https://www.vaultproject.io/docs/guides/upgrading/index.html) for +details. + +If you want to deploy a new version of Vault across a cluster deployed with this module, the best way to do that is to: + +1. Build a new AMI. +1. Set the `ami_id` parameter to the ID of the new AMI. +1. Run `terraform apply`. + +This updates the Launch Configuration of the ASG, so any new Instances in the ASG will have your new AMI, but it does +NOT actually deploy those new instances. To make that happen, you need to: + +1. [Replace the standby nodes](#replace-the-standby-nodes) +1. [Replace the primary node](#replace-the-primary-node) + + +### Replace the standby nodes + +For each of the standby nodes: + +1. SSH to the EC2 Instance where the Vault standby is running. +1. Execute `sudo systemctl stop vault` to have Vault shut down gracefully. +1. Terminate the EC2 Instance. +1. After a minute or two, the ASG should automatically launch a new Instance, with the new AMI, to replace the old one. +1. Have each Vault admin SSH to the new EC2 Instance and unseal it. + + +### Replace the primary node + +The procedure for the primary node is the same, but should be done LAST, after all the standbys have already been +upgraded: + +1. SSH to the EC2 Instance where the Vault primary is running. This should be the last server that has the old version + of your AMI. +1. Execute `sudo systemctl stop vault` to have Vault shut down gracefully. +1. Terminate the EC2 Instance. +1. After a minute or two, the ASG should automatically launch a new Instance, with the new AMI, to replace the old one. +1. Have each Vault admin SSH to the new EC2 Instance and unseal it. + + + + + +## What happens if a node crashes? + +There are two ways a Vault node may go down: + +1. The Vault process may crash. In that case, `systemd` should restart it automatically. At this point, you will + need to have each Vault admin SSH to the Instance to unseal it again. +1. The EC2 Instance running Vault dies. In that case, the Auto Scaling Group should launch a replacement automatically. + Once again, the Vault admins will have to SSH to the replacement Instance and unseal it. + +Given the need for manual intervention, you will want to have alarms set up that go off any time a Vault node gets +restarted. + + + + +## Security + +Here are some of the main security considerations to keep in mind when using this module: + +1. [Encryption in transit](#encryption-in-transit) +1. [Encryption at rest](#encryption-at-rest) +1. [Dedicated instances](#dedicated-instances) +1. [Security groups](#security-groups) +1. [SSH access](#ssh-access) + + +### Encryption in transit + +Vault uses TLS to encrypt its network traffic. For instructions on configuring TLS, have a look at the +[How do you handle encryption documentation](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/run-vault#how-do-you-handle-encryption). + + +### Encryption at rest + +Vault servers keep everything in memory and does not write any data to the local hard disk. To persist data, Vault +encrypts it, and sends it off to its storage backends, so no matter how the backend stores that data, it is already +encrypted. By default, this Module uses Consul as a storage backend, so if you want an additional layer of +protection, you can check out the [official Consul encryption docs](https://www.consul.io/docs/agent/encryption.html) +and the Consul AWS Module [How do you handle encryption +docs](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/run-consul#how-do-you-handle-encryption) +for more info. + +Note that if you want to enable encryption for the root EBS Volume for your Vault Instances (despite the fact that +Vault itself doesn't write anything to this volume), you need to enable that in your AMI. If you're creating the AMI +using Packer (e.g. as shown in the [vault-consul-ami example](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/vault-consul-ami)), you need to set the [encrypt_boot +parameter](https://www.packer.io/docs/builders/amazon-ebs.html#encrypt_boot) to `true`. + + +### Dedicated instances + +If you wish to use dedicated instances, you can set the `tenancy` parameter to `"dedicated"` in this module. + + +### Security groups + +This module attaches a security group to each EC2 Instance that allows inbound requests as follows: + +* **Vault**: For the Vault API port (default: 8200), you can use the `allowed_inbound_cidr_blocks` parameter to control + the list of [CIDR blocks](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that will be allowed access + and the `allowed_inbound_security_group_ids` parameter to control the security groups that will be allowed access. + +* **SSH**: For the SSH port (default: 22), you can use the `allowed_ssh_cidr_blocks` parameter to control the list of + [CIDR blocks](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) that will be allowed access. You can use the `allowed_ssh_security_group_ids` parameter to control the list of source Security Groups that will be allowed access. + +Note that all the ports mentioned above are configurable via the `xxx_port` variables (e.g. `api_port`). See +[variables.tf](variables.tf) for the full list. + + + +### SSH access + +You can associate an [EC2 Key Pair](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) with each +of the EC2 Instances in this cluster by specifying the Key Pair's name in the `ssh_key_name` variable. If you don't +want to associate a Key Pair with these servers, set `ssh_key_name` to an empty string. + + + + + +## What's NOT included in this module? + +This module does NOT handle the following items, which you may want to provide on your own: + +* [Consul](#consul) +* [Monitoring, alerting, log aggregation](#monitoring-alerting-log-aggregation) +* [VPCs, subnets, route tables](#vpcs-subnets-route-tables) + + +### Consul + +This module configures Vault to use Consul as a high availability storage backend. This module assumes you already +have Consul servers deployed in a separate cluster. We do not recommend co-locating Vault and Consul servers in the +same cluster because: + +1. Vault is a tool built specifically for security, and running any other software on the same server increases its + surface area to attackers. +1. This Vault Module uses Consul as a high availability storage backend and both Vault and Consul keep their working + set in memory. That means for every 1 byte of data in Vault, you'd also have 1 byte of data in Consul, doubling + your memory consumption on each server. + +Check out the [Consul AWS Module](https://github.com/hashicorp/terraform-aws-consul) for how to deploy a Consul +server cluster in AWS. See the [root example](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/root-example) and +[vault-cluster-private](https://github.com/hashicorp/terraform-aws-vault/tree/master/examples/vault-cluster-private) examples for sample code that shows how to run both a +Vault server cluster and Consul server cluster. + + +### Monitoring, alerting, log aggregation + +This module does not include anything for monitoring, alerting, or log aggregation. All ASGs and EC2 Instances come +with limited [CloudWatch](https://aws.amazon.com/cloudwatch/) metrics built-in, but beyond that, you will have to +provide your own solutions. We especially recommend looking into Vault's [Audit +backends](https://www.vaultproject.io/docs/audit/index.html) for how you can capture detailed logging and audit +information. + +Given that any time Vault crashes, reboots, or restarts, you have to have the Vault admins manually unseal it (see +[What happens if a node crashes?](#what-happens-if-a_node-crashes)), we **strongly** recommend configuring alerts that +notify these admins whenever they need to take action! + + +### VPCs, subnets, route tables + +This module assumes you've already created your network topology (VPC, subnets, route tables, etc). You will need to +pass in the the relevant info about your network topology (e.g. `vpc_id`, `subnet_ids`) as input variables to this +module. diff --git a/modules/vault-cluster/main.tf b/modules/vault-cluster/main.tf new file mode 100644 index 00000000..e8941d57 --- /dev/null +++ b/modules/vault-cluster/main.tf @@ -0,0 +1,411 @@ +# ---------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# ---------------------------------------------------------------------------------------------------------------------- +terraform { + # This module is now only being tested with Terraform 0.13.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.13.x code. + required_version = ">= 0.12.26" +} + +data "aws_region" "current" {} + +data "aws_caller_identity" "current" {} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE AN AUTO SCALING GROUP (ASG) TO RUN VAULT +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_autoscaling_group" "autoscaling_group" { + name_prefix = var.cluster_name + + launch_configuration = aws_launch_configuration.launch_configuration.name + + availability_zones = var.availability_zones + vpc_zone_identifier = var.subnet_ids + + # Use a fixed-size cluster + min_size = var.cluster_size + max_size = var.cluster_size + desired_capacity = var.cluster_size + termination_policies = [var.termination_policies] + + health_check_type = var.health_check_type + health_check_grace_period = var.health_check_grace_period + wait_for_capacity_timeout = var.wait_for_capacity_timeout + + enabled_metrics = var.enabled_metrics + + # Use bucket and policies names in tags for depending on them when they are there + # And only create the cluster after S3 bucket and policies exist + # Otherwise Vault might boot and not find the bucket or not yet have the necessary permissions + # Not using `depends_on` because these resources might not exist + tag { + key = var.cluster_tag_key + value = var.cluster_name + propagate_at_launch = true + } + + tag { + key = "using_s3_bucket_backend" + value = element(concat(aws_iam_role_policy.vault_s3.*.name, [""]), 0) + propagate_at_launch = true + } + + tag { + key = "s3_bucket_id" + value = element(concat(aws_s3_bucket.vault_storage.*.id, [""]), 0) + propagate_at_launch = true + } + + + # Use table policy name in tags for depending on them when they are there + # And only create the cluster after dynamo exists + # Otherwise Vault might boot and not find the bucket or not yet have the necessary permissions + # Not using `depends_on` because these resources might not exist + tag { + key = "using_dynamodb_backend" + value = element(concat(aws_iam_role_policy.vault_dynamo.*.name, [""]), 0) + propagate_at_launch = true + } + + tag { + key = "using_auto_unseal" + value = element(concat(aws_iam_role_policy.vault_auto_unseal_kms.*.name, [""]), 0) + propagate_at_launch = true + } + + dynamic "tag" { + for_each = var.cluster_extra_tags + + content { + key = tag.value.key + value = tag.value.value + propagate_at_launch = tag.value.propagate_at_launch + } + } + + + lifecycle { + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + create_before_destroy = true + + # As of AWS Provider 3.x, inline load_balancers and target_group_arns + # in an aws_autoscaling_group take precedence over attachment resources. + # Since the vault-cluster module does not define any Load Balancers, + # it's safe to assume that we will always want to favor an attachment + # over these inline properties. + # + # For further discussion and links to relevant documentation, see + # https://github.com/hashicorp/terraform-aws-vault/issues/210 + ignore_changes = [load_balancers, target_group_arns] + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE LAUNCH CONFIGURATION TO DEFINE WHAT RUNS ON EACH INSTANCE IN THE ASG +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_launch_configuration" "launch_configuration" { + name_prefix = "${var.cluster_name}-" + image_id = var.ami_id + instance_type = var.instance_type + user_data = var.user_data + + # added to https://github.com/hashicorp/terraform-aws-vault/tree/v0.14.1/modules/vault-cluster + metadata_options { + http_endpoint = "enabled" + http_tokens = "required" + http_put_response_hop_limit = 3 + } + + iam_instance_profile = aws_iam_instance_profile.instance_profile.name + key_name = var.ssh_key_name + # TF-UPGRADE-TODO: In Terraform v0.10 and earlier, it was sometimes necessary to + # force an interpolation expression to be interpreted as a list by wrapping it + # in an extra set of list brackets. That form was supported for compatibilty in + # v0.11, but is no longer supported in Terraform v0.12. + # + # If the expression in the following list itself returns a list, remove the + # brackets to avoid interpretation as a list of lists. If the expression + # returns a single list item then leave it as-is and remove this TODO comment. + security_groups = concat( + [aws_security_group.lc_security_group.id], + var.additional_security_group_ids, + ) + placement_tenancy = var.tenancy + associate_public_ip_address = var.associate_public_ip_address + + ebs_optimized = var.root_volume_ebs_optimized + + root_block_device { + volume_type = var.root_volume_type + volume_size = var.root_volume_size + delete_on_termination = var.root_volume_delete_on_termination + } + + # Important note: whenever using a launch configuration with an auto scaling group, you must set + # create_before_destroy = true. However, as soon as you set create_before_destroy = true in one resource, you must + # also set it in every resource that it depends on, or you'll get an error about cyclic dependencies (especially when + # removing resources). For more info, see: + # + # https://www.terraform.io/docs/providers/aws/r/launch_configuration.html + # https://terraform.io/docs/configuration/resources.html + lifecycle { + create_before_destroy = true + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE A SECURITY GROUP TO CONTROL WHAT REQUESTS CAN GO IN AND OUT OF EACH EC2 INSTANCE +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_security_group" "lc_security_group" { + name_prefix = var.cluster_name + description = "Security group for the ${var.cluster_name} launch configuration" + vpc_id = var.vpc_id + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } + + tags = merge( + { + "Name" = var.cluster_name + }, + var.security_group_tags, + ) +} + +resource "aws_security_group_rule" "allow_ssh_inbound_from_cidr_blocks" { + count = length(var.allowed_ssh_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.ssh_port + to_port = var.ssh_port + protocol = "tcp" + cidr_blocks = var.allowed_ssh_cidr_blocks + + security_group_id = aws_security_group.lc_security_group.id +} + +resource "aws_security_group_rule" "allow_ssh_inbound_from_security_group_ids" { + count = length(var.allowed_ssh_security_group_ids) + type = "ingress" + from_port = var.ssh_port + to_port = var.ssh_port + protocol = "tcp" + source_security_group_id = element(var.allowed_ssh_security_group_ids, count.index) + + security_group_id = aws_security_group.lc_security_group.id +} + +resource "aws_security_group_rule" "allow_all_outbound" { + type = "egress" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + + security_group_id = aws_security_group.lc_security_group.id +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE INBOUND/OUTBOUND RULES FOR THE SECURITY GROUP COME FROM THE VAULT-SECURITY-GROUP-RULES MODULE +# --------------------------------------------------------------------------------------------------------------------- + +module "security_group_rules" { + source = "../vault-security-group-rules" + + security_group_id = aws_security_group.lc_security_group.id + allowed_inbound_cidr_blocks = var.allowed_inbound_cidr_blocks + allowed_inbound_security_group_ids = var.allowed_inbound_security_group_ids + allowed_inbound_security_group_count = var.allowed_inbound_security_group_count + + api_port = var.api_port + cluster_port = var.cluster_port +} + +# --------------------------------------------------------------------------------------------------------------------- +# ATTACH AN IAM ROLE TO EACH EC2 INSTANCE +# We can use the IAM role to grant the instance IAM permissions so we can use the AWS APIs without having to figure out +# how to get our secret AWS access keys onto the box. +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_iam_instance_profile" "instance_profile" { + name_prefix = var.cluster_name + path = var.instance_profile_path + role = aws_iam_role.instance_role.name + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + +resource "aws_iam_role" "instance_role" { + name_prefix = var.cluster_name + assume_role_policy = data.aws_iam_policy_document.instance_role.json + + permissions_boundary = var.iam_permissions_boundary + + # aws_iam_instance_profile.instance_profile in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + +data "aws_iam_policy_document" "instance_role" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +resource "aws_s3_bucket" "vault_storage" { + count = var.enable_s3_backend ? 1 : 0 + bucket = var.s3_bucket_name + force_destroy = var.force_destroy_s3_bucket + + tags = merge( + { + "Description" = "Used for secret storage with Vault. DO NOT DELETE this Bucket unless you know what you are doing." + }, + var.s3_bucket_tags, + ) + + versioning { + enabled = var.enable_s3_bucket_versioning + } + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + +resource "aws_iam_role_policy" "vault_s3" { + count = var.enable_s3_backend ? 1 : 0 + name = "vault_s3" + role = aws_iam_role.instance_role.id + policy = element( + concat(data.aws_iam_policy_document.vault_s3.*.json, [""]), + 0, + ) + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + +data "aws_iam_policy_document" "vault_s3" { + count = var.enable_s3_backend ? 1 : 0 + + statement { + effect = "Allow" + actions = ["s3:*"] + + resources = [ + aws_s3_bucket.vault_storage[0].arn, + "${aws_s3_bucket.vault_storage[0].arn}/*", + ] + } +} + +data "aws_iam_policy_document" "vault_dynamo" { + count = var.enable_dynamo_backend ? 1 : 0 + statement { + effect = "Allow" + actions = [ + "dynamodb:DescribeLimits", + "dynamodb:DescribeTimeToLive", + "dynamodb:ListTagsOfResource", + "dynamodb:DescribeReservedCapacityOfferings", + "dynamodb:DescribeReservedCapacity", + "dynamodb:ListTables", + "dynamodb:BatchGetItem", + "dynamodb:BatchWriteItem", + "dynamodb:CreateTable", + "dynamodb:DeleteItem", + "dynamodb:GetItem", + "dynamodb:GetRecords", + "dynamodb:PutItem", + "dynamodb:Query", + "dynamodb:UpdateItem", + "dynamodb:Scan", + "dynamodb:DescribeTable" + ] + resources = [ + format("arn:aws:dynamodb:%s:%s:table/%s", + var.dynamo_table_region == "" ? data.aws_region.current.name : var.dynamo_table_region, + data.aws_caller_identity.current.account_id, + var.dynamo_table_name + ) + ] + } +} + +resource "aws_iam_role_policy" "vault_dynamo" { + count = var.enable_dynamo_backend ? 1 : 0 + name = "vault_dynamo" + role = aws_iam_role.instance_role.id + policy = element( + concat(data.aws_iam_policy_document.vault_dynamo.*.json, [""]), + 0, + ) +} + +data "aws_iam_policy_document" "vault_auto_unseal_kms" { + count = var.enable_auto_unseal ? 1 : 0 + + statement { + effect = "Allow" + + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:DescribeKey", + ] + + resources = [var.auto_unseal_kms_key_arn] + } +} + +resource "aws_iam_role_policy" "vault_auto_unseal_kms" { + count = var.enable_auto_unseal ? 1 : 0 + name = "vault_auto_unseal_kms" + role = aws_iam_role.instance_role.id + policy = element( + concat( + data.aws_iam_policy_document.vault_auto_unseal_kms.*.json, + [""], + ), + 0, + ) + + # aws_launch_configuration.launch_configuration in this module sets create_before_destroy to true, which means + # everything it depends on, including this resource, must set it as well, or you'll get cyclic dependency errors + # when you try to do a terraform destroy. + lifecycle { + create_before_destroy = true + } +} + diff --git a/modules/vault-cluster/outputs.tf b/modules/vault-cluster/outputs.tf new file mode 100644 index 00000000..4aab60f1 --- /dev/null +++ b/modules/vault-cluster/outputs.tf @@ -0,0 +1,52 @@ +output "asg_name" { + value = aws_autoscaling_group.autoscaling_group.name +} + +output "cluster_tag_key" { + value = var.cluster_tag_key +} + +output "cluster_tag_value" { + value = var.cluster_name +} + +output "cluster_size" { + value = aws_autoscaling_group.autoscaling_group.desired_capacity +} + +output "launch_config_name" { + value = aws_launch_configuration.launch_configuration.name +} + +output "iam_instance_profile_arn" { + value = aws_iam_instance_profile.instance_profile.arn +} + +output "iam_instance_profile_id" { + value = aws_iam_instance_profile.instance_profile.id +} + +output "iam_instance_profile_name" { + value = aws_iam_instance_profile.instance_profile.name +} + +output "iam_role_arn" { + value = aws_iam_role.instance_role.arn +} + +output "iam_role_id" { + value = aws_iam_role.instance_role.id +} + +output "iam_role_name" { + value = aws_iam_role.instance_role.name +} + +output "security_group_id" { + value = aws_security_group.lc_security_group.id +} + +output "s3_bucket_arn" { + value = join(",", aws_s3_bucket.vault_storage.*.arn) +} + diff --git a/modules/vault-cluster/variables.tf b/modules/vault-cluster/variables.tf new file mode 100644 index 00000000..df58075c --- /dev/null +++ b/modules/vault-cluster/variables.tf @@ -0,0 +1,244 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# You must provide a value for each of these parameters. +# --------------------------------------------------------------------------------------------------------------------- + +variable "cluster_name" { + description = "The name of the Vault cluster (e.g. vault-stage). This variable is used to namespace all resources created by this module." +} + +variable "ami_id" { + description = "The ID of the AMI to run in this cluster. Should be an AMI that had Vault installed and configured by the install-vault module." +} + +variable "instance_type" { + description = "The type of EC2 Instances to run for each node in the cluster (e.g. t2.micro)." +} + +variable "vpc_id" { + description = "The ID of the VPC in which to deploy the cluster" +} + +variable "allowed_inbound_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Vault" + type = list(string) +} + +variable "allowed_inbound_security_group_ids" { + description = "A list of security group IDs that will be allowed to connect to Vault" + type = list(string) +} + +variable "allowed_inbound_security_group_count" { + description = "The number of entries in var.allowed_inbound_security_group_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only." +} + +variable "user_data" { + description = "A User Data script to execute while the server is booting. We recommend passing in a bash script that executes the run-vault script, which should have been installed in the AMI by the install-vault module." +} + +variable "cluster_size" { + description = "The number of nodes to have in the cluster. We strongly recommend setting this to 3 or 5." +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "enable_auto_unseal" { + description = "(Vault Enterprise only) Emable auto unseal of the Vault cluster" + default = false +} + +variable "auto_unseal_kms_key_arn" { + description = "(Vault Enterprise only) The arn of the KMS key used for unsealing the Vault cluster" + default = "" +} + +variable "subnet_ids" { + description = "The subnet IDs into which the EC2 Instances should be deployed. You should typically pass in one subnet ID per node in the cluster_size variable. We strongly recommend that you run Vault in private subnets. At least one of var.subnet_ids or var.availability_zones must be non-empty." + type = list(string) + default = null +} + +variable "availability_zones" { + description = "The availability zones into which the EC2 Instances should be deployed. You should typically pass in one availability zone per node in the cluster_size variable. We strongly recommend against passing in only a list of availability zones, as that will run Vault in the default (and most likely public) subnets in your VPC. At least one of var.subnet_ids or var.availability_zones must be non-empty." + type = list(string) + default = null +} + +variable "ssh_key_name" { + description = "The name of an EC2 Key Pair that can be used to SSH to the EC2 Instances in this cluster. Set to an empty string to not associate a Key Pair." + default = "" +} + +variable "allowed_ssh_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow SSH connections" + type = list(string) + default = [] +} + +variable "allowed_ssh_security_group_ids" { + description = "A list of security group IDs from which the EC2 Instances will allow SSH connections" + type = list(string) + default = [] +} + +variable "additional_security_group_ids" { + description = "A list of additional security group IDs to add to Vault EC2 Instances" + type = list(string) + default = [] +} + +variable "security_group_tags" { + description = "Tags to be applied to the LC security group" + type = map(string) + default = {} +} + +variable "cluster_tag_key" { + description = "Add a tag with this key and the value var.cluster_name to each Instance in the ASG." + default = "Name" +} + +variable "cluster_extra_tags" { + description = "A list of additional tags to add to each Instance in the ASG. Each element in the list must be a map with the keys key, value, and propagate_at_launch" + type = list(object({ key : string, value : string, propagate_at_launch : bool })) + + #example: + # default = [ + # { + # key = "Environment" + # value = "Dev" + # propagate_at_launch = true + # } + # ] + default = [] +} + +variable "termination_policies" { + description = "A list of policies to decide how the instances in the auto scale group should be terminated. The allowed values are OldestInstance, NewestInstance, OldestLaunchConfiguration, ClosestToNextInstanceHour, Default." + default = "Default" +} + +variable "associate_public_ip_address" { + description = "If set to true, associate a public IP address with each EC2 Instance in the cluster. We strongly recommend against making Vault nodes publicly accessible, except through an ELB (see the vault-elb module)." + default = false +} + +variable "tenancy" { + description = "The tenancy of the instance. Must be one of: default or dedicated." + default = "default" +} + +variable "root_volume_ebs_optimized" { + description = "If true, the launched EC2 instance will be EBS-optimized." + default = false +} + +variable "root_volume_type" { + description = "The type of volume. Must be one of: standard, gp2, or io1." + default = "standard" +} + +variable "root_volume_size" { + description = "The size, in GB, of the root EBS volume." + default = 50 +} + +variable "root_volume_delete_on_termination" { + description = "Whether the volume should be destroyed on instance termination." + default = true +} + +variable "wait_for_capacity_timeout" { + description = "A maximum duration that Terraform should wait for ASG instances to be healthy before timing out. Setting this to '0' causes Terraform to skip all Capacity Waiting behavior." + default = "10m" +} + +variable "health_check_type" { + description = "Controls how health checking is done. Must be one of EC2 or ELB." + default = "EC2" +} + +variable "health_check_grace_period" { + description = "Time, in seconds, after instance comes into service before checking health." + default = 300 +} + +variable "instance_profile_path" { + description = "Path in which to create the IAM instance profile." + default = "/" +} + +variable "api_port" { + description = "The port to use for Vault API calls" + default = 8200 +} + +variable "cluster_port" { + description = "The port to use for Vault server-to-server communication." + default = 8201 +} + +variable "ssh_port" { + description = "The port used for SSH connections." + default = 22 +} + +variable "enable_s3_backend" { + description = "Whether to configure an S3 storage backend in addition to Consul." + default = false +} + +variable "s3_bucket_name" { + description = "The name of the S3 bucket to create and use as a storage backend. Only used if 'enable_s3_backend' is set to true." + default = "" +} + +variable "s3_bucket_tags" { + description = "Tags to be applied to the S3 bucket." + type = map(string) + default = {} +} + +variable "enable_s3_bucket_versioning" { + description = "Whether to enable bucket versioning for the S3 bucket." + default = false +} + +variable "force_destroy_s3_bucket" { + description = "If 'configure_s3_backend' is enabled and you set this to true, when you run terraform destroy, this tells Terraform to delete all the objects in the S3 bucket used for backend storage. You should NOT set this to true in production or you risk losing all your data! This property is only here so automated tests of this module can clean up after themselves. Only used if 'enable_s3_backend' is set to true." + default = false +} + +variable "enabled_metrics" { + description = "List of autoscaling group metrics to enable." + type = list(string) + default = [] +} + +variable "enable_dynamo_backend" { + description = "Whether to use a DynamoDB storage backend instead of Consul" + type = bool + default = false +} + +variable "dynamo_table_name" { + description = "Table name for the storage backend, required if `enable_dynamo_backend = true`" + type = string + default = "" +} + +variable "dynamo_table_region" { + description = "Table region used for the instance policy. Uses the current region if not supplied. Global tables should use `*` to allow for a cross region deployment to write to their respective table" + type = string + default = "" +} + +variable "iam_permissions_boundary" { + description = "If set, restricts the created IAM role to the given permissions boundary" + type = string + default = null +} diff --git a/modules/vault-security-group-rules/INOUT.md b/modules/vault-security-group-rules/INOUT.md new file mode 100644 index 00000000..73f70d9e --- /dev/null +++ b/modules/vault-security-group-rules/INOUT.md @@ -0,0 +1,21 @@ +## Providers + +| Name | Version | +|------|---------| +| aws | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:-----:| +| allowed\_inbound\_cidr\_blocks | A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Vault | `list(string)` | n/a | yes | +| allowed\_inbound\_security\_group\_count | The number of entries in var.allowed\_inbound\_security\_group\_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only. | `any` | n/a | yes | +| allowed\_inbound\_security\_group\_ids | A list of security group IDs that will be allowed to connect to Vault | `list(string)` | n/a | yes | +| api\_port | The port to use for Vault API calls | `number` | `8200` | no | +| cluster\_port | The port to use for Vault server-to-server communication | `number` | `8201` | no | +| security\_group\_id | The ID of the security group to which we should add the Vault security group rules | `any` | n/a | yes | + +## Outputs + +No output. + diff --git a/modules/vault-security-group-rules/README.md b/modules/vault-security-group-rules/README.md new file mode 100644 index 00000000..48df1258 --- /dev/null +++ b/modules/vault-security-group-rules/README.md @@ -0,0 +1,48 @@ +# Vault Security Group Rules Module + +This folder contains a [Terraform](https://www.terraform.io/) module that defines the security group rules used by a +[Vault](https://www.vaultproject.io/) cluster to control the traffic that is allowed to go in and out of the cluster. + +Normally, you'd get these rules by default if you're using the [vault-cluster module](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/vault-cluster), but if +you're running Vault on top of a different cluster, then you can use this module to add the necessary security group +rules to that cluster. For example, imagine you were using the [consul-cluster +module](https://github.com/hashicorp/terraform-aws-consul/tree/master/modules/consul-cluster) to run a cluster of +servers that have both Vault and Consul on each node: + +```hcl +module "cluster" { + source = "github.com/hashicorp/terraform-aws-consul.git/modules/consul-cluster?ref=v0.0.1" + + # This AMI has both Vault and Consul installed + ami_id = "ami-1234abcd" +} +``` + +The `consul-cluster` module will provide the security group rules for Consul, but not for Vault. To ensure those +servers have the necessary ports open for using Vault, you can use this module as follows: + + +```hcl +module "security_group_rules" { + source = github.com/hashicorp/terraform-aws-consul.git/modules/vault-security-group-rules?ref=v0.0.1" + + security_group_id = "${module.cluster.security_group_id}" + + # ... (other params omitted) ... +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of this module. The double slash (`//`) is intentional + and required. Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/docs/modules/sources.html)). The `ref` parameter specifies a specific Git tag in + this repo. That way, instead of using the latest version of this module from the `master` branch, which + will change every time you run Terraform, you're using a fixed version of the repo. + +* `security_group_id`: Use this parameter to specify the ID of the security group to which the rules in this module + should be added. + +You can find the other parameters in [variables.tf](variables.tf). + +Check out the [vault-cluster module](https://github.com/hashicorp/terraform-aws-vault/tree/master/modules/vault-cluster) for example usage. diff --git a/modules/vault-security-group-rules/main.tf b/modules/vault-security-group-rules/main.tf new file mode 100644 index 00000000..2b821613 --- /dev/null +++ b/modules/vault-security-group-rules/main.tf @@ -0,0 +1,53 @@ +terraform { + # This module is now only being tested with Terraform 0.13.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.13.x code. + required_version = ">= 0.12.26" +} + +# --------------------------------------------------------------------------------------------------------------------- +# CREATE THE SECURITY GROUP RULES THAT CONTROL WHAT TRAFFIC CAN GO IN AND OUT OF A VAULT CLUSTER +# --------------------------------------------------------------------------------------------------------------------- + +resource "aws_security_group_rule" "allow_api_inbound_from_cidr_blocks" { + count = length(var.allowed_inbound_cidr_blocks) >= 1 ? 1 : 0 + type = "ingress" + from_port = var.api_port + to_port = var.api_port + protocol = "tcp" + cidr_blocks = var.allowed_inbound_cidr_blocks + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_api_inbound_from_security_group_ids" { + count = var.allowed_inbound_security_group_count + type = "ingress" + from_port = var.api_port + to_port = var.api_port + protocol = "tcp" + source_security_group_id = element(var.allowed_inbound_security_group_ids, count.index) + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_cluster_inbound_from_self" { + type = "ingress" + from_port = var.cluster_port + to_port = var.cluster_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + +resource "aws_security_group_rule" "allow_cluster_inbound_from_self_api" { + type = "ingress" + from_port = var.api_port + to_port = var.api_port + protocol = "tcp" + self = true + + security_group_id = var.security_group_id +} + diff --git a/modules/vault-security-group-rules/variables.tf b/modules/vault-security-group-rules/variables.tf new file mode 100644 index 00000000..2e18f3fe --- /dev/null +++ b/modules/vault-security-group-rules/variables.tf @@ -0,0 +1,38 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# You must provide a value for each of these parameters. +# --------------------------------------------------------------------------------------------------------------------- + +variable "security_group_id" { + description = "The ID of the security group to which we should add the Vault security group rules" +} + +variable "allowed_inbound_cidr_blocks" { + description = "A list of CIDR-formatted IP address ranges from which the EC2 Instances will allow connections to Vault" + type = list(string) +} + +variable "allowed_inbound_security_group_ids" { + description = "A list of security group IDs that will be allowed to connect to Vault" + type = list(string) +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "allowed_inbound_security_group_count" { + description = "The number of entries in var.allowed_inbound_security_group_ids. Ideally, this value could be computed dynamically, but we pass this variable to a Terraform resource's 'count' property and Terraform requires that 'count' be computed with literals or data sources only." +} + +variable "api_port" { + description = "The port to use for Vault API calls" + default = 8200 +} + +variable "cluster_port" { + description = "The port to use for Vault server-to-server communication" + default = 8201 +} +