diff --git a/README.md b/README.md
index 3602eb0..1e62c80 100644
--- a/README.md
+++ b/README.md
@@ -59,18 +59,22 @@ This project creates and manages resources within an AWS account for infrastruct
| [aws_cloudfront_distribution.infrastructure_ecs_cluster_service_cloudfront](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudfront_distribution) | resource |
| [aws_cloudfront_function.custom_s3_buckets_viewer_request](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudfront_function) | resource |
| [aws_cloudfront_origin_access_control.custom_s3_buckets](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudfront_origin_access_control) | resource |
+| [aws_cloudwatch_event_rule.ecs_cluster_infrastructure_ecs_asg_diff_metric_1_min_cron](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
| [aws_cloudwatch_event_rule.ecs_cluster_infrastructure_pending_task_metric_1_min_cron](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
| [aws_cloudwatch_event_rule.infrastructure_ecs_cluster_service_ecr_scan](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
| [aws_cloudwatch_event_rule.infrastructure_ecs_cluster_service_scheduled_task](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
| [aws_cloudwatch_event_target.ecr_scan_event_target](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
+| [aws_cloudwatch_event_target.ecs_cluster_infrastructure_ecs_asg_diff_metric_1_min_cron](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
| [aws_cloudwatch_event_target.ecs_cluster_infrastructure_pending_task_metric_1_min_cron](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
| [aws_cloudwatch_event_target.infrastructure_ecs_cluster_service_scheduled_task](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
| [aws_cloudwatch_log_group.ecs_cluster_infrastructure_draining_lambda_log_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
+| [aws_cloudwatch_log_group.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda_log_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
| [aws_cloudwatch_log_group.ecs_cluster_infrastructure_pending_task_metric_lambda_log_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
| [aws_cloudwatch_log_group.infrastructure_ecs_cluster_service](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
| [aws_cloudwatch_log_group.infrastructure_rds_exports](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
| [aws_cloudwatch_log_group.infrastructure_vpc_flow_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
| [aws_cloudwatch_metric_alarm.infrastructure_ecs_cluster_asg_cpu](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
+| [aws_cloudwatch_metric_alarm.infrastructure_ecs_cluster_ecs_asg_diff](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
| [aws_cloudwatch_metric_alarm.infrastructure_ecs_cluster_pending_task](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
| [aws_codebuild_project.infrastructure_ecs_cluster_service_build](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/codebuild_project) | resource |
| [aws_codedeploy_app.infrastructure_ecs_cluster_service_blue_green](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/codedeploy_app) | resource |
@@ -112,6 +116,11 @@ This project creates and manages resources within an AWS account for infrastruct
| [aws_iam_policy.ecs_cluster_infrastructure_draining_kms_encrypt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.ecs_cluster_infrastructure_draining_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.ecs_cluster_infrastructure_draining_sns_publish_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_asg_describe_asg_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_cloudwatch_put_metric_data_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_ecs_describe_cluster_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_kms_encrypt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.ecs_cluster_infrastructure_pending_task_metric_cloudwatch_put_metric_data_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.ecs_cluster_infrastructure_pending_task_metric_ecs_describe_cluster_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.ecs_cluster_infrastructure_pending_task_metric_kms_encrypt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
@@ -140,6 +149,7 @@ This project creates and manages resources within an AWS account for infrastruct
| [aws_iam_policy.infrastructure_ecs_cluster_ssm_service_setting_rw](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.infrastructure_rds_monitoring](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_role.ecs_cluster_infrastructure_draining_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
+| [aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role.ecs_cluster_infrastructure_pending_task_metric_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role.infrastructure_ecs_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role.infrastructure_ecs_cluster_autoscaling_lifecycle_termination](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
@@ -156,6 +166,11 @@ This project creates and manages resources within an AWS account for infrastruct
| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_draining_kms_encrypt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_draining_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_draining_sns_publish_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
+| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_cloudwatch_metric_put_metric_data_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
+| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_kms_encrypt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
+| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_metric_asg_describe_asg_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
+| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_metric_ecs_describe_cluster_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
+| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_pending_task_cloudwatch_metric_put_metric_data_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_pending_task_kms_encrypt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.ecs_cluster_infrastructure_pending_task_metric_ecs_describe_cluster_lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
@@ -190,8 +205,10 @@ This project creates and manages resources within an AWS account for infrastruct
| [aws_kms_key.custom_s3_buckets](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | resource |
| [aws_kms_key.infrastructure](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | resource |
| [aws_lambda_function.ecs_cluster_infrastructure_draining](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource |
+| [aws_lambda_function.ecs_cluster_infrastructure_ecs_asg_diff_metric](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource |
| [aws_lambda_function.ecs_cluster_infrastructure_pending_task_metric](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource |
| [aws_lambda_permission.ecs_cluster_infrastructure_draining_allow_sns_execution](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource |
+| [aws_lambda_permission.ecs_cluster_infrastructure_ecs_asg_diff_metric_allow_cloudwatch_execution](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource |
| [aws_lambda_permission.ecs_cluster_infrastructure_pending_task_metric_allow_cloudwatch_execution](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource |
| [aws_launch_template.infrastructure_ecs_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template) | resource |
| [aws_lb_listener_certificate.service_shared_alb_certificate](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lb_listener_certificate) | resource |
@@ -321,6 +338,7 @@ This project creates and manages resources within an AWS account for infrastruct
| [random_password.infrastructure_ecs_cluster_service_cloudfront_bypass_protection_secret](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/password) | resource |
| [random_password.infrastructure_rds_root](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/password) | resource |
| [archive_file.ecs_cluster_infrastructure_draining_lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source |
+| [archive_file.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source |
| [archive_file.ecs_cluster_infrastructure_pending_task_metric_lambda](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source |
| [aws_ami.ecs_cluster_ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source |
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
@@ -351,6 +369,7 @@ This project creates and manages resources within an AWS account for infrastruct
| [enable\_cloudformatian\_s3\_template\_store](#input\_enable\_cloudformatian\_s3\_template\_store) | Creates an S3 bucket to store custom CloudFormation templates, which can then be referenced in `custom_cloudformation_stacks`. A user with RW access to the bucket is also created. | `bool` | n/a | yes |
| [enable\_infrastructure\_ecs\_cluster](#input\_enable\_infrastructure\_ecs\_cluster) | Enable creation of infrastructure ECS cluster, to place ECS services | `bool` | n/a | yes |
| [enable\_infrastructure\_ecs\_cluster\_asg\_cpu\_alert](#input\_enable\_infrastructure\_ecs\_cluster\_asg\_cpu\_alert) | Enable a CPU alert for the ECS cluster's Autoscaling Group | `bool` | n/a | yes |
+| [enable\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert](#input\_enable\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert) | Enable the ECS Cluster Container Instance / ASG instance diff alert | `bool` | n/a | yes |
| [enable\_infrastructure\_ecs\_cluster\_efs](#input\_enable\_infrastructure\_ecs\_cluster\_efs) | Conditionally create and mount EFS to the ECS cluster instances | `bool` | n/a | yes |
| [enable\_infrastructure\_ecs\_cluster\_pending\_task\_alert](#input\_enable\_infrastructure\_ecs\_cluster\_pending\_task\_alert) | Enable the ECS Cluster pending task alert | `bool` | n/a | yes |
| [enable\_infrastructure\_ecs\_cluster\_services\_alb\_logs](#input\_enable\_infrastructure\_ecs\_cluster\_services\_alb\_logs) | Enable Infrastructure ECS cluster services ALB logs | `bool` | n/a | yes |
@@ -374,6 +393,12 @@ This project creates and manages resources within an AWS account for infrastruct
| [infrastructure\_ecs\_cluster\_draining\_lambda\_log\_retention](#input\_infrastructure\_ecs\_cluster\_draining\_lambda\_log\_retention) | Log retention for the ECS cluster draining Lambda | `number` | n/a | yes |
| [infrastructure\_ecs\_cluster\_ebs\_docker\_storage\_volume\_size](#input\_infrastructure\_ecs\_cluster\_ebs\_docker\_storage\_volume\_size) | Size of EBS volume for Docker storage on the infrastructure ECS instances | `number` | n/a | yes |
| [infrastructure\_ecs\_cluster\_ebs\_docker\_storage\_volume\_type](#input\_infrastructure\_ecs\_cluster\_ebs\_docker\_storage\_volume\_type) | Type of EBS volume for Docker storage on the infrastructure ECS instances (eg. gp3) | `string` | n/a | yes |
+| [infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_evaluation\_periods](#input\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_evaluation\_periods) | Evaluation periods for the ECS cluster's Container Instance / ASG instance diff alert | `number` | n/a | yes |
+| [infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_opsgenie](#input\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_opsgenie) | Enable Opsgenie alerts for the ECS cluster's Container Instance / ASG instance diff alert | `bool` | n/a | yes |
+| [infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_period](#input\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_period) | Period (in secods) for the ECS cluster's Container Instance / ASG instance diff alert | `number` | n/a | yes |
+| [infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_slack](#input\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_slack) | Enable Slack alerts for the ECS cluster's Container Instance / ASG instance diff alert | `bool` | n/a | yes |
+| [infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_threshold](#input\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_alert\_threshold) | Threshold (Number of pending tasks) for the ECS cluster's Container Instance / ASG instance diff alert | `number` | n/a | yes |
+| [infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_metric\_lambda\_log\_retention](#input\_infrastructure\_ecs\_cluster\_ecs\_asg\_diff\_metric\_lambda\_log\_retention) | Log retention for the ECS cluster Container Instance / ASG instance diff metric Lambda | `number` | n/a | yes |
| [infrastructure\_ecs\_cluster\_instance\_type](#input\_infrastructure\_ecs\_cluster\_instance\_type) | The instance type for EC2 instances launched in the ECS cluster | `string` | n/a | yes |
| [infrastructure\_ecs\_cluster\_max\_instance\_lifetime](#input\_infrastructure\_ecs\_cluster\_max\_instance\_lifetime) | Maximum lifetime in seconds of an instance within the ECS cluster | `number` | n/a | yes |
| [infrastructure\_ecs\_cluster\_max\_size](#input\_infrastructure\_ecs\_cluster\_max\_size) | Maximum number of instances for the ECS cluster | `number` | n/a | yes |
diff --git a/ecs-cluster-infrastructure-alert-ecs-asg-diff.tf b/ecs-cluster-infrastructure-alert-ecs-asg-diff.tf
new file mode 100644
index 0000000..63f275a
--- /dev/null
+++ b/ecs-cluster-infrastructure-alert-ecs-asg-diff.tf
@@ -0,0 +1,25 @@
+resource "aws_cloudwatch_metric_alarm" "infrastructure_ecs_cluster_ecs_asg_diff" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ alarm_name = "${local.resource_prefix}-infrastructure-ecs-cluster-infrastructure-ecs-asg-diff"
+ comparison_operator = "GreaterThanOrEqualToThreshold"
+ evaluation_periods = local.infrastructure_ecs_cluster_ecs_asg_diff_alert_evaluation_periods
+ metric_name = "ContainerInstanceAsgInstanceDiff"
+ namespace = "ECS"
+ period = local.infrastructure_ecs_cluster_ecs_asg_diff_alert_period
+ statistic = "Maximum"
+ threshold = local.infrastructure_ecs_cluster_ecs_asg_diff_alert_threshold
+ alarm_description = "Container Instance / ASG Instance Difference for ${aws_ecs_cluster.infrastructure[0].name} Cluster"
+ actions_enabled = "true"
+ alarm_actions = concat(
+ local.infrastructure_ecs_cluster_ecs_asg_diff_alert_slack ? [data.aws_sns_topic.infrastructure_slack_sns_topic[0].arn] : [],
+ local.infrastructure_ecs_cluster_ecs_asg_diff_alert_opsgenie ? [data.aws_sns_topic.infrastructure_opsgenie_sns_topic[0].arn] : []
+ )
+ ok_actions = concat(
+ local.infrastructure_ecs_cluster_ecs_asg_diff_alert_slack ? [data.aws_sns_topic.infrastructure_slack_sns_topic[0].arn] : [],
+ local.infrastructure_ecs_cluster_ecs_asg_diff_alert_opsgenie ? [data.aws_sns_topic.infrastructure_opsgenie_sns_topic[0].arn] : []
+ )
+ dimensions = {
+ ClusterName = aws_ecs_cluster.infrastructure[0].name
+ }
+}
diff --git a/ecs-cluster-infrastructure-ecs-asg-diff-lambda.tf b/ecs-cluster-infrastructure-ecs-asg-diff-lambda.tf
new file mode 100644
index 0000000..0772bc1
--- /dev/null
+++ b/ecs-cluster-infrastructure-ecs-asg-diff-lambda.tf
@@ -0,0 +1,181 @@
+resource "aws_cloudwatch_log_group" "ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda_log_group" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "/aws/lambda/${local.resource_prefix_hash}-ecs-cluster-infrastructure-ecs-asg-diff-metric"
+ kms_key_id = local.infrastructure_kms_encryption ? aws_kms_key.infrastructure[0].arn : null
+ retention_in_days = local.infrastructure_ecs_cluster_ecs_asg_diff_metric_lambda_log_retention
+}
+
+resource "aws_iam_role" "ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "${local.resource_prefix}-${substr(sha512("ecs-cluster-infrastructure-ecs-asg-diff-metric"), 0, 6)}"
+ description = "${local.resource_prefix}-ecs-cluster-infrastructure-ecs-asg-diff-metric"
+ assume_role_policy = templatefile(
+ "${path.root}/policies/assume-roles/service-principle-standard.json.tpl",
+ { services = jsonencode(["lambda.amazonaws.com"]) }
+ )
+}
+
+resource "aws_iam_policy" "ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "${local.resource_prefix}-ecs-cluster-infrastructure-ecs-asg-diff-metric"
+ policy = templatefile(
+ "${path.root}/policies/lambda-default.json.tpl",
+ {
+ region = local.aws_region
+ account_id = local.aws_account_id
+ function_name = "${local.resource_prefix_hash}-ecs-cluster-infrastructure-ecs-asg-diff-metric"
+ }
+ )
+}
+
+resource "aws_iam_role_policy_attachment" "ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ role = aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].name
+ policy_arn = aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].arn
+}
+
+resource "aws_iam_policy" "ecs_cluster_infrastructure_ecs_asg_diff_metric_cloudwatch_put_metric_data_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "${local.resource_prefix}-ecs-cluster-infrastructure-ecs-asg-diff-metric-cloudwatch-put-metric-data"
+ policy = templatefile(
+ "${path.root}/policies/cloudwatch-put-metric-data.json.tpl",
+ {
+ region = local.aws_region
+ account_id = local.aws_account_id
+ namespaces = ["ECS"]
+ }
+ )
+}
+
+resource "aws_iam_role_policy_attachment" "ecs_cluster_infrastructure_ecs_asg_diff_cloudwatch_metric_put_metric_data_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ role = aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].name
+ policy_arn = aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_cloudwatch_put_metric_data_lambda[0].arn
+}
+
+resource "aws_iam_policy" "ecs_cluster_infrastructure_ecs_asg_diff_metric_ecs_describe_cluster_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "${local.resource_prefix}-ecs-cluster-infrastructure-ecs-asg-diff-metric-ecs-describe-cluster"
+ policy = templatefile(
+ "${path.root}/policies/ecs-describe-cluster.json.tpl",
+ {
+ region = local.aws_region
+ account_id = local.aws_account_id
+ cluster_names = [local.infrastructure_ecs_cluster_name]
+ }
+ )
+}
+
+resource "aws_iam_role_policy_attachment" "ecs_cluster_infrastructure_ecs_asg_diff_metric_ecs_describe_cluster_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ role = aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].name
+ policy_arn = aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_ecs_describe_cluster_lambda[0].arn
+}
+
+resource "aws_iam_policy" "ecs_cluster_infrastructure_ecs_asg_diff_metric_asg_describe_asg_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "${local.resource_prefix}-ecs-cluster-infrastructure-ecs-asg-diff-metric-asg-describe-asg"
+ policy = templatefile(
+ "${path.root}/policies/asg-describe-asg.json.tpl", {}
+ )
+}
+
+resource "aws_iam_role_policy_attachment" "ecs_cluster_infrastructure_ecs_asg_diff_metric_asg_describe_asg_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ role = aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].name
+ policy_arn = aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_asg_describe_asg_lambda[0].arn
+}
+
+resource "aws_iam_policy" "ecs_cluster_infrastructure_ecs_asg_diff_metric_kms_encrypt" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert && local.infrastructure_kms_encryption ? 1 : 0
+
+ name = "${local.resource_prefix}-ecs-cluster-infrastructure-ecs-asg-diff-metric-kms-encrypt"
+ policy = templatefile(
+ "${path.root}/policies/kms-encrypt.json.tpl",
+ { kms_key_arn = aws_kms_key.infrastructure[0].arn }
+ )
+}
+
+resource "aws_iam_role_policy_attachment" "ecs_cluster_infrastructure_ecs_asg_diff_kms_encrypt" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert && local.infrastructure_kms_encryption ? 1 : 0
+
+ role = aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].name
+ policy_arn = aws_iam_policy.ecs_cluster_infrastructure_ecs_asg_diff_metric_kms_encrypt[0].arn
+}
+
+data "archive_file" "ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ type = "zip"
+ source_dir = "lambdas/ecs-asg-diff-metric"
+ output_path = "lambdas/.zip-cache/ecs-asg-diff-metric.zip"
+}
+
+resource "aws_lambda_function" "ecs_cluster_infrastructure_ecs_asg_diff_metric" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ filename = data.archive_file.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].output_path
+ function_name = "${local.resource_prefix_hash}-ecs-cluster-infrastructure-ecs-asg-diff-metric"
+ description = "${local.resource_prefix} ECS Cluster Infrastructure Container Instance / ASG Instance Difference Metric"
+ handler = "function.lambda_handler"
+ runtime = "python3.11"
+ role = aws_iam_role.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].arn
+ source_code_hash = data.archive_file.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda[0].output_base64sha256
+ memory_size = 128
+ package_type = "Zip"
+ timeout = 900
+
+ environment {
+ variables = {
+ ecsClusterName = local.infrastructure_ecs_cluster_name
+ asgName = aws_autoscaling_group.infrastructure_ecs_cluster[0].name
+ }
+ }
+
+ tracing_config {
+ mode = "Active"
+ }
+
+ depends_on = [
+ aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_metric_lambda,
+ aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_cloudwatch_metric_put_metric_data_lambda,
+ aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_metric_ecs_describe_cluster_lambda,
+ aws_iam_role_policy_attachment.ecs_cluster_infrastructure_ecs_asg_diff_kms_encrypt
+ ]
+}
+
+resource "aws_cloudwatch_event_rule" "ecs_cluster_infrastructure_ecs_asg_diff_metric_1_min_cron" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ name = "${local.resource_prefix_hash}-ecs-cluster-infrastructure-ecs-asg-diff-metric-1-min"
+ description = "Triggers the ${aws_lambda_function.ecs_cluster_infrastructure_ecs_asg_diff_metric[0].function_name} Lambda every 1 minute"
+ schedule_expression = "rate(1 minute)"
+}
+
+resource "aws_cloudwatch_event_target" "ecs_cluster_infrastructure_ecs_asg_diff_metric_1_min_cron" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ rule = aws_cloudwatch_event_rule.ecs_cluster_infrastructure_ecs_asg_diff_metric_1_min_cron[0].name
+ target_id = "lambda"
+ arn = aws_lambda_function.ecs_cluster_infrastructure_ecs_asg_diff_metric[0].arn
+}
+
+resource "aws_lambda_permission" "ecs_cluster_infrastructure_ecs_asg_diff_metric_allow_cloudwatch_execution" {
+ count = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert ? 1 : 0
+
+ statement_id = "AllowExecutionFromCloudWatch"
+ action = "lambda:InvokeFunction"
+ function_name = aws_lambda_function.ecs_cluster_infrastructure_ecs_asg_diff_metric[0].function_name
+ principal = "events.amazonaws.com"
+ source_arn = aws_cloudwatch_event_rule.ecs_cluster_infrastructure_ecs_asg_diff_metric_1_min_cron[0].arn
+}
diff --git a/kms-infrastructure.tf b/kms-infrastructure.tf
index 83c4cfb..67ddcb3 100644
--- a/kms-infrastructure.tf
+++ b/kms-infrastructure.tf
@@ -29,6 +29,11 @@ resource "aws_kms_key" "infrastructure" {
{
log_group_arn = local.enable_infrastructure_ecs_cluster_pending_task_alert && local.infrastructure_kms_encryption ? "arn:aws:logs:${local.aws_region}:${local.aws_account_id}:log-group:/aws/lambda/${local.resource_prefix_hash}-ecs-cluster-infrastructure-pending-task-metric" : ""
}
+ )}${local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert && local.infrastructure_kms_encryption ? "," : ""}
+ ${templatefile("${path.root}/policies/kms-key-policy-statements/cloudwatch-logs-allow.json.tpl",
+ {
+ log_group_arn = local.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert && local.infrastructure_kms_encryption ? "arn:aws:logs:${local.aws_region}:${local.aws_account_id}:log-group:/aws/lambda/${local.resource_prefix_hash}-ecs-cluster-infrastructure-ecs-asg-diff-metric" : ""
+ }
)}${length(local.infrastructure_ecs_cluster_services) > 0 && local.infrastructure_kms_encryption ? "," : ""}
${templatefile("${path.root}/policies/kms-key-policy-statements/cloudwatch-logs-allow.json.tpl",
{
diff --git a/lambdas/ecs-asg-diff-metric/function.py b/lambdas/ecs-asg-diff-metric/function.py
new file mode 100644
index 0000000..24ac07c
--- /dev/null
+++ b/lambdas/ecs-asg-diff-metric/function.py
@@ -0,0 +1,51 @@
+import boto3
+import os
+
+CLUSTER_NAME = os.environ['ecsClusterName']
+ASG_NAME = os.environ['asgName']
+
+ecs = boto3.client('ecs')
+autoscaling = boto3.client('autoscaling')
+cloudwatch = boto3.client('cloudwatch')
+
+def lambda_handler(event, context):
+ ecs_response = ecs.describe_clusters(
+ clusters=[CLUSTER_NAME],
+ )
+
+ if not ecs_response['clusters']:
+ return {'statusCode': 200, 'body': 'No ECS cluster found with the given name.'}
+
+ ecs_instance_count = ecs_response['clusters'][0]['registeredContainerInstancesCount']
+
+ asg_response = autoscaling.describe_auto_scaling_groups(
+ AutoScalingGroupNames=[ASG_NAME],
+ )
+
+ if not asg_response['AutoScalingGroups']:
+ return {'statusCode': 200, 'body': 'No Auto Scaling Group found with the given name.'}
+
+ asg_instance_count = len(asg_response['AutoScalingGroups'][0]['Instances'])
+
+ instance_diff = asg_instance_count - ecs_instance_count
+
+ cloudwatch.put_metric_data(
+ Namespace="ECS",
+ MetricData=[
+ {
+ 'MetricName': "ContainerInstanceAsgInstanceDiff",
+ 'Dimensions': [
+ {
+ 'Name': 'ClusterName',
+ 'Value': CLUSTER_NAME
+ },
+ ],
+ 'Value': instance_diff,
+ 'Unit': 'Count'
+ },
+ ]
+ )
+
+ return {
+ 'statusCode': 200,
+ 'body': f'Container Instance / ASG Instance difference ({instance_diff}) calculated and published successfully.'}
diff --git a/locals.tf b/locals.tf
index 7bf6e4c..2ff71bc 100644
--- a/locals.tf
+++ b/locals.tf
@@ -15,11 +15,13 @@ locals {
infrastructure_opsgenie_sns_topic_name = "${local.project_name}-cloudwatch-opsgenie-alerts"
infrastructure_slack_sns_topic_in_use = (
local.infrastructure_ecs_cluster_asg_cpu_alert_slack ||
- local.infrastructure_ecs_cluster_pending_task_alert_slack
+ local.infrastructure_ecs_cluster_pending_task_alert_slack ||
+ local.infrastructure_ecs_cluster_ecs_asg_diff_alert_slack
)
infrastructure_opsgenie_sns_topic_in_use = (
local.infrastructure_ecs_cluster_asg_cpu_alert_opsgenie ||
- local.infrastructure_ecs_cluster_pending_task_alert_opsgenie
+ local.infrastructure_ecs_cluster_pending_task_alert_opsgenie ||
+ local.infrastructure_ecs_cluster_ecs_asg_diff_alert_opsgenie
)
enable_infrastructure_logs_bucket = (
@@ -156,6 +158,13 @@ locals {
infrastructure_ecs_cluster_pending_task_alert_threshold = var.infrastructure_ecs_cluster_pending_task_alert_threshold
infrastructure_ecs_cluster_pending_task_alert_slack = var.infrastructure_ecs_cluster_pending_task_alert_slack
infrastructure_ecs_cluster_pending_task_alert_opsgenie = var.infrastructure_ecs_cluster_pending_task_alert_opsgenie
+ enable_infrastructure_ecs_cluster_ecs_asg_diff_alert = var.enable_infrastructure_ecs_cluster_ecs_asg_diff_alert && local.enable_infrastructure_ecs_cluster
+ infrastructure_ecs_cluster_ecs_asg_diff_metric_lambda_log_retention = var.infrastructure_ecs_cluster_ecs_asg_diff_metric_lambda_log_retention
+ infrastructure_ecs_cluster_ecs_asg_diff_alert_evaluation_periods = var.infrastructure_ecs_cluster_ecs_asg_diff_alert_evaluation_periods
+ infrastructure_ecs_cluster_ecs_asg_diff_alert_period = var.infrastructure_ecs_cluster_ecs_asg_diff_alert_period
+ infrastructure_ecs_cluster_ecs_asg_diff_alert_threshold = var.infrastructure_ecs_cluster_ecs_asg_diff_alert_threshold
+ infrastructure_ecs_cluster_ecs_asg_diff_alert_slack = var.infrastructure_ecs_cluster_ecs_asg_diff_alert_slack
+ infrastructure_ecs_cluster_ecs_asg_diff_alert_opsgenie = var.infrastructure_ecs_cluster_ecs_asg_diff_alert_opsgenie
infrastructure_ecs_cluster_wafs = var.infrastructure_ecs_cluster_wafs
infrastructure_ecs_cluster_enable_ssm_dhmc = local.enable_infrastructure_ecs_cluster ? data.external.ssm_dhmc_setting[0].result.setting_value != "$None" : false
infrastructure_ecs_cluster_user_data = base64encode(
diff --git a/policies/asg-describe-asg.json.tpl b/policies/asg-describe-asg.json.tpl
new file mode 100644
index 0000000..441f778
--- /dev/null
+++ b/policies/asg-describe-asg.json.tpl
@@ -0,0 +1,12 @@
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Action": [
+ "autoscaling:DescribeAutoScalingGroups"
+ ],
+ "Effect": "Allow",
+ "Resource": "*"
+ }
+ ]
+}
diff --git a/variables.tf b/variables.tf
index 8a94f8a..25c343d 100644
--- a/variables.tf
+++ b/variables.tf
@@ -378,6 +378,41 @@ variable "infrastructure_ecs_cluster_pending_task_alert_opsgenie" {
type = bool
}
+variable "enable_infrastructure_ecs_cluster_ecs_asg_diff_alert" {
+ description = "Enable the ECS Cluster Container Instance / ASG instance diff alert"
+ type = bool
+}
+
+variable "infrastructure_ecs_cluster_ecs_asg_diff_metric_lambda_log_retention" {
+ description = "Log retention for the ECS cluster Container Instance / ASG instance diff metric Lambda"
+ type = number
+}
+
+variable "infrastructure_ecs_cluster_ecs_asg_diff_alert_evaluation_periods" {
+ description = "Evaluation periods for the ECS cluster's Container Instance / ASG instance diff alert"
+ type = number
+}
+
+variable "infrastructure_ecs_cluster_ecs_asg_diff_alert_period" {
+ description = "Period (in secods) for the ECS cluster's Container Instance / ASG instance diff alert"
+ type = number
+}
+
+variable "infrastructure_ecs_cluster_ecs_asg_diff_alert_threshold" {
+ description = "Threshold (Number of pending tasks) for the ECS cluster's Container Instance / ASG instance diff alert"
+ type = number
+}
+
+variable "infrastructure_ecs_cluster_ecs_asg_diff_alert_slack" {
+ description = "Enable Slack alerts for the ECS cluster's Container Instance / ASG instance diff alert"
+ type = bool
+}
+
+variable "infrastructure_ecs_cluster_ecs_asg_diff_alert_opsgenie" {
+ description = "Enable Opsgenie alerts for the ECS cluster's Container Instance / ASG instance diff alert"
+ type = bool
+}
+
variable "infrastructure_ecs_cluster_wafs" {
description = "Map of WAF ACLs to craete, which can be used with service CloudFront distributions"
type = map(object({