diff --git a/README.md b/README.md index 9cec794..26a3ed0 100644 --- a/README.md +++ b/README.md @@ -36,16 +36,6 @@ The module includes configurations for IAM roles, KMS keys, VPC settings, and va | `node_taints` | `map(string)` | `{}` | The taints to apply to the EKS nodes. | No | | `enable_eks_pod_identities` | `bool` | `true` | Enable EKS Pod Identities. | No | | `pod_identity_tags` | `map(string)` | `{}` | The tags to apply to the Pod Identities. | No | -| `fb_chart_version` | `string` | `"0.1.33"` | Fluent-bit helm chart version. | No | -| `fb_log_encryption` | `bool` | `true` | Enable Fluent-bit log encryption. | No | -| `fb_log_systemd` | `bool` | `true` | Enable Fluent-bit cloudwatch logging for systemd. | No | -| `fb_tags` | `map(string)` | `{}` | The tags to apply to the fluent-bit deployment. | No | -| `fb_log_retention` | `number` | `7` | Days to retain Fluent-bit logs. | No | -| `fb_system_log_retention` | `number` | `7` | Days to retain Fluent-bit systemd logs. | No | -| `fb_drop_namespaces` | `list(string)` | `["kube-system", "cert-manager"]` | Fluent-bit doesn't send logs for these namespaces. | No | -| `fb_kube_namespaces` | `list(string)` | `["kube.*", "cert-manager.*"]` | Kubernetes namespaces. | No | -| `fb_log_filters` | `list(string)` | `["kube-probe", "health", "prometheus", "liveness"]` | Fluent-bit doesn't send logs if message consists of these values. | No | -| `fb_additional_log_filters` | `list(string)` | `["ELB-HealthChecker", "Amazon-Route53-Health-Check-Service"]` | Fluent-bit doesn't send logs if message consists of these values. | No | | `kp_chart_version` | `string` | `"0.37.0"` | Karpenter helm chart version. | No | | `karpenter_tags` | `map(string)` | `{}` | The tags to apply to the Karpenter deployment. | No | | `main_bucket_tags` | `map(string)` | `{}` | The tags to apply to the main bucket. | No | @@ -139,6 +129,23 @@ env = "dev" project = "batcave" ``` +3. I am seeing the following error, what does it mean? + +```bash +[error] [aws_client] connection initialization error +[error] [output:cloudwatch_logs:cloudwatch_logs.1] Failed to create log stream +[error] [output:cloudwatch_logs:cloudwatch_logs.1] Failed to send events +``` + +You will see this error on initial stand up of the fluentbit pod(s). +This error should eventually resolve itself as the fluentbit pod(s) come up and start sending logs to CloudWatch. +Look for the following cloudwatch log groups to validate that logs are being sent to cloudwatch as expected: + +- `/aws/containerinsights//application` +- `/aws/containerinsights//dataplane` +- `/aws/containerinsights//performance` +- `/aws/containerinsights//host` + ### Explanation: 1. **Terraform Configuration**: diff --git a/addons/fluentbit.tf b/addons/fluentbit.tf deleted file mode 100644 index 59834ec..0000000 --- a/addons/fluentbit.tf +++ /dev/null @@ -1,45 +0,0 @@ -#Cloudwatch Log Group -resource "aws_cloudwatch_log_group" "fluent-bit" { - name = local.fluentbit_log_name - retention_in_days = var.fluentbit_log_retention - kms_key_id = var.fluentbit_log_encryption ? var.cloudwatch_kms_key_arn : null - tags = var.fluentbit_tags -} - -resource "aws_cloudwatch_log_group" "fluent-bit-system" { - count = var.fluentbit_log_systemd ? 1 : 0 - name = local.fluentbit_system_log_name - retention_in_days = var.fluentbit_system_log_retention - kms_key_id = var.fluentbit_log_encryption ? var.cloudwatch_kms_key_arn : null - tags = var.fluentbit_tags -} - -#Fluentbit HELM -resource "helm_release" "fluent-bit" { - atomic = true - name = "fluentbit" - repository = "https://aws.github.io/eks-charts" - chart = "aws-for-fluent-bit" - version = var.fluentbit_chart_version - create_namespace = true - namespace = local.fluentbit_namespace - - values = [ - local.values - ] - - set { - name = "clusterName" - value = var.eks_cluster_name - } - - set { - name = "serviceAccount.name" - value = local.fluentbit_service_account_name - } - - set{ - name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = aws_iam_role.fluentbit.arn - } -} diff --git a/addons/iam.tf b/addons/iam.tf deleted file mode 100644 index bd13df7..0000000 --- a/addons/iam.tf +++ /dev/null @@ -1,33 +0,0 @@ -resource "aws_iam_role" "fluentbit" { - name = "${var.eks_cluster_name}-fluentbit" - path = var.iam_path - permissions_boundary = var.iam_permissions_boundary_arn - assume_role_policy = data.aws_iam_policy_document.fluentbit_trust.json -} - -resource "aws_iam_role_policy_attachment" "fluentbit" { - role = aws_iam_role.fluentbit.name - policy_arn = "arn:${var.aws_partition}:iam::aws:policy/CloudWatchAgentServerPolicy" -} - -data "aws_iam_policy_document" "fluentbit_trust" { - statement { - sid = "AllowEKSForFluentbit" - actions = ["sts:AssumeRoleWithWebIdentity"] - effect = "Allow" - principals { - type = "Federated" - identifiers = [var.eks_oidc_provider_arn] - } - condition { - test = "StringEquals" - values = ["system:serviceaccount:${local.fluentbit_namespace}:${local.fluentbit_service_account_name}"] - variable = "${var.eks_oidc_provider}:sub" - } - condition { - test = "StringEquals" - values = ["sts.amazonaws.com"] - variable = "${var.eks_oidc_provider}:aud" - } - } -} \ No newline at end of file diff --git a/addons/settings.tf b/addons/settings.tf index 56ae335..a89eebd 100644 --- a/addons/settings.tf +++ b/addons/settings.tf @@ -1,23 +1,4 @@ locals { - ################################## Fluentbit Settings ################################## - fluentbit_log_name = "${var.eks_cluster_name}-fluent-bit" - fluentbit_namespace = "fluentbit" - fluentbit_service_account_name = "fluent-bit" - fluentbit_system_log_name = "${var.eks_cluster_name}-fluent-bit-systemd" - - config_settings = { - log_group_name = local.fluentbit_log_name - system_log_group_name = local.fluentbit_system_log_name - region = var.aws_region - log_retention_days = var.fluentbit_log_retention - drop_namespaces = "(${join("|", var.fluentbit_drop_namespaces)})" - log_filters = "(${join("|", var.fluentbit_log_filters)})" - additional_log_filters = "(${join("|", var.fluentbit_additional_log_filters)})" - kube_namespaces = var.fluentbit_kube_namespaces - } - - values = templatefile("${path.module}/values/fluentbit/values.yaml.tpl", local.config_settings) - ################################## Karpenter Settings ################################## karpenter_namespace = "karpenter" karpenter_service_account_name = "karpenter" diff --git a/addons/values/fluentbit/values.yaml.tpl b/addons/values/fluentbit/values.yaml.tpl deleted file mode 100644 index 5459848..0000000 --- a/addons/values/fluentbit/values.yaml.tpl +++ /dev/null @@ -1,75 +0,0 @@ -config: - ## https://docs.fluentbit.io/manual/pipeline/inputs - inputs: | - [INPUT] - Name tail - Tag kube.* - Path /var/log/containers/*.log - Read_from_head true - multiline.parser docker, cri - Docker_Mode On - Parser docker - Mem_Buf_Limit 50MB - - [INPUT] - Name systemd - Tag host.* - Systemd_Filter _SYSTEMD_UNIT=kubelet.service - Read_From_Tail On - - ## https://docs.fluentbit.io/manual/pipeline/filters - filters: | - [FILTER] - Name kubernetes - Match kube.* - Merge_Log On - Keep_Log Off - K8S-Logging.Parser On - K8S-Logging.Exclude On - - [FILTER] - Name grep - Match kube.* - Exclude $log ${log_filters} - - [FILTER] - Name grep - Match kube.* - Exclude $log ${additional_log_filters} - -%{ for value in kube_namespaces } - [FILTER] - Name rewrite_tag - Match kube.* - Rule $kubernetes['namespace_name'] ^${value}$ system.$TAG false -%{ endfor ~} - - [FILTER] - Name grep - Match * - Exclude $kubernetes['namespace_name'] ${drop_namespaces} - - outputs: | - [OUTPUT] - Name cloudwatch_logs - Match kube.* - region ${region} - log_group_name ${log_group_name} - log_stream_prefix from-fluent-bit- - log_retention_days ${log_retention_days} - - [OUTPUT] - Name cloudwatch_logs - Match host.* - region ${region} - log_group_name ${system_log_group_name} - log_stream_prefix eks- - log_retention_days ${log_retention_days} - - [OUTPUT] - Name cloudwatch_logs - Match system.* - region ${region} - log_group_name ${system_log_group_name} - log_stream_prefix from-fluent-bit- - log_retention_days ${log_retention_days} \ No newline at end of file diff --git a/addons/variables.tf b/addons/variables.tf index 0b60e47..81632c3 100644 --- a/addons/variables.tf +++ b/addons/variables.tf @@ -109,56 +109,6 @@ variable "enable_bootstrap_user_data" { type = bool } -variable "fluentbit_additional_log_filters" { - description = "Additional log filters to use for Fluentbit" - type = list(string) -} - -variable "fluentbit_chart_version" { - description = "The version of the Fluentbit chart to use" - type = string -} - -variable "fluentbit_drop_namespaces" { - description = "Namespaces to drop from Fluentbit logs" - type = list(string) -} - -variable "fluentbit_kube_namespaces" { - description = "Kubernetes namespaces to use for Fluentbit" - type = list(string) -} - -variable "fluentbit_log_encryption" { - description = "Whether to encrypt Fluentbit logs" - type = bool -} - -variable "fluentbit_log_filters" { - description = "Log filters to use for Fluentbit" - type = list(string) -} - -variable "fluentbit_log_retention" { - description = "The number of days to retain Fluentbit logs" - type = number -} - -variable "fluentbit_log_systemd" { - description = "Whether to log systemd messages with Fluentbit" - type = bool -} - -variable "fluentbit_system_log_retention" { - description = "The number of days to retain Fluentbit systemd logs" - type = number -} - -variable "fluentbit_tags" { - description = "The tags to use for Fluentbit" - type = map(string) -} - variable "gold_image_ami_id" { description = "The AMI ID to use for the gold image" type = string diff --git a/efs.tf b/efs.tf index 7f38d44..c51bc91 100644 --- a/efs.tf +++ b/efs.tf @@ -21,7 +21,7 @@ resource "aws_efs_file_system" "main" { replication_overwrite = var.efs_protection_replication_overwrite } - tags = merge(var.efs_tags, { "Name" = "efs-${module.eks.cluster_name}" }) + tags = merge(var.efs_tags, local.tags_for_all_resources, { "Name" = "efs-${module.eks.cluster_name}" }) } resource "aws_efs_mount_target" "main" { diff --git a/eks.tf b/eks.tf index ed31255..fab6804 100644 --- a/eks.tf +++ b/eks.tf @@ -40,7 +40,7 @@ module "eks" { node_security_group_name = "eks-${local.cluster_name}-node-sg" node_security_group_use_name_prefix = false subnet_ids = local.all_private_subnet_ids - tags = merge(var.eks_cluster_tags, { Name = local.cluster_name }) + tags = merge(var.eks_cluster_tags, local.tags_for_all_resources, { Name = local.cluster_name }) vpc_id = data.aws_vpc.vpc.id cluster_enabled_log_types = [ @@ -67,7 +67,6 @@ module "main_nodes" { create_iam_role = true enable_bootstrap_user_data = var.gold_image_date != "" ? true : false - iam_role_additional_policies = { ssm = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" } iam_role_description = "IAM role for EKS nodes for cluster ${local.cluster_name}" iam_role_name = "eks-nodes-${local.cluster_name}" iam_role_path = local.iam_path @@ -91,14 +90,19 @@ module "main_nodes" { pre_bootstrap_user_data = local.pre_bootstrap_user_data taints = var.node_taints - tags = merge(var.eks_node_tags, { + iam_role_additional_policies = { + ssm = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" + cloudwatch = "arn:${data.aws_partition.current.partition}:iam::aws:policy/CloudWatchAgentServerPolicy" + } + + tags = merge(var.eks_node_tags, local.tags_for_all_resources, { Name = "eks-main-${var.cluster_custom_name}" }) } module "eks_addons" { source = "./addons" - depends_on = [module.main_nodes] + depends_on = [module.main_nodes, null_resource.sleep] available_availability_zones = local.available_availability_zone_names aws_partition = data.aws_partition.current.partition @@ -122,16 +126,6 @@ module "eks_addons" { eks_oidc_provider = module.eks.oidc_provider eks_oidc_provider_arn = module.eks.oidc_provider_arn enable_bootstrap_user_data = local.enable_bootstrap_user_data - fluentbit_additional_log_filters = var.fb_additional_log_filters - fluentbit_chart_version = var.fb_chart_version - fluentbit_drop_namespaces = var.fb_drop_namespaces - fluentbit_kube_namespaces = var.fb_kube_namespaces - fluentbit_log_encryption = var.fb_log_encryption - fluentbit_log_filters = var.fb_log_filters - fluentbit_log_retention = var.fb_log_retention - fluentbit_log_systemd = var.fb_log_systemd - fluentbit_system_log_retention = var.fb_system_log_retention - fluentbit_tags = var.fb_tags gold_image_ami_id = var.gold_image_date != "" ? data.aws_ami.gold_image[0].id : "" iam_path = local.iam_path iam_permissions_boundary_arn = local.permissions_boundary_arn @@ -169,14 +163,14 @@ module "eks_base" { secrets_store_csi_driver_provider_aws = { atomic = true - tags = { + tags = merge(local.tags_for_all_resources, { Name = "secrets-store-csi-driver-${module.eks.cluster_name}" - } + }) } - tags = { + tags = merge(local.tags_for_all_resources, { service = "eks" - } + }) depends_on = [ module.main_nodes, @@ -261,7 +255,7 @@ resource "aws_eks_addon" "aws_cloudwatch_observability" { } } containerLogs = { - enabled = false + enabled = true } }) @@ -284,7 +278,7 @@ module "aws_ebs_csi_pod_identity" { permissions_boundary_arn = local.permissions_boundary_arn policy_name_prefix = "${module.eks.cluster_name}-" - tags = merge(var.pod_identity_tags, { Cluster = local.cluster_name }) + tags = merge(var.pod_identity_tags, local.tags_for_all_resources, { Cluster = local.cluster_name }) depends_on = [aws_eks_addon.eks-pod-identity-agent] } @@ -303,7 +297,7 @@ module "aws_efs_csi_pod_identity" { permissions_boundary_arn = local.permissions_boundary_arn policy_name_prefix = "${module.eks.cluster_name}-" - tags = merge(var.pod_identity_tags, { Cluster = local.cluster_name }) + tags = merge(var.pod_identity_tags, local.tags_for_all_resources, { Cluster = local.cluster_name }) depends_on = [aws_eks_addon.eks-pod-identity-agent] } @@ -322,7 +316,7 @@ module "aws_lb_controller_pod_identity" { permissions_boundary_arn = local.permissions_boundary_arn policy_name_prefix = "${module.eks.cluster_name}-" - tags = merge(var.pod_identity_tags, { Cluster = local.cluster_name }) + tags = merge(var.pod_identity_tags, local.tags_for_all_resources, { Cluster = local.cluster_name }) # Pod Identity Associations association_defaults = { @@ -352,7 +346,7 @@ module "aws_cloudwatch_observability_pod_identity" { permissions_boundary_arn = local.permissions_boundary_arn policy_name_prefix = "${module.eks.cluster_name}-" - tags = merge(var.pod_identity_tags, { Cluster = local.cluster_name }) + tags = merge(var.pod_identity_tags, local.tags_for_all_resources, { Cluster = local.cluster_name }) associations = { "amazon-cloudwatch-observability-controller-manager" = { @@ -419,6 +413,14 @@ resource "aws_security_group_rule" "https-vpc-ingress" { cidr_blocks = data.aws_vpc.vpc.cidr_block_associations.*.cidr_block } +resource "null_resource" "sleep" { + provisioner "local-exec" { + command = "sleep 15" + } + + depends_on = [module.eks_base] +} + resource "null_resource" "terminate_nodes" { provisioner "local-exec" { command = <