From 7df9a8f05fdb4820345db926c42a2d237587e380 Mon Sep 17 00:00:00 2001 From: hms-tkl Date: Wed, 22 Nov 2023 20:19:24 +0100 Subject: [PATCH] add code-path to allow AKS with byoCNI and cilium without kube-proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update main.tf file with helm-provider and release for cilium * update main.tf file with kube-proxy configuration for cilium * update variables.tf to account for byoCNI settings * update modules/azure_aks/main.tf to account for byoCNI settings * added byocni-values.yaml file for helm-configuration of cilium-1.14.4 * added kube-proxy.json file to allow enabling eBPF mode for byoCNI (aka. remove kube-proxy) Signed-off-by: Tilo Kleßen --- files/cilium/byocni-values.yaml | 46 ++++++++++++++++++ files/cilium/kube-proxy.json | 10 ++++ input-minimal.tfvars | 85 +++++++++++++++++++++++++++++++++ main.tf | 42 +++++++++++++++- modules/azure_aks/main.tf | 2 +- variables.tf | 8 ++-- 6 files changed, 187 insertions(+), 6 deletions(-) create mode 100644 files/cilium/byocni-values.yaml create mode 100644 files/cilium/kube-proxy.json create mode 100644 input-minimal.tfvars diff --git a/files/cilium/byocni-values.yaml b/files/cilium/byocni-values.yaml new file mode 100644 index 00000000..05f7f94a --- /dev/null +++ b/files/cilium/byocni-values.yaml @@ -0,0 +1,46 @@ +version: "1.14.4" +kube-version: "1.27.7" +kubeProxyReplacement: "strict" +namespace: "kube-system" +aksbyocni: + enabled: true +nodeinit: + enabled: true +cluster: + name: "cilium-AKS-cluster" + id: 123 +gatewayAPI: + enabled: false +pmtuDiscovery: + enabled: true +healthPort: 9877 +ingressController: + enabled: true + loadbalancerMode: "shared" + ingressLBAnnotationPrefixes: "service.beta.kubernetes.io service.kubernetes.io cloud.google.com io.cilium" +kubeProxyReplacementHealthzBindAddr: "0.0.0.0:10256" +operator: + prometheus: + enabled: true +installIptablesRules: true +l7Proxy: true +ipMasqAgent: + enabled: true +socketLB: + hostNamespaceOnly: true +enableCiliumEndpointSlice: true +prometheus: + enabled: true +hubble: + enabled: true + relay: + enabled: true + ui: + enabled: true +cgroup: + hostRoot: "/sys/fs/cgroup" +bpf: + masquerade: true + tproxy: true +enableIPv4Masquerade: true +k8sServicePort: 443 diff --git a/files/cilium/kube-proxy.json b/files/cilium/kube-proxy.json new file mode 100644 index 00000000..d764938a --- /dev/null +++ b/files/cilium/kube-proxy.json @@ -0,0 +1,10 @@ +{ + "enabled": false, + "mode": "IPVS", + "ipvsConfig": { + "scheduler": "LeastConnection", + "TCPTimeoutSeconds": 900, + "TCPFINTimeoutSeconds": 120, + "UDPTimeoutSeconds": 300 + } +} diff --git a/input-minimal.tfvars b/input-minimal.tfvars new file mode 100644 index 00000000..9181cc4d --- /dev/null +++ b/input-minimal.tfvars @@ -0,0 +1,85 @@ +# !NOTE! - These are only a subset of CONFIG-VARS.md provided as examples. +# Customize this file to add any variables from 'CONFIG-VARS.md' whose default +# values you want to change. + +# **************** REQUIRED VARIABLES **************** +# These required variables' values MUST be provided by the User +prefix = "hms-tkl-northeurope" # this is a prefix that you assign for the resources to be created +location = "northeurope" # e.g., "eastus2" +# **************** REQUIRED VARIABLES **************** + +# !NOTE! - Without specifying your CIDR block access rules, ingress traffic +# to your cluster will be blocked by default. + +# ************** RECOMMENDED VARIABLES *************** +default_public_access_cidrs = ["0.0.0.0/0"] # e.g., ["123.45.6.89/32"] +ssh_public_key = "~/.ssh/id_rsa.pub" +# ************** RECOMMENDED VARIABLES *************** + +# Tags can be specified matching your tagging strategy. +tags = {} # for example: { "owner|email" = "@.", "key1" = "value1", "key2" = "value2" } + +# Postgres config - By having this entry a database server is created. If you do not +# need an external database server remove the 'postgres_servers' +# block below. +postgres_servers = { + default = {}, +} + +# Azure Container Registry config +create_container_registry = false +container_registry_sku = "Standard" +container_registry_admin_enabled = false + +# AKS config +kubernetes_version = "1.27" +default_nodepool_min_nodes = 2 +default_nodepool_vm_type = "Standard_D4_v3" +aks_cluster_sku_tier = "Standard" +aks_identity = "sp" +aks_network_plugin = "none" +aks_network_policy = "none" +node_pools_availability_zones = ["2"] +#v3 still has local temp storage + +# AKS Node Pools config - minimal +cluster_node_pool_mode = "minimal" +node_pools = { + cas = { + "machine_type" = "Standard_E4s_v3" + "os_disk_size" = 200 + "min_nodes" = 0 + "max_nodes" = 5 + "max_pods" = 110 + "node_taints" = ["workload.sas.com/class=cas:NoSchedule"] + "node_labels" = { + "workload.sas.com/class" = "cas" + } + }, + generic = { + "machine_type" = "Standard_D8s_v3" + "os_disk_size" = 200 + "min_nodes" = 0 + "max_nodes" = 5 + "max_pods" = 110 + "node_taints" = [] + "node_labels" = { + "workload.sas.com/class" = "compute" + "launcher.sas.com/prepullImage" = "sas-programming-environment" + } + } +} + +# Jump Box +create_jump_public_ip = true +jump_vm_admin = "hms" +jump_vm_machine_type = "Standard_B2s" + +# Storage for SAS Viya CAS/Compute +storage_type = "standard" +# required ONLY when storage_type is "standard" to create NFS Server VM +create_nfs_public_ip = false +nfs_vm_admin = "hms" +nfs_vm_machine_type = "Standard_D4s_v4" +nfs_raid_disk_size = 128 +nfs_raid_disk_type = "Standard_LRS" diff --git a/main.tf b/main.tf index 0d716e77..f17b4d18 100644 --- a/main.tf +++ b/main.tf @@ -31,6 +31,17 @@ provider "kubernetes" { cluster_ca_certificate = base64decode(module.aks.cluster_ca_certificate) } + +provider "helm" { + kubernetes { + host = module.aks.host + + client_key = base64decode(module.aks.client_key) + client_certificate = base64decode(module.aks.client_certificate) + cluster_ca_certificate = base64decode(module.aks.cluster_ca_certificate) + } +} + data "azurerm_subscription" "current" {} data "azurerm_resource_group" "network_rg" { @@ -179,7 +190,36 @@ module "kubeconfig" { client_crt = module.aks.client_certificate client_key = module.aks.client_key token = module.aks.cluster_password - depends_on = [module.aks] + depends_on = [helm_release.cilium] +} + +resource "null_resource" "kube-proxy-remover" { + provisioner "local-exec" { + command = "az aks update -g ${azurerm_resource_group.aks_rg[0].name} -n ${module.aks.name} --kube-proxy-config ./files/cilium/kube-proxy.json" + } + depends_on = [module.aks, module.node_pools] +} + +resource "helm_release" "cilium" { + name = "cilium" + repository = "https://helm.cilium.io/" + chart = "cilium" + namespace = "kube-system" + version = "v1.14.4" + depends_on = [null_resource.kube-proxy-remover] + values = [ + "${file("files/cilium/byocni-values.yaml")}" + ] + + set { + name = "k8sServiceHost" + value = trimsuffix(trimprefix(module.aks.host, "https://"), ":443") + } + + set { + name = "hubble.metrics.enabled" + value = "{dns,drop,tcp,flow,port-distribution,icmp,httpV2:exemplars=true;labelsContext=source_ip\\,source_namespace\\,source_workload\\,destination_ip\\,destination_namespace\\,destination_workload\\,traffic_direction}" + } } module "node_pools" { diff --git a/modules/azure_aks/main.tf b/modules/azure_aks/main.tf index d1d0098c..a5f6d742 100644 --- a/modules/azure_aks/main.tf +++ b/modules/azure_aks/main.tf @@ -22,7 +22,7 @@ resource "azurerm_kubernetes_cluster" "aks" { network_profile { network_plugin = var.aks_network_plugin - network_policy = var.aks_network_plugin == "kubenet" && var.aks_network_policy == "azure" ? null : var.aks_network_policy + network_policy = var.aks_network_plugin == "kubenet" && var.aks_network_policy == "azure" || var.aks_network_plugin == "none" ? null : var.aks_network_policy # Docs on AKS Advanced Networking config # https://docs.microsoft.com/en-us/azure/architecture/aws-professional/networking diff --git a/variables.tf b/variables.tf index ee188417..e0003198 100644 --- a/variables.tf +++ b/variables.tf @@ -161,8 +161,8 @@ variable "aks_network_plugin" { default = "kubenet" validation { - condition = contains(["kubenet", "azure"], var.aks_network_plugin) - error_message = "Error: Currently the supported values are 'kubenet' and 'azure'." + condition = contains(["kubenet", "azure", "none"], var.aks_network_plugin) + error_message = "Error: Currently the supported values are 'kubenet' and 'azure' and 'none'." } } @@ -172,8 +172,8 @@ variable "aks_network_policy" { default = "azure" validation { - condition = contains(["azure", "calico"], var.aks_network_policy) - error_message = "Error: Currently the supported values are 'calico' and 'azure'." + condition = contains(["azure", "calico", "none"], var.aks_network_policy) + error_message = "Error: Currently the supported values are 'calico' and 'azure' and 'none'." } }