From 201137b716fccded53f072d6265653b67298e37f Mon Sep 17 00:00:00 2001 From: Camil Blanaru Date: Wed, 24 May 2017 00:03:02 +0200 Subject: [PATCH] Add Kubernetes 1.6 support. --- README.md | 6 +- definitions/cleanup.sh => cleanup.sh | 5 +- {definitions/grafana => grafana}/Dockerfile | 2 +- .../grafana-config/grafana.ini | 0 .../grafana-dashboards/ec2-instances.json | 0 .../grafana-dashboards/k8s-cluster.json | 0 .../grafana-dashboards/k8s-nodes.json | 0 .../prometheus-data-exploration.json | 0 .../grafana-dashboards/prometheus-stats.json | 0 definitions/init.sh => init.sh | 58 ++++++--- .../grafana/grafana.svc.deployment.yaml | 2 +- .../ingress/01-basic-auth.secret.yaml | 0 .../ingress/02-nginx-lb.svc.deployment.yaml | 26 ++-- .../ingress/03-prometheus.ing.yaml | 0 .../kube-state-metrics/deployment.yml | 4 +- .../kube-state-metrics/service.yaml | 0 .../prometheus/00-alerts.cm.yaml | 112 ++---------------- k8s/prometheus/01-prometheus.configmap.yaml | 79 ++++++++++++ .../02-prometheus.svc.statefulset.yaml | 8 +- .../prometheus/03-alertmanager.configmap.yaml | 0 .../04-alertmanager.svc.deployment.yaml | 2 +- .../05-node-exporter.svc.daemonset.yaml | 7 +- k8s/rbac/01-prometheus-rbac-config.yaml | 5 + k8s/rbac/02-nginx-ingress-rbac-config.yaml | 100 ++++++++++++++++ definitions/remove.sh => remove.sh | 5 +- {definitions => tools}/alertmanager_proxy.sh | 0 {definitions => tools}/grafana_proxy.sh | 0 {definitions => tools}/prometheus_proxy.sh | 0 28 files changed, 277 insertions(+), 144 deletions(-) rename definitions/cleanup.sh => cleanup.sh (50%) rename {definitions/grafana => grafana}/Dockerfile (85%) rename {definitions/grafana => grafana}/grafana-config/grafana.ini (100%) rename {definitions/grafana => grafana}/grafana-dashboards/ec2-instances.json (100%) rename {definitions/grafana => grafana}/grafana-dashboards/k8s-cluster.json (100%) rename {definitions/grafana => grafana}/grafana-dashboards/k8s-nodes.json (100%) rename {definitions/grafana => grafana}/grafana-dashboards/prometheus-data-exploration.json (100%) rename {definitions/grafana => grafana}/grafana-dashboards/prometheus-stats.json (100%) rename definitions/init.sh => init.sh (80%) rename {definitions/k8s => k8s}/grafana/grafana.svc.deployment.yaml (92%) rename {definitions/k8s => k8s}/ingress/01-basic-auth.secret.yaml (100%) rename definitions/k8s/ingress/02-nginx-lb.svc.rc.yaml => k8s/ingress/02-nginx-lb.svc.deployment.yaml (84%) rename {definitions/k8s => k8s}/ingress/03-prometheus.ing.yaml (100%) rename {definitions/k8s => k8s}/kube-state-metrics/deployment.yml (79%) rename {definitions/k8s => k8s}/kube-state-metrics/service.yaml (100%) rename definitions/k8s/prometheus/01-prometheus.configmap.yaml => k8s/prometheus/00-alerts.cm.yaml (79%) create mode 100644 k8s/prometheus/01-prometheus.configmap.yaml rename {definitions/k8s => k8s}/prometheus/02-prometheus.svc.statefulset.yaml (86%) rename {definitions/k8s => k8s}/prometheus/03-alertmanager.configmap.yaml (100%) rename {definitions/k8s => k8s}/prometheus/04-alertmanager.svc.deployment.yaml (93%) rename {definitions/k8s => k8s}/prometheus/05-node-exporter.svc.daemonset.yaml (78%) create mode 100644 k8s/rbac/01-prometheus-rbac-config.yaml create mode 100644 k8s/rbac/02-nginx-ingress-rbac-config.yaml rename definitions/remove.sh => remove.sh (55%) rename {definitions => tools}/alertmanager_proxy.sh (100%) rename {definitions => tools}/grafana_proxy.sh (100%) rename {definitions => tools}/prometheus_proxy.sh (100%) diff --git a/README.md b/README.md index 971190c..12e48c4 100644 --- a/README.md +++ b/README.md @@ -31,13 +31,13 @@ _____________________________________________________________________ Clone repository - git clone github.com/camilb/prometheus-kubernetes && cd prometehus-kubernetes/definitions + git clone github.com/camilb/prometheus-kubernetes && cd prometehus-kubernetes Change these values in `init.sh`. -`GRAFANA_VERSION=4.1.0-beta1` +`GRAFANA_VERSION=4.3.0` -`PROMETHEUS_VERSION=v1.4.1` +`PROMETHEUS_VERSION=v1.6.3` `DOCKER_USER=your_dockerhub_user` diff --git a/definitions/cleanup.sh b/cleanup.sh similarity index 50% rename from definitions/cleanup.sh rename to cleanup.sh index 51385f1..714fdcb 100755 --- a/definitions/cleanup.sh +++ b/cleanup.sh @@ -1,6 +1,9 @@ git checkout k8s/ingress/01-basic-auth.secret.yaml git checkout k8s/prometheus/01-prometheus.configmap.yaml +git checkout k8s/prometheus/02-prometheus.svc.statefulset.yaml git checkout k8s/prometheus/03-alertmanager.configmap.yaml +git checkout k8s/prometheus/04-alertmanager.svc.deployment.yaml +git checkout k8s/prometheus/05-node-exporter.svc.daemonset.yaml git checkout k8s/grafana/grafana.svc.deployment.yaml +git checkout grafana/Dockerfile rm auth -rm dhparam.pem diff --git a/definitions/grafana/Dockerfile b/grafana/Dockerfile similarity index 85% rename from definitions/grafana/Dockerfile rename to grafana/Dockerfile index 390ad15..bf7900d 100644 --- a/definitions/grafana/Dockerfile +++ b/grafana/Dockerfile @@ -1,4 +1,4 @@ -FROM grafana/grafana:4.1.1 +FROM grafana/grafana:GRAFANA_VERSION MAINTAINER Camil Blanaru ADD grafana-config/grafana.ini /etc/grafana/grafana.ini diff --git a/definitions/grafana/grafana-config/grafana.ini b/grafana/grafana-config/grafana.ini similarity index 100% rename from definitions/grafana/grafana-config/grafana.ini rename to grafana/grafana-config/grafana.ini diff --git a/definitions/grafana/grafana-dashboards/ec2-instances.json b/grafana/grafana-dashboards/ec2-instances.json similarity index 100% rename from definitions/grafana/grafana-dashboards/ec2-instances.json rename to grafana/grafana-dashboards/ec2-instances.json diff --git a/definitions/grafana/grafana-dashboards/k8s-cluster.json b/grafana/grafana-dashboards/k8s-cluster.json similarity index 100% rename from definitions/grafana/grafana-dashboards/k8s-cluster.json rename to grafana/grafana-dashboards/k8s-cluster.json diff --git a/definitions/grafana/grafana-dashboards/k8s-nodes.json b/grafana/grafana-dashboards/k8s-nodes.json similarity index 100% rename from definitions/grafana/grafana-dashboards/k8s-nodes.json rename to grafana/grafana-dashboards/k8s-nodes.json diff --git a/definitions/grafana/grafana-dashboards/prometheus-data-exploration.json b/grafana/grafana-dashboards/prometheus-data-exploration.json similarity index 100% rename from definitions/grafana/grafana-dashboards/prometheus-data-exploration.json rename to grafana/grafana-dashboards/prometheus-data-exploration.json diff --git a/definitions/grafana/grafana-dashboards/prometheus-stats.json b/grafana/grafana-dashboards/prometheus-stats.json similarity index 100% rename from definitions/grafana/grafana-dashboards/prometheus-stats.json rename to grafana/grafana-dashboards/prometheus-stats.json diff --git a/definitions/init.sh b/init.sh similarity index 80% rename from definitions/init.sh rename to init.sh index bf5eeab..1e3680f 100755 --- a/definitions/init.sh +++ b/init.sh @@ -1,7 +1,9 @@ #!/bin/bash -GRAFANA_DEFAULT_VERSION=4.2.0 +GRAFANA_DEFAULT_VERSION=4.3.0 PROMETHEUS_DEFAULT_VERSION=v1.6.3 +ALERT_MANAGER_DEFAULT_VERSION=v0.7.0-rc.0 +NODE_EXPORTER_DEFAULT_VERSION=v0.14.0 DOCKER_USER_DEFAULT=$(docker info|grep Username:|awk '{print $2}') RED='\033[0;31m' GREEN='\033[0;32m' @@ -25,6 +27,17 @@ echo read -p "Enter Prometheus version [$PROMETHEUS_DEFAULT_VERSION]: " PROMETHEUS_VERSION PROMETHEUS_VERSION=${PROMETHEUS_VERSION:-$PROMETHEUS_DEFAULT_VERSION} +#Ask for alertmanager version or apply default +echo +read -p "Enter Alert Manager version [$ALERT_MANAGER_DEFAULT_VERSION]: " ALERT_MANAGER_VERSION +ALERT_MANAGER_VERSION=${ALERT_MANAGER_VERSION:-$ALERT_MANAGER_DEFAULT_VERSION} + + +#Ask for node exporter version or apply default +echo +read -p "Enter Node Exporter version [$NODE_EXPORTER_DEFAULT_VERSION]: " NODE_EXPORTER_VERSION +NODE_EXPORTER_VERSION=${NODE_EXPORTER_VERSION:-$NODE_EXPORTER_DEFAULT_VERSION} + #Ask for dockerhub user or apply default of the current logged-in username echo read -p "Enter Dockerhub username [$DOCKER_USER_DEFAULT]: " DOCKER_USER @@ -87,13 +100,13 @@ tput sgr0 echo if [ ! -z $AWS_ACCESS_KEY_ID ] && [ ! -z $AWS_SECRET_ACCESS_KEY ]; then aws_access_key=$AWS_ACCESS_KEY_ID - aws_access_password=$AWS_SECRET_ACCESS_KEY + aws_secret_key=$AWS_SECRET_ACCESS_KEY echo -e "${ORANGE}AWS_ACCESS_KEY_ID found, using $aws_access_key." tput sgr0 echo elif [ ! -z $AWS_ACCESS_KEY ] && [ ! -z $AWS_SECRET_KEY ]; then aws_access_key=$AWS_ACCESS_KEY - aws_access_password=$AWS_SECRET_KEY + aws_secret_key=$AWS_SECRET_KEY echo -e "${ORANGE}AWS_ACCESS_KEY found, using $aws_access_key." tput sgr0 echo @@ -129,7 +142,7 @@ else break fi prompt='*' - aws_access_password+="$char" + aws_secret_key+="$char" done echo fi @@ -137,7 +150,7 @@ fi #sed in the AWS credentials. this looks odd because aws secret access keys can have '/' as a valid character #so we use ',' as a delimiter for sed, since that won't appear in the secret key sed -i -e 's/aws_access_key/'"$aws_access_key"'/g' k8s/prometheus/01-prometheus.configmap.yaml -sed -i -e 's,aws_access_password,'"$aws_access_password"',g' k8s/prometheus/01-prometheus.configmap.yaml +sed -i -e 's,aws_secret_key,'"$aws_secret_key"',g' k8s/prometheus/01-prometheus.configmap.yaml #slack channel echo -e "${PURPLE}Insert your slack channel name where you wish to receive alerts and press [ENTER]:" @@ -156,21 +169,34 @@ do done echo sed -i -e 's/slack_channel/'"$slack_channel"'/g' k8s/prometheus/03-alertmanager.configmap.yaml +echo + +read -r -p "Is the RBAC plugin enabled? [y/N] " response +if [[ $response =~ ^([yY][eE][sS]|[yY])$ ]] +then + kubectl create -f ./k8s/rbac + sed -i -e 's/default/'prometheus'/g' k8s/prometheus/02-prometheus.svc.statefulset.yaml +else + echo -e "${GREEN}Skipping RBAC configuration." + tput sgr0 +fi +#set prometheus version +sed -i -e 's/PROMETHEUS_VERSION/'"$PROMETHEUS_VERSION"'/g' k8s/prometheus/02-prometheus.svc.statefulset.yaml -#remove "sed" generated files -rm k8s/prometheus/*.yaml-e && rm k8s/ingress/*.yaml-e && rm k8s/grafana/*.yaml-e +#set grafana version +sed -i -e 's/GRAFANA_VERSION/'"$GRAFANA_VERSION"'/g' grafana/Dockerfile +sed -i -e 's/GRAFANA_VERSION/'"$GRAFANA_VERSION"'/g' k8s/grafana/grafana.svc.deployment.yaml -echo +#set alertmanager version +sed -i -e 's/ALERT_MANAGER_VERSION/'"$ALERT_MANAGER_VERSION"'/g' k8s/prometheus/04-alertmanager.svc.deployment.yaml -#nginx load balancer display errors if dhparam is not set -echo -e "${RED}Generate DH parameters for nginx." -openssl dhparam -out dhparam.pem 1024 -tput sgr0 +#set node-exporter version +sed -i -e 's/NODE_EXPORTER_VERSION/'"$NODE_EXPORTER_VERSION"'/g' k8s/prometheus/05-node-exporter.svc.daemonset.yaml -echo -e "${BLUE}Create dhparam secret." -tput sgr0 -kubectl create secret generic dhparam --from-file=dhparam.pem -n monitoring + +#remove "sed" generated files +rm k8s/prometheus/*.yaml-e && rm k8s/ingress/*.yaml-e && rm k8s/grafana/*.yaml-e && rm grafana/*-e echo @@ -210,7 +236,7 @@ echo #deploy prometheus echo -e "${ORANGE}Deploying Prometheus" tput sgr0 -kubectl create -f ./k8s/prometheus +kubectl create -R -f ./k8s/prometheus echo diff --git a/definitions/k8s/grafana/grafana.svc.deployment.yaml b/k8s/grafana/grafana.svc.deployment.yaml similarity index 92% rename from definitions/k8s/grafana/grafana.svc.deployment.yaml rename to k8s/grafana/grafana.svc.deployment.yaml index abaabcf..e4aec1e 100644 --- a/definitions/k8s/grafana/grafana.svc.deployment.yaml +++ b/k8s/grafana/grafana.svc.deployment.yaml @@ -26,7 +26,7 @@ spec: app: grafana spec: containers: - - image: DOCKER_USER/grafana:4.1.1 + - image: DOCKER_USER/grafana:GRAFANA_VERSION name: grafana imagePullPolicy: Always ports: diff --git a/definitions/k8s/ingress/01-basic-auth.secret.yaml b/k8s/ingress/01-basic-auth.secret.yaml similarity index 100% rename from definitions/k8s/ingress/01-basic-auth.secret.yaml rename to k8s/ingress/01-basic-auth.secret.yaml diff --git a/definitions/k8s/ingress/02-nginx-lb.svc.rc.yaml b/k8s/ingress/02-nginx-lb.svc.deployment.yaml similarity index 84% rename from definitions/k8s/ingress/02-nginx-lb.svc.rc.yaml rename to k8s/ingress/02-nginx-lb.svc.deployment.yaml index 47e9832..5307d22 100644 --- a/definitions/k8s/ingress/02-nginx-lb.svc.rc.yaml +++ b/k8s/ingress/02-nginx-lb.svc.deployment.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: name: default-http-backend - namespace: monitoring + namespace: nginx-ingress labels: k8s-app: default-http-backend spec: @@ -18,9 +18,13 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: name: default-http-backend - namespace: monitoring + namespace: nginx-ingress spec: replicas: 1 + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 selector: matchLabels: k8s-app: default-http-backend @@ -29,10 +33,11 @@ spec: labels: k8s-app: default-http-backend spec: + serviceAccountName: default terminationGracePeriodSeconds: 60 containers: - name: default-http-backend - image: gcr.io/google_containers/defaultbackend:1.0 + image: gcr.io/google_containers/defaultbackend:1.3 livenessProbe: httpGet: path: /healthz @@ -42,6 +47,7 @@ spec: timeoutSeconds: 5 ports: - containerPort: 8080 + name: http resources: limits: cpu: 10m @@ -54,7 +60,7 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: name: nginx-ingress-controller - namespace: monitoring + namespace: nginx-ingress labels: k8s-app: nginx-ingress-lb spec: @@ -70,12 +76,8 @@ spec: spec: terminationGracePeriodSeconds: 60 hostNetwork: true - volumes: - - name: dhparam - secret: - secretName: dhparam containers: - - image: gcr.io/google_containers/nginx-ingress-controller:0.8.3 + - image: gcr.io/google_containers/nginx-ingress-controller:0.9.0-beta.5 name: nginx-ingress-lb imagePullPolicy: Always livenessProbe: @@ -101,10 +103,6 @@ spec: - containerPort: 443 hostPort: 443 - containerPort: 8080 - hostPort: 8080 - volumeMounts: - - mountPath: /etc/nginx-ssl/dhparam - name: dhparam args: - /nginx-ingress-controller - - --default-backend-service=monitoring/default-http-backend + - --default-backend-service=$(POD_NAMESPACE)/default-http-backend diff --git a/definitions/k8s/ingress/03-prometheus.ing.yaml b/k8s/ingress/03-prometheus.ing.yaml similarity index 100% rename from definitions/k8s/ingress/03-prometheus.ing.yaml rename to k8s/ingress/03-prometheus.ing.yaml diff --git a/definitions/k8s/kube-state-metrics/deployment.yml b/k8s/kube-state-metrics/deployment.yml similarity index 79% rename from definitions/k8s/kube-state-metrics/deployment.yml rename to k8s/kube-state-metrics/deployment.yml index d373366..bd72593 100644 --- a/definitions/k8s/kube-state-metrics/deployment.yml +++ b/k8s/kube-state-metrics/deployment.yml @@ -9,11 +9,11 @@ spec: metadata: labels: app: kube-state-metrics - version: "v0.3.0" + version: "v0.5.0" spec: containers: - name: kube-state-metrics - image: gcr.io/google_containers/kube-state-metrics:v0.3.0 + image: gcr.io/google_containers/kube-state-metrics:v0.5.0 ports: - containerPort: 8080 imagePullPolicy: Always diff --git a/definitions/k8s/kube-state-metrics/service.yaml b/k8s/kube-state-metrics/service.yaml similarity index 100% rename from definitions/k8s/kube-state-metrics/service.yaml rename to k8s/kube-state-metrics/service.yaml diff --git a/definitions/k8s/prometheus/01-prometheus.configmap.yaml b/k8s/prometheus/00-alerts.cm.yaml similarity index 79% rename from definitions/k8s/prometheus/01-prometheus.configmap.yaml rename to k8s/prometheus/00-alerts.cm.yaml index 051ebdf..9923edf 100644 --- a/definitions/k8s/prometheus/01-prometheus.configmap.yaml +++ b/k8s/prometheus/00-alerts.cm.yaml @@ -1,108 +1,10 @@ apiVersion: v1 kind: ConfigMap metadata: - name: prometheus + name: alerts namespace: monitoring data: - prometheus.yml: |- - global: - evaluation_interval: 30s - scrape_configs: - - - job_name: kubelets - - scrape_interval: 20s - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: node - - - job_name: standard-endpoints - - scrape_interval: 20s - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - action: keep - source_labels: [__meta_kubernetes_service_name] - regex: kubernetes|node-exporter|kube-state-metrics|etcd-k8s|prometheus - - action: replace - source_labels: [__meta_kubernetes_service_name] - target_label: job - - action: replace - source_labels: [__meta_kubernetes_service_name] - regex: kubernetes - target_label: __scheme__ - replacement: https - - - job_name: kube-components - - scrape_interval: 20s - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - action: replace - source_labels: [__meta_kubernetes_service_name] - target_label: job - regex: "kube-(.*)-prometheus-discovery" - replacement: "kube-${1}" - - action: keep - source_labels: [__meta_kubernetes_service_name] - regex: "kube-(.*)-prometheus-discovery" - - action: keep - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: "prometheus" - - - job_name: 'kubernetes-pods' - - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: (.+):(?:\d+);(\d+) - replacement: ${1}:${2} - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - source_labels: [__meta_kubernetes_pod_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: kubernetes_pod_name - - - job_name: ec2 - - ec2_sd_configs: - - region: us-east-1 - access_key: aws_access_key - secret_key: aws_secret_key - refresh_interval: 60s - port: 9100 - relabel_configs: - - action: labelmap - regex: __meta_ec2_tag_(.+) - rule_files: - - '/etc/prometheus/alert.rules' - alert.rules: |- + kubernetes.rules: |- ### Container resources ### cluster_namespace_controller_pod_container:spec_memory_limit_bytes = sum by (cluster,namespace,controller,pod_name,container_name) ( @@ -303,7 +205,7 @@ data: } ALERT K8SApiserverDown IF up{job="kubernetes"} == 0 - FOR 5m + FOR 10m LABELS { service = "k8s", severity = "warning" @@ -417,3 +319,11 @@ data: summary = "Kubelet is close to pod limit", description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110", } + ALERT PodRestartingTooMuch + IF rate(kube_pod_container_status_restarts[10m])*600 > 2 + FOR 5m + LABELS { severity="warning" } + ANNOTATIONS { + summary = "Pod {{$labels.namespace}}/{{$label.name}} restarting too much.", + description = "Pod {{$labels.namespace}}/{{$label.name}} restarting too much.", + } diff --git a/k8s/prometheus/01-prometheus.configmap.yaml b/k8s/prometheus/01-prometheus.configmap.yaml new file mode 100644 index 0000000..66971ca --- /dev/null +++ b/k8s/prometheus/01-prometheus.configmap.yaml @@ -0,0 +1,79 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus + namespace: monitoring +data: + prometheus.yml: |- + global: + scrape_interval: 15s + evaluation_interval: 15s + + rule_files: + - /etc/alertmanager/*.rules + + scrape_configs: + - job_name: kubelets + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + - job_name: standard-endpoints + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - action: keep + source_labels: [__meta_kubernetes_service_name] + regex: prometheus|kubernetes|node-exporter|kube-state-metrics|etcd-k8s + - action: replace + source_labels: [__meta_kubernetes_service_name] + target_label: job + - action: replace + source_labels: [__meta_kubernetes_service_name] + regex: kubernetes + target_label: __scheme__ + replacement: https + + - job_name: kube-components + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - action: replace + source_labels: [__meta_kubernetes_service_name] + target_label: job + regex: "kube-(.*)-prometheus-discovery" + replacement: "kube-${1}" + - action: keep + source_labels: [__meta_kubernetes_service_name] + regex: "kube-(.*)-prometheus-discovery" + - action: keep + source_labels: [__meta_kubernetes_endpoint_port_name] + regex: "prometheus" + + - job_name: ec2 + + ec2_sd_configs: + - region: us-east-1 + access_key: aws_access_key + secret_key: aws_secret_key + refresh_interval: 60s + port: 9100 + relabel_configs: + - action: labelmap + regex: __meta_ec2_tag_(.+) diff --git a/definitions/k8s/prometheus/02-prometheus.svc.statefulset.yaml b/k8s/prometheus/02-prometheus.svc.statefulset.yaml similarity index 86% rename from definitions/k8s/prometheus/02-prometheus.svc.statefulset.yaml rename to k8s/prometheus/02-prometheus.svc.statefulset.yaml index 8623196..53a6f29 100644 --- a/definitions/k8s/prometheus/02-prometheus.svc.statefulset.yaml +++ b/k8s/prometheus/02-prometheus.svc.statefulset.yaml @@ -32,10 +32,11 @@ spec: annotations: pod.alpha.kubernetes.io/initialized: "true" spec: + serviceAccountName: default terminationGracePeriodSeconds: 0 containers: - name: prometheus-prom-1 - image: prom/prometheus:v1.5.0 + image: prom/prometheus:PROMETHEUS_VERSION args: - '-storage.local.retention=720h' - '-storage.local.memory-chunks=500000' @@ -49,10 +50,15 @@ spec: mountPath: /etc/prometheus - name: prometheus-prom-1 mountPath: /prometheus + - name: alerts-volume + mountPath: /etc/alertmanager volumes: - name: config-volume configMap: name: prometheus + - name: alerts-volume + configMap: + name: alerts volumeClaimTemplates: - metadata: name: prometheus-prom-1 diff --git a/definitions/k8s/prometheus/03-alertmanager.configmap.yaml b/k8s/prometheus/03-alertmanager.configmap.yaml similarity index 100% rename from definitions/k8s/prometheus/03-alertmanager.configmap.yaml rename to k8s/prometheus/03-alertmanager.configmap.yaml diff --git a/definitions/k8s/prometheus/04-alertmanager.svc.deployment.yaml b/k8s/prometheus/04-alertmanager.svc.deployment.yaml similarity index 93% rename from definitions/k8s/prometheus/04-alertmanager.svc.deployment.yaml rename to k8s/prometheus/04-alertmanager.svc.deployment.yaml index b3fbd9e..571864a 100644 --- a/definitions/k8s/prometheus/04-alertmanager.svc.deployment.yaml +++ b/k8s/prometheus/04-alertmanager.svc.deployment.yaml @@ -28,7 +28,7 @@ spec: spec: containers: - name: alertmanager - image: prom/alertmanager:v0.5.1 + image: prom/alertmanager:ALERT_MANAGER_VERSION ports: - containerPort: 9093 imagePullPolicy: Always diff --git a/definitions/k8s/prometheus/05-node-exporter.svc.daemonset.yaml b/k8s/prometheus/05-node-exporter.svc.daemonset.yaml similarity index 78% rename from definitions/k8s/prometheus/05-node-exporter.svc.daemonset.yaml rename to k8s/prometheus/05-node-exporter.svc.daemonset.yaml index 44c7cc1..b5cd298 100644 --- a/definitions/k8s/prometheus/05-node-exporter.svc.daemonset.yaml +++ b/k8s/prometheus/05-node-exporter.svc.daemonset.yaml @@ -30,8 +30,13 @@ spec: app: node-exporter name: node-exporter spec: + tolerations: + - key: "node.alpha.kubernetes.io/role" + operator: "Equal" + value: "master" + effect: "NoSchedule" containers: - - image: prom/node-exporter + - image: prom/node-exporter:NODE_EXPORTER_VERSION name: node-exporter ports: - containerPort: 9100 diff --git a/k8s/rbac/01-prometheus-rbac-config.yaml b/k8s/rbac/01-prometheus-rbac-config.yaml new file mode 100644 index 0000000..f4c5f20 --- /dev/null +++ b/k8s/rbac/01-prometheus-rbac-config.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring diff --git a/k8s/rbac/02-nginx-ingress-rbac-config.yaml b/k8s/rbac/02-nginx-ingress-rbac-config.yaml new file mode 100644 index 0000000..781e642 --- /dev/null +++ b/k8s/rbac/02-nginx-ingress-rbac-config.yaml @@ -0,0 +1,100 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nginx-ingress +--- + +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: nginx-ingress-clusterrole +rules: + - apiGroups: + - "" + resources: + - configmaps + - endpoints + - nodes + - pods + - secrets + verbs: + - list + - watch + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + resources: + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - apiGroups: + - "extensions" + resources: + - ingresses/status + verbs: + - update +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: Role +metadata: + name: nginx-ingress-role + namespace: nginx-ingress +rules: + - apiGroups: + - "" + resources: + - configmaps + - pods + - secrets + verbs: + - get + - apiGroups: + - "" + resources: + - endpoints + verbs: + - get + - create + - update +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: RoleBinding +metadata: + name: nginx-ingress-role-nisa-binding + namespace: nginx-ingress +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nginx-ingress-role +subjects: + - kind: ServiceAccount + name: default + namespace: nginx-ingress +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: nginx-ingress-clusterrole-nisa-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nginx-ingress-clusterrole +subjects: + - kind: ServiceAccount + name: default + namespace: nginx-ingress diff --git a/definitions/remove.sh b/remove.sh similarity index 55% rename from definitions/remove.sh rename to remove.sh index 3093db7..adf9015 100755 --- a/definitions/remove.sh +++ b/remove.sh @@ -2,6 +2,7 @@ kubectl delete -f ./k8s/grafana kubectl delete -f ./k8s/ingress -kubectl delete -f ./k8s/prometheus +kubectl delete -R -f ./k8s/prometheus kubectl delete -f ./k8s/kube-state-metrics -kubectl delete secret dhparam -n monitoring +kubectl delete -f ./k8s/rbac +kubectl delete ns monitoring diff --git a/definitions/alertmanager_proxy.sh b/tools/alertmanager_proxy.sh similarity index 100% rename from definitions/alertmanager_proxy.sh rename to tools/alertmanager_proxy.sh diff --git a/definitions/grafana_proxy.sh b/tools/grafana_proxy.sh similarity index 100% rename from definitions/grafana_proxy.sh rename to tools/grafana_proxy.sh diff --git a/definitions/prometheus_proxy.sh b/tools/prometheus_proxy.sh similarity index 100% rename from definitions/prometheus_proxy.sh rename to tools/prometheus_proxy.sh