diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 4e0b677..60d09c1 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -38,7 +38,7 @@ jobs: if: github.ref == 'refs/heads/main' run: | helm upgrade --install bitcoind charts/bitcoind/ -f charts/bitcoind/values.yaml --namespace bitcoin-mainnet --create-namespace - # Add more Helm upgrade/install commands as needed for other charts + helm upgrade kube-prometheus-stack prometheus-community/kube-prometheus-stack --namespace namespace kube-prometheus-stack --values charts/monitoring/values.yaml - name: Deploy Testnet if: github.ref == 'refs/heads/development' run: | diff --git a/charts/bitcoind/Chart.yaml b/charts/bitcoind/Chart.yaml index 3b4a8a9..5df523c 100644 --- a/charts/bitcoind/Chart.yaml +++ b/charts/bitcoind/Chart.yaml @@ -13,7 +13,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 1.0.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. diff --git a/charts/bitcoind/templates/statefulset.yaml b/charts/bitcoind/templates/statefulset.yaml index 812c4f8..7edc0a8 100644 --- a/charts/bitcoind/templates/statefulset.yaml +++ b/charts/bitcoind/templates/statefulset.yaml @@ -19,6 +19,7 @@ spec: selector: matchLabels: {{- include "bitcoind.selectorLabels" . | nindent 6 }} + terminationGracePeriodSeconds: {{.Values.terminationGracePeriodSeconds}} template: metadata: annotations: @@ -103,6 +104,7 @@ spec: - getblockchaininfo failureThreshold: {{.Values.startupProbe.failureThreshold}} periodSeconds: 10 + timeoutSeconds: 3 livenessProbe: exec: command: @@ -114,6 +116,8 @@ spec: - getblockchaininfo initialDelaySeconds: 120 periodSeconds: 30 + timeoutSeconds: 3 + failureThreshold: {{.Values.livenessProbe.failureThreshold}} readinessProbe: exec: command: @@ -123,8 +127,10 @@ spec: - -{{.Values.global.network}} {{- end }} - getblockchaininfo - periodSeconds: 4 - successThreshold: 3 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 3 + failureThreshold: {{.Values.livenessProbe.failureThreshold}} resources: {{- toYaml .Values.resources | nindent 12 }} {{- if and .Values.descriptor.secretName .Values.descriptor.secretKey }} @@ -193,10 +199,18 @@ spec: httpGet: path: / port: metrics + initialDelaySeconds: 120 + periodSeconds: 300 + timeoutSeconds: 5 + failureThreshold: 3 readinessProbe: httpGet: path: / port: metrics + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + periodSeconds: 300 volumes: - name: config emptyDir: {} diff --git a/charts/bitcoind/testnet-values.yaml b/charts/bitcoind/testnet-values.yaml index 8685d4c..bbb7f3a 100644 --- a/charts/bitcoind/testnet-values.yaml +++ b/charts/bitcoind/testnet-values.yaml @@ -23,3 +23,9 @@ bitcoindCustomConfig: bind: 0.0.0.0 rpcbind: 0.0.0.0 rpcallowip: 0.0.0.0/0 + +startupProbe: + failureThreshold: 90 + +livenessProbe: + failureThreshold: 3 \ No newline at end of file diff --git a/charts/bitcoind/values.yaml b/charts/bitcoind/values.yaml index 39c2889..2e20edb 100644 --- a/charts/bitcoind/values.yaml +++ b/charts/bitcoind/values.yaml @@ -8,6 +8,7 @@ secrets: create: true replicaCount: 1 +terminationGracePeriodSeconds: 60 image: repository: lncm/bitcoind @@ -65,18 +66,18 @@ ingress: # hosts: # - chart-example.local -resources: {} +resources: # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. # We recommend using the following values due to the resource intensive nature of bitcoind - # limits: - # cpu: 3000m - # memory: 5120Mi - # requests: - # cpu: 100m - # memory: 4096Mi + limits: + cpu: 3000m + memory: 5200Mi + requests: + cpu: 1000m + memory: 4096Mi persistence: enabled: true @@ -99,7 +100,8 @@ affinity: {} labels: {} -podLabels: {} +podLabels: + app: bitcoind bitcoindGenericConfig: - debug=mempool @@ -112,7 +114,6 @@ bitcoindGenericConfig: - zmqpubrawtx=tcp://0.0.0.0:28333 - zmqpubrawblock=tcp://0.0.0.0:28332 - blockfilterindex=1 -- dbcache=8192 # these flags need to be here and not in bitcoindGenericConfig because they have to be present under a separate section inside bitcoind.conf when in testnet/regtest mode bitcoindCustomConfig: @@ -126,3 +127,6 @@ descriptor: startupProbe: failureThreshold: 90 + +livenessProbe: + failureThreshold: 3 diff --git a/charts/monitoring/Chart.lock b/charts/monitoring/Chart.lock index f362bd4..acbe915 100644 --- a/charts/monitoring/Chart.lock +++ b/charts/monitoring/Chart.lock @@ -5,8 +5,5 @@ dependencies: - name: prometheus repository: https://prometheus-community.github.io/helm-charts version: 25.17.0 -- name: kubernetes-dashboard - repository: https://kubernetes.github.io/dashboard/ - version: 7.1.2 -digest: sha256:a7bc54cdc759b95792c1f453fb97630f573b39590a8fc29ebfc909908a9ea861 -generated: "2024-03-15T01:52:03.270815+01:00" +digest: sha256:08bd3956345350e4dab8c3b3b426fe44b14bac03f5f6e29a70e70b76c3d6167d +generated: "2024-03-15T15:29:29.409577+01:00" diff --git a/charts/monitoring/Chart.yaml b/charts/monitoring/Chart.yaml index b9cb09b..5185b8b 100644 --- a/charts/monitoring/Chart.yaml +++ b/charts/monitoring/Chart.yaml @@ -13,18 +13,15 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 1.0.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. -appVersion: 0.1.0 +appVersion: 55.7.0 dependencies: - name: grafana repository: https://grafana.github.io/helm-charts version: 7.3.5 - name: prometheus repository: https://prometheus-community.github.io/helm-charts - version: 25.17.0 - - name: kubernetes-dashboard - version: 7.1.2 - repository: "https://kubernetes.github.io/dashboard/" \ No newline at end of file + version: 25.17.0 \ No newline at end of file diff --git a/charts/monitoring/charts/kubernetes-dashboard-7.1.2.tgz b/charts/monitoring/charts/kubernetes-dashboard-7.1.2.tgz deleted file mode 100644 index efd47f5..0000000 Binary files a/charts/monitoring/charts/kubernetes-dashboard-7.1.2.tgz and /dev/null differ diff --git a/charts/monitoring/values.yaml b/charts/monitoring/values.yaml index 0743170..94a846c 100644 --- a/charts/monitoring/values.yaml +++ b/charts/monitoring/values.yaml @@ -1,174 +1,90 @@ -grafana: - persistence: - enabled: true - ingress: - enabled: false - resources: {} - deploymentStrategy: - type: Recreate - plugins: - - https://github.com/doitintl/bigquery-grafana/archive/master.zip;doit-bigquery-datasource - grafana.ini: - plugins: - allow_loading_unsigned_plugins: "doitintl-bigquery-datasource" - datasources: - datasources.yaml: - apiVersion: 1 - datasources: - - name: Prometheus - type: prometheus - url: http://monitoring-prometheus-server - isDefault: true +## Stack name: prometheus-community/kube-prometheus-stack +## Ref: https://github.com/prometheus-community/helm-charts/tree/kube-prometheus-stack-35.5.1/charts/kube-prometheus-stack +## -prometheus: - alertmanager: - enabled: false - ## kube-state-metrics sub-chart configurable values - ## Please see https://github.com/helm/charts/tree/master/stable/kube-state-metrics - ## - kube-state-metrics: - podAnnotations: - prometheus.io/path: /metrics - prometheus.io/scrape: "true" - prometheus.io/port: "8080" - - server: - statefulSet: - enabled: true - resources: {} - retention: "365d" - prometheus-pushgateway: - enabled: false +## Manages Prometheus and Alertmanager components +## +prometheusOperator: + enabled: true - ## Prometheus server ConfigMap entries +## Deploy a Prometheus instance +## +prometheus: + enabled: true + additionalServiceMonitors: + - name: "bitcoin-exporter" + selector: + matchLabels: + app.kubernetes.io/name: bitcoind + namespaceSelector: + matchNames: + - bitcoin-mainnet + endpoints: + - port: "metrics" + interval: 180s + path: "/" + + ## Prometheus StorageSpec for persistent data + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md ## - serverFiles: - prometheus.yml: - rule_files: - - /etc/config/recording_rules.yml - - /etc/config/alerting_rules.yml - ## Below two files are DEPRECATED will be removed from this default values file - - /etc/config/rules - - /etc/config/alerts - - scrape_configs: - - job_name: 'cln' - - scrape_interval: 300s - scrape_timeout: 60s - - kubernetes_sd_configs: - - role: pod - - relabel_configs: - - source_labels: [__meta_kubernetes_pod_name] - regex: '.*cln.*' - action: keep - - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] - action: replace - target_label: pod - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - - job_name: 'kube-state-metrics' - - scrape_interval: 5s - scrape_timeout: 4s - - kubernetes_sd_configs: - - role: pod - - relabel_configs: - - source_labels: [__meta_kubernetes_pod_name] - regex: 'prometheus-kube-state-metrics-.*' - action: keep - - - job_name: 'kubernetes-service-endpoints' - - kubernetes_sd_configs: - - role: endpoints - - relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] - action: replace - target_label: __address__ - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: service - - source_labels: [__meta_kubernetes_pod_node_name] - action: replace - target_label: kubernetes_node - - source_labels: [__meta_kubernetes_pod_name] - regex: 'prometheus-kube-state-metrics-.*' - action: drop - - - job_name: 'kubernetes-pods' - - scrape_interval: 10m - scrape_timeout: 120s - - kubernetes_sd_configs: - - role: pod - - relabel_configs: - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: pod - - source_labels: [__meta_kubernetes_pod_name] - regex: 'prometheus-kube-state-metrics-.*' - action: drop - - source_labels: [__meta_kubernetes_pod_name] - regex: 'cln.*' - action: drop - - source_labels: [__meta_kubernetes_pod_name] - regex: 'dealer.*' - action: drop - - # adds additional scrape configs to prometheus.yml - # must be a string so you have to add a | after extraScrapeConfigs: - extraScrapeConfigs: "" + prometheusSpec: + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: do-block-storage + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi + +## Configuration for Grafana +## ref: https://grafana.com/ +## +## Deploy a Grafana instance +## +grafana: + enabled: true + adminPassword: prom-operator # Please change the default password in production !!! + persistence: + enabled: true + storageClassName: do-block-storage + accessModes: ["ReadWriteOnce"] + size: 5Gi + +## Configuration for Alertmanager +## ref: https://prometheus.io/docs/alerting/alertmanager/ +## +## Deploy an Alertmanager instance +## +alertmanager: + enabled: true + + +## Create default rules for monitoring the cluster +## +## Disable `etcd` and `kubeScheduler` rules (managed by DOKS, so metrics are not accessible) +## +defaultRules: + create: true + rules: + etcd: false + kubeScheduler: false + +## Component scraping kube scheduler +## +## Disabled because it's being managed by DOKS, so it's not accessible +## +kubeScheduler: + enabled: false + +## Component scraping etcd +## +## Disabled because it's being managed by DOKS, so it's not accessible +## +kubeEtcd: + enabled: false + + ## Stack name: prometheus-community/kube-prometheus-stack +## Ref: https://github.com/prometheus-community/helm-charts/tree/kube-prometheus-stack-35.5.1/charts/kube-prometheus-stack +##