Merge pull request #2442 from rexagod/readyz

fix: add `readyz` endpoint
kubernetes · Jul 15, 2024 · f7618df · f7618df
2 parents a1fb0ce + dbb0276
commit f7618df
Show file tree

Hide file tree

Showing 10 changed files with 76 additions and 39 deletions.
diff --git a/README.md b/README.md
@@ -346,11 +346,13 @@ After running the above, if you see `Clusterrolebinding "cluster-admin-binding"
 
 #### Healthcheck Endpoints
 
-The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes:
+The following healthcheck endpoints are available (`self` refers to the telemetry port, while `main` refers to the exposition port):
 
-* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to use this as a liveness probe.
-* `/metrics`: Returns a 200 status code if the application is able to serve metrics. While this is available for both ports, we recommend to use the telemetry metrics endpoint as a readiness probe.
-* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this as a startup probe.
+* `/healthz` (exposed on `main`): Returns a 200 status code if the application is running. We recommend to use this for the startup probe.
+* `/livez` (exposed on `main`): Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to using this for the liveness probe.
+* `/readyz` (exposed on `self`): Returns a 200 status code if the application is ready to accept requests and expose metrics. We recommend using this for the readiness probe.
+
+Note that it is discouraged to use the telemetry metrics endpoint for any probe when proxying the exposition data.
 
 #### Limited privileges environment
 

diff --git a/README.md.tpl b/README.md.tpl
@@ -347,11 +347,13 @@ After running the above, if you see `Clusterrolebinding "cluster-admin-binding"
 
 #### Healthcheck Endpoints
 
-The following healthcheck endpoints are available, some of which are used to determine the result of the aforementioned probes:
+The following healthcheck endpoints are available (`self` refers to the telemetry port, while `main` refers to the exposition port):
 
-* `/livez`: Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to use this as a liveness probe.
-* `/metrics`: Returns a 200 status code if the application is able to serve metrics. While this is available for both ports, we recommend to use the telemetry metrics endpoint as a readiness probe.
-* `/healthz`: Returns a 200 status code if the application is running. We recommend to use this as a startup probe.
+* `/healthz` (exposed on `main`): Returns a 200 status code if the application is running. We recommend to use this for the startup probe.
+* `/livez` (exposed on `main`): Returns a 200 status code if the application is not affected by an outage of the Kubernetes API Server. We recommend to using this for the liveness probe.
+* `/readyz` (exposed on `self`): Returns a 200 status code if the application is ready to accept requests and expose metrics. We recommend using this for the readiness probe.
+
+Note that it is discouraged to use the telemetry metrics endpoint for any probe when proxying the exposition data.
 
 #### Limited privileges environment
 

diff --git a/examples/autosharding/statefulset.yaml b/examples/autosharding/statefulset.yaml
@@ -38,7 +38,7 @@ spec:
         livenessProbe:
           httpGet:
             path: /livez
-            port: 8080
+            port: http-metrics
           initialDelaySeconds: 5
           timeoutSeconds: 5
         name: kube-state-metrics
@@ -49,8 +49,8 @@ spec:
           name: telemetry
         readinessProbe:
           httpGet:
-            path: /metrics
-            port: 8081
+            path: /readyz
+            port: telemetry
           initialDelaySeconds: 5
           timeoutSeconds: 5
         securityContext:

diff --git a/examples/daemonsetsharding/daemonset.yaml b/examples/daemonsetsharding/daemonset.yaml
@@ -33,7 +33,7 @@ spec:
         livenessProbe:
           httpGet:
             path: /livez
-            port: 8080
+            port: http-metrics
           initialDelaySeconds: 5
           timeoutSeconds: 5
         name: kube-state-metrics-shard
@@ -44,8 +44,8 @@ spec:
           name: telemetry
         readinessProbe:
           httpGet:
-            path: /metrics
-            port: 8081
+            path: /readyz
+            port: telemetry
           initialDelaySeconds: 5
           timeoutSeconds: 5
         securityContext:

diff --git a/examples/daemonsetsharding/deployment-no-node-pods.yaml b/examples/daemonsetsharding/deployment-no-node-pods.yaml
@@ -28,7 +28,7 @@ spec:
         livenessProbe:
           httpGet:
             path: /livez
-            port: 8080
+            port: http-metrics
           initialDelaySeconds: 5
           timeoutSeconds: 5
         name: kube-state-metrics
@@ -39,8 +39,8 @@ spec:
           name: telemetry
         readinessProbe:
           httpGet:
-            path: /metrics
-            port: 8081
+            path: /readyz
+            port: telemetry
           initialDelaySeconds: 5
           timeoutSeconds: 5
         securityContext:

diff --git a/examples/daemonsetsharding/deployment.yaml b/examples/daemonsetsharding/deployment.yaml
@@ -27,7 +27,7 @@ spec:
         livenessProbe:
           httpGet:
             path: /livez
-            port: 8080
+            port: http-metrics
           initialDelaySeconds: 5
           timeoutSeconds: 5
         name: kube-state-metrics
@@ -38,8 +38,8 @@ spec:
           name: telemetry
         readinessProbe:
           httpGet:
-            path: /metrics
-            port: 8081
+            path: /readyz
+            port: telemetry
           initialDelaySeconds: 5
           timeoutSeconds: 5
         securityContext:

diff --git a/examples/standard/deployment.yaml b/examples/standard/deployment.yaml
@@ -25,7 +25,7 @@ spec:
         livenessProbe:
           httpGet:
             path: /livez
-            port: 8080
+            port: http-metrics
           initialDelaySeconds: 5
           timeoutSeconds: 5
         name: kube-state-metrics
@@ -36,8 +36,8 @@ spec:
           name: telemetry
         readinessProbe:
           httpGet:
-            path: /metrics
-            port: 8081
+            path: /readyz
+            port: telemetry
           initialDelaySeconds: 5
           timeoutSeconds: 5
         securityContext:

diff --git a/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet
@@ -192,12 +192,12 @@
         seccompProfile: { type: 'RuntimeDefault' },
       },
       livenessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
-        port: 8080,
+        port: "http-metrics",
         path: '/livez',
       } },
       readinessProbe: { timeoutSeconds: 5, initialDelaySeconds: 5, httpGet: {
-        port: 8081,
-        path: '/metrics',
+        port: "telemetry",
+        path: '/readyz',
       } },
     };
 

diff --git a/pkg/app/server.go b/pkg/app/server.go
@@ -62,6 +62,7 @@ const (
 	metricsPath = "/metrics"
 	healthzPath = "/healthz"
 	livezPath   = "/livez"
+	readyzPath  = "/readyz"
 )
 
 // promLogger implements promhttp.Logger
@@ -376,6 +377,18 @@ func buildTelemetryServer(registry prometheus.Gatherer) *http.ServeMux {
 	// Add metricsPath
 	mux.Handle(metricsPath, promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorLog: promLogger{}}))
 
+	// Add readyzPath
+	mux.Handle(readyzPath, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		count, err := util.GatherAndCount(registry)
+		if err != nil || count == 0 {
+			w.WriteHeader(http.StatusServiceUnavailable)
+			w.Write([]byte(http.StatusText(http.StatusServiceUnavailable)))
+			return
+		}
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(http.StatusText(http.StatusOK)))
+	}))
+
 	// Add index
 	landingConfig := web.LandingConfig{
 		Name:        "kube-state-metrics",
@@ -396,6 +409,19 @@ func buildTelemetryServer(registry prometheus.Gatherer) *http.ServeMux {
 	return mux
 }
 
+func handleClusterDelegationForProber(client kubernetes.Interface, probeType string) http.HandlerFunc {
+	return func(w http.ResponseWriter, _ *http.Request) {
+		got := client.CoreV1().RESTClient().Get().AbsPath(probeType).Do(context.Background())
+		if got.Error() != nil {
+			w.WriteHeader(http.StatusServiceUnavailable)
+			w.Write([]byte(http.StatusText(http.StatusServiceUnavailable)))
+			return
+		}
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(http.StatusText(http.StatusOK)))
+	}
+}
+
 func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prometheus.ObserverVec, client kubernetes.Interface) *http.ServeMux {
 	mux := http.NewServeMux()
 
@@ -410,18 +436,7 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome
 	mux.Handle(metricsPath, promhttp.InstrumentHandlerDuration(durationObserver, m))
 
 	// Add livezPath
-	mux.Handle(livezPath, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
-
-		// Query the Kube API to make sure we are not affected by a network outage.
-		got := client.CoreV1().RESTClient().Get().AbsPath("/livez").Do(context.Background())
-		if got.Error() != nil {
-			w.WriteHeader(http.StatusServiceUnavailable)
-			w.Write([]byte(http.StatusText(http.StatusServiceUnavailable)))
-			return
-		}
-		w.WriteHeader(http.StatusOK)
-		w.Write([]byte(http.StatusText(http.StatusOK)))
-	}))
+	mux.Handle(livezPath, handleClusterDelegationForProber(client, livezPath))
 
 	// Add healthzPath
 	mux.HandleFunc(healthzPath, func(w http.ResponseWriter, _ *http.Request) {

diff --git a/pkg/util/utils.go b/pkg/util/utils.go
@@ -21,7 +21,6 @@ import (
 	"runtime"
 	"strings"
 
-	"github.com/prometheus/common/version"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/client-go/discovery"
@@ -32,6 +31,9 @@ import (
 	"k8s.io/klog/v2"
 	testUnstructuredMock "k8s.io/sample-controller/pkg/apis/samplecontroller/v1alpha1"
 
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/version"
+
 	"k8s.io/kube-state-metrics/v2/pkg/customresource"
 )
 
@@ -154,3 +156,19 @@ func GVRFromType(resourceName string, expectedType interface{}) *schema.GroupVer
 		Resource: r,
 	}
 }
+
+// GatherAndCount gathers all metrics from the provided Gatherer and counts
+// them. It returns the number of metric children in all gathered metric
+// families together.
+func GatherAndCount(g prometheus.Gatherer) (int, error) {
+	got, err := g.Gather()
+	if err != nil {
+		return 0, fmt.Errorf("gathering metrics failed: %w", err)
+	}
+
+	result := 0
+	for _, mf := range got {
+		result += len(mf.GetMetric())
+	}
+	return result, nil
+}