diff --git a/cli/pkg/healthcheck/healthcheck.go b/cli/pkg/healthcheck/healthcheck.go index df8f46b..9647c72 100644 --- a/cli/pkg/healthcheck/healthcheck.go +++ b/cli/pkg/healthcheck/healthcheck.go @@ -5,11 +5,11 @@ import ( "fmt" "net/http" + agentk8s "github.com/buoyantio/linkerd-buoyant/agent/pkg/k8s" "github.com/buoyantio/linkerd-buoyant/cli/pkg/k8s" "github.com/buoyantio/linkerd-buoyant/cli/pkg/version" "github.com/linkerd/linkerd2/pkg/healthcheck" l5dk8s "github.com/linkerd/linkerd2/pkg/k8s" - appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" ) @@ -31,6 +31,8 @@ type HealthChecker struct { ns *v1.Namespace } +type getWorkloadLabelsFn func(ctx context.Context) (map[string]string, error) + // NewHealthChecker returns an initialized HealthChecker for linkerd-buoyant. // The returned instance does not contain any linkerd-buoyant Categories. // Categories are to be explicitly added by using hc.AppendCategories @@ -55,10 +57,10 @@ func (hc *HealthChecker) L5dBuoyantCategory() *healthcheck.Category { hc.globalChecks(), append( hc.deploymentChecks(k8s.AgentName), - hc.deploymentChecks(k8s.MetricsName)..., + hc.daemonSetChecks(k8s.MetricsName)..., )..., ) - return healthcheck.NewCategory(categoryID, checks, true) + return healthcheck.NewCategory(categoryID, checks, true).WithHintBaseURL("https://linkerd.io/checks#l5d-buoyant") } func (hc *HealthChecker) globalChecks() []healthcheck.Checker { @@ -141,46 +143,86 @@ func (hc *HealthChecker) globalChecks() []healthcheck.Checker { } func (hc *HealthChecker) deploymentChecks(name string) []healthcheck.Checker { - var deploy *appsv1.Deployment - var pod v1.Pod + getWorkloadLabels := func(ctx context.Context) (map[string]string, error) { + deploy, err := hc.k8s.Deployment(ctx, name) + if err != nil { + return nil, err + } + return deploy.GetLabels(), nil + } - return []healthcheck.Checker{ - *healthcheck.NewChecker(fmt.Sprintf("%s Deployment exists", name)). + return hc.workloadChecks(name, agentk8s.Deployment, getWorkloadLabels, true) +} + +func (hc *HealthChecker) daemonSetChecks(name string) []healthcheck.Checker { + getWorkloadLabels := func(ctx context.Context) (map[string]string, error) { + ds, err := hc.k8s.DaemonSet(ctx, name) + if err != nil { + return nil, err + } + return ds.GetLabels(), nil + } + + return hc.workloadChecks(name, agentk8s.DaemonSet, getWorkloadLabels, false) +} + +func (hc *HealthChecker) workloadChecks( + name, kind string, getWorkloadLabels getWorkloadLabelsFn, singleton bool, +) []healthcheck.Checker { + var labels map[string]string + var pods []v1.Pod + + checks := []healthcheck.Checker{ + *healthcheck.NewChecker(fmt.Sprintf("%s %s exists", name, kind)). Fatal(). WithCheck(func(ctx context.Context) error { var err error - deploy, err = hc.k8s.Deployment(ctx, name) + labels, err = getWorkloadLabels(ctx) if err != nil { return err } - return checkLabel(deploy.GetLabels(), k8s.PartOfKey, k8s.PartOfVal) + return checkLabel(labels, k8s.PartOfKey, k8s.PartOfVal) }), - *healthcheck.NewChecker(fmt.Sprintf("%s Deployment is running", name)). + *healthcheck.NewChecker(fmt.Sprintf("%s %s is running", name, kind)). WithCheck(func(ctx context.Context) error { labelSelector := fmt.Sprintf("app=%s", name) - pods, err := hc.k8s.Pods(ctx, labelSelector) + podList, err := hc.k8s.Pods(ctx, labelSelector) if err != nil { return err } - if len(pods.Items) != 1 { - return fmt.Errorf("expected 1 %s pod, found %d", name, len(pods.Items)) - } + pods = podList.Items - pod = pods.Items[0] + if len(pods) == 0 { + return fmt.Errorf("no running pods for %s %s", name, kind) + } - return healthcheck.CheckPodsRunning(pods.Items, "") + return healthcheck.CheckPodsRunning(pods, "") }), - *healthcheck.NewChecker(fmt.Sprintf("%s Deployment is injected", name)). + *healthcheck.NewChecker(fmt.Sprintf("%s %s is injected", name, kind)). WithCheck(func(ctx context.Context) error { - return healthcheck.CheckIfDataPlanePodsExist([]v1.Pod{pod}) + return healthcheck.CheckIfDataPlanePodsExist(pods) }), - *healthcheck.NewChecker(fmt.Sprintf("%s is up-to-date", name)). + *healthcheck.NewChecker(fmt.Sprintf("%s %s is up-to-date", name, kind)). Warning(). WithCheck(func(ctx context.Context) error { - return checkLabel(deploy.GetLabels(), k8s.VersionLabel, hc.version) + return checkLabel(labels, k8s.VersionLabel, hc.version) }), } + + if singleton { + checks = append(checks, + *healthcheck.NewChecker(fmt.Sprintf("%s %s is running a single pod", name, kind)). + WithCheck(func(ctx context.Context) error { + if len(pods) != 1 { + return fmt.Errorf("expected 1 %s pod, found %d", name, len(pods)) + } + return nil + }), + ) + } + + return checks } func checkLabel(labels map[string]string, key, val string) error { diff --git a/cli/pkg/healthcheck/healthcheck_test.go b/cli/pkg/healthcheck/healthcheck_test.go index 495b906..1abb085 100644 --- a/cli/pkg/healthcheck/healthcheck_test.go +++ b/cli/pkg/healthcheck/healthcheck_test.go @@ -75,7 +75,7 @@ func TestHealthChecker(t *testing.T) { √ buoyant-cloud Namespace exists × buoyant-cloud Namespace has correct labels missing linkerd.io/extension label - see https://linkerd.io/2/checks/# for hints + see https://linkerd.io/checks#l5d-buoyant for hints Status check results are × `, @@ -116,6 +116,9 @@ Status check results are × MockServiceAccount: &v1.ServiceAccount{ ObjectMeta: objMeta, }, + MockDaemonSet: &appsv1.DaemonSet{ + ObjectMeta: objMetaDeploy, + }, MockDeployment: &appsv1.Deployment{ ObjectMeta: objMetaDeploy, }, @@ -157,11 +160,12 @@ Status check results are × √ buoyant-cloud-agent Deployment exists √ buoyant-cloud-agent Deployment is running √ buoyant-cloud-agent Deployment is injected -√ buoyant-cloud-agent is up-to-date -√ buoyant-cloud-metrics Deployment exists -√ buoyant-cloud-metrics Deployment is running -√ buoyant-cloud-metrics Deployment is injected -√ buoyant-cloud-metrics is up-to-date +√ buoyant-cloud-agent Deployment is up-to-date +√ buoyant-cloud-agent Deployment is running a single pod +√ buoyant-cloud-metrics DaemonSet exists +√ buoyant-cloud-metrics DaemonSet is running +√ buoyant-cloud-metrics DaemonSet is injected +√ buoyant-cloud-metrics DaemonSet is up-to-date Status check results are √ `, diff --git a/cli/pkg/k8s/client.go b/cli/pkg/k8s/client.go index 1321d9e..a14d3d4 100644 --- a/cli/pkg/k8s/client.go +++ b/cli/pkg/k8s/client.go @@ -24,6 +24,8 @@ type ( Secret(ctx context.Context) (*v1.Secret, error) // ServiceAccount retrieves the buoyant-cloud-agent ServiceAccount. ServiceAccount(ctx context.Context) (*v1.ServiceAccount, error) + // DaemonSet retrieves a DaemonSet by name in the buoyant-cloud namespace. + DaemonSet(ctx context.Context, name string) (*appsv1.DaemonSet, error) // Deployment retrieves a Deployment by name in the buoyant-cloud namespace. Deployment(ctx context.Context, name string) (*appsv1.Deployment, error) // Pods retrieves a PodList by labelSelector from the buoyant-cloud @@ -107,6 +109,13 @@ func (c *client) ServiceAccount(ctx context.Context) (*v1.ServiceAccount, error) Get(ctx, AgentName, metav1.GetOptions{}) } +func (c *client) DaemonSet(ctx context.Context, name string) (*appsv1.DaemonSet, error) { + return c. + AppsV1(). + DaemonSets(Namespace). + Get(ctx, name, metav1.GetOptions{}) +} + func (c *client) Deployment(ctx context.Context, name string) (*appsv1.Deployment, error) { return c. AppsV1(). diff --git a/cli/pkg/k8s/mock_client.go b/cli/pkg/k8s/mock_client.go index 423cec1..9d8db9c 100644 --- a/cli/pkg/k8s/mock_client.go +++ b/cli/pkg/k8s/mock_client.go @@ -15,6 +15,7 @@ type MockClient struct { MockClusterRoleBinding *rbacv1.ClusterRoleBinding MockSecret *v1.Secret MockServiceAccount *v1.ServiceAccount + MockDaemonSet *appsv1.DaemonSet MockDeployment *appsv1.Deployment MockPods *v1.PodList @@ -47,6 +48,11 @@ func (m *MockClient) ServiceAccount(ctx context.Context) (*v1.ServiceAccount, er return m.MockServiceAccount, nil } +// DaemonSet returns a mock DaemonSet object. +func (m *MockClient) DaemonSet(ctx context.Context, name string) (*appsv1.DaemonSet, error) { + return m.MockDaemonSet, nil +} + // Deployment returns a mock Deployment object. func (m *MockClient) Deployment(ctx context.Context, name string) (*appsv1.Deployment, error) { return m.MockDeployment, nil