diff --git a/pkg/kube/controller.go b/pkg/kube/controller.go index 63c0aba..bcf67fe 100644 --- a/pkg/kube/controller.go +++ b/pkg/kube/controller.go @@ -28,6 +28,7 @@ const ( annotationRunnerGroup = "github-actions-manager.oursky.com/runner-group" annotationRunnerLabels = "github-actions-manager.oursky.com/runner-labels" annotationRunnerState = "github-actions-manager.oursky.com/runner-state" + annotationBusy = "github-actions-manager.oursky.com/busy" finalizer = "github-actions-manager.oursky.com/finalizer" ) @@ -179,9 +180,11 @@ func (p *ControllerProvider) CheckAgent(ctx context.Context, agent *controller.A func (p *ControllerProvider) updateAgentPod(ctx context.Context, pod *corev1.Pod, runnerName string, isBusy bool) error { deletionCost := "" safeToEvict := "" + busy := "" if isBusy { deletionCost = "100" safeToEvict = "false" + busy = "true" } var patches []jsonPatch @@ -191,6 +194,9 @@ func (p *ControllerProvider) updateAgentPod(ctx context.Context, pod *corev1.Pod if pod.Annotations[annotationSafeToEvict] != safeToEvict { patches = append(patches, annotationPatch(annotationSafeToEvict, safeToEvict)) } + if pod.Annotations[annotationBusy] != busy { + patches = append(patches, annotationPatch(annotationBusy, busy)) + } patches = append(patches, addFinalizerPatch(pod.ObjectMeta, finalizer)...) return patchPod(p.ctx, p.kube, pod, patches) diff --git a/pkg/kube/metrics.go b/pkg/kube/metrics.go index 27173b5..dbbd9b0 100644 --- a/pkg/kube/metrics.go +++ b/pkg/kube/metrics.go @@ -1,17 +1,23 @@ package kube import ( + "regexp" "strconv" "github.com/oursky/github-actions-manager/pkg/utils/promutil" "github.com/prometheus/client_golang/prometheus" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" ) +var statefulPodRegex = regexp.MustCompile("(.*)-([0-9]+)$") + type metrics struct { state *ControllerState - kubePod *promutil.MetricDesc + kubePod *promutil.MetricDesc + statefulSetBusyRunnerOrd *promutil.MetricDesc } func newMetrics(state *ControllerState) *metrics { @@ -24,6 +30,13 @@ func newMetrics(state *ControllerState) *metrics { Name: "pod", Help: "Describes the associated pod of runner.", }), + + statefulSetBusyRunnerOrd: promutil.NewMetricDesc(prometheus.Opts{ + Namespace: "github_actions", + Subsystem: "kube", + Name: "stateful_set_busy_runner_ord", + Help: "The highest ordinal of busy runners pod in StatefulSet.", + }), } return m } @@ -35,6 +48,12 @@ func (m *metrics) Collect(ch chan<- prometheus.Metric) { labelRunner: "true", })) + type statefulSetPod struct { + *v1.Pod + Ord int + } + statefulSetBusyRunnerOrds := make(map[string]statefulSetPod) + for _, pod := range pods { agent := m.state.decodeState(pod) if agent == nil || agent.RunnerID == nil { @@ -48,5 +67,39 @@ func (m *metrics) Collect(ch chan<- prometheus.Metric) { "namespace": pod.Namespace, "node": pod.Spec.NodeName, }) + + if pod.Annotations[annotationBusy] == "true" { + ctrl := metav1.GetControllerOf(pod) + if ctrl != nil && ctrl.Kind == "StatefulSet" { + parent, ord := getStatefulSetPodInfo(pod) + if parent != "" && statefulSetBusyRunnerOrds[parent].Ord <= ord { + statefulSetBusyRunnerOrds[parent] = statefulSetPod{ + Pod: pod, + Ord: ord, + } + } + } + } + } + + for set, info := range statefulSetBusyRunnerOrds { + ch <- m.statefulSetBusyRunnerOrd.Gauge(float64(info.Ord), prometheus.Labels{ + "statefulset": set, + "namespace": info.Pod.Namespace, + }) + } +} + +func getStatefulSetPodInfo(pod *v1.Pod) (string, int) { + parent := "" + ordinal := -1 + subMatches := statefulPodRegex.FindStringSubmatch(pod.Name) + if len(subMatches) < 3 { + return parent, ordinal + } + parent = subMatches[1] + if i, err := strconv.ParseInt(subMatches[2], 10, 32); err == nil { + ordinal = int(i) } + return parent, ordinal }