Skip to content

Commit

Permalink
ProviderReconciler: health check scheduling
Browse files Browse the repository at this point in the history
  • Loading branch information
andmat900 committed Dec 19, 2024
1 parent a2d6e0d commit 75934d7
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 6 deletions.
3 changes: 2 additions & 1 deletion api/v1alpha1/provider_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ type ProviderSpec struct {

// ProviderStatus defines the observed state of Provider
type ProviderStatus struct {
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"`
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"`
LastHealthCheckTime string `json:"lastHealthCheckTime,omitempty"` // RFC3339 timestamp
}

// +kubebuilder:object:root=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ spec:
- type
type: object
type: array
lastHealthCheckTime:
type: string
type: object
type: object
served: true
Expand Down
35 changes: 30 additions & 5 deletions internal/controller/provider_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,47 @@ func (r *ProviderReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
logger.Error(err, "failed to get provider")
return ctrl.Result{}, err
}
logger.V(2).Info("Checking availability of provider", "provider", req.NamespacedName)

// TODO: Schedule checks instead of doing it every time something happens
intervalNoError := time.Duration(provider.Spec.Healthcheck.IntervalSeconds) * time.Second

// force initial health check:
if provider.Status.LastHealthCheckTime == "" {
provider.Status.LastHealthCheckTime = time.Now().Format(time.RFC3339)
if err = r.Status().Update(ctx, provider); err != nil {
logger.Error(err, "failed to update provider status")
return ctrl.Result{}, err
}
return ctrl.Result{RequeueAfter: time.Duration(1) * time.Second}, nil
}

lastHealthCheckTime, err := time.Parse(time.RFC3339, provider.Status.LastHealthCheckTime)
if err != nil {
logger.Error(err, "failed to parse provider's last health check time")
return ctrl.Result{}, err
}

// Calculate intervalOnError as the difference since the last check
intervalOnError := intervalNoError - time.Since(lastHealthCheckTime)
if intervalOnError < 0 {
intervalOnError = time.Duration(10) * time.Second // fallback to a default value if the interval is negative
}

logger.V(2).Info("Checking availability of provider", "provider", req.NamespacedName)
// We don't check the availability of JSONTas as it is not yet running as a service we can check.
if provider.Spec.JSONTas == nil {
logger.V(2).Info("Healthcheck", "endpoint", fmt.Sprintf("%s/%s", provider.Spec.Host, provider.Spec.Healthcheck.Endpoint))
resp, err := http.Get(fmt.Sprintf("%s/%s", provider.Spec.Host, provider.Spec.Healthcheck.Endpoint))
now := time.Now()
provider.Status.LastHealthCheckTime = now.Format(time.RFC3339)

if err != nil {
meta.SetStatusCondition(&provider.Status.Conditions, metav1.Condition{Type: StatusAvailable, Status: metav1.ConditionFalse, Reason: "Error", Message: "Could not communicate with host"})
if err = r.Status().Update(ctx, provider); err != nil {
logger.Error(err, "failed to update provider status")
return ctrl.Result{}, err
}
logger.Info("Provider did not respond", "provider", req.NamespacedName)
return ctrl.Result{RequeueAfter: time.Duration(provider.Spec.Healthcheck.IntervalSeconds) * time.Second}, nil
return ctrl.Result{RequeueAfter: intervalOnError}, nil
}
if resp.StatusCode != 204 {
meta.SetStatusCondition(&provider.Status.Conditions, metav1.Condition{Type: StatusAvailable, Status: metav1.ConditionFalse, Reason: "Error", Message: fmt.Sprintf("Wrong status code (%d) from health check endpoint", resp.StatusCode)})
Expand All @@ -85,7 +110,7 @@ func (r *ProviderReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
return ctrl.Result{}, err
}
logger.Info("Provider responded with a bad status code", "provider", req.NamespacedName, "status", resp.StatusCode)
return ctrl.Result{RequeueAfter: time.Duration(provider.Spec.Healthcheck.IntervalSeconds) * time.Second}, nil
return ctrl.Result{RequeueAfter: intervalOnError}, nil
}
}
meta.SetStatusCondition(&provider.Status.Conditions, metav1.Condition{Type: StatusAvailable, Status: metav1.ConditionTrue, Reason: "OK", Message: "Provider is up and running"})
Expand All @@ -94,7 +119,7 @@ func (r *ProviderReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
return ctrl.Result{}, err
}
logger.V(2).Info("Provider is available", "provider", req.NamespacedName)
return ctrl.Result{RequeueAfter: time.Duration(provider.Spec.Healthcheck.IntervalSeconds) * time.Second}, nil
return ctrl.Result{RequeueAfter: intervalNoError}, nil
}

// SetupWithManager sets up the controller with the Manager.
Expand Down

0 comments on commit 75934d7

Please sign in to comment.