Skip to content

Commit

Permalink
feat: add config to recover from previously failed attempt
Browse files Browse the repository at this point in the history
Signed-off-by: Lukas Wöhrl <[email protected]>

sync crd

Signed-off-by: Lukas Wöhrl <[email protected]>

add stable itter for tests

Signed-off-by: Lukas Wöhrl <[email protected]>

test: more debug output to find out what's wrong

Signed-off-by: Lukas Wöhrl <[email protected]>

fix: invalid helm default

Signed-off-by: Lukas Wöhrl <[email protected]>

fix: remove default helm value for maxRetryDuration

Signed-off-by: Lukas Wöhrl <[email protected]>
  • Loading branch information
woehrl01 committed Oct 22, 2024
1 parent 235aa48 commit edb1182
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 39 deletions.
3 changes: 3 additions & 0 deletions charts/aws-pca-issuer/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ spec:
{{- if .Values.disableApprovedCheck }}
- -disable-approved-check
{{- end }}
{{- if .Values.maxRetryDuration }}
- -max-retry-duration={{ .Values.maxRetryDuration }}
{{- end }}
{{- if .Values.disableClientSideRateLimiting }}
- -disable-client-side-rate-limiting
{{- end }}
Expand Down
11 changes: 7 additions & 4 deletions charts/aws-pca-issuer/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ disableApprovedCheck: false
# Disables Kubernetes client-side rate limiting (only use if API Priority & Fairness is enabled on the cluster).
disableClientSideRateLimiting: false

# Maxium duration to retry fullfilling a CertificateRequest
#maxRetryDuration: 180s

# Optional secrets used for pulling the container image
#
# For example:
Expand Down Expand Up @@ -55,12 +58,12 @@ service:
# Annotations to add to the issuer Pod
podAnnotations: {}

# Pod security context
# Pod security context
# +docs:property
podSecurityContext:
runAsUser: 65532

# Container security context
# Container security context
# +docs:property
securityContext:
allowPrivilegeEscalation: false
Expand Down Expand Up @@ -129,7 +132,7 @@ volumeMounts: []
# Configures a disruption budget for the deployment.
#
# Expects input structure similar to https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#poddisruptionbudgetspec-v1-policy
# WITHOUT the pod selector, which is handled by the chart.
# WITHOUT the pod selector, which is handled by the chart.
# Per https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#poddisruptionbudgetspec-v1-policy, `maxUnavailable` is mutually
# exclusive with `minAvailable`, you cannot set both.
#
Expand Down Expand Up @@ -177,7 +180,7 @@ approverRole:
# +docs:section=Monitoring

serviceMonitor:
# Create Prometheus ServiceMonitor
# Create Prometheus ServiceMonitor
create: false
# Annotations to add to the Prometheus ServiceMonitor
annotations: {}
Expand Down
16 changes: 16 additions & 0 deletions e2e/k8_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func waitForIssuerReady(ctx context.Context, client *clientV1beta1.Client, name
return true, nil
}
}

fmt.Println("Issuer not ready yet - Current conditions:")
for _, condition := range issuer.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}
return false, nil
})
}
Expand All @@ -52,6 +57,11 @@ func waitForClusterIssuerReady(ctx context.Context, client *clientV1beta1.Client
}
}

fmt.Println("ClusterIssuer not ready yet - Current conditions:")
for _, condition := range issuer.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}

return false, nil
})
}
Expand All @@ -71,6 +81,12 @@ func waitForCertificateReady(ctx context.Context, client *cmclientv1.Certmanager
return true, nil
}
}

fmt.Println("Certifiate not ready yet - Current conditions:")
for _, condition := range certificate.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}

return false, nil
})
}
5 changes: 5 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package main
import (
"flag"
"os"
"time"

certmanager "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"

Expand Down Expand Up @@ -58,6 +59,7 @@ func main() {
var enableLeaderElection bool
var probeAddr string
var disableApprovedCheck bool
var maxRetryDuration time.Duration
var disableClientSideRateLimiting bool

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
Expand All @@ -67,6 +69,7 @@ func main() {
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&disableApprovedCheck, "disable-approved-check", false,
"Disables waiting for CertificateRequests to have an approved condition before signing.")
flag.DurationVar(&maxRetryDuration, "max-retry-duration", 180, "Maximum duration to retry failed CertificateRequests. Set to 0 to disable.")
flag.BoolVar(&disableClientSideRateLimiting, "disable-client-side-rate-limiting", false,
"Disables Kubernetes client-side rate limiting (only use if API Priority & Fairness is enabled on the cluster).")

Expand Down Expand Up @@ -135,7 +138,9 @@ func main() {
Recorder: mgr.GetEventRecorderFor("awspcaissuer-controller"),

Clock: clock.RealClock{},
RequeueItter: controllers.NewRequeueItter(),
CheckApprovedCondition: !disableApprovedCheck,
MaxRetryDuration: maxRetryDuration,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CertificateRequest")
os.Exit(1)
Expand Down
3 changes: 3 additions & 0 deletions pkg/api/v1beta1/awspcaissuer_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ type AWSPCAIssuerStatus struct {
// ConditionTypeReady is the default condition type for the CRs
const ConditionTypeReady = "Ready"

// ConditionTypeIssuing is the condition type for the CRs when they are issuing a certificate
const ConditionTypeIssuing = "Issuing"

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status

Expand Down
89 changes: 70 additions & 19 deletions pkg/controllers/certificaterequest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"time"

acmpcatypes "github.com/aws/aws-sdk-go-v2/service/acmpca/types"
"github.com/cert-manager/aws-privateca-issuer/pkg/aws"
Expand All @@ -42,6 +44,21 @@ import (
cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1"
)

type RequeueItter interface {
RequeueAfter() time.Duration
}

type requeueItter struct {
}

func (r *requeueItter) RequeueAfter() time.Duration {
return 1*time.Minute + time.Duration(rand.Intn(60))*time.Second
}

func NewRequeueItter() RequeueItter {
return &requeueItter{}
}

// CertificateRequestReconciler reconciles a AWSPCAIssuer object
type CertificateRequestReconciler struct {
client.Client
Expand All @@ -50,7 +67,9 @@ type CertificateRequestReconciler struct {
Recorder record.EventRecorder

Clock clock.Clock
RequeueItter RequeueItter
CheckApprovedCondition bool
MaxRetryDuration time.Duration
}

// +kubebuilder:rbac:groups=cert-manager.io,resources=certificaterequests,verbs=get;list;watch;update
Expand Down Expand Up @@ -118,7 +137,7 @@ func (r *CertificateRequestReconciler) Reconcile(ctx context.Context, req ctrl.R
}

message := "The CertificateRequest was denied by an approval controller"
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonDenied, message)
return ctrl.Result{}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonDenied, message)
}

if r.CheckApprovedCondition {
Expand All @@ -142,53 +161,67 @@ func (r *CertificateRequestReconciler) Reconcile(ctx context.Context, req ctrl.R
issuerName.Namespace = ""
}

retry, requeue, err := r.HandleSignRequest(ctx, log, issuerName, cr)

if err != nil {
now := r.Clock.Now()
creationTime := cr.GetCreationTimestamp()
pastMaxRetryDuration := now.After(creationTime.Add(r.MaxRetryDuration))

if pastMaxRetryDuration || !retry {
return ctrl.Result{Requeue: requeue}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "Permanent error signing certificate: %s", err.Error())
}

return ctrl.Result{
RequeueAfter: r.RequeueItter.RequeueAfter(),
}, r.setTemporaryStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "Temporary error signing certificate, retry again: %s", err.Error())
}

return ctrl.Result{Requeue: requeue}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionTrue, cmapi.CertificateRequestReasonIssued, "Certificate issued")
}

func (r *CertificateRequestReconciler) HandleSignRequest(ctx context.Context, log logr.Logger, issuerName types.NamespacedName, cr *cmapi.CertificateRequest) (retry bool, requeue bool, error) {
iss, err := util.GetIssuer(ctx, r.Client, issuerName)
if err != nil {
log.Error(err, "failed to retrieve Issuer resource")
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "issuer could not be found")
return ctrl.Result{}, err
return true, false, fmt.Errorf("failed to retrieve Issuer resource: %w", err)
}

if !isReady(iss) {
err := fmt.Errorf("issuer %s is not ready", iss.GetName())
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "issuer is not ready")
return ctrl.Result{}, err
return true, false, fmt.Errorf("issuer %s is not ready", iss.GetName())
}

provisioner, ok := aws.GetProvisioner(issuerName)
if !ok {
err := fmt.Errorf("provisioner for %s not found", issuerName)
log.Error(err, "failed to retrieve provisioner")
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to retrieve provisioner")
return ctrl.Result{}, err
return true, false, fmt.Errorf("provisioner for %s not found (name: %s, namespace: %s)", issuerName, issuerName.Name, issuerName.Namespace)
}

certArn, exists := cr.ObjectMeta.GetAnnotations()["aws-privateca-issuer/certificate-arn"]
if !exists {
err := provisioner.Sign(ctx, cr, log)
if err != nil {
log.Error(err, "failed to request certificate from PCA")
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to request certificate from PCA: "+err.Error())
return false, fmt.Errorf("failed to sign certificat from PCA: %w", err)
}

return ctrl.Result{Requeue: true}, r.Client.Update(ctx, cr)
return true, true, nil
}

pem, ca, err := provisioner.Get(ctx, cr, certArn, log)
if err != nil {
var errorType *acmpcatypes.RequestInProgressException
if errors.As(err, &errorType) {
log.Info("certificate is still issuing")
return ctrl.Result{Requeue: true}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonPending, "waiting for certificate to be issued")
return false, false,fmt.Errorf("waiting for certificate to be issued: %w", err)
}

log.Error(err, "failed to issue certificate from PCA")
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to issue certificate from PCA: "+err.Error())
return false,true, fmt.Errorf("failed to issue certificate from PCA: %w", err)
}

cr.Status.Certificate = pem
cr.Status.CA = ca
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionTrue, cmapi.CertificateRequestReasonIssued, "certificate issued")

return true, false, nil
}

// SetupWithManager sets up the controller with the Manager.
Expand All @@ -207,14 +240,32 @@ func isReady(issuer api.GenericIssuer) bool {
return false
}

func (r *CertificateRequestReconciler) setStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
func (r *CertificateRequestReconciler) setStatusInternal(ctx context.Context, cr *cmapi.CertificateRequest, permanent bool, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
completeMessage := fmt.Sprintf(message, args...)
cmutil.SetCertificateRequestCondition(cr, "Ready", status, reason, completeMessage)

eventType := core.EventTypeNormal
if status == cmmeta.ConditionFalse {
eventType = core.EventTypeWarning
}
r.Recorder.Event(cr, eventType, reason, completeMessage)
return r.Client.Status().Update(ctx, cr)

cmutil.SetCertificateRequestCondition(cr, api.ConditionTypeIssuing, status, reason, completeMessage)
if permanent {
cmutil.SetCertificateRequestCondition(cr, api.ConditionTypeReady, status, reason, completeMessage)
}
r.Client.Status().Update(ctx, cr)

if reason == cmapi.CertificateRequestReasonFailed {
return fmt.Errorf(completeMessage)
}

return nil
}

func (r *CertificateRequestReconciler) setPermanentStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
return r.setStatusInternal(ctx, cr, true, status, reason, message, args...)
}

func (r *CertificateRequestReconciler) setTemporaryStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
return r.setStatusInternal(ctx, cr, false, status, reason, message, args...)
}
Loading

0 comments on commit edb1182

Please sign in to comment.