Skip to content

Commit

Permalink
K8SPG-600: Fix failed backup status (#820)
Browse files Browse the repository at this point in the history
* Fix failed backup status

* fix lint

* fix fmt

* fix regression and status update
  • Loading branch information
pooknull authored and ptankov committed Jun 25, 2024
1 parent c39ebed commit bc2c56e
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 94 deletions.
2 changes: 1 addition & 1 deletion config/rbac/cluster/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ rules:
resources:
- crunchybridgeclusters/finalizers
- crunchybridgeclusters/status
- postgresclusters/status
verbs:
- patch
- update
Expand All @@ -205,7 +206,6 @@ rules:
resources:
- pgadmins/status
- pgupgrades/status
- postgresclusters/status
verbs:
- patch
- apiGroups:
Expand Down
2 changes: 1 addition & 1 deletion config/rbac/namespace/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ rules:
resources:
- crunchybridgeclusters/finalizers
- crunchybridgeclusters/status
- postgresclusters/status
verbs:
- patch
- update
Expand All @@ -205,7 +206,6 @@ rules:
resources:
- pgadmins/status
- pgupgrades/status
- postgresclusters/status
verbs:
- patch
- apiGroups:
Expand Down
2 changes: 1 addition & 1 deletion deploy/bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45321,6 +45321,7 @@ rules:
resources:
- crunchybridgeclusters/finalizers
- crunchybridgeclusters/status
- postgresclusters/status
verbs:
- patch
- update
Expand All @@ -45345,7 +45346,6 @@ rules:
resources:
- pgadmins/status
- pgupgrades/status
- postgresclusters/status
verbs:
- patch
- apiGroups:
Expand Down
2 changes: 1 addition & 1 deletion deploy/cw-bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45321,6 +45321,7 @@ rules:
resources:
- crunchybridgeclusters/finalizers
- crunchybridgeclusters/status
- postgresclusters/status
verbs:
- patch
- update
Expand All @@ -45345,7 +45346,6 @@ rules:
resources:
- pgadmins/status
- pgupgrades/status
- postgresclusters/status
verbs:
- patch
- apiGroups:
Expand Down
2 changes: 1 addition & 1 deletion deploy/cw-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ rules:
resources:
- crunchybridgeclusters/finalizers
- crunchybridgeclusters/status
- postgresclusters/status
verbs:
- patch
- update
Expand All @@ -209,7 +210,6 @@ rules:
resources:
- pgadmins/status
- pgupgrades/status
- postgresclusters/status
verbs:
- patch
- apiGroups:
Expand Down
2 changes: 1 addition & 1 deletion deploy/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ rules:
resources:
- crunchybridgeclusters/finalizers
- crunchybridgeclusters/status
- postgresclusters/status
verbs:
- patch
- update
Expand All @@ -209,7 +210,6 @@ rules:
resources:
- pgadmins/status
- pgupgrades/status
- postgresclusters/status
verbs:
- patch
- apiGroups:
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/postgrescluster/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ type Reconciler struct {

// +kubebuilder:rbac:groups="",resources="events",verbs={create,patch}
// +kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="postgresclusters",verbs={get,list,watch}
// +kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="postgresclusters/status",verbs={patch}
// +kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="postgresclusters/status",verbs={patch,update}

// Reconcile reconciles a ConfigMap in a namespace managed by the PostgreSQL Operator
func (r *Reconciler) Reconcile(
Expand Down
60 changes: 30 additions & 30 deletions percona/controller/pgbackup/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package pgbackup
import (
"context"
"path"
"strings"
"time"

"github.com/pkg/errors"
Expand All @@ -23,6 +22,7 @@ import (
"github.com/percona/percona-postgresql-operator/internal/naming"
"github.com/percona/percona-postgresql-operator/percona/clientcmd"
"github.com/percona/percona-postgresql-operator/percona/controller"
pNaming "github.com/percona/percona-postgresql-operator/percona/naming"
"github.com/percona/percona-postgresql-operator/percona/pgbackrest"
"github.com/percona/percona-postgresql-operator/percona/watcher"
v2 "github.com/percona/percona-postgresql-operator/pkg/apis/pgv2.percona.com/v2"
Expand Down Expand Up @@ -156,7 +156,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re
if bcp.Annotations == nil {
bcp.Annotations = make(map[string]string)
}
bcp.Annotations[v2.AnnotationPGBackrestBackupJobType] = job.Labels[naming.LabelPGBackRestBackup]
bcp.Annotations[pNaming.AnnotationPGBackrestBackupJobType] = job.Labels[naming.LabelPGBackRestBackup]
default:
return errors.Errorf("unknown value for %s label: %s", naming.LabelPGBackRestBackup, job.Labels[naming.LabelPGBackRestBackup])
}
Expand Down Expand Up @@ -257,9 +257,8 @@ func getBackupInProgress(ctx context.Context, c client.Client, clusterName, ns s
return "", errors.Wrap(err, "get PostgresCluster")
}

annotation := v2.AnnotationBackupInProgress
spl := strings.Split(annotation, "/")
crunchyAnnotation := v2.CrunchyAnnotationPrefix + spl[1]
annotation := pNaming.AnnotationBackupInProgress
crunchyAnnotation := pNaming.ToCrunchyAnnotation(annotation)

if crunchyCluster.Annotations[crunchyAnnotation] != "" {
return crunchyCluster.Annotations[crunchyAnnotation], nil
Expand Down Expand Up @@ -324,7 +323,7 @@ func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod,
}

backupType := backup.Annotation[v2.PGBackrestAnnotationJobType]
if (backupType != pgBackup.Annotations[v2.AnnotationPGBackrestBackupJobType] ||
if (backupType != pgBackup.Annotations[pNaming.AnnotationPGBackrestBackupJobType] ||
backupType != string(naming.BackupReplicaCreate)) && backup.Annotation[v2.PGBackrestAnnotationBackupName] != pgBackup.Name {
continue
}
Expand Down Expand Up @@ -358,14 +357,17 @@ func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod,
}

func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBackup, job *batchv1.Job) (*reconcile.Result, error) {
readyPod, err := controller.GetReadyInstancePod(ctx, c, pgBackup.Spec.PGCluster, pgBackup.Namespace)
if err != nil {
return nil, errors.Wrap(err, "get ready instance pod")
}
if checkBackupJob(job) == v2.BackupSucceeded {
readyPod, err := controller.GetReadyInstancePod(ctx, c, pgBackup.Spec.PGCluster, pgBackup.Namespace)
if err != nil {
return nil, errors.Wrap(err, "get ready instance pod")
}

if err := updatePGBackrestInfo(ctx, c, readyPod, pgBackup); err != nil {
return nil, errors.Wrap(err, "update pgbackrest info")
if err := updatePGBackrestInfo(ctx, c, readyPod, pgBackup); err != nil {
return nil, errors.Wrap(err, "update pgbackrest info")
}
}

if job.Labels[naming.LabelPGBackRestBackup] != string(naming.BackupManual) {
return nil, nil
}
Expand All @@ -383,11 +385,7 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
return false, errors.Wrap(err, "get PostgresCluster")
}

crunchyAnnotation := annotation
if strings.HasPrefix(annotation, v2.AnnotationPrefix) {
spl := strings.Split(annotation, "/")
crunchyAnnotation = v2.CrunchyAnnotationPrefix + spl[1]
}
crunchyAnnotation := pNaming.ToCrunchyAnnotation(annotation)
// We should be sure that annotation is deleted from crunchy's cluster
_, ok := pgCluster.Annotations[crunchyAnnotation]
if !ok {
Expand Down Expand Up @@ -442,19 +440,21 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
return nil, errors.Wrap(err, "update backup job labels")
}

pgCluster := new(v1beta1.PostgresCluster)
if err := c.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: pgBackup.Namespace}, pgCluster); err != nil {
return nil, errors.Wrap(err, "get PostgresCluster")
}
origPGCluster := pgCluster.DeepCopy()
pgCluster.Status.PGBackRest.ManualBackup = nil
if err := c.Status().Patch(ctx, pgCluster, client.MergeFrom(origPGCluster)); err != nil {
return nil, errors.Wrap(err, "failed to patch PostgresCluster")
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
pgCluster := new(v1beta1.PostgresCluster)
if err := c.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: pgBackup.Namespace}, pgCluster); err != nil {
return errors.Wrap(err, "get PostgresCluster")
}
pgCluster.Status.PGBackRest.ManualBackup = nil

return c.Status().Update(ctx, pgCluster)
}); err != nil {
return nil, errors.Wrap(err, "update postgrescluster")
}

deleted, err = deleteAnnotation(v2.AnnotationBackupInProgress)
deleted, err = deleteAnnotation(pNaming.AnnotationBackupInProgress)
if err != nil {
return nil, errors.Wrapf(err, "delete %s annotation", v2.AnnotationBackupInProgress)
return nil, errors.Wrapf(err, "delete %s annotation", pNaming.AnnotationBackupInProgress)
}
if !deleted {
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
Expand All @@ -470,14 +470,14 @@ func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) e
if err != nil {
return err
}
if a := pg.Annotations[v2.AnnotationBackupInProgress]; a != "" && a != pb.Name {
if a := pg.Annotations[pNaming.AnnotationBackupInProgress]; a != "" && a != pb.Name {
return errors.Errorf("backup %s already in progress", a)
}
if pg.Annotations == nil {
pg.Annotations = make(map[string]string)
}
pg.Annotations[naming.PGBackRestBackup] = pb.Name
pg.Annotations[v2.AnnotationBackupInProgress] = pb.Name
pg.Annotations[pNaming.AnnotationBackupInProgress] = pb.Name

if pg.Spec.Backups.PGBackRest.Manual == nil {
pg.Spec.Backups.PGBackRest.Manual = new(v1beta1.PGBackRestManualBackup)
Expand All @@ -495,7 +495,7 @@ func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) e
}

func findBackupJob(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) (*batchv1.Job, error) {
if jobName := pb.GetAnnotations()[v2.AnnotationPGBackrestBackupJobName]; jobName != "" {
if jobName := pb.GetAnnotations()[pNaming.AnnotationPGBackrestBackupJobName]; jobName != "" {
job := new(batchv1.Job)
err := c.Get(ctx, types.NamespacedName{Name: jobName, Namespace: pb.Namespace}, job)
if err != nil {
Expand Down
15 changes: 10 additions & 5 deletions percona/controller/pgcluster/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,22 @@ import (
"github.com/percona/percona-postgresql-operator/internal/logging"
"github.com/percona/percona-postgresql-operator/internal/naming"
"github.com/percona/percona-postgresql-operator/percona/controller"
pNaming "github.com/percona/percona-postgresql-operator/percona/naming"
"github.com/percona/percona-postgresql-operator/percona/pgbackrest"
v2 "github.com/percona/percona-postgresql-operator/pkg/apis/pgv2.percona.com/v2"
)

func (r *PGClusterReconciler) reconcileBackups(ctx context.Context, cr *v2.PerconaPGCluster) error {
log := logging.FromContext(ctx)

if err := r.reconcileBackupJobs(ctx, cr); err != nil {
return errors.Wrap(err, "reconcile backup jobs")
}

if err := r.cleanupOutdatedBackups(ctx, cr); err != nil {
return errors.Wrap(err, "cleanup outdated backups")
// If the user has invalid pgbackrest credentials, this could stop the reconcile.
// We should just print an error message.
log.Error(err, "failed to cleanup outdated backups")
}

return nil
Expand Down Expand Up @@ -117,7 +122,7 @@ func (r *PGClusterReconciler) reconcileBackupJobs(ctx context.Context, cr *v2.Pe
}

func checkBackupAnnotations(ctx context.Context, cl client.Client, cr *v2.PerconaPGCluster) error {
backupName := cr.Annotations[v2.AnnotationBackupInProgress]
backupName := cr.Annotations[pNaming.AnnotationBackupInProgress]
if backupName == "" {
return nil
}
Expand All @@ -137,7 +142,7 @@ func checkBackupAnnotations(ctx context.Context, cl client.Client, cr *v2.Percon
return err
}

delete(cr.Annotations, v2.AnnotationBackupInProgress)
delete(cr.Annotations, pNaming.AnnotationBackupInProgress)
delete(cr.Annotations, naming.PGBackRestBackup)

return cl.Update(ctx, cr)
Expand Down Expand Up @@ -169,7 +174,7 @@ func reconcileBackupJob(ctx context.Context, cl client.Client, cr *v2.PerconaPGC
GenerateName: job.Name + "-",
Namespace: job.Namespace,
Annotations: map[string]string{
v2.AnnotationPGBackrestBackupJobName: job.Name,
pNaming.AnnotationPGBackrestBackupJobName: job.Name,
},
},
Spec: v2.PerconaPGBackupSpec{
Expand Down Expand Up @@ -212,7 +217,7 @@ func findPGBackup(ctx context.Context, cl client.Reader, cr *v2.PerconaPGCluster

for _, pb := range pbList {
pb := pb
if pb.GetAnnotations()[v2.AnnotationPGBackrestBackupJobName] == job.Name || pb.Status.JobName == job.Name {
if pb.GetAnnotations()[pNaming.AnnotationPGBackrestBackupJobName] == job.Name || pb.Status.JobName == job.Name {
return &pb, nil
}
}
Expand Down
5 changes: 3 additions & 2 deletions percona/controller/pgcluster/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
perconaController "github.com/percona/percona-postgresql-operator/percona/controller"
"github.com/percona/percona-postgresql-operator/percona/extensions"
"github.com/percona/percona-postgresql-operator/percona/k8s"
pNaming "github.com/percona/percona-postgresql-operator/percona/naming"
"github.com/percona/percona-postgresql-operator/percona/pmm"
"github.com/percona/percona-postgresql-operator/percona/utils/registry"
"github.com/percona/percona-postgresql-operator/percona/watcher"
Expand Down Expand Up @@ -337,7 +338,7 @@ func (r *PGClusterReconciler) addPMMSidecar(ctx context.Context, cr *v2.PerconaP
if set.Metadata.Annotations == nil {
set.Metadata.Annotations = make(map[string]string)
}
set.Metadata.Annotations[v2.AnnotationPMMSecretHash] = pmmSecretHash
set.Metadata.Annotations[pNaming.AnnotationPMMSecretHash] = pmmSecretHash

set.Sidecars = append(set.Sidecars, pmm.SidecarContainer(cr))
}
Expand Down Expand Up @@ -381,7 +382,7 @@ func (r *PGClusterReconciler) handleMonitorUserPassChange(ctx context.Context, c
}

// If the currentHash is the same is the on the STS, restart will not happen
set.Metadata.Annotations[v2.AnnotationMonitorUserSecretHash] = currentHash
set.Metadata.Annotations[pNaming.AnnotationMonitorUserSecretHash] = currentHash
}

return nil
Expand Down
7 changes: 4 additions & 3 deletions percona/controller/pgcluster/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/percona/percona-postgresql-operator/internal/controller/runtime"
"github.com/percona/percona-postgresql-operator/internal/naming"
perconaController "github.com/percona/percona-postgresql-operator/percona/controller"
pNaming "github.com/percona/percona-postgresql-operator/percona/naming"
v2 "github.com/percona/percona-postgresql-operator/pkg/apis/pgv2.percona.com/v2"
"github.com/percona/percona-postgresql-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
)
Expand Down Expand Up @@ -257,7 +258,7 @@ var _ = Describe("PMM sidecar", Ordered, func() {

It("should have PMM secret hash", func() {
Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(&sts), &sts)).Should(Succeed())
Expect(sts.Spec.Template.ObjectMeta.Annotations).To(HaveKey(v2.AnnotationPMMSecretHash))
Expect(sts.Spec.Template.ObjectMeta.Annotations).To(HaveKey(pNaming.AnnotationPMMSecretHash))
})

It("should label PMM secret", func() {
Expand Down Expand Up @@ -383,7 +384,7 @@ var _ = Describe("Monitor user password change", Ordered, func() {

Expect(stsList.Items).Should(ContainElement(gs.MatchFields(gs.IgnoreExtras, gs.Fields{
"ObjectMeta": gs.MatchFields(gs.IgnoreExtras, gs.Fields{
"Annotations": HaveKeyWithValue(v2.AnnotationMonitorUserSecretHash, currentHash),
"Annotations": HaveKeyWithValue(pNaming.AnnotationMonitorUserSecretHash, currentHash),
}),
})))
})
Expand Down Expand Up @@ -416,7 +417,7 @@ var _ = Describe("Monitor user password change", Ordered, func() {

Expect(stsList.Items).Should(ContainElement(gs.MatchFields(gs.IgnoreExtras, gs.Fields{
"ObjectMeta": gs.MatchFields(gs.IgnoreExtras, gs.Fields{
"Annotations": HaveKeyWithValue(v2.AnnotationMonitorUserSecretHash, currentHash),
"Annotations": HaveKeyWithValue(pNaming.AnnotationMonitorUserSecretHash, currentHash),
}),
})))
})
Expand Down
Loading

0 comments on commit bc2c56e

Please sign in to comment.