Skip to content

Commit

Permalink
pgupgrade: Ensure that old pg deployments are terminated before upgra…
Browse files Browse the repository at this point in the history
…de job is run (PROJQUAY-8092) (#991)

Explicitly checking for previous deployment to stop terminating.
  • Loading branch information
jonathankingfc authored Oct 22, 2024
1 parent cd60da9 commit 9af8f3f
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 26 deletions.
44 changes: 40 additions & 4 deletions controllers/quay/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ func (r *QuayRegistryReconciler) checkMonitoringAvailable(
// checkPostgresVersion returns the image name used by the currently deployed postgres version
func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(
ctx context.Context, qctx *quaycontext.QuayRegistryContext, quay *v1.QuayRegistry, component v1.ComponentKind,
) error {
) (err error, scaledDown bool) {
componentInfo := map[v1.ComponentKind]struct {
deploymentSuffix string
upgradeField *bool
Expand All @@ -419,7 +419,7 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(

info, ok := componentInfo[component]
if !ok {
return fmt.Errorf("invalid component kind: %s", component)
return fmt.Errorf("invalid component kind: %s", component), false
}

deploymentName := fmt.Sprintf("%s-%s", quay.GetName(), info.deploymentSuffix)
Expand All @@ -435,7 +435,7 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(
postgresDeployment,
); err != nil {
r.Log.Info(fmt.Sprintf("%s deployment not found, skipping", component))
return nil
return nil, true
}

deployedImageName := postgresDeployment.Spec.Template.Spec.Containers[0].Image
Expand All @@ -458,9 +458,45 @@ func (r *QuayRegistryReconciler) checkNeedsPostgresUpgradeForComponent(
*info.upgradeField = true
} else {
r.Log.Info(fmt.Sprintf("%s does not need to perform an upgrade", component))
return nil, true
}

return nil
// at this point we have determined that these postgres deployments need to be upgraded and can set them to 0 replicas
// so that the upgrade job can run with no interference
r.Log.Info(fmt.Sprintf("scaling down %s deployment", component))
postgresDeployment.Spec.Replicas = &[]int32{0}[0]
postgresDeployment.Spec.Template.Spec.TerminationGracePeriodSeconds = &[]int64{600}[0]
if err := r.Client.Update(ctx, postgresDeployment); err != nil {
r.Log.Error(err, "unable to update postgres deployment replicas")
}
// now we wait to ensure that the deployment has scaled down before we proceed

terminatingPods := []corev1.Pod{}
podList := &corev1.PodList{}
labelSelector, err := metav1.LabelSelectorAsSelector(postgresDeployment.Spec.Selector)
if err != nil {
r.Log.Error(err, "unable to get label selector for postgres deployment")
}
err = r.Client.List(ctx, podList, &client.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
r.Log.Error(err, "unable to list pods for postgres deployment")
return err, false
}

for _, pod := range podList.Items {
if pod.Status.Phase == corev1.PodRunning {
terminatingPods = append(terminatingPods, pod)
}
}

if len(terminatingPods) > 0 {
r.Log.Info(fmt.Sprintf("Found %d pods in terminating status", len(terminatingPods)))
return nil, false
}

return nil, true
}

func extractImageName(imageName string) string {
Expand Down
10 changes: 8 additions & 2 deletions controllers/quay/quayregistry_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request

// Populate the QuayContext with whether or not the QuayRegistry needs an upgrade
if v1.ComponentIsManaged(updatedQuay.Spec.Components, v1.ComponentPostgres) {
err := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentPostgres)
err, scaledDown := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentPostgres)
if err != nil {
return r.reconcileWithCondition(
ctx,
Expand All @@ -564,11 +564,14 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request
fmt.Sprintf("error checking for pg upgrade: %s", err),
)
}
if !scaledDown {
return r.Requeue, nil
}
}

// Populate the QuayContext with whether or not the QuayRegistry needs an upgrade
if v1.ComponentIsManaged(updatedQuay.Spec.Components, v1.ComponentClairPostgres) {
err := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentClairPostgres)
err, scaledDown := r.checkNeedsPostgresUpgradeForComponent(ctx, quayContext, updatedQuay, v1.ComponentClairPostgres)
if err != nil {
return r.reconcileWithCondition(
ctx,
Expand All @@ -579,6 +582,9 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request
fmt.Sprintf("error checking for pg upgrade: %s", err),
)
}
if !scaledDown {
return r.Requeue, nil
}
}

if err := r.checkBuildManagerAvailable(quayContext, cbundle); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: clair-postgres-old
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: clair-postgres
volumes:
- name: clair-postgres-conf-sample
Expand All @@ -27,15 +27,6 @@ spec:
- name: postgres-data
persistentVolumeClaim:
claimName: clair-postgres-13
initContainers:
- name: check-postgres-scale-down
image: quay.io/sclorg/postgresql-13-c9s:latest
command:
- /bin/sh
- -c
- |
echo "Waiting for 30 seconds before starting the main container..."
sleep 30
containers:
- name: postgres
image: quay.io/sclorg/postgresql-13-c9s:latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
template:
spec:
restartPolicy: OnFailure
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: clair-postgres
volumes:
- name: clair-postgres-conf-sample
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: clair-postgres
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: clair-postgres
volumes:
- name: clair-postgres-conf-sample
Expand Down
2 changes: 1 addition & 1 deletion kustomize/components/pgupgrade/quay-pg-old.deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: postgres
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: quay-database
volumes:
- name: postgres-conf-sample
Expand Down
11 changes: 5 additions & 6 deletions kustomize/components/pgupgrade/quay-pg-upgrade.job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
template:
spec:
restartPolicy: OnFailure
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: quay-database
volumes:
- name: postgres-conf-sample
Expand Down Expand Up @@ -55,10 +55,9 @@ spec:
cpu: 500m
memory: 2Gi
command:
- "/bin/sh"
- "-c"
- "/bin/sh"
- "-c"
args:
- >
run-postgresql --version || (echo "postgres migration command failed, cleaning up..." && rm -rf /var/lib/pgsql/data/* && exit 1)
- >
run-postgresql --version || (echo "postgres migration command failed, cleaning up..." && rm -rf /var/lib/pgsql/data/* && exit 1)
backoffLimit: 50

2 changes: 1 addition & 1 deletion kustomize/components/postgres/postgres.deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
labels:
quay-component: postgres
spec:
terminationGracePeriodSeconds: 180
terminationGraceperiodSeconds: 600
serviceAccountName: quay-database
volumes:
- name: postgres-conf-sample
Expand Down

0 comments on commit 9af8f3f

Please sign in to comment.