Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race condition in PVC deletion #7149

Merged
merged 1 commit into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions pkg/reconciler/pipelinerun/affinity_assistant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ func TestThatAffinityAssistantNameIsNoLongerThan53(t *testing.T) {
t.Errorf("affinity assistant name can not be longer than 53 chars")
}
}

func TestCleanupAffinityAssistants_Success(t *testing.T) {
workspaces := []v1.WorkspaceBinding{
{
Expand Down Expand Up @@ -866,6 +867,20 @@ func TestCleanupAffinityAssistants_Success(t *testing.T) {
_, c, _ := seedTestData(data)
ctx := cfgtesting.SetFeatureFlags(context.Background(), t, tc.cfgMap)

// mocks `kubernetes.io/pvc-protection` finalizer behavior by adding DeletionTimestamp when deleting pvcs with the finalizer
// see details in: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers/#how-finalizers-work
if tc.aaBehavior == aa.AffinityAssistantPerPipelineRun {
for _, pvc := range pvcs {
pvc.DeletionTimestamp = &metav1.Time{
Time: metav1.Now().Time,
}
c.KubeClientSet.CoreV1().(*fake.FakeCoreV1).PrependReactor("delete", "persistentvolumeclaims",
func(action testing2.Action) (handled bool, ret runtime.Object, err error) {
return true, pvc, nil
})
}
}

// call clean up
err := c.cleanupAffinityAssistantsAndPVCs(ctx, pr)
if err != nil {
Expand All @@ -889,9 +904,13 @@ func TestCleanupAffinityAssistants_Success(t *testing.T) {
t.Errorf("unexpected err when getting PersistentVolumeClaims, err: %v", err)
}
} else {
_, err = c.KubeClientSet.CoreV1().PersistentVolumeClaims(pr.Namespace).Get(ctx, expectedPVCName, metav1.GetOptions{})
if !apierrors.IsNotFound(err) {
t.Errorf("failed to clean up PersistentVolumeClaim")
pvc, err := c.KubeClientSet.CoreV1().PersistentVolumeClaims(pr.Namespace).Get(ctx, expectedPVCName, metav1.GetOptions{})
if err != nil {
t.Fatalf("unexpect err when getting PersistentVolumeClaims, err: %v", err)
}
// validate the finalizer is removed, which mocks the pvc is deleted properly
if len(pvc.Finalizers) > 0 {
t.Errorf("pvc %s kubernetes.io/pvc-protection finalizer is not removed properly", pvc.Name)
}
}
}
Expand Down
14 changes: 7 additions & 7 deletions pkg/reconciler/volumeclaim/pvchandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (c *defaultPVCHandler) CreatePVCFromVolumeClaimTemplate(ctx context.Context
return nil
}

// PurgeFinalizerAndDeletePVCForWorkspace purges the `kubernetes.io/pvc-protection` finalizer protection of the given pvc and then deletes it.
// PurgeFinalizerAndDeletePVCForWorkspace deletes pvcs and then purges the `kubernetes.io/pvc-protection` finalizer protection.
// Purging the `kubernetes.io/pvc-protection` finalizer allows the pvc to be deleted even when it is referenced by a taskrun pod.
// See mode details in https://kubernetes.io/docs/concepts/storage/persistent-volumes/#storage-object-in-use-protection.
func (c *defaultPVCHandler) PurgeFinalizerAndDeletePVCForWorkspace(ctx context.Context, pvcName, namespace string) error {
Expand Down Expand Up @@ -113,18 +113,18 @@ func (c *defaultPVCHandler) PurgeFinalizerAndDeletePVCForWorkspace(ctx context.C
return fmt.Errorf("failed to marshal jsonpatch: %w", err)
}

// patch the existing PVC to update the finalizers
QuanZhang-William marked this conversation as resolved.
Show resolved Hide resolved
_, err = c.clientset.CoreV1().PersistentVolumeClaims(namespace).Patch(ctx, pvcName, types.JSONPatchType, removeFinalizerBytes, metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to patch the PVC %s: %w", pvcName, err)
}

// delete PVC
err = c.clientset.CoreV1().PersistentVolumeClaims(namespace).Delete(ctx, pvcName, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("failed to delete the PVC %s: %w", pvcName, err)
}

// remove finalizer
_, err = c.clientset.CoreV1().PersistentVolumeClaims(namespace).Patch(ctx, pvcName, types.JSONPatchType, removeFinalizerBytes, metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to patch the PVC %s: %w", pvcName, err)
}

return nil
}

Expand Down
25 changes: 20 additions & 5 deletions pkg/reconciler/volumeclaim/pvchandler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ import (
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
fakek8s "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/kubernetes/typed/core/v1/fake"
client_go_testing "k8s.io/client-go/testing"
)

Expand Down Expand Up @@ -263,15 +263,30 @@ func TestPurgeFinalizerAndDeletePVCForWorkspace(t *testing.T) {
t.Fatalf("unexpected error when creating PVC: %v", err)
}

// mocks `kubernetes.io/pvc-protection` finalizer behavior by adding DeletionTimestamp when deleting pvcs with the finalizer
// see details in: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers/#how-finalizers-work
pvc.DeletionTimestamp = &metav1.Time{
Time: metav1.Now().Time,
}
kubeClientSet.CoreV1().(*fake.FakeCoreV1).PrependReactor("delete", "persistentvolumeclaims",
func(action client_go_testing.Action) (handled bool, ret runtime.Object, err error) {
return true, pvc, nil
})

// call PurgeFinalizerAndDeletePVCForWorkspace to delete pvc
// note that the pvcs are not actually deleted in the unit test due to the mock limitation of fakek8s.NewSimpleClientset();
// full pvc lifecycle is tested in TestAffinityAssistant_PerPipelineRun integration test
pvcHandler := defaultPVCHandler{kubeClientSet, zap.NewExample().Sugar()}
if err := pvcHandler.PurgeFinalizerAndDeletePVCForWorkspace(ctx, pvcName, namespace); err != nil {
t.Fatalf("unexpected error when calling PurgeFinalizerAndDeletePVCForWorkspace: %v", err)
}

// validate pvc is deleted
_, err := kubeClientSet.CoreV1().PersistentVolumeClaims(namespace).Get(ctx, pvcName, metav1.GetOptions{})
if !apierrors.IsNotFound(err) {
t.Errorf("expected a NotFound response of PersistentVolumeClaims, got: %v", err)
// validate the `kubernetes.io/pvc-protection` finalizer is removed
pvc, err := kubeClientSet.CoreV1().PersistentVolumeClaims(namespace).Get(ctx, pvcName, metav1.GetOptions{})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(pvc.Finalizers) > 0 {
t.Errorf("pvc %s kubernetes.io/pvc-protection finalizer is not removed properly", pvcName)
}
}
6 changes: 6 additions & 0 deletions test/affinity_assistant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ spec:
t.Errorf("Error waiting for PipelineRun to finish: %s", err)
}

// wait and check PVCs are deleted
t.Logf("Waiting for PVC in namespace %s to delete", namespace)
if err := WaitForPVCIsDeleted(ctx, c, timeout, prName, namespace, "PVCDeleted"); err != nil {
t.Errorf("Error waiting for PVC to be deleted: %s", err)
}

// validate PipelineRun pods sharing the same PVC are scheduled to the same node
podNames := []string{fmt.Sprintf("%v-foo-pod", prName), fmt.Sprintf("%v-bar-pod", prName), fmt.Sprintf("%v-double-ws-pod", prName), fmt.Sprintf("%v-no-ws-pod", prName)}
validatePodAffinity(t, ctx, podNames, namespace, c.KubeClient)
Expand Down
25 changes: 25 additions & 0 deletions test/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,31 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
})
}

// WaitForPVCIsDeleted polls the number of the PVC in the namespace from client every
// interval until all the PVCs in the namespace are deleted. It returns an
// error or timeout. desc will be used to name the metric that is emitted to
// track how long it took to delete all the PVCs in the namespace.
func WaitForPVCIsDeleted(ctx context.Context, c *clients, polltimeout time.Duration, name, namespace, desc string) error {
metricName := fmt.Sprintf("WaitForPVCIsDeleted/%s/%s", name, desc)
_, span := trace.StartSpan(context.Background(), metricName)
defer span.End()

ctx, cancel := context.WithTimeout(ctx, polltimeout)
defer cancel()

return pollImmediateWithContext(ctx, func() (bool, error) {
pvcList, err := c.KubeClient.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return true, err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind adding docString here for the indications of the bool error being returned?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it just follows the expected function signature, and k8s does have docstring for it:

// ConditionFunc returns true if the condition is satisfied, or an error

}
if len(pvcList.Items) > 0 {
return false, nil
}

return true, nil
})
}

// WaitForPipelineRunState polls the status of the PipelineRun called name from client every
// interval until inState returns `true` indicating it is done, returns an
// error or timeout. desc will be used to name the metric that is emitted to
Expand Down
Loading