From a807a54b24d924dd9951a669a5f4c8334684a7f1 Mon Sep 17 00:00:00 2001 From: stevenhorsman Date: Wed, 18 Dec 2024 15:16:41 +0000 Subject: [PATCH] test/e2e: Add debug of failed pods Print logs of failed pods and CAA in case of failures to help in case of debugging Signed-off-by: stevenhorsman --- .../test/e2e/assessment_helpers.go | 43 ++++++++++++--- .../test/e2e/assessment_runner.go | 52 +++++++++++++++---- 2 files changed, 77 insertions(+), 18 deletions(-) diff --git a/src/cloud-api-adaptor/test/e2e/assessment_helpers.go b/src/cloud-api-adaptor/test/e2e/assessment_helpers.go index 10976f332..162b5396b 100644 --- a/src/cloud-api-adaptor/test/e2e/assessment_helpers.go +++ b/src/cloud-api-adaptor/test/e2e/assessment_helpers.go @@ -355,25 +355,36 @@ func GetNodeNameFromPod(ctx context.Context, client klient.Client, customPod *v1 return getStringFromPod(ctx, client, customPod, getNodeName) } +func GetPodsFromJob(ctx context.Context, t *testing.T, client klient.Client, job *batchv1.Job) (*v1.PodList, error) { + clientset, err := kubernetes.NewForConfig(client.RESTConfig()) + if err != nil { + return nil, fmt.Errorf("GetPodFromJob: get Kubernetes clientSet failed: %v", err) + } + + pods, err := clientset.CoreV1().Pods(job.Namespace).List(context.TODO(), metav1.ListOptions{LabelSelector: "job-name=" + job.Name}) + if err != nil { + return nil, fmt.Errorf("GetPodFromJob: get pod list failed: %v", err) + } + + return pods, nil +} + func GetSuccessfulAndErroredPods(ctx context.Context, t *testing.T, client klient.Client, job batchv1.Job) (int, int, string, error) { podLogString := "" errorPod := 0 successPod := 0 - var podlist v1.PodList clientset, err := kubernetes.NewForConfig(client.RESTConfig()) if err != nil { return 0, 0, "", err } - if err := client.Resources(job.Namespace).List(ctx, &podlist); err != nil { + podList, err := GetPodsFromJob(ctx, t, client, &job) + if err != nil { return 0, 0, "", err } - for _, pod := range podlist.Items { - if pod.ObjectMeta.Labels["job-name"] != job.Name { - continue - } + for _, pod := range podList.Items { if pod.Status.Phase == v1.PodPending { if pod.Status.ContainerStatuses[0].State.Waiting.Reason == "ContainerCreating" { - return 0, 0, "", errors.New("Failed to Create PodVM") + return 0, 0, "", errors.New("failed to Create PodVM") } } if pod.Status.ContainerStatuses[0].State.Terminated.Reason == "StartError" { @@ -413,6 +424,22 @@ func GetSuccessfulAndErroredPods(ctx context.Context, t *testing.T, client klien return successPod, errorPod, podLogString, nil } +func GetCaaPodLog(ctx context.Context, t *testing.T, client klient.Client, pod *v1.Pod) (string, error) { + nodeName, err := GetNodeNameFromPod(ctx, client, pod) + if err != nil { + return "", fmt.Errorf("GetCaaPodLog: GetNodeNameFromPod failed with %v", err) + } + caaPod, err := getCaaPod(ctx, client, t, nodeName) + if err != nil { + return "", fmt.Errorf("GetCaaPodLog: failed to getCaaPod: %v", err) + } + podLogString, err := getStringFromPod(ctx, client, caaPod, GetPodLog) + if err != nil { + return "", fmt.Errorf("GetCaaPodLog: failed to getStringFromPod: %v", err) + } + return podLogString, nil +} + // SkipTestOnCI skips the test if running on CI func SkipTestOnCI(t *testing.T) { ci := os.Getenv("CI") @@ -665,7 +692,7 @@ func GetPodNamesByLabel(ctx context.Context, client klient.Client, t *testing.T, clientset, err := kubernetes.NewForConfig(client.RESTConfig()) if err != nil { - return nil, fmt.Errorf("GetPodNamesByLabel: get Kubernetes clientSef failed: %v", err) + return nil, fmt.Errorf("GetPodNamesByLabel: get Kubernetes clientSet failed: %v", err) } nodeSelector := fmt.Sprintf("spec.nodeName=%s", nodeName) diff --git a/src/cloud-api-adaptor/test/e2e/assessment_runner.go b/src/cloud-api-adaptor/test/e2e/assessment_runner.go index 15e75f723..21e76da23 100644 --- a/src/cloud-api-adaptor/test/e2e/assessment_runner.go +++ b/src/cloud-api-adaptor/test/e2e/assessment_runner.go @@ -460,6 +460,7 @@ func (tc *TestCase) Run() { if err != nil { t.Fatal(err) } + if tc.configMap != nil { if err = client.Resources().Delete(ctx, tc.configMap); err != nil { t.Fatal(err) @@ -498,27 +499,58 @@ func (tc *TestCase) Run() { } if tc.job != nil { - var podlist v1.PodList - if err := client.Resources(tc.job.Namespace).List(ctx, &podlist); err != nil { - t.Fatal(err) + podList, err := GetPodsFromJob(ctx, t, client, tc.job) + if err != nil { + t.Error(err) + } + + if t.Failed() { + if len(podList.Items) > 0 { + jobPod := podList.Items[0] + + podLogString, err := GetPodLog(ctx, client, &jobPod) + if err != nil { + t.Error(err) + } + t.Logf("Job pod log: %s\n", podLogString) + + caaPodLog, err := GetCaaPodLog(ctx, t, client, &jobPod) + if err != nil { + t.Error(err) + } + t.Logf("CAA log: %s\n", caaPodLog) + } } + if err = client.Resources().Delete(ctx, tc.job); err != nil { t.Fatal(err) } else { t.Logf("Deleting Job... %s", tc.job.Name) } - for _, pod := range podlist.Items { - if pod.ObjectMeta.Labels["job-name"] == tc.job.Name { - if err = client.Resources().Delete(ctx, &pod); err != nil { - t.Fatal(err) - } - t.Logf("Deleting pods created by job... %s", pod.ObjectMeta.Name) - + for _, pod := range podList.Items { + if err = client.Resources().Delete(ctx, &pod); err != nil { + t.Fatal(err) } + t.Logf("Deleting pods created by job... %s", pod.ObjectMeta.Name) } } if tc.pod != nil { + + if t.Failed() { + podLogString, err := GetPodLog(ctx, client, tc.pod) + if err != nil { + t.Error(err) + } + t.Logf("Pod log: %s\n", podLogString) + + caaPodLog, err := GetCaaPodLog(ctx, t, client, tc.pod) + if err != nil { + t.Error(err) + } + t.Logf("CAA log: %s\n", caaPodLog) + } + if err = client.Resources().Delete(ctx, tc.pod); err != nil { t.Fatal(err) }