From 57093a20fbf399aa538c76c50a60ae696a192d0b Mon Sep 17 00:00:00 2001 From: gujing Date: Thu, 7 Mar 2024 17:10:15 +0800 Subject: [PATCH] delete cm if job failed (#1051) Signed-off-by: zibai --- pkg/util/kubeclient/configmap.go | 4 ++-- pkg/workflow/workflow.go | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pkg/util/kubeclient/configmap.go b/pkg/util/kubeclient/configmap.go index b7969cd29..abdb54d40 100644 --- a/pkg/util/kubeclient/configmap.go +++ b/pkg/util/kubeclient/configmap.go @@ -77,7 +77,7 @@ func CheckJobIsOwnedByUser(namespace, jobName string, jobType types.TrainingJobT return true, nil } -func CreateAppConfigmap(name, trainingType, namespace, configFileName, appInfoFileName, chartName, chartVersion string) (err error) { +func CreateAppConfigmap(name, namespace, configFileName, appInfoFileName, chartName, chartVersion string) (err error) { data := map[string]string{ chartName: chartVersion, } @@ -97,7 +97,7 @@ func CreateAppConfigmap(name, trainingType, namespace, configFileName, appInfoFi return err } configmap := obj.(*corev1.ConfigMap) - configmap.Name = fmt.Sprintf("%v-%v", name, trainingType) + configmap.Name = name configmap.Namespace = namespace configmap.Data = data arenaConfiger := config.GetArenaConfiger() diff --git a/pkg/workflow/workflow.go b/pkg/workflow/workflow.go index 0c61ad6a7..40544e0f5 100644 --- a/pkg/workflow/workflow.go +++ b/pkg/workflow/workflow.go @@ -141,8 +141,9 @@ func SubmitJob(name string, trainingType string, namespace string, values interf if err != nil { return err } - err = kubeclient.CreateAppConfigmap(name, - trainingType, + + configName := fmt.Sprintf("%v-%v", name, trainingType) + err = kubeclient.CreateAppConfigmap(configName, namespace, valueFileName, appInfoFileName, @@ -161,8 +162,14 @@ func SubmitJob(name string, trainingType string, namespace string, values interf fmt.Printf("%s", result) if err != nil { // clean configmap - log.Infof("clean up the config map %s because creating application failed.", name) - log.Warnf("Please clean up the training job by using `arena delete %s --type %s`", name, trainingType) + delErr := kubeclient.DeleteConfigMap(namespace, configName) + if delErr != nil { + log.Errorf("Failed to clean up configmap %s in namespace %s, error: %s", configName, namespace, delErr.Error()) + } else { + log.Infof("Successfully clean up the config map %s in namespace %s because creating application failed.", configName, namespace) + } + + log.Warnf("Please clean up the %s job", name) return err }