From 3d876e1d90d2def0bd499eb6f82f574d96d8ad1d Mon Sep 17 00:00:00 2001 From: James <370036720@qq.com> Date: Wed, 11 Sep 2024 14:11:01 +0800 Subject: [PATCH] fix svc status and remove svc immediately (#114) * fix-deploy-status * disable ssl by default * delete svc immediately --- builder/deploy/deployer.go | 7 ++++--- common/config/config.go | 4 ++-- servicerunner/handler/k8s.go | 32 ++++++++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/builder/deploy/deployer.go b/builder/deploy/deployer.go index 96c8d775..cd5ebdc4 100644 --- a/builder/deploy/deployer.go +++ b/builder/deploy/deployer.go @@ -262,7 +262,7 @@ func (d *deployer) Status(ctx context.Context, dr types.DeployRepo, needDetails } return svcName, deploy.Status, nil, nil } - + deployStatus := rstatus.Code if dr.ModelID > 0 { targetID := dr.DeployID // support model deploy with multi-instance status, err := d.ir.Status(ctx, &types.StatusRequest{ @@ -278,12 +278,13 @@ func (d *deployer) Status(ctx context.Context, dr types.DeployRepo, needDetails return "", common.RunTimeError, nil, fmt.Errorf("can't get deploy status, %w", err) } rstatus.Instances = status.Instances + deployStatus = status.Code } if rstatus.DeployID == 0 || rstatus.DeployID >= deploy.ID { - return svcName, rstatus.Code, rstatus.Instances, nil + return svcName, deployStatus, rstatus.Instances, nil } - return svcName, deploy.Status, rstatus.Instances, nil + return svcName, deployStatus, rstatus.Instances, nil } func (d *deployer) Logs(ctx context.Context, dr types.DeployRepo) (*MultiLogReader, error) { diff --git a/common/config/config.go b/common/config/config.go index 393f29fc..601c6c53 100644 --- a/common/config/config.go +++ b/common/config/config.go @@ -80,7 +80,7 @@ type Config struct { Region string `envconfig:"STARHUB_SERVER_S3_REGION"` Endpoint string `envconfig:"STARHUB_SERVER_S3_ENDPOINT" default:"oss-cn-beijing.aliyuncs.com"` Bucket string `envconfig:"STARHUB_SERVER_S3_BUCKET" default:"opencsg-test"` - EnableSSL bool `envconfig:"STARHUB_SERVER_S3_ENABLE_SSL" default:"true"` + EnableSSL bool `envconfig:"STARHUB_SERVER_S3_ENABLE_SSL" default:"false"` } SensitiveCheck struct { @@ -126,7 +126,7 @@ type Config struct { } Model struct { - DeployTimeoutInMin int `envconfig:"STARHUB_SERVER_MODEL_DEPLOY_TIMEOUT_IN_MINUTES" default:"30"` + DeployTimeoutInMin int `envconfig:"STARHUB_SERVER_MODEL_DEPLOY_TIMEOUT_IN_MINUTES" default:"60"` DownloadEndpoint string `envconfig:"STARHUB_SERVER_MODEL_DOWNLOAD_ENDPOINT" default:"https://hub.opencsg.com"` DockerRegBase string `envconfig:"STARHUB_SERVER_MODEL_DOCKER_REG_BASE" default:"opencsg-registry.cn-beijing.cr.aliyuncs.com/public/"` } diff --git a/servicerunner/handler/k8s.go b/servicerunner/handler/k8s.go index 313da6e2..588f66c9 100644 --- a/servicerunner/handler/k8s.go +++ b/servicerunner/handler/k8s.go @@ -70,7 +70,7 @@ func (s *K8sHander) RunService(c *gin.Context) { // check if the ksvc exists _, err = cluster.KnativeClient.ServingV1().Services(s.k8sNameSpace).Get(c.Request.Context(), srvName, metav1.GetOptions{}) if err == nil { - cluster.KnativeClient.ServingV1().Services(s.k8sNameSpace).Delete(c, srvName, *metav1.NewDeleteOptions(0)) + s.removeServiceForcely(c, cluster, srvName) slog.Info("service already exists,delete it first", slog.String("srv_name", srvName), slog.Any("image_id", request.ImageID)) } service, err := s.s.GenerateService(*request, srvName) @@ -185,8 +185,7 @@ func (s *K8sHander) StopService(c *gin.Context) { c.JSON(http.StatusOK, resp) return } - - err = cluster.KnativeClient.ServingV1().Services(s.k8sNameSpace).Delete(c, srvName, *metav1.NewDeleteOptions(0)) + err = s.removeServiceForcely(c, cluster, srvName) if err != nil { slog.Error("stop image failed, cannot delete service ", slog.String("srv_name", srvName), slog.Any("error", err), slog.String("srv_name", srvName)) @@ -202,6 +201,31 @@ func (s *K8sHander) StopService(c *gin.Context) { c.JSON(http.StatusOK, resp) } +func (s *K8sHander) removeServiceForcely(c *gin.Context, cluster *cluster.Cluster, svcName string) error { + err := cluster.KnativeClient.ServingV1().Services(s.k8sNameSpace).Delete(context.Background(), svcName, *metav1.NewDeleteOptions(0)) + if err != nil { + return err + } + podNames, _ := s.GetServicePods(c.Request.Context(), *cluster, svcName, s.k8sNameSpace, -1) + if podNames == nil { + return nil + } + gracePeriodSeconds := int64(0) + deletePolicy := metav1.DeletePropagationForeground + deleteOptions := metav1.DeleteOptions{ + GracePeriodSeconds: &gracePeriodSeconds, + PropagationPolicy: &deletePolicy, + } + + for _, podName := range podNames { + errForce := cluster.Client.CoreV1().Pods(s.k8sNameSpace).Delete(c.Request.Context(), podName, deleteOptions) + if errForce != nil { + slog.Error("removeServiceForcely failed to delete pod", slog.String("pod_name", podName), slog.Any("error", errForce)) + } + } + return nil +} + func (s *K8sHander) UpdateService(c *gin.Context) { var resp types.ModelUpdateResponse var request = &types.ModelUpdateRequest{} @@ -824,7 +848,7 @@ func (s *K8sHander) PurgeService(c *gin.Context) { slog.String("srv_name", srvName)) } else { // 1 delete service - err = cluster.KnativeClient.ServingV1().Services(s.k8sNameSpace).Delete(c, srvName, *metav1.NewDeleteOptions(0)) + err = s.removeServiceForcely(c, cluster, srvName) if err != nil { slog.Error("failed to delete service ", slog.String("srv_name", srvName), slog.Any("error", err), slog.String("srv_name", srvName))