diff --git a/api/api/version_endpoints_api.go b/api/api/version_endpoints_api.go index f6fe5ef31..eb6e0c4c6 100644 --- a/api/api/version_endpoints_api.go +++ b/api/api/version_endpoints_api.go @@ -279,7 +279,8 @@ func (c *EndpointsController) UpdateEndpoint(r *http.Request, vars map[string]st // Should not allow changing the deployment mode of a pending/running/serving model for 2 reasons: // * For "serving" models it's risky as, we can't guarantee graceful re-deployment // * Kserve uses slightly different deployment resource naming under the hood and doesn't clean up the older deployment - if (endpoint.IsRunning() || endpoint.IsServing()) && newEndpoint.DeploymentMode != endpoint.DeploymentMode { + if (endpoint.IsRunning() || endpoint.IsServing()) && newEndpoint.DeploymentMode != "" && + newEndpoint.DeploymentMode != endpoint.DeploymentMode { return BadRequest(fmt.Sprintf("Changing deployment type of a %s model is not allowed, please terminate it first.", endpoint.Status)) } diff --git a/api/api/version_endpoints_api_test.go b/api/api/version_endpoints_api_test.go index ab124e8a7..e7c0e781c 100644 --- a/api/api/version_endpoints_api_test.go +++ b/api/api/version_endpoints_api_test.go @@ -5055,6 +5055,183 @@ func TestUpdateEndpoint(t *testing.T) { data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the pending state"}, }, }, + { + desc: "Should success without changing deployment mode if request does not specify new deployment mode", + vars: map[string]string{ + "model_id": "1", + "version_id": "1", + "endpoint_id": uuid.String(), + }, + requestBody: &models.VersionEndpoint{ + ID: uuid, + VersionID: models.ID(1), + VersionModelID: models.ID(1), + Status: models.EndpointRunning, + ServiceName: "sample", + Namespace: "sample", + EnvironmentName: "dev", + Message: "", + ResourceRequest: &models.ResourceRequest{ + MinReplica: 1, + MaxReplica: 4, + CPURequest: resource.MustParse("1"), + MemoryRequest: resource.MustParse("1Gi"), + }, + EnvVars: models.EnvVars([]models.EnvVar{ + { + Name: "WORKER", + Value: "1", + }, + }), + }, + modelService: func() *mocks.ModelsService { + svc := &mocks.ModelsService{} + svc.On("FindByID", context.Background(), models.ID(1)).Return(&models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "", + Endpoints: nil, + }, nil) + return svc + }, + versionService: func() *mocks.VersionsService { + svc := &mocks.VersionsService{} + svc.On("FindByID", context.Background(), models.ID(1), models.ID(1), mock.Anything).Return(&models.Version{ + ID: models.ID(1), + ModelID: models.ID(1), + Model: &models.Model{ + ID: models.ID(1), + Name: "model-1", + ProjectID: models.ID(1), + Project: mlp.Project{}, + ExperimentID: 1, + Type: "pyfunc", + MlflowURL: "", + Endpoints: nil, + }, + }, nil) + return svc + }, + envService: func() *mocks.EnvironmentService { + svc := &mocks.EnvironmentService{} + svc.On("GetEnvironment", "dev").Return(&models.Environment{ + ID: models.ID(1), + Name: "dev", + Cluster: "dev", + IsDefault: &trueBoolean, + Region: "id", + GcpProject: "dev-proj", + MaxCPU: "1", + MaxMemory: "1Gi", + }, nil) + return svc + }, + endpointService: func() *mocks.EndpointsService { + svc := &mocks.EndpointsService{} + svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{ + ID: uuid, + VersionID: models.ID(1), + VersionModelID: models.ID(1), + Status: models.EndpointFailed, + ServiceName: "sample", + InferenceServiceName: "sample", + Namespace: "sample", + URL: "http://endpoint.svc", + MonitoringURL: "http://monitoring.com", + Environment: &models.Environment{ + ID: models.ID(1), + Name: "dev", + Cluster: "dev", + IsDefault: &trueBoolean, + Region: "id", + GcpProject: "dev-proj", + MaxCPU: "1", + MaxMemory: "1Gi", + }, EnvironmentName: "dev", + Message: "", + ResourceRequest: nil, + EnvVars: models.EnvVars([]models.EnvVar{ + { + Name: "WORKER", + Value: "1", + }, + }), + DeploymentMode: deployment.ServerlessDeploymentMode, + }, nil) + svc.On("DeployEndpoint", context.Background(), mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(&models.VersionEndpoint{ + ID: uuid, + VersionID: models.ID(1), + VersionModelID: models.ID(1), + Status: models.EndpointRunning, + URL: "http://endpoint.svc", + ServiceName: "sample", + InferenceServiceName: "sample", + Namespace: "sample", + MonitoringURL: "http://monitoring.com", + Environment: &models.Environment{ + ID: models.ID(1), + Name: "dev", + Cluster: "dev", + IsDefault: &trueBoolean, + Region: "id", + GcpProject: "dev-proj", + MaxCPU: "1", + MaxMemory: "1Gi", + }, + EnvironmentName: "dev", + Message: "", + ResourceRequest: nil, + EnvVars: models.EnvVars([]models.EnvVar{ + { + Name: "WORKER", + Value: "1", + }, + }), + DeploymentMode: deployment.RawDeploymentMode, + CreatedUpdated: models.CreatedUpdated{}, + }, nil) + return svc + }, + expected: &Response{ + code: http.StatusOK, + data: &models.VersionEndpoint{ + ID: uuid, + VersionID: models.ID(1), + VersionModelID: models.ID(1), + Status: models.EndpointRunning, + URL: "http://endpoint.svc", + ServiceName: "sample", + InferenceServiceName: "sample", + Namespace: "sample", + MonitoringURL: "http://monitoring.com", + Environment: &models.Environment{ + ID: models.ID(1), + Name: "dev", + Cluster: "dev", + IsDefault: &trueBoolean, + Region: "id", + GcpProject: "dev-proj", + MaxCPU: "1", + MaxMemory: "1Gi", + }, + EnvironmentName: "dev", + Message: "", + ResourceRequest: nil, + EnvVars: models.EnvVars([]models.EnvVar{ + { + Name: "WORKER", + Value: "1", + }, + }), + DeploymentMode: deployment.RawDeploymentMode, + CreatedUpdated: models.CreatedUpdated{}, + }, + }, + }, } for _, tC := range testCases { t.Run(tC.desc, func(t *testing.T) {