diff --git a/api/api/deployment_api.go b/api/api/deployment_api.go new file mode 100644 index 000000000..3ca79edb0 --- /dev/null +++ b/api/api/deployment_api.go @@ -0,0 +1,19 @@ +package api + +import ( + "fmt" + "net/http" +) + +type DeploymentController struct { + *AppContext +} + +func (c *DeploymentController) ListDeployments(r *http.Request, vars map[string]string, _ interface{}) *Response { + deployments, err := c.DeploymentService.ListDeployments(vars["model_id"], vars["version_id"], vars["endpoint_id"]) + if err != nil { + return InternalServerError(fmt.Sprintf("Error listing deployments: %v", err)) + } + + return Ok(deployments) +} diff --git a/api/api/deployment_api_test.go b/api/api/deployment_api_test.go new file mode 100644 index 000000000..69ab53ed8 --- /dev/null +++ b/api/api/deployment_api_test.go @@ -0,0 +1,100 @@ +package api + +import ( + "fmt" + "net/http" + "testing" + "time" + + "github.com/caraml-dev/merlin/models" + "github.com/caraml-dev/merlin/service/mocks" + "github.com/google/uuid" +) + +func TestDeploymentController_ListDeployments(t *testing.T) { + endpointUUID := uuid.New() + endpointUUIDString := fmt.Sprint(endpointUUID) + + createdUpdated := models.CreatedUpdated{ + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + + testCases := []struct { + desc string + vars map[string]string + deploymentService func() *mocks.DeploymentService + expected *Response + }{ + { + desc: "Should success list deployments", + vars: map[string]string{ + "model_id": "model", + "version_id": "1", + "endpoint_id": endpointUUIDString, + }, + deploymentService: func() *mocks.DeploymentService { + mockSvc := &mocks.DeploymentService{} + mockSvc.On("ListDeployments", "model", "1", endpointUUIDString).Return([]*models.Deployment{ + { + ID: models.ID(1), + ProjectID: models.ID(1), + VersionModelID: models.ID(1), + VersionID: models.ID(1), + VersionEndpointID: endpointUUID, + Status: models.EndpointRunning, + Error: "", + CreatedUpdated: createdUpdated, + }, + }, nil) + return mockSvc + }, + expected: &Response{ + code: http.StatusOK, + data: []*models.Deployment{ + { + ID: models.ID(1), + ProjectID: models.ID(1), + VersionModelID: models.ID(1), + VersionID: models.ID(1), + VersionEndpointID: endpointUUID, + Status: models.EndpointRunning, + Error: "", + CreatedUpdated: createdUpdated, + }, + }, + }, + }, + { + desc: "Should return 500 when failed fetching list of deployments", + vars: map[string]string{ + "model_id": "model", + "version_id": "1", + "endpoint_id": endpointUUIDString, + }, + deploymentService: func() *mocks.DeploymentService { + mockSvc := &mocks.DeploymentService{} + mockSvc.On("ListDeployments", "model", "1", endpointUUIDString).Return(nil, fmt.Errorf("Database is down")) + return mockSvc + }, + expected: &Response{ + code: http.StatusInternalServerError, + data: Error{ + Message: "Error listing deployments: Database is down", + }, + }, + }, + } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + mockSvc := tC.deploymentService() + ctl := &DeploymentController{ + AppContext: &AppContext{ + DeploymentService: mockSvc, + }, + } + resp := ctl.ListDeployments(&http.Request{}, tC.vars, nil) + assertEqualResponses(t, tC.expected, resp) + }) + } +} diff --git a/api/api/router.go b/api/api/router.go index 6ce13d324..45aad0f9d 100644 --- a/api/api/router.go +++ b/api/api/router.go @@ -53,6 +53,7 @@ type AppContext struct { DB *gorm.DB Enforcer enforcer.Enforcer + DeploymentService service.DeploymentService EnvironmentService service.EnvironmentService ProjectsService service.ProjectsService ModelsService service.ModelsService @@ -154,6 +155,7 @@ func NewRouter(appCtx AppContext) (*mux.Router, error) { if err != nil { return nil, err } + deploymentController := DeploymentController{&appCtx} environmentController := EnvironmentController{&appCtx} projectsController := ProjectsController{&appCtx} modelEndpointsController := ModelEndpointsController{&appCtx} @@ -206,6 +208,9 @@ func NewRouter(appCtx AppContext) (*mux.Router, error) { // To maintain backward compatibility with SDK v0.1.0 {http.MethodDelete, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint", nil, endpointsController.DeleteEndpoint, "DeleteDefaultEndpoint"}, + // Deployments API + {http.MethodGet, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoints/{endpoint_id}/deployments", nil, deploymentController.ListDeployments, "ListDeployments"}, + {http.MethodGet, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint/{endpoint_id}", nil, endpointsController.GetEndpoint, "GetEndpoint"}, {http.MethodPut, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint/{endpoint_id}", models.VersionEndpoint{}, endpointsController.UpdateEndpoint, "UpdateEndpoint"}, {http.MethodDelete, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint/{endpoint_id}", nil, endpointsController.DeleteEndpoint, "DeleteEndpoint"}, diff --git a/api/api/version_endpoints_api.go b/api/api/version_endpoints_api.go index 98022671b..f6fe5ef31 100644 --- a/api/api/version_endpoints_api.go +++ b/api/api/version_endpoints_api.go @@ -379,12 +379,16 @@ func (c *EndpointsController) ListContainers(r *http.Request, vars map[string]st if err != nil { return NotFound(fmt.Sprintf("Version not found: %v", err)) } + endpoint, err := c.EndpointsService.FindByID(ctx, endpointID) + if err != nil { + return NotFound(fmt.Sprintf("Endpoint not found: %v", err)) + } - endpoint, err := c.EndpointsService.ListContainers(ctx, model, version, endpointID) + containers, err := c.EndpointsService.ListContainers(ctx, model, version, endpoint) if err != nil { return InternalServerError(fmt.Sprintf("Error while getting container for endpoint: %v", err)) } - return Ok(endpoint) + return Ok(containers) } func validateUpdateRequest(prev *models.VersionEndpoint, new *models.VersionEndpoint) error { diff --git a/api/api/version_endpoints_api_test.go b/api/api/version_endpoints_api_test.go index e61dffade..ab124e8a7 100644 --- a/api/api/version_endpoints_api_test.go +++ b/api/api/version_endpoints_api_test.go @@ -536,7 +536,13 @@ func TestListContainers(t *testing.T) { }, endpointService: func() *mocks.EndpointsService { svc := &mocks.EndpointsService{} - svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, uuid).Return([]*models.Container{ + svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{ + ID: uuid, + VersionModelID: models.ID(1), + VersionID: models.ID(1), + RevisionID: models.ID(1), + }, nil) + svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return([]*models.Container{ { Name: "pod-1", PodName: "pod-1-1", @@ -656,7 +662,13 @@ func TestListContainers(t *testing.T) { }, endpointService: func() *mocks.EndpointsService { svc := &mocks.EndpointsService{} - svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, uuid).Return(nil, fmt.Errorf("Error creating secret: db is down")) + svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{ + ID: uuid, + VersionModelID: models.ID(1), + VersionID: models.ID(1), + RevisionID: models.ID(1), + }, nil) + svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return(nil, fmt.Errorf("Error creating secret: db is down")) return svc }, expected: &Response{ diff --git a/api/cluster/container_test.go b/api/cluster/container_test.go index d98e65610..110688585 100644 --- a/api/cluster/container_test.go +++ b/api/cluster/container_test.go @@ -88,7 +88,7 @@ func TestContainer_GetContainers(t *testing.T) { clusterMetadata := Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"} containerFetcher := NewContainerFetcher(v1Client, clusterMetadata) - ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, config.DeploymentConfig{}, containerFetcher, nil) + ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, nil, config.DeploymentConfig{}, containerFetcher, nil) containers, err := ctl.GetContainers(context.Background(), tt.args.namespace, tt.args.labelSelector) if !tt.wantError { assert.NoErrorf(t, err, "expected no error got %v", err) diff --git a/api/cluster/controller.go b/api/cluster/controller.go index efcc054f1..960c78fef 100644 --- a/api/cluster/controller.go +++ b/api/cluster/controller.go @@ -16,12 +16,14 @@ package cluster import ( "context" + "fmt" "io" "time" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" kservev1beta1client "github.com/kserve/kserve/pkg/client/clientset/versioned/typed/serving/v1beta1" "github.com/pkg/errors" + networkingv1beta1 "istio.io/client-go/pkg/clientset/versioned/typed/networking/v1beta1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -83,6 +85,7 @@ type controller struct { clusterClient corev1client.CoreV1Interface batchClient batchv1client.BatchV1Interface policyClient policyv1client.PolicyV1Interface + istioClient networkingv1beta1.NetworkingV1beta1Interface namespaceCreator NamespaceCreator deploymentConfig *config.DeploymentConfig kfServingResourceTemplater *resource.InferenceServiceTemplater @@ -126,6 +129,11 @@ func NewController(clusterConfig Config, deployConfig config.DeploymentConfig, s return nil, err } + istioClient, err := networkingv1beta1.NewForConfig(cfg) + if err != nil { + return nil, err + } + containerFetcher := NewContainerFetcher(coreV1Client, Metadata{ ClusterName: clusterConfig.ClusterName, GcpProject: clusterConfig.GcpProject, @@ -138,6 +146,7 @@ func NewController(clusterConfig Config, deployConfig config.DeploymentConfig, s coreV1Client, batchV1Client, policyV1Client, + istioClient, deployConfig, containerFetcher, kfServingResourceTemplater, @@ -150,6 +159,7 @@ func newController( coreV1Client corev1client.CoreV1Interface, batchV1Client batchv1client.BatchV1Interface, policyV1Client policyv1client.PolicyV1Interface, + istioClient networkingv1beta1.NetworkingV1beta1Interface, deploymentConfig config.DeploymentConfig, containerFetcher ContainerFetcher, templater *resource.InferenceServiceTemplater, @@ -160,6 +170,7 @@ func newController( clusterClient: coreV1Client, batchClient: batchV1Client, policyClient: policyV1Client, + istioClient: istioClient, namespaceCreator: NewNamespaceCreator(coreV1Client, deploymentConfig.NamespaceTimeout), deploymentConfig: &deploymentConfig, ContainerFetcher: containerFetcher, @@ -186,56 +197,43 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) ( _, err := c.namespaceCreator.CreateNamespace(ctx, modelService.Namespace) if err != nil { log.Errorf("unable to create namespace %s %v", modelService.Namespace, err) - return nil, ErrUnableToCreateNamespace + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateNamespace, modelService.Namespace)) } isvcName := modelService.Name - s, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(isvcName, metav1.GetOptions{}) - if err != nil { - if !kerrors.IsNotFound(err) { - log.Errorf("unable to check inference service %s %v", isvcName, err) - return nil, ErrUnableToGetInferenceServiceStatus - } - - // create new resource - spec, err := c.kfServingResourceTemplater.CreateInferenceServiceSpec(modelService, c.deploymentConfig) - if err != nil { - log.Errorf("unable to create inference service spec %s %v", isvcName, err) - return nil, ErrUnableToCreateInferenceService - } - s, err = c.kserveClient.InferenceServices(modelService.Namespace).Create(spec) - if err != nil { - log.Errorf("unable to create inference service %s %v", isvcName, err) - return nil, ErrUnableToCreateInferenceService - } - } else { - // Get current scale of the existing deployment - deploymentScale := resource.DeploymentScale{} + // Get current scale of the existing deployment + deploymentScale := resource.DeploymentScale{} + if modelService.CurrentIsvcName != "" { if modelService.DeploymentMode == deployment.ServerlessDeploymentMode || modelService.DeploymentMode == deployment.EmptyDeploymentMode { - deploymentScale = c.GetCurrentDeploymentScale(ctx, modelService.Namespace, s.Status.Components) - } + currentIsvc, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(modelService.CurrentIsvcName, metav1.GetOptions{}) + if err != nil && !kerrors.IsNotFound(err) { + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToGetInferenceServiceStatus, isvcName)) + } - patchedSpec, err := c.kfServingResourceTemplater.PatchInferenceServiceSpec(s, modelService, c.deploymentConfig, deploymentScale) - if err != nil { - log.Errorf("unable to update inference service %s %v", isvcName, err) - return nil, ErrUnableToUpdateInferenceService + deploymentScale = c.GetCurrentDeploymentScale(ctx, modelService.Namespace, currentIsvc.Status.Components) } + } - // existing resource found, do update - s, err = c.kserveClient.InferenceServices(modelService.Namespace).Update(patchedSpec) - if err != nil { - log.Errorf("unable to update inference service %s %v", isvcName, err) - return nil, ErrUnableToUpdateInferenceService - } + // create new resource + spec, err := c.kfServingResourceTemplater.CreateInferenceServiceSpec(modelService, c.deploymentConfig, deploymentScale) + if err != nil { + log.Errorf("unable to create inference service spec %s: %v", isvcName, err) + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateInferenceService, isvcName)) + } + + s, err := c.kserveClient.InferenceServices(modelService.Namespace).Create(spec) + if err != nil { + log.Errorf("unable to create inference service %s: %v", isvcName, err) + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateInferenceService, isvcName)) } if c.deploymentConfig.PodDisruptionBudget.Enabled { pdbs := createPodDisruptionBudgets(modelService, c.deploymentConfig.PodDisruptionBudget) if err := c.deployPodDisruptionBudgets(ctx, pdbs); err != nil { - log.Errorf("unable to create pdb %v", err) - return nil, ErrUnableToCreatePDB + log.Errorf("unable to create pdb: %v", err) + return nil, errors.Wrapf(err, fmt.Sprintf("%v", ErrUnableToCreatePDB)) } } @@ -243,19 +241,46 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) ( if err != nil { // remove created inferenceservice when got error if err := c.deleteInferenceService(isvcName, modelService.Namespace); err != nil { - log.Warnf("unable to delete inference service %s with error %v", isvcName, err) + log.Errorf("unable to delete inference service %s with error %v", isvcName, err) } - return nil, err + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToGetInferenceServiceStatus, isvcName)) } inferenceURL := models.GetInferenceURL(s.Status.URL, isvcName, modelService.Protocol) + + // Create / update virtual service + vsCfg, err := NewVirtualService(modelService, inferenceURL) + if err != nil { + log.Errorf("unable to initialize virtual service builder: %v", err) + return nil, errors.Wrapf(err, fmt.Sprintf("%v", ErrUnableToCreateVirtualService)) + } + + vs, err := c.deployVirtualService(ctx, vsCfg) + if err != nil { + log.Errorf("unable to create virtual service: %v", err) + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateVirtualService, vsCfg.Name)) + } + + if vs != nil && len(vs.Spec.Hosts) > 0 { + inferenceURL = vsCfg.getInferenceURL(vs) + } + + // Delete previous inference service + if modelService.CurrentIsvcName != "" { + if err := c.deleteInferenceService(modelService.CurrentIsvcName, modelService.Namespace); err != nil { + log.Errorf("unable to delete prevision revision %s with error %v", modelService.CurrentIsvcName, err) + return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToDeletePreviousInferenceService, modelService.CurrentIsvcName)) + } + } + return &models.Service{ - Name: s.Name, - Namespace: s.Namespace, - ServiceName: s.Status.URL.Host, - URL: inferenceURL, - Metadata: modelService.Metadata, + Name: s.Name, + Namespace: s.Namespace, + ServiceName: s.Status.URL.Host, + URL: inferenceURL, + Metadata: modelService.Metadata, + CurrentIsvcName: s.Name, }, nil } @@ -280,6 +305,14 @@ func (c *controller) Delete(ctx context.Context, modelService *models.Service) ( } } + if modelService.RevisionID > 1 { + vsName := fmt.Sprintf("%s-%s-%s", modelService.ModelName, modelService.ModelVersion, models.VirtualServiceComponentType) + if err := c.deleteVirtualService(ctx, vsName, modelService.Namespace); err != nil { + log.Errorf("unable to delete virtual service %v", err) + return nil, ErrUnableToDeleteVirtualService + } + } + return modelService, nil } diff --git a/api/cluster/controller_test.go b/api/cluster/controller_test.go index c6e107eb6..fadbb3a34 100644 --- a/api/cluster/controller_test.go +++ b/api/cluster/controller_test.go @@ -25,6 +25,9 @@ import ( fakekserve "github.com/kserve/kserve/pkg/client/clientset/versioned/fake" fakekservev1beta1 "github.com/kserve/kserve/pkg/client/clientset/versioned/typed/serving/v1beta1/fake" "github.com/stretchr/testify/assert" + istiov1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1" + fakeistio "istio.io/client-go/pkg/clientset/versioned/fake" + fakeistionetworking "istio.io/client-go/pkg/clientset/versioned/typed/networking/v1beta1/fake" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -54,6 +57,7 @@ const ( listMethod = "list" getMethod = "get" createMethod = "create" + patchMethod = "patch" updateMethod = "update" deleteMethod = "delete" deleteCollectionMethod = "delete-collection" @@ -63,6 +67,7 @@ const ( knativeVersion = "v1" inferenceServiceResource = "inferenceservices" revisionResource = "revisions" + virtualServiceResource = "virtualservices" coreGroup = "" namespaceResource = "namespaces" @@ -83,8 +88,13 @@ type inferenceServiceReactor struct { err error } -type knativeRevisionReactor struct { - rev *knservingv1.Revision +type pdbReactor struct { + pdb *policyv1.PodDisruptionBudget + err error +} + +type vsReactor struct { + vs *istiov1beta1.VirtualService err error } @@ -93,6 +103,7 @@ var clusterMetadata = Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"} // TestDeployInferenceServiceNamespaceCreation test namespaceResource creation when deploying inference service func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { nsTimeout := 2 * tickDurationSecond * time.Second + model := &models.Model{ Name: "my-model", } @@ -102,13 +113,19 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { version := &models.Version{ ID: 1, } + revisionID := models.ID(1) modelOpt := &models.ModelOption{} - isvc := fakeInferenceService(model.Name, version.ID.String(), project.Name) + + isvc := fakeInferenceService(model.Name, version.ID.String(), revisionID.String(), project.Name) + vs := fakeVirtualService(model.Name, version.ID.String()) modelSvc := &models.Service{ - Name: isvc.Name, - Namespace: project.Name, - Options: modelOpt, + Name: isvc.Name, + ModelName: model.Name, + ModelVersion: version.ID.String(), + RevisionID: revisionID, + Namespace: project.Name, + Options: modelOpt, } tests := []struct { @@ -257,18 +274,17 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { knClient := knservingfake.NewSimpleClientset().ServingV1() + kfClient := fakekserve.NewSimpleClientset().ServingV1beta1().(*fakekservev1beta1.FakeServingV1beta1) kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { - kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { - return true, isvc, nil - }) - return true, nil, kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvc.Name) + return true, isvc, nil }) kfClient.PrependReactor(createMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { return true, isvc, nil }) v1Client := fake.NewSimpleClientset().CoreV1() + nsClient := v1Client.Namespaces().(*fakecorev1.FakeNamespaces) nsClient.Fake.PrependReactor(getMethod, namespaceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { nsClient.Fake.PrependReactor(getMethod, namespaceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { @@ -282,6 +298,11 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { policyV1Client := fake.NewSimpleClientset().PolicyV1() + istioClient := fakeistio.NewSimpleClientset().NetworkingV1beta1().(*fakeistionetworking.FakeNetworkingV1beta1) + istioClient.PrependReactor(patchMethod, virtualServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { + return true, vs, nil + }) + deployConfig := config.DeploymentConfig{ NamespaceTimeout: tt.nsTimeout, DeploymentTimeout: 2 * tickDurationSecond * time.Second, @@ -289,7 +310,8 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { } containerFetcher := NewContainerFetcher(v1Client, clusterMetadata) - ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, deployConfig, containerFetcher, nil) + + ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, istioClient, deployConfig, containerFetcher, nil) iSvc, err := ctl.Deploy(context.Background(), modelSvc) if tt.wantError { @@ -297,6 +319,7 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { assert.Nil(t, iSvc) return } + assert.NoError(t, err) assert.NotNil(t, iSvc) }) @@ -305,8 +328,8 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) { func TestController_DeployInferenceService(t *testing.T) { defaultMaxUnavailablePDB := 20 - deployTimeout := 2 * tickDurationSecond * time.Second + model := &models.Model{ Name: "my-model", } @@ -316,81 +339,59 @@ func TestController_DeployInferenceService(t *testing.T) { version := &models.Version{ ID: 1, } + revisionID := models.ID(1) modelOpt := &models.ModelOption{} - isvcName := models.CreateInferenceServiceName(model.Name, version.ID.String()) + + isvcName := models.CreateInferenceServiceName(model.Name, version.ID.String(), revisionID.String()) statusReady := createServiceReadyStatus(isvcName, project.Name, baseUrl) namespace := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{Name: project.Name}, Status: corev1.NamespaceStatus{Phase: corev1.NamespaceActive}, } pdb := &policyv1.PodDisruptionBudget{} + vs := fakeVirtualService(model.Name, version.ID.String()) modelSvc := &models.Service{ - Name: isvcName, - Namespace: project.Name, - Options: modelOpt, + Name: isvcName, + ModelName: model.Name, + ModelVersion: version.ID.String(), + RevisionID: revisionID, + Namespace: project.Name, + Options: modelOpt, } tests := []struct { - name string - modelService *models.Service - getRevResult *knativeRevisionReactor - getResult *inferenceServiceReactor - createResult *inferenceServiceReactor - updateResult *inferenceServiceReactor - checkResult *inferenceServiceReactor - deployTimeout time.Duration - wantError bool + name string + modelService *models.Service + createResult *inferenceServiceReactor + checkResult *inferenceServiceReactor + createPdbResult *pdbReactor + createVsResult *vsReactor + deployTimeout time.Duration + wantError bool }{ { - "success: create inference service", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName), - }, - &inferenceServiceReactor{ + name: "success: create inference service", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: statusReady, }, nil, }, - deployTimeout, - false, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: false, }, { - "success: update inference service", - modelSvc, - &knativeRevisionReactor{err: kerrors.NewNotFound(schema.GroupResource{}, "test service")}, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, - nil, - }, - nil, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, - nil, - }, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, - Status: statusReady, - }, - nil, - }, - deployTimeout, - false, - }, - { - "success: deploying service", - &models.Service{ + name: "success: deploying service", + modelService: &models.Service{ Name: isvcName, Namespace: project.Name, Options: modelOpt, @@ -401,29 +402,25 @@ func TestController_DeployInferenceService(t *testing.T) { MemoryRequest: resource.MustParse("1Gi"), }, }, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName), - }, - &inferenceServiceReactor{ + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: statusReady, }, nil, }, - deployTimeout, - false, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: false, }, { - "success: create inference service with transformer", - &models.Service{ + name: "success: create inference service with transformer", + modelService: &models.Service{ Name: isvcName, Namespace: project.Name, Options: modelOpt, @@ -433,187 +430,147 @@ func TestController_DeployInferenceService(t *testing.T) { Image: "ghcr.io/caraml-dev/merlin-transformer-test", }, }, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName), - }, - &inferenceServiceReactor{ + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: statusReady, }, nil, }, - deployTimeout, - false, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: false, }, { - "error: failed get", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - errors.New("error"), - }, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, - nil, - }, - nil, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, - Status: statusReady, - }, + name: "error: failed create", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ nil, + errors.New("error creating inference service"), }, - deployTimeout, - true, + checkResult: nil, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, { - "error: failed create", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName), - }, - &inferenceServiceReactor{ + name: "error: failed check", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ + &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}}, nil, - errors.New("error creating inference service"), }, - nil, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, - Status: statusReady, - }, + checkResult: &inferenceServiceReactor{ nil, + errors.New("error check"), }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, { - "error: failed update", - modelSvc, - &knativeRevisionReactor{err: kerrors.NewNotFound(schema.GroupResource{}, "test service")}, - &inferenceServiceReactor{ - &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, - nil, - }, - nil, - &inferenceServiceReactor{ + name: "error: predictor error", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ + &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}}, nil, - errors.New("error updating inference service"), }, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, - Status: statusReady, + Status: createPredErrorCond(), }, nil, }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, { - "error: failed check", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName), - }, - &inferenceServiceReactor{ + name: "error: routes error", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ + &kservev1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, + Status: createRoutesErrorCond(), + }, nil, - errors.New("error check"), }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, { - "error: predictor error", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName), - }, - &inferenceServiceReactor{ + name: "error: pdb error", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, - Status: createPredErrorCond(), + Status: createRoutesErrorCond(), }, nil, }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{nil, ErrUnableToCreatePDB}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, { - "error: routes error", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName), - }, - &inferenceServiceReactor{ + name: "error: vs error", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: createRoutesErrorCond(), }, nil, }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{nil, ErrUnableToCreateVirtualService}, + wantError: true, }, { - "error: timeout", - modelSvc, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName), - }, - &inferenceServiceReactor{ + name: "error: timeout", + modelService: modelSvc, + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: statusReady, }, nil, }, - 1 * time.Millisecond, - true, + deployTimeout: 1 * time.Millisecond, + wantError: true, }, { - "error: deploying service due to insufficient CPU", - &models.Service{ + name: "error: deploying service due to insufficient CPU", + modelService: &models.Service{ Name: isvcName, Namespace: project.Name, Options: modelOpt, @@ -624,29 +581,25 @@ func TestController_DeployInferenceService(t *testing.T) { MemoryRequest: resource.MustParse("1Gi"), }, }, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName), - }, - &inferenceServiceReactor{ + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: statusReady, }, nil, }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, { - "error: deploying service due to insufficient memory", - &models.Service{ + name: "error: deploying service due to insufficient memory", + modelService: &models.Service{ Name: isvcName, Namespace: project.Name, Options: modelOpt, @@ -657,48 +610,35 @@ func TestController_DeployInferenceService(t *testing.T) { MemoryRequest: resource.MustParse("10Gi"), }, }, - &knativeRevisionReactor{}, - &inferenceServiceReactor{ - nil, - kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName), - }, - &inferenceServiceReactor{ + createResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}}, nil, }, - nil, - &inferenceServiceReactor{ + checkResult: &inferenceServiceReactor{ &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}, Status: statusReady, }, nil, }, - deployTimeout, - true, + deployTimeout: deployTimeout, + createPdbResult: &pdbReactor{pdb, nil}, + createVsResult: &vsReactor{vs, nil}, + wantError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { knClient := knservingfake.NewSimpleClientset() - knClient.PrependReactor(getMethod, revisionResource, func(action k8stesting.Action) (bool, runtime.Object, error) { - return true, tt.getRevResult.rev, tt.getRevResult.err - }) kfClient := fakekserve.NewSimpleClientset().ServingV1beta1().(*fakekservev1beta1.FakeServingV1beta1) kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { - kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { - return true, tt.checkResult.isvc, tt.checkResult.err - }) - return true, tt.getResult.isvc, tt.getResult.err + return true, tt.checkResult.isvc, tt.checkResult.err }) kfClient.PrependReactor(createMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { return true, tt.createResult.isvc, tt.createResult.err }) - kfClient.PrependReactor(updateMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { - return true, tt.updateResult.isvc, tt.updateResult.err - }) kfClient.PrependReactor(deleteMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { return true, nil, nil @@ -711,8 +651,13 @@ func TestController_DeployInferenceService(t *testing.T) { }) policyV1Client := fake.NewSimpleClientset().PolicyV1().(*fakepolicyv1.FakePolicyV1) - policyV1Client.Fake.PrependReactor("patch", pdbResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { - return true, pdb, nil + policyV1Client.Fake.PrependReactor(patchMethod, pdbResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { + return true, tt.createPdbResult.pdb, tt.createPdbResult.err + }) + + istioClient := fakeistio.NewSimpleClientset().NetworkingV1beta1().(*fakeistionetworking.FakeNetworkingV1beta1) + istioClient.PrependReactor(patchMethod, virtualServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { + return true, tt.createVsResult.vs, tt.createVsResult.err }) deployConfig := config.DeploymentConfig{ @@ -734,7 +679,7 @@ func TestController_DeployInferenceService(t *testing.T) { FeastServingKeepAlive: &config.FeastServingKeepAliveConfig{}, }) - ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, deployConfig, containerFetcher, templater) + ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, istioClient, deployConfig, containerFetcher, templater) iSvc, err := ctl.Deploy(context.Background(), tt.modelService) if tt.wantError { @@ -867,7 +812,7 @@ func TestGetCurrentDeploymentScale(t *testing.T) { }) // Create test controller - ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, deployConfig, containerFetcher, templater) + ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, nil, deployConfig, containerFetcher, templater) desiredReplicas := ctl.GetCurrentDeploymentScale(context.TODO(), testNamespace, tt.components) assert.Equal(t, tt.expectedScale, desiredReplicas) @@ -875,12 +820,16 @@ func TestGetCurrentDeploymentScale(t *testing.T) { } } -func fakeInferenceService(model, version, project string) *kservev1beta1.InferenceService { - svcName := models.CreateInferenceServiceName(model, version) +func fakeInferenceService(model, version, revisionID, project string) *kservev1beta1.InferenceService { + svcName := models.CreateInferenceServiceName(model, version, revisionID) status := createServiceReadyStatus(svcName, project, baseUrl) return &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: svcName, Namespace: project}, Status: status} } +func fakeVirtualService(model, version string) *istiov1beta1.VirtualService { + return &istiov1beta1.VirtualService{ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-%s", model, version)}} +} + func createServiceReadyStatus(iSvcName, namespace, baseUrl string) kservev1beta1.InferenceServiceStatus { status := kservev1beta1.InferenceServiceStatus{} status.InitializeConditions() @@ -1016,9 +965,10 @@ func Test_controller_ListPods(t *testing.T) { } func TestController_Delete(t *testing.T) { - isvcName := models.CreateInferenceServiceName("my-model", "1") + isvcName := models.CreateInferenceServiceName("my-model", "1", "1") projectName := "my-project" pdb := &policyv1.PodDisruptionBudget{} + vs := fakeVirtualService("my-model", "1") tests := []struct { name string @@ -1187,11 +1137,16 @@ func TestController_Delete(t *testing.T) { return true, pdb, nil }) + istioClient := fakeistio.NewSimpleClientset().NetworkingV1beta1().(*fakeistionetworking.FakeNetworkingV1beta1) + istioClient.PrependReactor(deleteMethod, virtualServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { + return true, vs, nil + }) + containerFetcher := NewContainerFetcher(v1Client, clusterMetadata) templater := clusterresource.NewInferenceServiceTemplater(config.StandardTransformerConfig{}) - ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, tt.deployConfig, containerFetcher, templater) + ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, istioClient, tt.deployConfig, containerFetcher, templater) mSvc, err := ctl.Delete(context.Background(), tt.modelService) if tt.wantError { diff --git a/api/cluster/errors.go b/api/cluster/errors.go index 67f1cbe28..506f111ba 100644 --- a/api/cluster/errors.go +++ b/api/cluster/errors.go @@ -17,15 +17,19 @@ package cluster import "errors" var ( - ErrInsufficientCPU = errors.New("CPU request is too large") - ErrInsufficientMem = errors.New("memory request too large") - ErrTimeoutNamespace = errors.New("timeout creating namespace") - ErrUnableToCreateNamespace = errors.New("error creating namespace") - ErrUnableToGetNamespaceStatus = errors.New("error retrieving namespace status") - ErrUnableToGetInferenceServiceStatus = errors.New("error retrieving inference service status") - ErrUnableToCreateInferenceService = errors.New("error creating inference service") - ErrUnableToUpdateInferenceService = errors.New("error updating inference service") - ErrTimeoutCreateInferenceService = errors.New("timeout creating inference service") - ErrUnableToCreatePDB = errors.New("error creating pod disruption budget") - ErrUnableToDeletePDB = errors.New("error deleting pod disruption budget") + ErrInsufficientCPU = errors.New("CPU request is too large") + ErrInsufficientMem = errors.New("memory request too large") + ErrTimeoutNamespace = errors.New("timeout creating namespace") + ErrUnableToCreateNamespace = errors.New("error creating namespace") + ErrUnableToGetNamespaceStatus = errors.New("error retrieving namespace status") + ErrUnableToGetInferenceServiceStatus = errors.New("error retrieving inference service status") + ErrUnableToCreateInferenceService = errors.New("error creating inference service") + ErrUnableToUpdateInferenceService = errors.New("error updating inference service") + ErrUnableToDeleteInferenceService = errors.New("error deleting inference service") + ErrUnableToDeletePreviousInferenceService = errors.New("error deleting previous inference service") + ErrTimeoutCreateInferenceService = errors.New("timeout creating inference service") + ErrUnableToCreatePDB = errors.New("error creating pod disruption budget") + ErrUnableToDeletePDB = errors.New("error deleting pod disruption budget") + ErrUnableToCreateVirtualService = errors.New("error creating virtual service") + ErrUnableToDeleteVirtualService = errors.New("error deleting virtual service") ) diff --git a/api/cluster/resource/templater.go b/api/cluster/resource/templater.go index 41f995240..225830d62 100644 --- a/api/cluster/resource/templater.go +++ b/api/cluster/resource/templater.go @@ -82,28 +82,13 @@ const ( grpcHealthProbeCommand = "grpc_health_probe" ) -var ( - // list of configuration stored as annotations - configAnnotationKeys = []string{ - annotationPrometheusScrapeFlag, - annotationPrometheusScrapePort, - knserving.QueueSidecarResourcePercentageAnnotationKey, - kserveconstant.AutoscalerClass, - kserveconstant.AutoscalerMetrics, - kserveconstant.TargetUtilizationPercentage, - knautoscaling.ClassAnnotationKey, - knautoscaling.MetricAnnotationKey, - knautoscaling.TargetAnnotationKey, - } - - grpcContainerPorts = []corev1.ContainerPort{ - { - ContainerPort: defaultGRPCPort, - Name: "h2c", - Protocol: corev1.ProtocolTCP, - }, - } -) +var grpcContainerPorts = []corev1.ContainerPort{ + { + ContainerPort: defaultGRPCPort, + Name: "h2c", + Protocol: corev1.ProtocolTCP, + }, +} type DeploymentScale struct { Predictor *int @@ -118,10 +103,26 @@ func NewInferenceServiceTemplater(standardTransformerConfig config.StandardTrans return &InferenceServiceTemplater{standardTransformerConfig: standardTransformerConfig} } -func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *models.Service, config *config.DeploymentConfig) (*kservev1beta1.InferenceService, error) { +func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *models.Service, config *config.DeploymentConfig, currentReplicas DeploymentScale) (*kservev1beta1.InferenceService, error) { applyDefaults(modelService, config) - annotations, err := createAnnotations(modelService, config, nil) + // Identify the desired initial scale of the new deployment + var initialScale *int + if currentReplicas.Predictor != nil { + // The desired scale of the new deployment is a single value, applicable to both the predictor and the transformer. + // Set the desired scale of the new deployment by taking the max of the 2 values. + // Consider the transformer's scale only if it is also enabled in the new spec. + if modelService.Transformer != nil && + modelService.Transformer.Enabled && + currentReplicas.Transformer != nil && + *currentReplicas.Transformer > *currentReplicas.Predictor { + initialScale = currentReplicas.Transformer + } else { + initialScale = currentReplicas.Predictor + } + } + + annotations, err := createAnnotations(modelService, config, initialScale) if err != nil { return nil, fmt.Errorf("unable to create inference service spec: %w", err) } @@ -168,73 +169,6 @@ func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *mod return inferenceService, nil } -func (t *InferenceServiceTemplater) PatchInferenceServiceSpec( - orig *kservev1beta1.InferenceService, - modelService *models.Service, - config *config.DeploymentConfig, - currentReplicas DeploymentScale, -) (*kservev1beta1.InferenceService, error) { - // Identify the desired initial scale of the new deployment - var initialScale *int - if currentReplicas.Predictor != nil { - // The desired scale of the new deployment is a single value, applicable to both the predictor and the transformer. - // Set the desired scale of the new deployment by taking the max of the 2 values. - // Consider the transformer's scale only if it is also enabled in the new spec. - if modelService.Transformer != nil && - modelService.Transformer.Enabled && - currentReplicas.Transformer != nil && - *currentReplicas.Transformer > *currentReplicas.Predictor { - initialScale = currentReplicas.Transformer - } else { - initialScale = currentReplicas.Predictor - } - } - - applyDefaults(modelService, config) - - orig.ObjectMeta.Labels = modelService.Metadata.ToLabel() - annotations, err := createAnnotations(modelService, config, initialScale) - if err != nil { - return nil, fmt.Errorf("unable to patch inference service spec: %w", err) - } - orig.ObjectMeta.Annotations = utils.MergeMaps(utils.ExcludeKeys(orig.ObjectMeta.Annotations, configAnnotationKeys), annotations) - - orig.Spec.Predictor = createPredictorSpec(modelService, config) - orig.Spec.Predictor.TopologySpreadConstraints, err = updateExistingInferenceServiceTopologySpreadConstraints( - orig, - modelService, - config, - kservev1beta1.PredictorComponent, - ) - if err != nil { - return nil, fmt.Errorf("unable to create predictor topology spread constraints: %w", err) - } - - orig.Spec.Transformer = nil - if modelService.Transformer != nil && modelService.Transformer.Enabled { - orig.Spec.Transformer = t.createTransformerSpec(modelService, modelService.Transformer) - if _, ok := orig.Status.Components[kservev1beta1.TransformerComponent]; !ok || - orig.Status.Components[kservev1beta1.TransformerComponent].LatestCreatedRevision == "" { - orig.Spec.Transformer.TopologySpreadConstraints, err = createNewInferenceServiceTopologySpreadConstraints( - modelService, - config, - kservev1beta1.TransformerComponent, - ) - } else { - orig.Spec.Transformer.TopologySpreadConstraints, err = updateExistingInferenceServiceTopologySpreadConstraints( - orig, - modelService, - config, - kservev1beta1.TransformerComponent, - ) - } - if err != nil { - return nil, fmt.Errorf("unable to create transformer topology spread constraints: %w", err) - } - } - return orig, nil -} - func createPredictorSpec(modelService *models.Service, config *config.DeploymentConfig) kservev1beta1.PredictorSpec { envVars := modelService.EnvVars @@ -684,39 +618,6 @@ func createNewInferenceServiceTopologySpreadConstraints( ) } -// updateExistingInferenceServiceTopologySpreadConstraints creates topology spread constraints for a component of a new -// inference service -func updateExistingInferenceServiceTopologySpreadConstraints( - orig *kservev1beta1.InferenceService, - modelService *models.Service, - config *config.DeploymentConfig, - component kservev1beta1.ComponentType, -) ([]corev1.TopologySpreadConstraint, error) { - if len(config.TopologySpreadConstraints) == 0 { - var topologySpreadConstraints []corev1.TopologySpreadConstraint - return topologySpreadConstraints, nil - } - var newRevisionName string - if modelService.DeploymentMode == deployment.RawDeploymentMode { - newRevisionName = fmt.Sprintf("isvc.%s-%s", modelService.Name, component) - } else if modelService.DeploymentMode == deployment.ServerlessDeploymentMode || - modelService.DeploymentMode == deployment.EmptyDeploymentMode { - var err error - newRevisionName, err = getNewRevisionNameForExistingServerlessDeployment( - orig.Status.Components[component].LatestCreatedRevision, - ) - if err != nil { - return nil, fmt.Errorf("unable to generate new revision name: %w", err) - } - } else { - return nil, fmt.Errorf("invalid deployment mode: %s", modelService.DeploymentMode) - } - return appendPodSpreadingLabelSelectorsToTopologySpreadConstraints( - config.TopologySpreadConstraints, - newRevisionName, - ) -} - // appendPodSpreadingLabelSelectorsToTopologySpreadConstraints makes a deep copy of the config topology spread // constraints and then adds the given revisionName as a label to the match labels of each topology spread constraint // to spread out all the pods across the specified topologyKey @@ -758,24 +659,6 @@ func copyTopologySpreadConstraints( return topologySpreadConstraints, nil } -// getNewRevisionNameForExistingServerlessDeployment examines the current revision name of an inference service ( -// serverless deployment) app name that is given to it and increments the last value of the revision number by 1, e.g. -// sklearn-sample-predictor-00001 -> sklearn-sample-predictor-00002 -func getNewRevisionNameForExistingServerlessDeployment(currentRevisionName string) (string, error) { - revisionNameElements := strings.Split(currentRevisionName, "-") - if len(revisionNameElements) < 4 { - return "", fmt.Errorf("unexpected revision name format that is not in at least 3 parts: %s", - currentRevisionName) - } - currentRevisionNumber, err := strconv.Atoi(revisionNameElements[len(revisionNameElements)-1]) - if err != nil { - return "", err - } - - revisionNameElements[len(revisionNameElements)-1] = fmt.Sprintf("%05d", currentRevisionNumber+1) - return strings.Join(revisionNameElements, "-"), nil -} - func createDefaultTransformerEnvVars(modelService *models.Service) models.EnvVars { defaultEnvVars := models.EnvVars{} @@ -869,7 +752,7 @@ func createPyFuncDefaultEnvVars(svc *models.Service) models.EnvVars { envVars := models.EnvVars{ models.EnvVar{ Name: envPyFuncModelName, - Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion), + Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()), }, models.EnvVar{ Name: envModelName, @@ -881,7 +764,7 @@ func createPyFuncDefaultEnvVars(svc *models.Service) models.EnvVars { }, models.EnvVar{ Name: envModelFullName, - Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion), + Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()), }, models.EnvVar{ Name: envHTTPPort, diff --git a/api/cluster/resource/templater_gpu_test.go b/api/cluster/resource/templater_gpu_test.go index ed6c05b2f..5af4e17aa 100644 --- a/api/cluster/resource/templater_gpu_test.go +++ b/api/cluster/resource/templater_gpu_test.go @@ -116,6 +116,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { name string modelSvc *models.Service resourcePercentage string + deploymentScale DeploymentScale exp *kservev1beta1.InferenceService wantErr bool }{ @@ -134,6 +135,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -141,6 +143,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -200,6 +203,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -207,6 +211,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -266,6 +271,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -325,6 +331,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -332,6 +339,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -439,6 +447,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -446,6 +455,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -497,6 +507,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -504,6 +515,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -554,6 +566,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -561,6 +574,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -616,6 +630,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -625,6 +640,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { "prometheus.io/scrape": "true", "prometheus.io/port": "8080", kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -677,6 +693,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -686,6 +703,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { "prometheus.io/scrape": "true", "prometheus.io/port": "8080", kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -747,6 +765,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -754,6 +773,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -808,6 +828,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -815,6 +836,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -875,6 +897,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -941,6 +964,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, wantErr: true, }, { @@ -963,6 +987,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -970,6 +995,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), knautoscaling.ClassAnnotationKey: knautoscaling.HPA, knautoscaling.MetricAnnotationKey: knautoscaling.CPU, knautoscaling.TargetAnnotationKey: "30", @@ -1029,6 +1055,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1036,6 +1063,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), knautoscaling.ClassAnnotationKey: knautoscaling.HPA, knautoscaling.MetricAnnotationKey: knautoscaling.Memory, knautoscaling.TargetAnnotationKey: "150", @@ -1095,6 +1123,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1102,6 +1131,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), knautoscaling.ClassAnnotationKey: knautoscaling.HPA, knautoscaling.MetricAnnotationKey: knautoscaling.Memory, knautoscaling.TargetAnnotationKey: "100", @@ -1161,6 +1191,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1168,6 +1199,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), knautoscaling.ClassAnnotationKey: knautoscaling.KPA, knautoscaling.MetricAnnotationKey: knautoscaling.Concurrency, knautoscaling.TargetAnnotationKey: "2", @@ -1227,6 +1259,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1234,6 +1267,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), knautoscaling.ClassAnnotationKey: knautoscaling.KPA, knautoscaling.MetricAnnotationKey: knautoscaling.RPS, knautoscaling.TargetAnnotationKey: "10", @@ -1288,6 +1322,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1295,6 +1330,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1349,6 +1385,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1358,6 +1395,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { "prometheus.io/scrape": "true", "prometheus.io/port": "8080", kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1409,6 +1447,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1416,6 +1455,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1479,6 +1519,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { ResourceRequest: modelSvc.ResourceRequest, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1486,6 +1527,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1533,7 +1575,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { } tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig) + infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale) if tt.wantErr { assert.Error(t, err) return diff --git a/api/cluster/resource/templater_test.go b/api/cluster/resource/templater_test.go index 929fc0462..96851fdd1 100644 --- a/api/cluster/resource/templater_test.go +++ b/api/cluster/resource/templater_test.go @@ -97,6 +97,13 @@ var ( }, } + testPredictorScale, testTransformerScale = 3, 5 + + defaultDeploymentScale = DeploymentScale{ + Predictor: &testPredictorScale, + Transformer: &testTransformerScale, + } + oneMinuteDuration = time.Minute * 1 twoMinuteDuration = time.Minute * 2 standardTransformerConfig = config.StandardTransformerConfig{ @@ -158,7 +165,6 @@ func TestCreateInferenceServiceSpec(t *testing.T) { project := mlp.Project{ Name: "project", } - modelSvc := &models.Service{ Name: "my-model-1", ModelName: "my-model", @@ -190,6 +196,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { name string modelSvc *models.Service resourcePercentage string + deploymentScale DeploymentScale exp *kservev1beta1.InferenceService wantErr bool }{ @@ -207,6 +214,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -214,6 +222,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -268,6 +277,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -275,6 +285,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -329,6 +340,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -383,6 +395,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -390,6 +403,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -435,12 +449,14 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Metadata: modelSvc.Metadata, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, Namespace: project.Name, Annotations: map[string]string{ - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -487,6 +503,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -494,6 +511,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -540,6 +558,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -547,6 +566,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -592,6 +612,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -599,6 +620,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -649,6 +671,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -658,6 +681,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { "prometheus.io/scrape": "true", "prometheus.io/port": "8080", kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -707,6 +731,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -716,6 +741,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { "prometheus.io/scrape": "true", "prometheus.io/port": "8080", kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -761,6 +787,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -768,6 +795,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -825,6 +853,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -832,6 +861,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -884,6 +914,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -891,6 +922,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -948,6 +980,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1030,6 +1063,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1040,6 +1074,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { knautoscaling.ClassAnnotationKey: knautoscaling.HPA, knautoscaling.MetricAnnotationKey: knautoscaling.CPU, knautoscaling.TargetAnnotationKey: "30", + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1091,6 +1126,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1101,6 +1137,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { knautoscaling.ClassAnnotationKey: knautoscaling.HPA, knautoscaling.MetricAnnotationKey: knautoscaling.Memory, knautoscaling.TargetAnnotationKey: "150", // 30% * default memory request (500Mi) = 150Mi + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1153,6 +1190,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { ResourceRequest: userResourceRequests, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1163,6 +1201,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { knautoscaling.ClassAnnotationKey: knautoscaling.HPA, knautoscaling.MetricAnnotationKey: knautoscaling.Memory, knautoscaling.TargetAnnotationKey: "205", // 20% * (1Gi) ~= 205Mi + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1214,6 +1253,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1224,6 +1264,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { knautoscaling.ClassAnnotationKey: knautoscaling.KPA, knautoscaling.MetricAnnotationKey: knautoscaling.Concurrency, knautoscaling.TargetAnnotationKey: "2", + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1275,6 +1316,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.HttpJson, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1285,6 +1327,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { knautoscaling.ClassAnnotationKey: knautoscaling.KPA, knautoscaling.MetricAnnotationKey: knautoscaling.RPS, knautoscaling.TargetAnnotationKey: "10", + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1331,6 +1374,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.UpiV1, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1338,6 +1382,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1387,6 +1432,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.UpiV1, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1396,6 +1442,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { "prometheus.io/scrape": "true", "prometheus.io/port": "8080", kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1444,6 +1491,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.UpiV1, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1451,6 +1499,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1509,6 +1558,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Protocol: protocol.UpiV1, }, resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1516,6 +1566,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1560,7 +1611,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) { } tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig) + infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale) if tt.wantErr { assert.Error(t, err) return @@ -1638,10 +1689,11 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/") transformerProbeConfigUPI := createLivenessProbeSpec(protocol.UpiV1, "/") tests := []struct { - name string - modelSvc *models.Service - exp *kservev1beta1.InferenceService - wantErr bool + name string + modelSvc *models.Service + deploymentScale DeploymentScale + exp *kservev1beta1.InferenceService + wantErr bool }{ { name: "custom transformer with default resource request", @@ -1667,6 +1719,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1674,6 +1727,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1752,6 +1806,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1759,6 +1814,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1834,6 +1890,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Protocol: protocol.UpiV1, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1841,6 +1898,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -1924,6 +1982,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -1931,6 +1990,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2027,6 +2087,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Protocol: protocol.UpiV1, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2034,6 +2095,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2140,6 +2202,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Protocol: protocol.UpiV1, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2149,6 +2212,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { kserveconstant.DeploymentMode: string(kserveconstant.Serverless), annotationPrometheusScrapeFlag: "true", annotationPrometheusScrapePort: "8080", + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2239,7 +2303,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { } tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig) + infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale) if tt.wantErr { assert.Error(t, err) return @@ -2295,10 +2359,11 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/") tests := []struct { - name string - modelSvc *models.Service - exp *kservev1beta1.InferenceService - wantErr bool + name string + modelSvc *models.Service + deploymentScale DeploymentScale + exp *kservev1beta1.InferenceService + wantErr bool }{ { name: "model logger enabled", @@ -2320,6 +2385,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2327,6 +2393,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2389,6 +2456,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2396,6 +2464,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2477,6 +2546,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2484,6 +2554,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2561,6 +2632,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2568,6 +2640,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2649,6 +2722,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2656,6 +2730,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2718,7 +2793,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { } tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig) + infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale) if tt.wantErr { assert.Error(t, err) return @@ -2772,10 +2847,11 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/") tests := []struct { - name string - modelSvc *models.Service - exp *kservev1beta1.InferenceService - wantErr bool + name string + modelSvc *models.Service + deploymentScale DeploymentScale + exp *kservev1beta1.InferenceService + wantErr bool }{ { name: "predictor with unspecified deployment mode (serverless)", @@ -2790,6 +2866,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { Metadata: modelSvc.Metadata, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2797,6 +2874,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2892,6 +2970,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { DeploymentMode: deployment.ServerlessDeploymentMode, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -2899,6 +2978,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -2994,6 +3074,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { DeploymentMode: deployment.RawDeploymentMode, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -3101,6 +3182,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { }, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -3108,6 +3190,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -3275,6 +3358,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { DeploymentMode: deployment.ServerlessDeploymentMode, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -3282,6 +3366,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { Annotations: map[string]string{ knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, kserveconstant.DeploymentMode: string(kserveconstant.Serverless), + knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale), }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, @@ -3449,6 +3534,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { DeploymentMode: deployment.RawDeploymentMode, Protocol: protocol.HttpJson, }, + deploymentScale: defaultDeploymentScale, exp: &kservev1beta1.InferenceService{ ObjectMeta: metav1.ObjectMeta{ Name: modelSvc.Name, @@ -3652,1961 +3738,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { } tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig) - if tt.wantErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - assert.Equal(t, tt.exp, infSvcSpec) - }) - } -} - -func TestPatchInferenceServiceSpec(t *testing.T) { - err := models.InitKubernetesLabeller("gojek.com/", testEnvironmentName) - assert.NoError(t, err) - - defer func() { - _ = models.InitKubernetesLabeller("", "") - }() - - project := mlp.Project{ - Name: "project", - } - - modelSvc := &models.Service{ - Name: "model-1", - ModelName: "model", - ModelVersion: "1", - Namespace: project.Name, - ArtifactURI: "gs://my-artifacet", - Metadata: models.Metadata{ - App: "model", - Component: models.ComponentModelVersion, - Stream: "dsp", - Team: "dsp", - Labels: mlp.Labels{ - { - Key: "sample", - Value: "true", - }, - }, - }, - Protocol: protocol.HttpJson, - } - - storageUri := fmt.Sprintf("%s/model", modelSvc.ArtifactURI) - - // Liveness probe config for the model containers - probeConfig := createLivenessProbeSpec(protocol.HttpJson, fmt.Sprintf("/v1/models/%s", modelSvc.Name)) - - // Liveness probe config for the transformers - transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/") - - one := 1 - minReplica := 1 - maxReplica := 10 - cpuRequest := resource.MustParse("1") - memoryRequest := resource.MustParse("1Gi") - cpuLimit := cpuRequest.DeepCopy() - cpuLimit.Add(cpuRequest) - memoryLimit := memoryRequest.DeepCopy() - memoryLimit.Add(memoryRequest) - queueResourcePercentage := "2" - - resourceRequests := corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - } - - testPredictorScale, testTransformerScale := 3, 5 - - tests := []struct { - name string - modelSvc *models.Service - deploymentScale DeploymentScale - original *kservev1beta1.InferenceService - exp *kservev1beta1.InferenceService - wantErr bool - }{ - { - name: "tensorflow spec", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - }, - { - name: "tensorflow + transformer spec", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Transformer: &models.Transformer{ - Enabled: true, - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: "python", - Args: "main.py", - ResourceRequest: &models.ResourceRequest{ - MinReplica: 1, - MaxReplica: 1, - CPURequest: cpuRequest, - MemoryRequest: memoryRequest, - }, - }, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - Transformer: &kservev1beta1.TransformerSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "transformer", - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: []string{"python"}, - Args: []string{"main.py"}, - Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(), - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - }, - LivenessProbe: transformerProbeConfig, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &one, - MaxReplicas: one, - }, - }, - }, - }, - }, - { - name: "tensorflow + transformer spec to tensorflow spec only", - modelSvc: &models.Service{ - Name: modelSvc.Name, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Transformer: &models.Transformer{ - Enabled: false, - }, - Protocol: protocol.HttpJson, - }, - deploymentScale: DeploymentScale{ - Predictor: &testPredictorScale, - Transformer: &testTransformerScale, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - Transformer: &kservev1beta1.TransformerSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "transformer", - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: []string{"python"}, - Args: []string{"main.py"}, - Env: []corev1.EnvVar{ - {Name: envTransformerPort, Value: fmt.Sprint(defaultHTTPPort)}, - {Name: envTransformerModelName, Value: "model-1"}, - {Name: envTransformerPredictURL, Value: "model-1-predictor.project"}, - }, - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - }, - LivenessProbe: transformerProbeConfig, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &one, - MaxReplicas: one, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - knautoscaling.InitialScaleAnnotationKey: "3", - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - Transformer: nil, - }, - }, - }, - { - name: "custom spec", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeCustom, - Options: &models.ModelOption{ - CustomPredictor: &models.CustomPredictor{ - Image: "gcr.io/custom-model:v0.2", - Command: "./run-1.sh", - Args: "firstArg secondArg", - }, - }, - Metadata: modelSvc.Metadata, - ResourceRequest: userResourceRequests, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: kserveconstant.InferenceServiceContainerName, - Image: "gcr.io/custom-model:v0.1", - Env: createDefaultPredictorEnvVars(modelSvc).ToKubernetesEnvVars(), - Resources: expUserResourceRequests, - Command: []string{ - "./run.sh", - }, - Args: []string{ - "firstArg", - "secondArg", - }, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &userResourceRequests.MinReplica, - MaxReplicas: userResourceRequests.MaxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: kserveconstant.InferenceServiceContainerName, - Image: "gcr.io/custom-model:v0.2", - Env: createDefaultPredictorEnvVars(modelSvc).ToKubernetesEnvVars(), - Resources: expUserResourceRequests, - Command: []string{ - "./run-1.sh", - }, - Args: []string{ - "firstArg", - "secondArg", - }, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &userResourceRequests.MinReplica, - MaxReplicas: userResourceRequests.MaxReplica, - }, - }, - }, - }, - }, - { - name: "patch deployment mode from serverless to raw_deployment", - modelSvc: &models.Service{ - Name: modelSvc.Name, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - DeploymentMode: deployment.RawDeploymentMode, - AutoscalingPolicy: &autoscaling.AutoscalingPolicy{ - MetricsType: autoscaling.CPUUtilization, - TargetValue: 30, - }, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment), - kserveconstant.AutoscalerClass: string(kserveconstant.AutoscalerClassHPA), - kserveconstant.AutoscalerMetrics: string(kserveconstant.AutoScalerMetricsCPU), - kserveconstant.TargetUtilizationPercentage: "30", - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - }, - { - name: "patch deployment mode from raw_deployment to serverless_deployment", - modelSvc: &models.Service{ - Name: modelSvc.Name, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: &autoscaling.AutoscalingPolicy{ - MetricsType: autoscaling.Concurrency, - TargetValue: 2, - }, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment), - kserveconstant.AutoscalerClass: string(kserveconstant.AutoscalerClassHPA), - kserveconstant.AutoscalerMetrics: string(kserveconstant.AutoScalerMetricsCPU), - kserveconstant.TargetUtilizationPercentage: "30", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - knautoscaling.ClassAnnotationKey: knautoscaling.KPA, - knautoscaling.MetricAnnotationKey: knautoscaling.Concurrency, - knautoscaling.TargetAnnotationKey: "2", - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - deployConfig := &config.DeploymentConfig{ - DefaultModelResourceRequests: &config.ResourceRequests{ - MinReplica: minReplica, - MaxReplica: maxReplica, - CPURequest: cpuRequest, - MemoryRequest: memoryRequest, - }, - QueueResourcePercentage: queueResourcePercentage, - } - - tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.PatchInferenceServiceSpec(tt.original, tt.modelSvc, deployConfig, tt.deploymentScale) - if tt.wantErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - assert.Equal(t, tt.exp, infSvcSpec) - }) - } -} - -func TestPatchInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) { - err := models.InitKubernetesLabeller("gojek.com/", testEnvironmentName) - assert.NoError(t, err) - - defer func() { - _ = models.InitKubernetesLabeller("", "") - }() - - project := mlp.Project{ - Name: "project", - } - - modelSvc := &models.Service{ - Name: "model-1", - ModelName: "model", - ModelVersion: "1", - Namespace: project.Name, - ArtifactURI: "gs://my-artifacet", - Metadata: models.Metadata{ - App: "model", - Component: models.ComponentModelVersion, - Stream: "dsp", - Team: "dsp", - Labels: mlp.Labels{ - { - Key: "sample", - Value: "true", - }, - }, - }, - Protocol: protocol.HttpJson, - } - - storageUri := fmt.Sprintf("%s/model", modelSvc.ArtifactURI) - - // Liveness probe config for the model containers - probeConfig := createLivenessProbeSpec(protocol.HttpJson, fmt.Sprintf("/v1/models/%s", modelSvc.Name)) - - // Liveness probe config for the transformers - transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/") - - one := 1 - minReplica := 1 - maxReplica := 10 - cpuRequest := resource.MustParse("1") - memoryRequest := resource.MustParse("1Gi") - cpuLimit := cpuRequest.DeepCopy() - cpuLimit.Add(cpuRequest) - memoryLimit := memoryRequest.DeepCopy() - memoryLimit.Add(memoryRequest) - queueResourcePercentage := "2" - - resourceRequests := corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - } - - testPredictorScale, testTransformerScale := 3, 5 - - tests := []struct { - name string - modelSvc *models.Service - deploymentScale DeploymentScale - original *kservev1beta1.InferenceService - exp *kservev1beta1.InferenceService - wantErr bool - }{ - { - name: "predictor with unspecified deployment mode (serverless)", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - PodSpec: kservev1beta1.PodSpec{ - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - }, - { - name: "predictor with serverless deployment mode", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - DeploymentMode: deployment.ServerlessDeploymentMode, - Protocol: protocol.HttpJson, - }, - deploymentScale: DeploymentScale{ - Predictor: &testPredictorScale, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - knautoscaling.InitialScaleAnnotationKey: "3", - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - PodSpec: kservev1beta1.PodSpec{ - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - }, - { - name: "predictor with raw deployment mode", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - DeploymentMode: deployment.RawDeploymentMode, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - PodSpec: kservev1beta1.PodSpec{ - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "isvc.model-1-predictor", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "isvc.model-1-predictor", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "isvc.model-1-predictor", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - { - name: "predictor and transformer with unspecified deployment mode (serverless)", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Transformer: &models.Transformer{ - Enabled: true, - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: "python", - Args: "main.py", - ResourceRequest: &models.ResourceRequest{ - MinReplica: 1, - MaxReplica: 1, - CPURequest: cpuRequest, - MemoryRequest: memoryRequest, - }, - }, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - PodSpec: kservev1beta1.PodSpec{ - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - }, - Transformer: &kservev1beta1.TransformerSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "transformer", - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: []string{"python"}, - Args: []string{"main.py"}, - Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(), - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - }, - LivenessProbe: transformerProbeConfig, - }, - }, - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-transformer-00001", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-transformer-00001", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "model-1-transformer-00001", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &one, - MaxReplicas: one, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - }, - { - name: "predictor and transformer with serverless deployment mode", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Transformer: &models.Transformer{ - Enabled: true, - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: "python", - Args: "main.py", - ResourceRequest: &models.ResourceRequest{ - MinReplica: 1, - MaxReplica: 1, - CPURequest: cpuRequest, - MemoryRequest: memoryRequest, - }, - }, - DeploymentMode: deployment.ServerlessDeploymentMode, - Protocol: protocol.HttpJson, - }, - deploymentScale: DeploymentScale{ - Predictor: &testPredictorScale, - Transformer: &testTransformerScale, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.Serverless), - knautoscaling.InitialScaleAnnotationKey: "5", - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - PodSpec: kservev1beta1.PodSpec{ - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "model-1-predictor-00002", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - }, - Transformer: &kservev1beta1.TransformerSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "transformer", - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: []string{"python"}, - Args: []string{"main.py"}, - Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(), - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - }, - LivenessProbe: transformerProbeConfig, - }, - }, - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-transformer-00001", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "model-1-transformer-00001", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "model-1-transformer-00001", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &one, - MaxReplicas: one, - }, - }, - }, - Status: kservev1beta1.InferenceServiceStatus{ - Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{ - kservev1beta1.PredictorComponent: { - LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name), - }, - }, - }, - }, - }, - { - name: "predictor and transformer with raw deployment mode", - modelSvc: &models.Service{ - Name: modelSvc.Name, - ModelName: modelSvc.ModelName, - ModelVersion: modelSvc.ModelVersion, - Namespace: project.Name, - ArtifactURI: modelSvc.ArtifactURI, - Type: models.ModelTypeTensorflow, - Options: &models.ModelOption{}, - Metadata: modelSvc.Metadata, - Transformer: &models.Transformer{ - Enabled: true, - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: "python", - Args: "main.py", - ResourceRequest: &models.ResourceRequest{ - MinReplica: 1, - MaxReplica: 1, - CPURequest: cpuRequest, - MemoryRequest: memoryRequest, - }, - }, - DeploymentMode: deployment.RawDeploymentMode, - Protocol: protocol.HttpJson, - }, - original: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment), - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - }, - }, - }, - exp: &kservev1beta1.InferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: modelSvc.Name, - Namespace: project.Name, - Annotations: map[string]string{ - knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage, - kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment), - }, - Labels: map[string]string{ - "gojek.com/app": modelSvc.Metadata.App, - "gojek.com/component": models.ComponentModelVersion, - "gojek.com/environment": testEnvironmentName, - "gojek.com/orchestrator": testOrchestratorName, - "gojek.com/stream": modelSvc.Metadata.Stream, - "gojek.com/team": modelSvc.Metadata.Team, - "sample": "true", - }, - }, - Spec: kservev1beta1.InferenceServiceSpec{ - Predictor: kservev1beta1.PredictorSpec{ - Tensorflow: &kservev1beta1.TFServingSpec{ - PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{ - StorageURI: &storageUri, - Container: corev1.Container{ - Name: kserveconstant.InferenceServiceContainerName, - Resources: resourceRequests, - LivenessProbe: probeConfig, - Env: []corev1.EnvVar{}, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &minReplica, - MaxReplicas: maxReplica, - }, - PodSpec: kservev1beta1.PodSpec{ - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "isvc.model-1-predictor", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "isvc.model-1-predictor", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "isvc.model-1-predictor", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - }, - Transformer: &kservev1beta1.TransformerSpec{ - PodSpec: kservev1beta1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "transformer", - Image: "ghcr.io/gojek/merlin-transformer-test", - Command: []string{"python"}, - Args: []string{"main.py"}, - Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(), - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: cpuRequest, - corev1.ResourceMemory: memoryRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: cpuLimit, - corev1.ResourceMemory: memoryLimit, - }, - }, - LivenessProbe: transformerProbeConfig, - }, - }, - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "isvc.model-1-transformer", - }, - }, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "isvc.model-1-transformer", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - "app": "isvc.model-1-transformer", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - }, - ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{ - MinReplicas: &one, - MaxReplicas: one, - }, - }, - }, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - deployConfig := &config.DeploymentConfig{ - DefaultModelResourceRequests: &config.ResourceRequests{ - MinReplica: minReplica, - MaxReplica: maxReplica, - CPURequest: cpuRequest, - MemoryRequest: memoryRequest, - }, - QueueResourcePercentage: queueResourcePercentage, - TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ - { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.ScheduleAnyway, - }, - { - MaxSkew: 2, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - { - MaxSkew: 3, - TopologyKey: "kubernetes.io/hostname", - WhenUnsatisfiable: corev1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app-label": "spread", - }, - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app-expression", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"1"}, - }, - }, - }, - }, - }, - } - - tpl := NewInferenceServiceTemplater(standardTransformerConfig) - infSvcSpec, err := tpl.PatchInferenceServiceSpec(tt.original, tt.modelSvc, deployConfig, tt.deploymentScale) + infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale) if tt.wantErr { assert.Error(t, err) return @@ -5801,7 +3933,7 @@ func createPyFuncDefaultEnvVarsWithProtocol(svc *models.Service, protocolValue p envVars := models.EnvVars{ models.EnvVar{ Name: envPyFuncModelName, - Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion), + Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()), }, models.EnvVar{ Name: envModelName, @@ -5813,7 +3945,7 @@ func createPyFuncDefaultEnvVarsWithProtocol(svc *models.Service, protocolValue p }, models.EnvVar{ Name: envModelFullName, - Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion), + Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()), }, models.EnvVar{ Name: envHTTPPort, diff --git a/api/cluster/virtual_service.go b/api/cluster/virtual_service.go new file mode 100644 index 000000000..d7e07a84b --- /dev/null +++ b/api/cluster/virtual_service.go @@ -0,0 +1,227 @@ +package cluster + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "strings" + + istiov1beta1 "istio.io/api/networking/v1beta1" + v1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/caraml-dev/merlin/log" + "github.com/caraml-dev/merlin/models" + "github.com/caraml-dev/merlin/pkg/protocol" + "github.com/mitchellh/copystructure" +) + +const ( + // TODO: Make these configurable + knativeIngressGateway = "knative-serving/knative-ingress-gateway" + defaultIstioIngressGatewayHost = "istio-ingressgateway.istio-system.svc.cluster.local" +) + +type VirtualService struct { + Name string + Namespace string + ModelName string + VersionID string + RevisionID models.ID + Labels map[string]string + Protocol protocol.Protocol + ModelVersionRevisionURL *url.URL +} + +func NewVirtualService(modelService *models.Service, isvcURL string) (*VirtualService, error) { + modelVersionRevisionURL, err := url.Parse(isvcURL) + if err != nil { + return nil, fmt.Errorf("failed to parse model version revision url: %s", isvcURL) + } + + if modelVersionRevisionURL.Scheme == "" { + veURL := "//" + isvcURL + modelVersionRevisionURL, err = url.Parse(veURL) + if err != nil { + return nil, fmt.Errorf("failed to parse model version revision url: %s", isvcURL) + } + } + + return &VirtualService{ + Name: fmt.Sprintf("%s-%s-%s", modelService.ModelName, modelService.ModelVersion, models.VirtualServiceComponentType), + Namespace: modelService.Namespace, + ModelName: modelService.ModelName, + VersionID: modelService.ModelVersion, + RevisionID: modelService.RevisionID, + Labels: modelService.Metadata.ToLabel(), + Protocol: modelService.Protocol, + ModelVersionRevisionURL: modelVersionRevisionURL, + }, nil +} + +func (cfg VirtualService) BuildVirtualServiceSpec() (*v1beta1.VirtualService, error) { + modelVersionHost, err := cfg.getModelVersionHost() + if err != nil { + return nil, err + } + + modelVersionRevisionHost := cfg.ModelVersionRevisionURL.Hostname() + modelVersionRevisionPath := cfg.ModelVersionRevisionURL.Path + + vs := &v1beta1.VirtualService{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "networking.istio.io/v1beta1", + Kind: "VirtualService", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: cfg.Name, + Namespace: cfg.Namespace, + Labels: cfg.Labels, + }, + Spec: istiov1beta1.VirtualService{ + Gateways: []string{knativeIngressGateway}, + Hosts: []string{modelVersionHost}, + Http: cfg.createHttpRoutes(modelVersionRevisionHost, modelVersionRevisionPath), + }, + } + + return vs, nil +} + +// getModelVersionHost creates model version endpoint host based on version endpoint's url +func (cfg *VirtualService) getModelVersionHost() (string, error) { + host := strings.Split(cfg.ModelVersionRevisionURL.Hostname(), fmt.Sprintf(".%s.", cfg.Namespace)) + if len(host) != 2 { + return "", fmt.Errorf("invalid version endpoint url: %s. failed to split domain: %+v", cfg.ModelVersionRevisionURL, host) + } + + domain := host[1] + return fmt.Sprintf("%s-%s.%s.%s", cfg.ModelName, cfg.VersionID, cfg.Namespace, domain), nil +} + +func (cfg *VirtualService) createHttpRoutes(modelVersionRevisionHost, modelVersionRevisionPath string) []*istiov1beta1.HTTPRoute { + routeDestinations := []*istiov1beta1.HTTPRouteDestination{ + { + Destination: &istiov1beta1.Destination{ + Host: defaultIstioIngressGatewayHost, + }, + Headers: &istiov1beta1.Headers{ + Request: &istiov1beta1.Headers_HeaderOperations{ + Set: map[string]string{ + "Host": modelVersionRevisionHost, + }, + }, + }, + Weight: 100, + }, + } + + switch cfg.Protocol { + case protocol.UpiV1: + return []*istiov1beta1.HTTPRoute{ + { + Route: routeDestinations, + }, + } + + default: + routeDestinationsWithContentType, err := copyRouteDestinations(routeDestinations) + if err != nil { + log.Errorf("failed to copy routeDestinations: %+v", err) + return nil + } + routeDestinationsWithContentType[0].Headers.Request.Set["Content-Type"] = "application/json" + + uri := &istiov1beta1.StringMatch{ + MatchType: &istiov1beta1.StringMatch_Exact{ + Exact: fmt.Sprintf("/v1/models/%s-%s:predict", cfg.ModelName, cfg.VersionID), + }, + } + rewrite := &istiov1beta1.HTTPRewrite{ + Uri: fmt.Sprintf("%s:predict", modelVersionRevisionPath), + } + + return []*istiov1beta1.HTTPRoute{ + // For request to the Predict API without Content-Type header, set the header to application/json + { + Match: []*istiov1beta1.HTTPMatchRequest{ + { + Uri: uri, + Headers: map[string]*istiov1beta1.StringMatch{ + "content-type": {}, + }, + }, + }, + Route: routeDestinationsWithContentType, + Rewrite: rewrite, + }, + // For request to the Predict API with Content-Type header, forward the request to the model version revision + { + Match: []*istiov1beta1.HTTPMatchRequest{ + { + Uri: uri, + }, + }, + Route: routeDestinations, + Rewrite: rewrite, + }, + // For other request (e.g. List Models API), forward the request + // Note that we are currently using Kserve V1 Inference protocol (https://kserve.github.io/website/0.11/modelserving/data_plane/v1_protocol/), + // and we are not using API other than Predict API, this route is used as fallback. + { + Route: routeDestinations, + }, + } + } +} + +func (cfg *VirtualService) getInferenceURL(vs *v1beta1.VirtualService) string { + modelVersionHost := vs.Spec.Hosts[0] + + switch cfg.Protocol { + case protocol.UpiV1: + // return only host name + return modelVersionHost + default: + return fmt.Sprintf("http://%s/v1/models/%s-%s:predict", modelVersionHost, cfg.ModelName, cfg.VersionID) + } +} + +// copyTopologySpreadConstraints copies the topology spread constraints using the service builder's as a template +func copyRouteDestinations(src []*istiov1beta1.HTTPRouteDestination) ([]*istiov1beta1.HTTPRouteDestination, error) { + destRaw, err := copystructure.Copy(src) + if err != nil { + return nil, fmt.Errorf("error copying []*HTTPRouteDestination: %w", err) + } + + dest, ok := destRaw.([]*istiov1beta1.HTTPRouteDestination) + if !ok { + return nil, fmt.Errorf("error in type assertion of copied []*HTTPRouteDestination interface: %w", err) + } + + return dest, nil +} + +func (c *controller) deployVirtualService(ctx context.Context, vsCfg *VirtualService) (*v1beta1.VirtualService, error) { + vsSpec, err := vsCfg.BuildVirtualServiceSpec() + if err != nil { + return nil, err + } + + vsJSON, err := json.Marshal(vsSpec) + if err != nil { + return nil, err + } + + forceEnabled := true + + return c.istioClient. + VirtualServices(vsSpec.Namespace). + Patch(ctx, vsCfg.Name, types.ApplyPatchType, vsJSON, metav1.PatchOptions{FieldManager: "application/apply-patch", Force: &forceEnabled}) +} + +func (c *controller) deleteVirtualService(ctx context.Context, name, namespace string) error { + return c.istioClient.VirtualServices(namespace).Delete(ctx, name, metav1.DeleteOptions{}) +} diff --git a/api/cluster/virtual_service_test.go b/api/cluster/virtual_service_test.go new file mode 100644 index 000000000..4acebf6cd --- /dev/null +++ b/api/cluster/virtual_service_test.go @@ -0,0 +1,253 @@ +package cluster + +import ( + "fmt" + "net/url" + "reflect" + "testing" + + "github.com/caraml-dev/merlin/models" + "github.com/caraml-dev/merlin/pkg/protocol" + istiov1beta1 "istio.io/api/networking/v1beta1" +) + +func TestVirtualService_getModelVersionHost(t *testing.T) { + defaultModelVersionRevisionURL, _ := url.Parse("http://test-model-1-1.test-namespace.caraml.dev") + + type fields struct { + Name string + Namespace string + ModelName string + VersionID string + RevisionID models.ID + Labels map[string]string + Protocol protocol.Protocol + ModelVersionRevisionURL *url.URL + } + tests := []struct { + name string + fields fields + want string + wantErr bool + }{ + { + name: "1", + fields: fields{ + Name: "test-model-1", + Namespace: "test-namespace", + ModelName: "test-model", + VersionID: "1", + RevisionID: models.ID(1), + Labels: map[string]string{}, + Protocol: protocol.HttpJson, + ModelVersionRevisionURL: defaultModelVersionRevisionURL, + }, + want: "test-model-1.test-namespace.caraml.dev", + wantErr: false, + }, + { + name: "2", + fields: fields{ + Name: "test-model-1", + Namespace: "test-namespace", + ModelName: "test-model", + VersionID: "1", + RevisionID: models.ID(1), + Labels: map[string]string{}, + Protocol: protocol.HttpJson, + ModelVersionRevisionURL: defaultModelVersionRevisionURL, + }, + want: "test-model-1.test-namespace.caraml.dev", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := &VirtualService{ + Name: tt.fields.Name, + Namespace: tt.fields.Namespace, + ModelName: tt.fields.ModelName, + VersionID: tt.fields.VersionID, + RevisionID: tt.fields.RevisionID, + Labels: tt.fields.Labels, + Protocol: tt.fields.Protocol, + ModelVersionRevisionURL: tt.fields.ModelVersionRevisionURL, + } + got, err := cfg.getModelVersionHost() + if (err != nil) != tt.wantErr { + t.Errorf("VirtualService.getModelVersionHost() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("VirtualService.getModelVersionHost() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestVirtualService_createHttpRoutes(t *testing.T) { + type fields struct { + Name string + Namespace string + ModelName string + VersionID string + RevisionID models.ID + Labels map[string]string + Protocol protocol.Protocol + ModelVersionRevisionURL *url.URL + } + type args struct { + modelVersionRevisionHost string + modelVersionRevisionPath string + } + tests := []struct { + name string + fields fields + args args + want []*istiov1beta1.HTTPRoute + }{ + { + name: "http", + fields: fields{ + Name: "test-model-1", + ModelName: "test-model", + VersionID: "1", + Protocol: protocol.HttpJson, + }, + args: args{ + modelVersionRevisionHost: "test-model-1-1.test-namespace.caraml.dev", + modelVersionRevisionPath: "/v1/models/test-model-1-1", + }, + want: []*istiov1beta1.HTTPRoute{ + { + Match: []*istiov1beta1.HTTPMatchRequest{ + { + Uri: &istiov1beta1.StringMatch{ + MatchType: &istiov1beta1.StringMatch_Exact{ + Exact: "/v1/models/test-model-1:predict", + }, + }, + Headers: map[string]*istiov1beta1.StringMatch{ + "content-type": {}, + }, + }, + }, + Route: []*istiov1beta1.HTTPRouteDestination{ + { + Destination: &istiov1beta1.Destination{ + Host: defaultIstioIngressGatewayHost, + }, + Headers: &istiov1beta1.Headers{ + Request: &istiov1beta1.Headers_HeaderOperations{ + Set: map[string]string{ + "Content-Type": "application/json", + "Host": "test-model-1-1.test-namespace.caraml.dev", + }, + }, + }, + Weight: 100, + }, + }, + Rewrite: &istiov1beta1.HTTPRewrite{ + Uri: fmt.Sprintf("%s:predict", "/v1/models/test-model-1-1"), + }, + }, + { + Match: []*istiov1beta1.HTTPMatchRequest{ + { + Uri: &istiov1beta1.StringMatch{ + MatchType: &istiov1beta1.StringMatch_Exact{ + Exact: "/v1/models/test-model-1:predict", + }, + }, + }, + }, + Route: []*istiov1beta1.HTTPRouteDestination{ + { + Destination: &istiov1beta1.Destination{ + Host: defaultIstioIngressGatewayHost, + }, + Headers: &istiov1beta1.Headers{ + Request: &istiov1beta1.Headers_HeaderOperations{ + Set: map[string]string{ + "Host": "test-model-1-1.test-namespace.caraml.dev", + }, + }, + }, + Weight: 100, + }, + }, + Rewrite: &istiov1beta1.HTTPRewrite{ + Uri: fmt.Sprintf("%s:predict", "/v1/models/test-model-1-1"), + }, + }, + { + Route: []*istiov1beta1.HTTPRouteDestination{ + { + Destination: &istiov1beta1.Destination{ + Host: defaultIstioIngressGatewayHost, + }, + Headers: &istiov1beta1.Headers{ + Request: &istiov1beta1.Headers_HeaderOperations{ + Set: map[string]string{ + "Host": "test-model-1-1.test-namespace.caraml.dev", + }, + }, + }, + Weight: 100, + }, + }, + }, + }, + }, + { + name: "upi", + fields: fields{ + Name: "test-model-1", + ModelName: "test-model", + VersionID: "1", + Protocol: protocol.UpiV1, + }, + args: args{ + modelVersionRevisionHost: "test-model-1-1.test-namespace.caraml.dev", + modelVersionRevisionPath: "/v1/models/test-model-1-1", + }, + want: []*istiov1beta1.HTTPRoute{ + { + Route: []*istiov1beta1.HTTPRouteDestination{ + { + Destination: &istiov1beta1.Destination{ + Host: defaultIstioIngressGatewayHost, + }, + Headers: &istiov1beta1.Headers{ + Request: &istiov1beta1.Headers_HeaderOperations{ + Set: map[string]string{ + "Host": "test-model-1-1.test-namespace.caraml.dev", + }, + }, + }, + Weight: 100, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := &VirtualService{ + Name: tt.fields.Name, + Namespace: tt.fields.Namespace, + ModelName: tt.fields.ModelName, + VersionID: tt.fields.VersionID, + RevisionID: tt.fields.RevisionID, + Labels: tt.fields.Labels, + Protocol: tt.fields.Protocol, + ModelVersionRevisionURL: tt.fields.ModelVersionRevisionURL, + } + if got := cfg.createHttpRoutes(tt.args.modelVersionRevisionHost, tt.args.modelVersionRevisionPath); !reflect.DeepEqual(got, tt.want) { + t.Errorf("VirtualService.createHttpRoutes() =\n%v\n, want\n%v", got, tt.want) + } + }) + } +} diff --git a/api/cmd/api/main.go b/api/cmd/api/main.go index c11cc0157..531e08eaf 100644 --- a/api/cmd/api/main.go +++ b/api/cmd/api/main.go @@ -295,6 +295,7 @@ func buildDependencies(ctx context.Context, cfg *config.Config, db *gorm.DB, dis versionsService := service.NewVersionsService(db, mlpAPIClient) environmentService := initEnvironmentService(cfg, db) secretService := service.NewSecretService(mlpAPIClient) + deploymentService := service.NewDeploymentService(storage.NewDeploymentStorage(db)) gitlabConfig := cfg.FeatureToggleConfig.AlertConfig.GitlabConfig gitlabClient, err := gitlab.NewClient(gitlabConfig.BaseURL, gitlabConfig.Token) @@ -327,6 +328,7 @@ func buildDependencies(ctx context.Context, cfg *config.Config, db *gorm.DB, dis DB: db, Enforcer: authEnforcer, + DeploymentService: deploymentService, EnvironmentService: environmentService, ProjectsService: projectsService, ModelsService: modelsService, diff --git a/api/cmd/inference-logger/main.go b/api/cmd/inference-logger/main.go index cd5d2231b..67bfd7a33 100644 --- a/api/cmd/inference-logger/main.go +++ b/api/cmd/inference-logger/main.go @@ -9,6 +9,7 @@ import ( "net/http/httputil" "net/url" "os" + "regexp" "strings" "time" @@ -269,9 +270,22 @@ func getModelNameAndVersion(inferenceServiceName string) (modelName string, mode return inferenceServiceName, "1" } - idx := strings.LastIndex(inferenceServiceName, "-") - modelName = inferenceServiceName[:idx] - modelVersion = inferenceServiceName[idx+1:] + // regex to match string that contains revision number at the end + // e.g. my-model-1-r1 + re := regexp.MustCompile(`-r\d+$`) + + // for backward compatibility + if !re.MatchString(inferenceServiceName) { + idx := strings.LastIndex(inferenceServiceName, "-") + modelName = inferenceServiceName[:idx] + modelVersion = inferenceServiceName[idx+1:] + return + } + + withoutRevision := re.ReplaceAllString(inferenceServiceName, "") + idx := strings.LastIndex(withoutRevision, "-") + modelName = withoutRevision[:idx] + modelVersion = withoutRevision[idx+1:] return } diff --git a/api/cmd/inference-logger/main_test.go b/api/cmd/inference-logger/main_test.go index b121bd264..620e4e622 100644 --- a/api/cmd/inference-logger/main_test.go +++ b/api/cmd/inference-logger/main_test.go @@ -6,13 +6,6 @@ import ( "github.com/stretchr/testify/assert" ) -func TestGetModelVersion(t *testing.T) { - modelName, modelVersion := getModelNameAndVersion("my-model-1") - - assert.Equal(t, "my-model", modelName) - assert.Equal(t, "1", modelVersion) -} - func TestGetTopicName(t *testing.T) { assert.Equal(t, "merlin-my-project-my-model-inference-log", getTopicName(getServiceName("my-project", "my-model"))) } @@ -57,3 +50,59 @@ func Test_getNewRelicAPIKey(t *testing.T) { }) } } + +func Test_getModelNameAndVersion(t *testing.T) { + type args struct { + inferenceServiceName string + } + tests := []struct { + name string + args args + wantModelName string + wantModelVersion string + }{ + { + name: "without revision", + args: args{ + inferenceServiceName: "my-model-1", + }, + wantModelName: "my-model", + wantModelVersion: "1", + }, + { + name: "with revision", + args: args{ + inferenceServiceName: "my-model-1-r1", + }, + wantModelName: "my-model", + wantModelVersion: "1", + }, + { + name: "without revision and model name contain number", + args: args{ + inferenceServiceName: "my-model-0-1-2-10", + }, + wantModelName: "my-model-0-1-2", + wantModelVersion: "10", + }, + { + name: "with revision and model name contain number", + args: args{ + inferenceServiceName: "my-model-0-1-2-10-r11", + }, + wantModelName: "my-model-0-1-2", + wantModelVersion: "10", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotModelName, gotModelVersion := getModelNameAndVersion(tt.args.inferenceServiceName) + if gotModelName != tt.wantModelName { + t.Errorf("getModelNameAndVersion() gotModelName = %v, want %v", gotModelName, tt.wantModelName) + } + if gotModelVersion != tt.wantModelVersion { + t.Errorf("getModelNameAndVersion() gotModelVersion = %v, want %v", gotModelVersion, tt.wantModelVersion) + } + }) + } +} diff --git a/api/models/container.go b/api/models/container.go index e8df9bd4e..19777c2c8 100644 --- a/api/models/container.go +++ b/api/models/container.go @@ -30,6 +30,7 @@ const ( PredictorComponentType = "predictor" TransformerComponentType = "transformer" PDBComponentType = "pdb" // Pod disruption budget + VirtualServiceComponentType = "vs" BatchJobDriverComponentType = "batch_job_driver" BatchJobExecutorComponentType = "batch_job_executor" ) @@ -76,8 +77,8 @@ func componentType(containerName, podName string) string { return componentType } -func OnlineInferencePodLabelSelector(modelName string, versionID string) string { - serviceName := CreateInferenceServiceName(modelName, versionID) +func OnlineInferencePodLabelSelector(modelName, versionID, revisionID string) string { + serviceName := CreateInferenceServiceName(modelName, versionID, revisionID) return fmt.Sprintf(onlineInferenceLabelTemplate, serviceName) } diff --git a/api/models/container_test.go b/api/models/container_test.go index 7982bc6b5..fd4fe6908 100644 --- a/api/models/container_test.go +++ b/api/models/container_test.go @@ -24,8 +24,9 @@ import ( func TestOnlineInferencePodLabelSelector(t *testing.T) { modelName := "my-model" versionID := "1" - result := OnlineInferencePodLabelSelector(modelName, versionID) - assert.Equal(t, "serving.kserve.io/inferenceservice=my-model-1", result) + revisionID := "1" + result := OnlineInferencePodLabelSelector(modelName, versionID, revisionID) + assert.Equal(t, "serving.kserve.io/inferenceservice=my-model-1-r1", result) } func TestBatchInferencePodLabelSelector(t *testing.T) { @@ -51,14 +52,14 @@ func TestNewContainer(t *testing.T) { "model", args{ name: "kfserving-container", - podName: "test-1-predictor-12345-deployment", + podName: "test-1-1-predictor-12345-deployment", namespace: "sample", cluster: "test", gcpProject: "test-project", }, &Container{ Name: "kfserving-container", - PodName: "test-1-predictor-12345-deployment", + PodName: "test-1-1-predictor-12345-deployment", ComponentType: "model", Namespace: "sample", Cluster: "test", @@ -69,14 +70,14 @@ func TestNewContainer(t *testing.T) { "transformer", args{ name: "transformer", - podName: "test-1-transformer-12345-deployment", + podName: "test-1-1-transformer-12345-deployment", namespace: "sample", cluster: "test", gcpProject: "test-project", }, &Container{ Name: "transformer", - PodName: "test-1-transformer-12345-deployment", + PodName: "test-1-1-transformer-12345-deployment", ComponentType: "transformer", Namespace: "sample", Cluster: "test", diff --git a/api/models/deployment.go b/api/models/deployment.go index 8f8a189e1..c9e998206 100644 --- a/api/models/deployment.go +++ b/api/models/deployment.go @@ -20,8 +20,8 @@ import "github.com/google/uuid" type Deployment struct { ID ID `json:"id"` ProjectID ID `json:"project_id"` - VersionID ID `json:"version_id"` VersionModelID ID `json:"model_id"` + VersionID ID `json:"version_id"` VersionEndpointID uuid.UUID `json:"version_endpoint_id"` Status EndpointStatus `json:"status"` Error string `json:"error"` diff --git a/api/models/service.go b/api/models/service.go index f33bd6c03..868e15590 100644 --- a/api/models/service.go +++ b/api/models/service.go @@ -27,10 +27,15 @@ import ( "knative.dev/pkg/apis" ) +const ( + revisionPrefix = "r" +) + type Service struct { Name string ModelName string ModelVersion string + RevisionID ID Namespace string ServiceName string URL string @@ -45,13 +50,16 @@ type Service struct { DeploymentMode deployment.Mode AutoscalingPolicy *autoscaling.AutoscalingPolicy Protocol protocol.Protocol + // CurrentIsvcName is the name of the current running/serving InferenceService's revision + CurrentIsvcName string } func NewService(model *Model, version *Version, modelOpt *ModelOption, endpoint *VersionEndpoint) *Service { return &Service{ - Name: CreateInferenceServiceName(model.Name, version.ID.String()), + Name: CreateInferenceServiceName(model.Name, version.ID.String(), endpoint.RevisionID.String()), ModelName: model.Name, ModelVersion: version.ID.String(), + RevisionID: endpoint.RevisionID, Namespace: model.Project.Name, ArtifactURI: version.ArtifactURI, Type: model.Type, @@ -70,6 +78,7 @@ func NewService(model *Model, version *Version, modelOpt *ModelOption, endpoint DeploymentMode: endpoint.DeploymentMode, AutoscalingPolicy: endpoint.AutoscalingPolicy, Protocol: endpoint.Protocol, + CurrentIsvcName: endpoint.InferenceServiceName, } } @@ -99,8 +108,12 @@ func MergeProjectVersionLabels(projectLabels mlp.Labels, versionLabels KV) mlp.L return projectLabels } -func CreateInferenceServiceName(modelName string, versionID string) string { - return fmt.Sprintf("%s-%s", modelName, versionID) +func CreateInferenceServiceName(modelName, versionID, revisionID string) string { + if revisionID == "" || revisionID == "0" { + // This is for backward compatibility, when the endpoint / isvc name didn't include the revision number + return fmt.Sprintf("%s-%s", modelName, versionID) + } + return fmt.Sprintf("%s-%s-%s%s", modelName, versionID, revisionPrefix, revisionID) } func GetInferenceURL(url *apis.URL, inferenceServiceName string, protocolValue protocol.Protocol) string { diff --git a/api/models/service_test.go b/api/models/service_test.go index f6d611bdb..62dffb03b 100644 --- a/api/models/service_test.go +++ b/api/models/service_test.go @@ -1,6 +1,7 @@ package models import ( + "fmt" "reflect" "testing" @@ -179,7 +180,10 @@ func TestNewService(t *testing.T) { project := mlp.Project{Name: "project", Labels: mlpLabels} model := &Model{Name: "model", Project: project} version := &Version{ID: 1, Labels: versionLabels} - endpoint := &VersionEndpoint{} + revisionID := ID(1) + endpoint := &VersionEndpoint{ + RevisionID: revisionID, + } type args struct { model *Model @@ -201,9 +205,10 @@ func TestNewService(t *testing.T) { endpoint: endpoint, }, want: &Service{ - Name: CreateInferenceServiceName(model.Name, version.ID.String()), + Name: fmt.Sprintf("%s-%s-r%s", model.Name, version.ID.String(), revisionID), ModelName: model.Name, ModelVersion: version.ID.String(), + RevisionID: revisionID, Namespace: model.Project.Name, ArtifactURI: version.ArtifactURI, Type: model.Type, diff --git a/api/models/version_endpoint.go b/api/models/version_endpoint.go index 8eabda047..8acd6e770 100644 --- a/api/models/version_endpoint.go +++ b/api/models/version_endpoint.go @@ -15,7 +15,6 @@ package models import ( - "fmt" "net/url" "github.com/caraml-dev/merlin/pkg/autoscaling" @@ -31,12 +30,14 @@ import ( type VersionEndpoint struct { // ID unique id of the version endpoint ID uuid.UUID `json:"id" gorm:"type:uuid;primary_key;"` + // VersionModelID model id from which the version endpoint is created + VersionModelID ID `json:"model_id"` // VersionID model version id from which the version endpoint is created // The field name has to be prefixed with the related struct name // in order for gorm Preload to work with references VersionID ID `json:"version_id"` - // VersionModelID model id from which the version endpoint is created - VersionModelID ID `json:"model_id"` + // RevisionID defines the revision of the current model version + RevisionID ID `json:"revision_id"` // Status status of the version endpoint Status EndpointStatus `json:"status"` // URL url of the version endpoint @@ -88,19 +89,19 @@ func NewVersionEndpoint(env *Environment, project mlp.Project, model *Model, ver } ve := &VersionEndpoint{ - ID: id, - VersionID: version.ID, - VersionModelID: version.ModelID, - Namespace: project.Name, - InferenceServiceName: fmt.Sprintf("%s-%s", model.Name, version.ID.String()), - Status: EndpointPending, - EnvironmentName: env.Name, - Environment: env, - ResourceRequest: env.DefaultResourceRequest, - DeploymentMode: deploymentMode, - AutoscalingPolicy: autoscalingPolicy, - EnvVars: envVars, - Protocol: protocol.HttpJson, + ID: id, + VersionModelID: version.ModelID, + VersionID: version.ID, + RevisionID: ID(0), + Namespace: project.Name, + Status: EndpointPending, + EnvironmentName: env.Name, + Environment: env, + ResourceRequest: env.DefaultResourceRequest, + DeploymentMode: deploymentMode, + AutoscalingPolicy: autoscalingPolicy, + EnvVars: envVars, + Protocol: protocol.HttpJson, } if monitoringConfig.MonitoringEnabled { diff --git a/api/queue/work/model_service_deployment.go b/api/queue/work/model_service_deployment.go index 4dd73a567..ce8f4ace4 100644 --- a/api/queue/work/model_service_deployment.go +++ b/api/queue/work/model_service_deployment.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "time" "github.com/caraml-dev/merlin/cluster" "github.com/caraml-dev/merlin/log" @@ -23,7 +24,7 @@ var deploymentCounter = prometheus.NewCounterVec( Namespace: "merlin_api", Help: "Number of deployment", }, - []string{"project", "model", "status"}, + []string{"project", "model", "status", "redeploy"}, ) var dataArgKey = "data" @@ -49,6 +50,7 @@ type EndpointJob struct { func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error { ctx := context.Background() + data := job.Arguments[dataArgKey] byte, _ := json.Marshal(data) var jobArgs EndpointJob @@ -71,32 +73,51 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error { version := jobArgs.Version project := jobArgs.Project model := jobArgs.Model + model.Project = project + + isRedeployment := false // Need to reassign destionationURL cause it is ignored when marshalled and unmarshalled if endpoint.Logger != nil { endpoint.Logger.DestinationURL = depl.LoggerDestinationURL } - model.Project = project - log.Infof("creating deployment for model %s version %s with endpoint id: %s", model.Name, endpoint.VersionID, endpoint.ID) - - // copy endpoint to avoid race condition + endpoint.RevisionID++ endpoint.Status = models.EndpointFailed + + // for backward compatibility, if inference service name is not empty, it means we are redeploying the "legacy" endpoint that created prior to model version revision introduction + // for future compatibility, if endpoint.RevisionID > 1, it means we are redeploying the endpoint that created after model version revision introduction + if endpoint.InferenceServiceName != "" || endpoint.RevisionID > 1 { + isRedeployment = true + endpoint.Status = endpointArg.Status + } + + log.Infof("creating deployment for model %s version %s revision %s with endpoint id: %s", model.Name, endpoint.VersionID, endpoint.RevisionID, endpoint.ID) + + // record the deployment process + deployment, err := depl.DeploymentStorage.Save(&models.Deployment{ + ProjectID: model.ProjectID, + VersionModelID: model.ID, + VersionID: endpoint.VersionID, + VersionEndpointID: endpoint.ID, + Status: models.EndpointPending, + }) + if err != nil { + log.Warnf("unable to create deployment history", err) + } + defer func() { - deploymentCounter.WithLabelValues(model.Project.Name, model.Name, string(endpoint.Status)).Inc() + deploymentCounter.WithLabelValues(model.Project.Name, model.Name, fmt.Sprint(endpoint.Status), fmt.Sprint(isRedeployment)).Inc() // record the deployment result - if _, err := depl.DeploymentStorage.Save(&models.Deployment{ - ProjectID: model.ProjectID, - VersionModelID: model.ID, - VersionID: endpoint.VersionID, - VersionEndpointID: endpoint.ID, - Status: endpoint.Status, - Error: endpoint.Message, - }); err != nil { - log.Warnf("unable to insert deployment history", err) + deployment.Status = endpoint.Status + deployment.Error = endpoint.Message + deployment.UpdatedAt = time.Now() + if _, err := depl.DeploymentStorage.Save(deployment); err != nil { + log.Warnf("unable to update deployment history", err) } + // record the version endpoint result if err := depl.Storage.Save(endpoint); err != nil { log.Errorf("unable to update endpoint status for model: %s, version: %s, reason: %v", model.Name, version.ID, err) } @@ -113,6 +134,7 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error { if !ok { return fmt.Errorf("unable to find cluster controller for environment %s", endpoint.EnvironmentName) } + svc, err := ctl.Deploy(ctx, modelService) if err != nil { log.Errorf("unable to deploy version endpoint for model: %s, version: %s, reason: %v", model.Name, version.ID, err) @@ -120,6 +142,7 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error { return err } + // By reaching this point, the deployment is successful endpoint.URL = svc.URL previousStatus := endpointArg.Status if previousStatus == models.EndpointServing { @@ -128,6 +151,9 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error { endpoint.Status = models.EndpointRunning } endpoint.ServiceName = svc.ServiceName + endpoint.InferenceServiceName = svc.CurrentIsvcName + endpoint.Message = "" // reset message + return nil } diff --git a/api/queue/work/model_service_deployment_test.go b/api/queue/work/model_service_deployment_test.go index 5f2d05244..df816cd18 100644 --- a/api/queue/work/model_service_deployment_test.go +++ b/api/queue/work/model_service_deployment_test.go @@ -8,6 +8,7 @@ import ( "github.com/caraml-dev/merlin/cluster" clusterMock "github.com/caraml-dev/merlin/cluster/mocks" + "github.com/caraml-dev/merlin/log" "github.com/caraml-dev/merlin/mlp" "github.com/caraml-dev/merlin/models" imageBuilderMock "github.com/caraml-dev/merlin/pkg/imagebuilder/mocks" @@ -55,9 +56,9 @@ func TestExecuteDeployment(t *testing.T) { project := mlp.Project{Name: "project", Labels: mlpLabels} model := &models.Model{Name: "model", Project: project} version := &models.Version{ID: 1, Labels: versionLabels} - iSvcName := fmt.Sprintf("%s-%d", model.Name, version.ID) - svcName := fmt.Sprintf("%s-%d.project.svc.cluster.local", model.Name, version.ID) - url := fmt.Sprintf("%s-%d.example.com", model.Name, version.ID) + iSvcName := fmt.Sprintf("%s-%d-1", model.Name, version.ID) + svcName := fmt.Sprintf("%s-%d-1.project.svc.cluster.local", model.Name, version.ID) + url := fmt.Sprintf("%s-%d-1.example.com", model.Name, version.ID) tests := []struct { name string @@ -81,6 +82,7 @@ func TestExecuteDeployment(t *testing.T) { }, deploymentStorage: func() *mocks.DeploymentStorage { mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) mockStorage.On("Save", mock.Anything).Return(nil, nil) return mockStorage }, @@ -88,12 +90,11 @@ func TestExecuteDeployment(t *testing.T) { mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Save", mock.Anything).Return(nil) mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ - Environment: env, - EnvironmentName: env.Name, - ResourceRequest: env.DefaultResourceRequest, - VersionID: version.ID, - Namespace: project.Name, - InferenceServiceName: iSvcName, + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, }, nil) return mockStorage }, @@ -125,6 +126,7 @@ func TestExecuteDeployment(t *testing.T) { }, deploymentStorage: func() *mocks.DeploymentStorage { mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) mockStorage.On("Save", mock.Anything).Return(nil, nil) return mockStorage }, @@ -132,12 +134,11 @@ func TestExecuteDeployment(t *testing.T) { mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Save", mock.Anything).Return(nil) mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ - Environment: env, - EnvironmentName: env.Name, - ResourceRequest: env.DefaultResourceRequest, - VersionID: version.ID, - Namespace: project.Name, - InferenceServiceName: iSvcName, + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, }, nil) return mockStorage }, @@ -169,6 +170,7 @@ func TestExecuteDeployment(t *testing.T) { }, deploymentStorage: func() *mocks.DeploymentStorage { mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) mockStorage.On("Save", mock.Anything).Return(nil, nil) return mockStorage }, @@ -176,12 +178,11 @@ func TestExecuteDeployment(t *testing.T) { mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Save", mock.Anything).Return(nil) mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ - Environment: env, - EnvironmentName: env.Name, - ResourceRequest: env.DefaultResourceRequest, - VersionID: version.ID, - Namespace: project.Name, - InferenceServiceName: iSvcName, + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, }, nil) return mockStorage }, @@ -215,6 +216,7 @@ func TestExecuteDeployment(t *testing.T) { }, deploymentStorage: func() *mocks.DeploymentStorage { mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) mockStorage.On("Save", mock.Anything).Return(nil, nil) return mockStorage }, @@ -222,12 +224,11 @@ func TestExecuteDeployment(t *testing.T) { mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Save", mock.Anything).Return(nil) mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ - Environment: env, - EnvironmentName: env.Name, - ResourceRequest: env.DefaultResourceRequest, - VersionID: version.ID, - Namespace: project.Name, - InferenceServiceName: iSvcName, + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, }, nil) return mockStorage }, @@ -262,6 +263,7 @@ func TestExecuteDeployment(t *testing.T) { }, deploymentStorage: func() *mocks.DeploymentStorage { mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) mockStorage.On("Save", mock.Anything).Return(nil, nil) return mockStorage }, @@ -269,12 +271,11 @@ func TestExecuteDeployment(t *testing.T) { mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Save", mock.Anything).Return(nil) mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ - Environment: env, - EnvironmentName: env.Name, - ResourceRequest: env.DefaultResourceRequest, - VersionID: version.ID, - Namespace: project.Name, - InferenceServiceName: iSvcName, + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, }, nil) return mockStorage }, @@ -301,29 +302,334 @@ func TestExecuteDeployment(t *testing.T) { }, deploymentStorage: func() *mocks.DeploymentStorage { mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) mockStorage.On("Save", mock.Anything).Return(nil, nil) return mockStorage }, storage: func() *mocks.VersionEndpointStorage { mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Save", mock.Anything).Return(nil) + mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, + }, nil) + return mockStorage + }, + controller: func() *clusterMock.Controller { + ctrl := &clusterMock.Controller{} + return ctrl + }, + imageBuilder: func() *imageBuilderMock.ImageBuilder { + mockImgBuilder := &imageBuilderMock.ImageBuilder{} + mockImgBuilder.On("BuildImage", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return("", errors.New("Failed to build image")) + return mockImgBuilder + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctrl := tt.controller() + controllers := map[string]cluster.Controller{env.Name: ctrl} + imgBuilder := tt.imageBuilder() + mockStorage := tt.storage() + mockDeploymentStorage := tt.deploymentStorage() + job := &queue.Job{ + Name: "job", + Arguments: queue.Arguments{ + dataArgKey: EndpointJob{ + Endpoint: tt.endpoint, + Version: tt.version, + Model: tt.model, + Project: tt.model.Project, + }, + }, + } + svc := &ModelServiceDeployment{ + ClusterControllers: controllers, + ImageBuilder: imgBuilder, + Storage: mockStorage, + DeploymentStorage: mockDeploymentStorage, + LoggerDestinationURL: loggerDestinationURL, + } + + err := svc.Deploy(job) + assert.Equal(t, tt.deployErr, err) + + if len(ctrl.ExpectedCalls) > 0 && ctrl.ExpectedCalls[0].ReturnArguments[0] != nil { + deployedSvc := ctrl.ExpectedCalls[0].ReturnArguments[0].(*models.Service) + assert.Equal(t, svcMetadata, deployedSvc.Metadata) + assert.Equal(t, iSvcName, deployedSvc.Name) + } + + mockStorage.AssertNumberOfCalls(t, "Save", 1) + mockDeploymentStorage.AssertNumberOfCalls(t, "Save", 2) + + savedEndpoint := mockStorage.Calls[1].Arguments[0].(*models.VersionEndpoint) + assert.Equal(t, tt.model.ID, savedEndpoint.VersionModelID) + assert.Equal(t, tt.version.ID, savedEndpoint.VersionID) + assert.Equal(t, tt.model.Project.Name, savedEndpoint.Namespace) + assert.Equal(t, env.Name, savedEndpoint.EnvironmentName) + + if tt.endpoint.ResourceRequest != nil { + assert.Equal(t, tt.endpoint.ResourceRequest, savedEndpoint.ResourceRequest) + } else { + assert.Equal(t, env.DefaultResourceRequest, savedEndpoint.ResourceRequest) + } + + if tt.deployErr != nil { + assert.Equal(t, models.EndpointFailed, savedEndpoint.Status) + } else { + assert.Equal(t, models.EndpointRunning, savedEndpoint.Status) + assert.Equal(t, url, savedEndpoint.URL) + assert.Equal(t, "", savedEndpoint.InferenceServiceName) + } + }) + } +} + +func TestExecuteRedeployment(t *testing.T) { + isDefaultTrue := true + loggerDestinationURL := "http://logger.default" + + env := &models.Environment{ + Name: "env1", + Cluster: "cluster1", + IsDefault: &isDefaultTrue, + Region: "id", + GcpProject: "project", + DefaultResourceRequest: &models.ResourceRequest{ + MinReplica: 0, + MaxReplica: 1, + CPURequest: resource.MustParse("1"), + MemoryRequest: resource.MustParse("1Gi"), + }, + } + + mlpLabels := mlp.Labels{ + {Key: "key-1", Value: "value-1"}, + } + + versionLabels := models.KV{ + "key-1": "value-11", + "key-2": "value-2", + } + + svcMetadata := models.Metadata{ + Labels: mlp.Labels{ + {Key: "key-1", Value: "value-11"}, + {Key: "key-2", Value: "value-2"}, + }, + } + + project := mlp.Project{Name: "project", Labels: mlpLabels} + model := &models.Model{Name: "model", Project: project} + version := &models.Version{ID: 1, Labels: versionLabels} + + // currentIsvcName := fmt.Sprintf("%s-%d-1", model.Name, version.ID) + // currentSvcName := fmt.Sprintf("%s-%d-1.project.svc.cluster.local", model.Name, version.ID) + // currentUrl := fmt.Sprintf("%s-%d-1.example.com", model.Name, version.ID) + modelSvcName := fmt.Sprintf("%s-%d-2", model.Name, version.ID) + svcName := fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID) + url := fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID) + + tests := []struct { + name string + endpoint *models.VersionEndpoint + model *models.Model + version *models.Version + expectedEndpointStatus models.EndpointStatus + deployErr error + deploymentStorage func() *mocks.DeploymentStorage + storage func() *mocks.VersionEndpointStorage + controller func() *clusterMock.Controller + imageBuilder func() *imageBuilderMock.ImageBuilder + }{ + { + name: "Success: Redeploy running endpoint", + model: model, + version: version, + endpoint: &models.VersionEndpoint{ + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Status: models.EndpointRunning, + }, + expectedEndpointStatus: models.EndpointRunning, + deploymentStorage: func() *mocks.DeploymentStorage { + mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) + mockStorage.On("Save", mock.Anything).Return(nil, nil) + return mockStorage + }, + storage: func() *mocks.VersionEndpointStorage { + mockStorage := &mocks.VersionEndpointStorage{} mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ Environment: env, EnvironmentName: env.Name, ResourceRequest: env.DefaultResourceRequest, VersionID: version.ID, Namespace: project.Name, - InferenceServiceName: iSvcName, + RevisionID: models.ID(1), + InferenceServiceName: fmt.Sprintf("%s-%d-1", model.Name, version.ID), + Status: models.EndpointRunning, }, nil) + mockStorage.On("Save", &models.VersionEndpoint{ + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, + RevisionID: models.ID(2), + InferenceServiceName: modelSvcName, + Status: models.EndpointRunning, + URL: url, + ServiceName: svcName, + }).Return(nil) return mockStorage }, controller: func() *clusterMock.Controller { ctrl := &clusterMock.Controller{} + ctrl.On("Deploy", mock.Anything, mock.Anything). + Return(&models.Service{ + Name: fmt.Sprintf("%s-%d-2", model.Name, version.ID), + CurrentIsvcName: fmt.Sprintf("%s-%d-2", model.Name, version.ID), + RevisionID: models.ID(2), + Namespace: project.Name, + ServiceName: fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID), + URL: fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID), + Metadata: svcMetadata, + }, nil) + return ctrl + }, + imageBuilder: func() *imageBuilderMock.ImageBuilder { + mockImgBuilder := &imageBuilderMock.ImageBuilder{} + return mockImgBuilder + }, + }, + { + name: "Success: Redeploy serving endpoint", + model: model, + version: version, + endpoint: &models.VersionEndpoint{ + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Status: models.EndpointServing, + }, + expectedEndpointStatus: models.EndpointServing, + deploymentStorage: func() *mocks.DeploymentStorage { + mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) + mockStorage.On("Save", mock.Anything).Return(nil, nil) + return mockStorage + }, + storage: func() *mocks.VersionEndpointStorage { + mockStorage := &mocks.VersionEndpointStorage{} + mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, + RevisionID: models.ID(1), + InferenceServiceName: fmt.Sprintf("%s-%d-1", model.Name, version.ID), + Status: models.EndpointServing, + }, nil) + mockStorage.On("Save", &models.VersionEndpoint{ + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, + RevisionID: models.ID(2), + InferenceServiceName: modelSvcName, + Status: models.EndpointServing, + URL: url, + ServiceName: svcName, + }).Return(nil) + return mockStorage + }, + controller: func() *clusterMock.Controller { + ctrl := &clusterMock.Controller{} + ctrl.On("Deploy", mock.Anything, mock.Anything). + Return(&models.Service{ + Name: fmt.Sprintf("%s-%d-2", model.Name, version.ID), + CurrentIsvcName: fmt.Sprintf("%s-%d-2", model.Name, version.ID), + RevisionID: models.ID(2), + Namespace: project.Name, + ServiceName: fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID), + URL: fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID), + Metadata: svcMetadata, + }, nil) + return ctrl + }, + imageBuilder: func() *imageBuilderMock.ImageBuilder { + mockImgBuilder := &imageBuilderMock.ImageBuilder{} + return mockImgBuilder + }, + }, + { + name: "Success: Redeploy failed endpoint", + model: model, + version: version, + endpoint: &models.VersionEndpoint{ + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Status: models.EndpointFailed, + }, + expectedEndpointStatus: models.EndpointRunning, + deploymentStorage: func() *mocks.DeploymentStorage { + mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil) + mockStorage.On("Save", mock.Anything).Return(nil, nil) + return mockStorage + }, + storage: func() *mocks.VersionEndpointStorage { + mockStorage := &mocks.VersionEndpointStorage{} + mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{ + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, + RevisionID: models.ID(1), + InferenceServiceName: fmt.Sprintf("%s-%d-1", model.Name, version.ID), + Status: models.EndpointFailed, + }, nil) + mockStorage.On("Save", &models.VersionEndpoint{ + Environment: env, + EnvironmentName: env.Name, + ResourceRequest: env.DefaultResourceRequest, + VersionID: version.ID, + Namespace: project.Name, + RevisionID: models.ID(2), + InferenceServiceName: modelSvcName, + Status: models.EndpointRunning, + URL: url, + ServiceName: svcName, + }).Return(nil) + return mockStorage + }, + controller: func() *clusterMock.Controller { + ctrl := &clusterMock.Controller{} + ctrl.On("Deploy", mock.Anything, mock.Anything). + Return(&models.Service{ + Name: fmt.Sprintf("%s-%d-2", model.Name, version.ID), + CurrentIsvcName: fmt.Sprintf("%s-%d-2", model.Name, version.ID), + RevisionID: models.ID(2), + Namespace: project.Name, + ServiceName: fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID), + URL: fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID), + Metadata: svcMetadata, + }, nil) return ctrl }, imageBuilder: func() *imageBuilderMock.ImageBuilder { mockImgBuilder := &imageBuilderMock.ImageBuilder{} - mockImgBuilder.On("BuildImage", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return("", errors.New("Failed to build image")) return mockImgBuilder }, }, @@ -360,10 +666,14 @@ func TestExecuteDeployment(t *testing.T) { if len(ctrl.ExpectedCalls) > 0 && ctrl.ExpectedCalls[0].ReturnArguments[0] != nil { deployedSvc := ctrl.ExpectedCalls[0].ReturnArguments[0].(*models.Service) assert.Equal(t, svcMetadata, deployedSvc.Metadata) + assert.Equal(t, modelSvcName, deployedSvc.Name) } mockStorage.AssertNumberOfCalls(t, "Save", 1) + mockDeploymentStorage.AssertNumberOfCalls(t, "Save", 2) + savedEndpoint := mockStorage.Calls[1].Arguments[0].(*models.VersionEndpoint) + log.Infof("savedEndpoint: %+v", savedEndpoint) assert.Equal(t, tt.model.ID, savedEndpoint.VersionModelID) assert.Equal(t, tt.version.ID, savedEndpoint.VersionID) assert.Equal(t, tt.model.Project.Name, savedEndpoint.Namespace) @@ -374,12 +684,13 @@ func TestExecuteDeployment(t *testing.T) { } else { assert.Equal(t, env.DefaultResourceRequest, savedEndpoint.ResourceRequest) } + if tt.deployErr != nil { assert.Equal(t, models.EndpointFailed, savedEndpoint.Status) } else { - assert.Equal(t, models.EndpointRunning, savedEndpoint.Status) + assert.Equal(t, tt.expectedEndpointStatus, savedEndpoint.Status) assert.Equal(t, url, savedEndpoint.URL) - assert.Equal(t, iSvcName, savedEndpoint.InferenceServiceName) + assert.Equal(t, modelSvcName, savedEndpoint.InferenceServiceName) } }) } diff --git a/api/service/deployment_service.go b/api/service/deployment_service.go new file mode 100644 index 000000000..1c1391870 --- /dev/null +++ b/api/service/deployment_service.go @@ -0,0 +1,25 @@ +package service + +import ( + "github.com/caraml-dev/merlin/models" + "github.com/caraml-dev/merlin/storage" +) + +type DeploymentService interface { + ListDeployments(modelID, versionID, endpointUUID string) ([]*models.Deployment, error) +} + +func NewDeploymentService(storage storage.DeploymentStorage) DeploymentService { + return &deploymentService{ + storage: storage, + } +} + +type deploymentService struct { + storage storage.DeploymentStorage +} + +func (service *deploymentService) ListDeployments(modelID, versionID, endpointUUID string) ([]*models.Deployment, error) { + // TODO: Add pagination + return service.storage.ListInModelVersion(modelID, versionID, endpointUUID) +} diff --git a/api/service/deployment_service_test.go b/api/service/deployment_service_test.go new file mode 100644 index 000000000..b90944fc0 --- /dev/null +++ b/api/service/deployment_service_test.go @@ -0,0 +1,88 @@ +package service + +import ( + "fmt" + "reflect" + "testing" + "time" + + "github.com/caraml-dev/merlin/models" + "github.com/caraml-dev/merlin/storage/mocks" + "github.com/google/uuid" +) + +func Test_deploymentService_ListDeployments(t *testing.T) { + endpointUUID := uuid.New() + endpointUUIDString := fmt.Sprint(endpointUUID) + + createdUpdated := models.CreatedUpdated{ + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + + type args struct { + modelID string + versionID string + endpointUUID string + } + tests := []struct { + name string + args args + mockDeploymentStorage func() *mocks.DeploymentStorage + want []*models.Deployment + wantErr bool + }{ + { + name: "success", + args: args{ + modelID: "model", + versionID: "1", + endpointUUID: endpointUUIDString, + }, + mockDeploymentStorage: func() *mocks.DeploymentStorage { + mockStorage := &mocks.DeploymentStorage{} + mockStorage.On("ListInModelVersion", "model", "1", endpointUUIDString).Return([]*models.Deployment{ + { + ID: models.ID(1), + ProjectID: models.ID(1), + VersionModelID: models.ID(1), + VersionID: models.ID(1), + VersionEndpointID: endpointUUID, + Status: models.EndpointRunning, + Error: "", + CreatedUpdated: createdUpdated, + }, + }, nil) + return mockStorage + }, + want: []*models.Deployment{{ + ID: models.ID(1), + ProjectID: models.ID(1), + VersionModelID: models.ID(1), + VersionID: models.ID(1), + VersionEndpointID: endpointUUID, + Status: models.EndpointRunning, + Error: "", + CreatedUpdated: createdUpdated, + }}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockDeploymentStorage := tt.mockDeploymentStorage() + + service := &deploymentService{ + storage: mockDeploymentStorage, + } + got, err := service.ListDeployments(tt.args.modelID, tt.args.versionID, tt.args.endpointUUID) + if (err != nil) != tt.wantErr { + t.Errorf("deploymentService.ListDeployments() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("deploymentService.ListDeployments() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/api/service/mocks/deployment_service.go b/api/service/mocks/deployment_service.go new file mode 100644 index 000000000..70b00c99e --- /dev/null +++ b/api/service/mocks/deployment_service.go @@ -0,0 +1,54 @@ +// Code generated by mockery v2.20.0. DO NOT EDIT. + +package mocks + +import ( + models "github.com/caraml-dev/merlin/models" + mock "github.com/stretchr/testify/mock" +) + +// DeploymentService is an autogenerated mock type for the DeploymentService type +type DeploymentService struct { + mock.Mock +} + +// ListDeployments provides a mock function with given fields: modelID, versionID, endpointUUID +func (_m *DeploymentService) ListDeployments(modelID string, versionID string, endpointUUID string) ([]*models.Deployment, error) { + ret := _m.Called(modelID, versionID, endpointUUID) + + var r0 []*models.Deployment + var r1 error + if rf, ok := ret.Get(0).(func(string, string, string) ([]*models.Deployment, error)); ok { + return rf(modelID, versionID, endpointUUID) + } + if rf, ok := ret.Get(0).(func(string, string, string) []*models.Deployment); ok { + r0 = rf(modelID, versionID, endpointUUID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*models.Deployment) + } + } + + if rf, ok := ret.Get(1).(func(string, string, string) error); ok { + r1 = rf(modelID, versionID, endpointUUID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type mockConstructorTestingTNewDeploymentService interface { + mock.TestingT + Cleanup(func()) +} + +// NewDeploymentService creates a new instance of DeploymentService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewDeploymentService(t mockConstructorTestingTNewDeploymentService) *DeploymentService { + mock := &DeploymentService{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/api/service/mocks/endpoints_service.go b/api/service/mocks/endpoints_service.go index c4faea6e5..d5a2e3b33 100644 --- a/api/service/mocks/endpoints_service.go +++ b/api/service/mocks/endpoints_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -106,25 +106,25 @@ func (_m *EndpointsService) FindByID(ctx context.Context, endpointUuid uuid.UUID return r0, r1 } -// ListContainers provides a mock function with given fields: ctx, model, version, endpointUuid -func (_m *EndpointsService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpointUuid uuid.UUID) ([]*models.Container, error) { - ret := _m.Called(ctx, model, version, endpointUuid) +// ListContainers provides a mock function with given fields: ctx, model, version, endpoint +func (_m *EndpointsService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpoint *models.VersionEndpoint) ([]*models.Container, error) { + ret := _m.Called(ctx, model, version, endpoint) var r0 []*models.Container var r1 error - if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, uuid.UUID) ([]*models.Container, error)); ok { - return rf(ctx, model, version, endpointUuid) + if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, *models.VersionEndpoint) ([]*models.Container, error)); ok { + return rf(ctx, model, version, endpoint) } - if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, uuid.UUID) []*models.Container); ok { - r0 = rf(ctx, model, version, endpointUuid) + if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, *models.VersionEndpoint) []*models.Container); ok { + r0 = rf(ctx, model, version, endpoint) } else { if ret.Get(0) != nil { r0 = ret.Get(0).([]*models.Container) } } - if rf, ok := ret.Get(1).(func(context.Context, *models.Model, *models.Version, uuid.UUID) error); ok { - r1 = rf(ctx, model, version, endpointUuid) + if rf, ok := ret.Get(1).(func(context.Context, *models.Model, *models.Version, *models.VersionEndpoint) error); ok { + r1 = rf(ctx, model, version, endpoint) } else { r1 = ret.Error(1) } diff --git a/api/service/mocks/environment_service.go b/api/service/mocks/environment_service.go index d353cb986..ca6897597 100644 --- a/api/service/mocks/environment_service.go +++ b/api/service/mocks/environment_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -17,6 +17,10 @@ func (_m *EnvironmentService) GetDefaultEnvironment() (*models.Environment, erro ret := _m.Called() var r0 *models.Environment + var r1 error + if rf, ok := ret.Get(0).(func() (*models.Environment, error)); ok { + return rf() + } if rf, ok := ret.Get(0).(func() *models.Environment); ok { r0 = rf() } else { @@ -25,7 +29,6 @@ func (_m *EnvironmentService) GetDefaultEnvironment() (*models.Environment, erro } } - var r1 error if rf, ok := ret.Get(1).(func() error); ok { r1 = rf() } else { @@ -40,6 +43,10 @@ func (_m *EnvironmentService) GetDefaultPredictionJobEnvironment() (*models.Envi ret := _m.Called() var r0 *models.Environment + var r1 error + if rf, ok := ret.Get(0).(func() (*models.Environment, error)); ok { + return rf() + } if rf, ok := ret.Get(0).(func() *models.Environment); ok { r0 = rf() } else { @@ -48,7 +55,6 @@ func (_m *EnvironmentService) GetDefaultPredictionJobEnvironment() (*models.Envi } } - var r1 error if rf, ok := ret.Get(1).(func() error); ok { r1 = rf() } else { @@ -63,6 +69,10 @@ func (_m *EnvironmentService) GetEnvironment(name string) (*models.Environment, ret := _m.Called(name) var r0 *models.Environment + var r1 error + if rf, ok := ret.Get(0).(func(string) (*models.Environment, error)); ok { + return rf(name) + } if rf, ok := ret.Get(0).(func(string) *models.Environment); ok { r0 = rf(name) } else { @@ -71,7 +81,6 @@ func (_m *EnvironmentService) GetEnvironment(name string) (*models.Environment, } } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(name) } else { @@ -86,6 +95,10 @@ func (_m *EnvironmentService) ListEnvironments(name string) ([]*models.Environme ret := _m.Called(name) var r0 []*models.Environment + var r1 error + if rf, ok := ret.Get(0).(func(string) ([]*models.Environment, error)); ok { + return rf(name) + } if rf, ok := ret.Get(0).(func(string) []*models.Environment); ok { r0 = rf(name) } else { @@ -94,7 +107,6 @@ func (_m *EnvironmentService) ListEnvironments(name string) ([]*models.Environme } } - var r1 error if rf, ok := ret.Get(1).(func(string) error); ok { r1 = rf(name) } else { @@ -109,6 +121,10 @@ func (_m *EnvironmentService) Save(env *models.Environment) (*models.Environment ret := _m.Called(env) var r0 *models.Environment + var r1 error + if rf, ok := ret.Get(0).(func(*models.Environment) (*models.Environment, error)); ok { + return rf(env) + } if rf, ok := ret.Get(0).(func(*models.Environment) *models.Environment); ok { r0 = rf(env) } else { @@ -117,7 +133,6 @@ func (_m *EnvironmentService) Save(env *models.Environment) (*models.Environment } } - var r1 error if rf, ok := ret.Get(1).(func(*models.Environment) error); ok { r1 = rf(env) } else { diff --git a/api/service/mocks/list_options.go b/api/service/mocks/list_options.go index 0a3f31ec1..81c446b3d 100644 --- a/api/service/mocks/list_options.go +++ b/api/service/mocks/list_options.go @@ -1,4 +1,4 @@ -// Code generated by mockery v1.0.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -12,24 +12,6 @@ type ListOptions struct { mock.Mock } -type ListOptions_apply struct { - *mock.Call -} - -func (_m ListOptions_apply) Return(_a0 *gorm.DB) *ListOptions_apply { - return &ListOptions_apply{Call: _m.Call.Return(_a0)} -} - -func (_m *ListOptions) Onapply(q *gorm.DB) *ListOptions_apply { - c := _m.On("apply", q) - return &ListOptions_apply{Call: c} -} - -func (_m *ListOptions) OnapplyMatch(matchers ...interface{}) *ListOptions_apply { - c := _m.On("apply", matchers...) - return &ListOptions_apply{Call: c} -} - // apply provides a mock function with given fields: q func (_m *ListOptions) apply(q *gorm.DB) *gorm.DB { ret := _m.Called(q) @@ -45,3 +27,18 @@ func (_m *ListOptions) apply(q *gorm.DB) *gorm.DB { return r0 } + +type mockConstructorTestingTNewListOptions interface { + mock.TestingT + Cleanup(func()) +} + +// NewListOptions creates a new instance of ListOptions. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewListOptions(t mockConstructorTestingTNewListOptions) *ListOptions { + mock := &ListOptions{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/api/service/mocks/log_service.go b/api/service/mocks/log_service.go index 7006089c3..9c2815a23 100644 --- a/api/service/mocks/log_service.go +++ b/api/service/mocks/log_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/service/mocks/model_endpoint_alert_service.go b/api/service/mocks/model_endpoint_alert_service.go index f41440a83..642233439 100644 --- a/api/service/mocks/model_endpoint_alert_service.go +++ b/api/service/mocks/model_endpoint_alert_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -17,6 +17,10 @@ func (_m *ModelEndpointAlertService) CreateModelEndpointAlert(user string, alert ret := _m.Called(user, alert) var r0 *models.ModelEndpointAlert + var r1 error + if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) (*models.ModelEndpointAlert, error)); ok { + return rf(user, alert) + } if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) *models.ModelEndpointAlert); ok { r0 = rf(user, alert) } else { @@ -25,7 +29,6 @@ func (_m *ModelEndpointAlertService) CreateModelEndpointAlert(user string, alert } } - var r1 error if rf, ok := ret.Get(1).(func(string, *models.ModelEndpointAlert) error); ok { r1 = rf(user, alert) } else { @@ -40,6 +43,10 @@ func (_m *ModelEndpointAlertService) GetModelEndpointAlert(modelID models.ID, mo ret := _m.Called(modelID, modelEndpointID) var r0 *models.ModelEndpointAlert + var r1 error + if rf, ok := ret.Get(0).(func(models.ID, models.ID) (*models.ModelEndpointAlert, error)); ok { + return rf(modelID, modelEndpointID) + } if rf, ok := ret.Get(0).(func(models.ID, models.ID) *models.ModelEndpointAlert); ok { r0 = rf(modelID, modelEndpointID) } else { @@ -48,7 +55,6 @@ func (_m *ModelEndpointAlertService) GetModelEndpointAlert(modelID models.ID, mo } } - var r1 error if rf, ok := ret.Get(1).(func(models.ID, models.ID) error); ok { r1 = rf(modelID, modelEndpointID) } else { @@ -63,6 +69,10 @@ func (_m *ModelEndpointAlertService) ListModelAlerts(modelID models.ID) ([]*mode ret := _m.Called(modelID) var r0 []*models.ModelEndpointAlert + var r1 error + if rf, ok := ret.Get(0).(func(models.ID) ([]*models.ModelEndpointAlert, error)); ok { + return rf(modelID) + } if rf, ok := ret.Get(0).(func(models.ID) []*models.ModelEndpointAlert); ok { r0 = rf(modelID) } else { @@ -71,7 +81,6 @@ func (_m *ModelEndpointAlertService) ListModelAlerts(modelID models.ID) ([]*mode } } - var r1 error if rf, ok := ret.Get(1).(func(models.ID) error); ok { r1 = rf(modelID) } else { @@ -86,6 +95,10 @@ func (_m *ModelEndpointAlertService) ListTeams() ([]string, error) { ret := _m.Called() var r0 []string + var r1 error + if rf, ok := ret.Get(0).(func() ([]string, error)); ok { + return rf() + } if rf, ok := ret.Get(0).(func() []string); ok { r0 = rf() } else { @@ -94,7 +107,6 @@ func (_m *ModelEndpointAlertService) ListTeams() ([]string, error) { } } - var r1 error if rf, ok := ret.Get(1).(func() error); ok { r1 = rf() } else { @@ -109,6 +121,10 @@ func (_m *ModelEndpointAlertService) UpdateModelEndpointAlert(user string, alert ret := _m.Called(user, alert) var r0 *models.ModelEndpointAlert + var r1 error + if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) (*models.ModelEndpointAlert, error)); ok { + return rf(user, alert) + } if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) *models.ModelEndpointAlert); ok { r0 = rf(user, alert) } else { @@ -117,7 +133,6 @@ func (_m *ModelEndpointAlertService) UpdateModelEndpointAlert(user string, alert } } - var r1 error if rf, ok := ret.Get(1).(func(string, *models.ModelEndpointAlert) error); ok { r1 = rf(user, alert) } else { diff --git a/api/service/mocks/model_endpoints_service.go b/api/service/mocks/model_endpoints_service.go index a44d52970..53893e16e 100644 --- a/api/service/mocks/model_endpoints_service.go +++ b/api/service/mocks/model_endpoints_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/service/mocks/models_service.go b/api/service/mocks/models_service.go index 27a96bd77..dd4bd8f64 100644 --- a/api/service/mocks/models_service.go +++ b/api/service/mocks/models_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/service/mocks/prediction_job_service.go b/api/service/mocks/prediction_job_service.go index 4a63dcbe3..9a8a8ace7 100644 --- a/api/service/mocks/prediction_job_service.go +++ b/api/service/mocks/prediction_job_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -23,6 +23,10 @@ func (_m *PredictionJobService) CreatePredictionJob(ctx context.Context, env *mo ret := _m.Called(ctx, env, model, version, predictionJob) var r0 *models.PredictionJob + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) (*models.PredictionJob, error)); ok { + return rf(ctx, env, model, version, predictionJob) + } if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) *models.PredictionJob); ok { r0 = rf(ctx, env, model, version, predictionJob) } else { @@ -31,7 +35,6 @@ func (_m *PredictionJobService) CreatePredictionJob(ctx context.Context, env *mo } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) error); ok { r1 = rf(ctx, env, model, version, predictionJob) } else { @@ -46,6 +49,10 @@ func (_m *PredictionJobService) GetPredictionJob(ctx context.Context, env *model ret := _m.Called(ctx, env, model, version, id) var r0 *models.PredictionJob + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) (*models.PredictionJob, error)); ok { + return rf(ctx, env, model, version, id) + } if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) *models.PredictionJob); ok { r0 = rf(ctx, env, model, version, id) } else { @@ -54,7 +61,6 @@ func (_m *PredictionJobService) GetPredictionJob(ctx context.Context, env *model } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) error); ok { r1 = rf(ctx, env, model, version, id) } else { @@ -69,6 +75,10 @@ func (_m *PredictionJobService) ListContainers(ctx context.Context, env *models. ret := _m.Called(ctx, env, model, version, predictionJob) var r0 []*models.Container + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) ([]*models.Container, error)); ok { + return rf(ctx, env, model, version, predictionJob) + } if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) []*models.Container); ok { r0 = rf(ctx, env, model, version, predictionJob) } else { @@ -77,7 +87,6 @@ func (_m *PredictionJobService) ListContainers(ctx context.Context, env *models. } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) error); ok { r1 = rf(ctx, env, model, version, predictionJob) } else { @@ -92,6 +101,10 @@ func (_m *PredictionJobService) ListPredictionJobs(ctx context.Context, project ret := _m.Called(ctx, project, query) var r0 []*models.PredictionJob + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, mlp.Project, *service.ListPredictionJobQuery) ([]*models.PredictionJob, error)); ok { + return rf(ctx, project, query) + } if rf, ok := ret.Get(0).(func(context.Context, mlp.Project, *service.ListPredictionJobQuery) []*models.PredictionJob); ok { r0 = rf(ctx, project, query) } else { @@ -100,7 +113,6 @@ func (_m *PredictionJobService) ListPredictionJobs(ctx context.Context, project } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, mlp.Project, *service.ListPredictionJobQuery) error); ok { r1 = rf(ctx, project, query) } else { @@ -115,6 +127,10 @@ func (_m *PredictionJobService) StopPredictionJob(ctx context.Context, env *mode ret := _m.Called(ctx, env, model, version, id) var r0 *models.PredictionJob + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) (*models.PredictionJob, error)); ok { + return rf(ctx, env, model, version, id) + } if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) *models.PredictionJob); ok { r0 = rf(ctx, env, model, version, id) } else { @@ -123,7 +139,6 @@ func (_m *PredictionJobService) StopPredictionJob(ctx context.Context, env *mode } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) error); ok { r1 = rf(ctx, env, model, version, id) } else { diff --git a/api/service/mocks/projects_service.go b/api/service/mocks/projects_service.go index ca52c581f..c543546de 100644 --- a/api/service/mocks/projects_service.go +++ b/api/service/mocks/projects_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.28.2. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/service/mocks/queue_producer.go b/api/service/mocks/queue_producer.go deleted file mode 100644 index 18a5f29ac..000000000 --- a/api/service/mocks/queue_producer.go +++ /dev/null @@ -1,27 +0,0 @@ -// Code generated by mockery v2.6.0. DO NOT EDIT. - -package mocks - -import ( - queue "github.com/caraml-dev/merlin/queue" - mock "github.com/stretchr/testify/mock" -) - -// QueueProducer is an autogenerated mock type for the QueueProducer type -type QueueProducer struct { - mock.Mock -} - -// EnqueueJob provides a mock function with given fields: job -func (_m *QueueProducer) EnqueueJob(job *queue.Job) error { - ret := _m.Called(job) - - var r0 error - if rf, ok := ret.Get(0).(func(*queue.Job) error); ok { - r0 = rf(job) - } else { - r0 = ret.Error(0) - } - - return r0 -} diff --git a/api/service/mocks/secret_service.go b/api/service/mocks/secret_service.go index 6ea89b9db..481040635 100644 --- a/api/service/mocks/secret_service.go +++ b/api/service/mocks/secret_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -19,13 +19,16 @@ func (_m *SecretService) Create(ctx context.Context, projectID int32, secret mlp ret := _m.Called(ctx, projectID, secret) var r0 mlp.Secret + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) (mlp.Secret, error)); ok { + return rf(ctx, projectID, secret) + } if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) mlp.Secret); ok { r0 = rf(ctx, projectID, secret) } else { r0 = ret.Get(0).(mlp.Secret) } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, int32, mlp.Secret) error); ok { r1 = rf(ctx, projectID, secret) } else { @@ -54,13 +57,16 @@ func (_m *SecretService) GetByIDandProjectID(ctx context.Context, secretID int32 ret := _m.Called(ctx, secretID, projectID) var r0 mlp.Secret + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, int32, int32) (mlp.Secret, error)); ok { + return rf(ctx, secretID, projectID) + } if rf, ok := ret.Get(0).(func(context.Context, int32, int32) mlp.Secret); ok { r0 = rf(ctx, secretID, projectID) } else { r0 = ret.Get(0).(mlp.Secret) } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, int32, int32) error); ok { r1 = rf(ctx, secretID, projectID) } else { @@ -75,6 +81,10 @@ func (_m *SecretService) List(ctx context.Context, projectID int32) (mlp.Secrets ret := _m.Called(ctx, projectID) var r0 mlp.Secrets + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, int32) (mlp.Secrets, error)); ok { + return rf(ctx, projectID) + } if rf, ok := ret.Get(0).(func(context.Context, int32) mlp.Secrets); ok { r0 = rf(ctx, projectID) } else { @@ -83,7 +93,6 @@ func (_m *SecretService) List(ctx context.Context, projectID int32) (mlp.Secrets } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, int32) error); ok { r1 = rf(ctx, projectID) } else { @@ -98,13 +107,16 @@ func (_m *SecretService) Update(ctx context.Context, projectID int32, secret mlp ret := _m.Called(ctx, projectID, secret) var r0 mlp.Secret + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) (mlp.Secret, error)); ok { + return rf(ctx, projectID, secret) + } if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) mlp.Secret); ok { r0 = rf(ctx, projectID, secret) } else { r0 = ret.Get(0).(mlp.Secret) } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, int32, mlp.Secret) error); ok { r1 = rf(ctx, projectID, secret) } else { diff --git a/api/service/mocks/transformer_service.go b/api/service/mocks/transformer_service.go index 0d0d5b335..ef73d2ffa 100644 --- a/api/service/mocks/transformer_service.go +++ b/api/service/mocks/transformer_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -21,6 +21,10 @@ func (_m *TransformerService) SimulateTransformer(ctx context.Context, simulatio ret := _m.Called(ctx, simulationPayload) var r0 *types.PredictResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *models.TransformerSimulation) (*types.PredictResponse, error)); ok { + return rf(ctx, simulationPayload) + } if rf, ok := ret.Get(0).(func(context.Context, *models.TransformerSimulation) *types.PredictResponse); ok { r0 = rf(ctx, simulationPayload) } else { @@ -29,7 +33,6 @@ func (_m *TransformerService) SimulateTransformer(ctx context.Context, simulatio } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context, *models.TransformerSimulation) error); ok { r1 = rf(ctx, simulationPayload) } else { diff --git a/api/service/mocks/versions_service.go b/api/service/mocks/versions_service.go index 1547474e7..ef1f5625c 100644 --- a/api/service/mocks/versions_service.go +++ b/api/service/mocks/versions_service.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/service/version_endpoint_service.go b/api/service/version_endpoint_service.go index fd69335fc..f87d64f4c 100644 --- a/api/service/version_endpoint_service.go +++ b/api/service/version_endpoint_service.go @@ -21,7 +21,6 @@ import ( "github.com/caraml-dev/merlin/cluster" "github.com/caraml-dev/merlin/config" - "github.com/caraml-dev/merlin/log" "github.com/caraml-dev/merlin/models" "github.com/caraml-dev/merlin/pkg/autoscaling" "github.com/caraml-dev/merlin/pkg/deployment" @@ -50,7 +49,7 @@ type EndpointsService interface { // CountEndpoints count number of endpoint created from a model in an environment CountEndpoints(ctx context.Context, environment *models.Environment, model *models.Model) (int, error) // ListContainers list all container associated with an endpoint - ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpointUuid uuid.UUID) ([]*models.Container, error) + ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpoint *models.VersionEndpoint) ([]*models.Container, error) // DeleteEndpoint hard delete endpoint data, including the relation from deployment DeleteEndpoint(version *models.Version, endpoint *models.VersionEndpoint) error } @@ -142,12 +141,7 @@ func (k *endpointService) DeployEndpoint(ctx context.Context, environment *model }, }, }); err != nil { - // if error enqueue job, mark endpoint status to failed - endpoint.Status = models.EndpointFailed - if err := k.storage.Save(endpoint); err != nil { - log.Errorf("error to update endpoint %s status to failed: %v", endpoint.ID, err) - } - return nil, err + return nil, fmt.Errorf("failed to enqueue model service deployment job: %w", err) } return endpoint, nil @@ -251,7 +245,10 @@ func (k *endpointService) UndeployEndpoint(ctx context.Context, environment *mod } modelService := &models.Service{ - Name: models.CreateInferenceServiceName(model.Name, version.ID.String()), + Name: models.CreateInferenceServiceName(model.Name, version.ID.String(), endpoint.RevisionID.String()), + ModelName: model.Name, + ModelVersion: version.ID.String(), + RevisionID: endpoint.RevisionID, Namespace: model.Project.Name, ResourceRequest: endpoint.ResourceRequest, Transformer: endpoint.Transformer, @@ -277,8 +274,8 @@ func (k *endpointService) CountEndpoints(ctx context.Context, environment *model } // ListContainers list all containers belong to the given version endpoint -func (k *endpointService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, id uuid.UUID) ([]*models.Container, error) { - ve, err := k.storage.Get(id) +func (k *endpointService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpoint *models.VersionEndpoint) ([]*models.Container, error) { + ve, err := k.storage.Get(endpoint.ID) if err != nil { return nil, err } @@ -298,14 +295,16 @@ func (k *endpointService) ListContainers(ctx context.Context, model *models.Mode containers = append(containers, imgBuilderContainers...) } - modelContainers, err := ctl.GetContainers(ctx, model.Project.Name, models.OnlineInferencePodLabelSelector(model.Name, version.ID.String())) + labelSelector := models.OnlineInferencePodLabelSelector(model.Name, version.ID.String(), endpoint.RevisionID.String()) + + modelContainers, err := ctl.GetContainers(ctx, model.Project.Name, labelSelector) if err != nil { return nil, err } containers = append(containers, modelContainers...) for _, container := range containers { - container.VersionEndpointID = id + container.VersionEndpointID = endpoint.ID } return containers, nil diff --git a/api/service/version_endpoint_service_test.go b/api/service/version_endpoint_service_test.go index a59941a17..9f45a3050 100644 --- a/api/service/version_endpoint_service_test.go +++ b/api/service/version_endpoint_service_test.go @@ -22,9 +22,6 @@ import ( "testing" "time" - "github.com/caraml-dev/merlin/pkg/autoscaling" - "github.com/caraml-dev/merlin/pkg/deployment" - "github.com/caraml-dev/merlin/pkg/protocol" "github.com/feast-dev/feast/sdk/go/protos/feast/core" "github.com/feast-dev/feast/sdk/go/protos/feast/types" "github.com/google/uuid" @@ -41,7 +38,10 @@ import ( "github.com/caraml-dev/merlin/config" "github.com/caraml-dev/merlin/mlp" "github.com/caraml-dev/merlin/models" + "github.com/caraml-dev/merlin/pkg/autoscaling" + "github.com/caraml-dev/merlin/pkg/deployment" imageBuilderMock "github.com/caraml-dev/merlin/pkg/imagebuilder/mocks" + "github.com/caraml-dev/merlin/pkg/protocol" "github.com/caraml-dev/merlin/pkg/transformer" feastmocks "github.com/caraml-dev/merlin/pkg/transformer/feast/mocks" "github.com/caraml-dev/merlin/pkg/transformer/spec" @@ -79,7 +79,7 @@ func TestDeployEndpoint(t *testing.T) { model := &models.Model{Name: "model", Project: project} version := &models.Version{ID: 1} - iSvcName := fmt.Sprintf("%s-%d", model.Name, version.ID) + // iSvcName := fmt.Sprintf("%s-%d-0", model.Name, version.ID) tests := []struct { name string @@ -96,14 +96,13 @@ func TestDeployEndpoint(t *testing.T) { &models.VersionEndpoint{}, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, - Protocol: protocol.HttpJson, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, + Protocol: protocol.HttpJson, }, wantDeployError: false, }, @@ -123,12 +122,11 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -148,14 +146,13 @@ func TestDeployEndpoint(t *testing.T) { &models.VersionEndpoint{}, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, - Protocol: protocol.HttpJson, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, + Protocol: protocol.HttpJson, }, wantDeployError: false, }, @@ -170,14 +167,13 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, - Protocol: protocol.HttpJson, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, + Protocol: protocol.HttpJson, }, wantDeployError: false, }, @@ -192,14 +188,13 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, - Protocol: protocol.HttpJson, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, + Protocol: protocol.HttpJson, }, wantDeployError: false, }, @@ -210,13 +205,12 @@ func TestDeployEndpoint(t *testing.T) { &models.Model{Name: "model", Project: project, Type: models.ModelTypeCustom}, &models.Version{ID: 1}, &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, EnvVars: models.EnvVars{ { Name: "TF_MODEL_NAME", @@ -231,13 +225,12 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, EnvVars: models.EnvVars{ { Name: "TF_MODEL_NAME", @@ -278,13 +271,12 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, Transformer: &models.Transformer{ Enabled: true, Image: "ghcr.io/caraml-dev/merlin-transformer-test", @@ -321,12 +313,11 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -370,12 +361,11 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -415,12 +405,11 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -464,12 +453,11 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -518,10 +506,9 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -576,10 +563,9 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -637,10 +623,9 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -699,10 +684,9 @@ func TestDeployEndpoint(t *testing.T) { MetricsType: autoscaling.CPUUtilization, TargetValue: 50, }, - EnvironmentName: env.Name, - Namespace: project.Name, - InferenceServiceName: iSvcName, - Protocol: protocol.HttpJson, + EnvironmentName: env.Name, + Namespace: project.Name, + Protocol: protocol.HttpJson, }, }, }, @@ -731,10 +715,9 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, ResourceRequest: &models.ResourceRequest{ MinReplica: 2, MaxReplica: 4, @@ -773,14 +756,13 @@ func TestDeployEndpoint(t *testing.T) { }, }, expectedEndpoint: &models.VersionEndpoint{ - InferenceServiceName: iSvcName, - DeploymentMode: deployment.ServerlessDeploymentMode, - AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, - ResourceRequest: env.DefaultResourceRequest, - Namespace: project.Name, - URL: "", - Status: models.EndpointPending, - Protocol: protocol.UpiV1, + DeploymentMode: deployment.ServerlessDeploymentMode, + AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy, + ResourceRequest: env.DefaultResourceRequest, + Namespace: project.Name, + URL: "", + Status: models.EndpointPending, + Protocol: protocol.UpiV1, }, wantDeployError: false, }, @@ -2171,13 +2153,22 @@ func TestDeployEndpoint_StandardTransformer(t *testing.T) { } func TestListContainers(t *testing.T) { + id := uuid.New() + project := mlp.Project{ID: 1, Name: "my-project"} model := &models.Model{ID: 1, Name: "model", Type: models.ModelTypeXgboost, Project: project, ProjectID: models.ID(project.ID)} version := &models.Version{ID: 1} - id := uuid.New() + revisionID := models.ID(1) env := &models.Environment{Name: "my-env", Cluster: "my-cluster", IsDefault: &isDefaultTrue} + endpoint := &models.VersionEndpoint{ + ID: id, + VersionID: version.ID, + VersionModelID: model.ID, + RevisionID: revisionID, + EnvironmentName: env.Name, + } cfg := &config.Config{ - Environment: "dev", + Environment: env.Name, FeatureToggleConfig: config.FeatureToggleConfig{ MonitoringConfig: config.MonitoringConfig{ MonitoringEnabled: false, @@ -2186,9 +2177,9 @@ func TestListContainers(t *testing.T) { } type args struct { - model *models.Model - version *models.Version - id uuid.UUID + model *models.Model + version *models.Version + endpoint *models.VersionEndpoint } type componentMock struct { @@ -2206,20 +2197,21 @@ func TestListContainers(t *testing.T) { { "success: non-pyfunc model", args{ - model, version, id, + model, version, endpoint, }, componentMock{ &models.VersionEndpoint{ ID: id, VersionID: version.ID, VersionModelID: model.ID, + RevisionID: revisionID, EnvironmentName: env.Name, }, nil, []*models.Container{ { Name: "user-container", - PodName: "mymodel-2-predictor-hlqgv-deployment-6f478cbc67-mp7zf", + PodName: "mymodel-2-r1-predictor-hlqgv-deployment-6f478cbc67-mp7zf", Namespace: project.Name, Cluster: env.Cluster, GcpProject: env.GcpProject, @@ -2231,13 +2223,14 @@ func TestListContainers(t *testing.T) { { "success: pyfunc model", args{ - model, version, id, + model, version, endpoint, }, componentMock{ &models.VersionEndpoint{ ID: id, VersionID: version.ID, VersionModelID: model.ID, + RevisionID: revisionID, EnvironmentName: env.Name, }, &models.Container{ @@ -2250,7 +2243,7 @@ func TestListContainers(t *testing.T) { []*models.Container{ { Name: "user-container", - PodName: "mymodel-2-predictor-hlqgv-deployment-6f478cbc67-mp7zf", + PodName: "mymodel-2-r1-predictor-hlqgv-deployment-6f478cbc67-mp7zf", Namespace: project.Name, Cluster: env.Cluster, GcpProject: env.GcpProject, @@ -2267,7 +2260,7 @@ func TestListContainers(t *testing.T) { Return(tt.mock.imageBuilderContainer, nil) envController := &clusterMock.Controller{} - envController.On("GetContainers", context.Background(), "my-project", "serving.kserve.io/inferenceservice=model-1"). + envController.On("GetContainers", context.Background(), "my-project", "serving.kserve.io/inferenceservice=model-1-r1"). Return(tt.mock.modelContainers, nil) controllers := map[string]cluster.Controller{env.Name: envController} @@ -2277,7 +2270,6 @@ func TestListContainers(t *testing.T) { mockStorage.On("Get", mock.Anything).Return(tt.mock.versionEndpoint, nil) mockDeploymentStorage.On("Save", mock.Anything).Return(nil, nil) - // endpointSvc := NewEndpointService(controllers, imgBuilder, mockStorage, mockDeploymentStorage, cfg.Environment, cfg.FeatureToggleConfig.MonitoringConfig, loggerDestinationURL) endpointSvc := NewEndpointService(EndpointServiceParams{ ClusterControllers: controllers, ImageBuilder: imgBuilder, @@ -2287,7 +2279,8 @@ func TestListContainers(t *testing.T) { MonitoringConfig: cfg.FeatureToggleConfig.MonitoringConfig, LoggerDestinationURL: loggerDestinationURL, }) - containers, err := endpointSvc.ListContainers(context.Background(), tt.args.model, tt.args.version, tt.args.id) + + containers, err := endpointSvc.ListContainers(context.Background(), tt.args.model, tt.args.version, tt.args.endpoint) if !tt.wantError { assert.Nil(t, err, "unwanted error %v", err) } else { diff --git a/api/storage/deployment_storage.go b/api/storage/deployment_storage.go index 33687a4ee..9e6ad7791 100644 --- a/api/storage/deployment_storage.go +++ b/api/storage/deployment_storage.go @@ -23,6 +23,8 @@ import ( type DeploymentStorage interface { // ListInModel return all deployment within a model ListInModel(model *models.Model) ([]*models.Deployment, error) + // ListInModelVersion return all deployment within a model + ListInModelVersion(modelID, versionID, endpointUUID string) ([]*models.Deployment, error) // Save save the deployment to underlying storage Save(deployment *models.Deployment) (*models.Deployment, error) // GetFirstSuccessModelVersionPerModel Return mapping of model id and the first model version with a successful model version @@ -44,6 +46,12 @@ func (d *deploymentStorage) ListInModel(model *models.Model) ([]*models.Deployme return deployments, err } +func (d *deploymentStorage) ListInModelVersion(modelID, versionID, endpointUUID string) ([]*models.Deployment, error) { + var deployments []*models.Deployment + err := d.db.Where("version_model_id = ? AND version_id = ? AND version_endpoint_id = ?", modelID, versionID, endpointUUID).Find(&deployments).Error + return deployments, err +} + func (d *deploymentStorage) Save(deployment *models.Deployment) (*models.Deployment, error) { err := d.db.Save(deployment).Error return deployment, err diff --git a/api/storage/mocks/alert_storage.go b/api/storage/mocks/alert_storage.go index f21032904..e8a583ebd 100644 --- a/api/storage/mocks/alert_storage.go +++ b/api/storage/mocks/alert_storage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -45,6 +45,10 @@ func (_m *AlertStorage) GetModelEndpointAlert(modelID models.ID, modelEndpointID ret := _m.Called(modelID, modelEndpointID) var r0 *models.ModelEndpointAlert + var r1 error + if rf, ok := ret.Get(0).(func(models.ID, models.ID) (*models.ModelEndpointAlert, error)); ok { + return rf(modelID, modelEndpointID) + } if rf, ok := ret.Get(0).(func(models.ID, models.ID) *models.ModelEndpointAlert); ok { r0 = rf(modelID, modelEndpointID) } else { @@ -53,7 +57,6 @@ func (_m *AlertStorage) GetModelEndpointAlert(modelID models.ID, modelEndpointID } } - var r1 error if rf, ok := ret.Get(1).(func(models.ID, models.ID) error); ok { r1 = rf(modelID, modelEndpointID) } else { @@ -68,6 +71,10 @@ func (_m *AlertStorage) ListModelEndpointAlerts(modelID models.ID) ([]*models.Mo ret := _m.Called(modelID) var r0 []*models.ModelEndpointAlert + var r1 error + if rf, ok := ret.Get(0).(func(models.ID) ([]*models.ModelEndpointAlert, error)); ok { + return rf(modelID) + } if rf, ok := ret.Get(0).(func(models.ID) []*models.ModelEndpointAlert); ok { r0 = rf(modelID) } else { @@ -76,7 +83,6 @@ func (_m *AlertStorage) ListModelEndpointAlerts(modelID models.ID) ([]*models.Mo } } - var r1 error if rf, ok := ret.Get(1).(func(models.ID) error); ok { r1 = rf(modelID) } else { diff --git a/api/storage/mocks/deployment_storage.go b/api/storage/mocks/deployment_storage.go index 7535732ab..c9a231353 100644 --- a/api/storage/mocks/deployment_storage.go +++ b/api/storage/mocks/deployment_storage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -78,6 +78,32 @@ func (_m *DeploymentStorage) ListInModel(model *models.Model) ([]*models.Deploym return r0, r1 } +// ListInModelVersion provides a mock function with given fields: modelID, versionID, endpointUUID +func (_m *DeploymentStorage) ListInModelVersion(modelID string, versionID string, endpointUUID string) ([]*models.Deployment, error) { + ret := _m.Called(modelID, versionID, endpointUUID) + + var r0 []*models.Deployment + var r1 error + if rf, ok := ret.Get(0).(func(string, string, string) ([]*models.Deployment, error)); ok { + return rf(modelID, versionID, endpointUUID) + } + if rf, ok := ret.Get(0).(func(string, string, string) []*models.Deployment); ok { + r0 = rf(modelID, versionID, endpointUUID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*models.Deployment) + } + } + + if rf, ok := ret.Get(1).(func(string, string, string) error); ok { + r1 = rf(modelID, versionID, endpointUUID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // Save provides a mock function with given fields: deployment func (_m *DeploymentStorage) Save(deployment *models.Deployment) (*models.Deployment, error) { ret := _m.Called(deployment) diff --git a/api/storage/mocks/model_endpoint_storage.go b/api/storage/mocks/model_endpoint_storage.go index 40e190969..26423ff77 100644 --- a/api/storage/mocks/model_endpoint_storage.go +++ b/api/storage/mocks/model_endpoint_storage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/storage/mocks/prediction_job_storage.go b/api/storage/mocks/prediction_job_storage.go index d856c7a24..642b3366b 100644 --- a/api/storage/mocks/prediction_job_storage.go +++ b/api/storage/mocks/prediction_job_storage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks diff --git a/api/storage/mocks/version_endpoint_storage.go b/api/storage/mocks/version_endpoint_storage.go index b92d5fe4b..2707677dd 100644 --- a/api/storage/mocks/version_endpoint_storage.go +++ b/api/storage/mocks/version_endpoint_storage.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.22.1. DO NOT EDIT. +// Code generated by mockery v2.20.0. DO NOT EDIT. package mocks @@ -38,13 +38,13 @@ func (_m *VersionEndpointStorage) CountEndpoints(environment *models.Environment return r0, r1 } -// Delete provides a mock function with given fields: version -func (_m *VersionEndpointStorage) Delete(version *models.VersionEndpoint) error { - ret := _m.Called(version) +// Delete provides a mock function with given fields: endpoint +func (_m *VersionEndpointStorage) Delete(endpoint *models.VersionEndpoint) error { + ret := _m.Called(endpoint) var r0 error if rf, ok := ret.Get(0).(func(*models.VersionEndpoint) error); ok { - r0 = rf(version) + r0 = rf(endpoint) } else { r0 = ret.Error(0) } diff --git a/db-migrations/32_revision_id.down.sql b/db-migrations/32_revision_id.down.sql new file mode 100644 index 000000000..ecc8bd04d --- /dev/null +++ b/db-migrations/32_revision_id.down.sql @@ -0,0 +1 @@ +ALTER TABLE version_endpoints DROP COLUMN revision_id; diff --git a/db-migrations/32_revision_id.up.sql b/db-migrations/32_revision_id.up.sql new file mode 100644 index 000000000..4fee07746 --- /dev/null +++ b/db-migrations/32_revision_id.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE version_endpoints +ADD COLUMN revision_id VARCHAR(32); diff --git a/python/sdk/test/integration_test.py b/python/sdk/test/integration_test.py index c77feee65..8c3a14756 100644 --- a/python/sdk/test/integration_test.py +++ b/python/sdk/test/integration_test.py @@ -98,10 +98,7 @@ def test_sklearn( with merlin.new_model_version() as v: merlin.log_model(model_dir=model_dir) - resource_request = ResourceRequest(1, 1, "100m", "200Mi") - endpoint = merlin.deploy( - v, deployment_mode=deployment_mode, resource_request=resource_request - ) + endpoint = merlin.deploy(v, deployment_mode=deployment_mode) resp = requests.post(f"{endpoint.url}", json=request_json) assert resp.status_code == 200 @@ -133,10 +130,7 @@ def test_xgboost( # Upload the serialized model to MLP merlin.log_model(model_dir=model_dir) - resource_request = ResourceRequest(1, 1, "100m", "200Mi") - endpoint = merlin.deploy( - v, deployment_mode=deployment_mode, resource_request=resource_request - ) + endpoint = merlin.deploy(v, deployment_mode=deployment_mode) resp = requests.post(f"{endpoint.url}", json=request_json) assert resp.status_code == 200 @@ -240,8 +234,7 @@ def test_pytorch(integration_test_url, project_name, use_google_oauth, requests) with merlin.new_model_version() as v: merlin.log_model(model_dir=model_dir) - resource_request = ResourceRequest(1, 1, "100m", "200Mi") - endpoint = merlin.deploy(v, resource_request=resource_request) + endpoint = merlin.deploy(v) resp = requests.post(f"{endpoint.url}", json=request_json) @@ -265,9 +258,8 @@ def test_set_traffic(integration_test_url, project_name, use_google_oauth, reque with merlin.new_model_version() as v: # Upload the serialized model to MLP - resource_request = ResourceRequest(1, 1, "100m", "200Mi") merlin.log_model(model_dir=model_dir) - endpoint = merlin.deploy(v, resource_request=resource_request) + endpoint = merlin.deploy(v) resp = requests.post(f"{endpoint.url}", json=request_json) @@ -307,9 +299,8 @@ def test_serve_traffic(integration_test_url, project_name, use_google_oauth, req with merlin.new_model_version() as v: # Upload the serialized model to MLP - resource_request = ResourceRequest(1, 1, "100m", "200Mi") merlin.log_model(model_dir=model_dir) - endpoint = merlin.deploy(v, resource_request=resource_request) + endpoint = merlin.deploy(v) resp = requests.post(f"{endpoint.url}", json=request_json) @@ -355,10 +346,7 @@ def test_multi_env(integration_test_url, project_name, use_google_oauth, request with merlin.new_model_version() as v: # Upload the serialized model to MLP merlin.log_model(model_dir=model_dir) - resource_request = ResourceRequest(1, 1, "100m", "200Mi") - endpoint = merlin.deploy( - v, environment_name=default_env.name, resource_request=resource_request - ) + endpoint = merlin.deploy(v, environment_name=default_env.name) resp = requests.post(f"{endpoint.url}", json=request_json) @@ -395,7 +383,7 @@ def test_resource_request( # Upload the serialized model to MLP merlin.log_model(model_dir=model_dir) - resource_request = ResourceRequest(1, 1, "100m", "200Mi") + resource_request = ResourceRequest(1, 1, "100m", "256Mi") endpoint = merlin.deploy( v, environment_name=default_env.name, @@ -444,7 +432,7 @@ def test_resource_request_with_gpu( # Upload the serialized model to MLP merlin.log_model(model_dir=model_dir) - resource_request = ResourceRequest(1, 1, "100m", "200Mi", **gpu_config) + resource_request = ResourceRequest(1, 1, "100m", "256Mi", **gpu_config) endpoint = merlin.deploy( v, environment_name=default_env.name, diff --git a/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties b/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties index bd7e62955..876c0e1a9 100644 --- a/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties +++ b/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties @@ -10,4 +10,4 @@ job_queue_size=10 enable_envvars_config=true install_py_dep_per_model=true model_store=/mnt/models/model-store -model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"pytorch-sample-1":{"1.0":{"defaultVersion":true,"marName":"pytorch-sample.mar","minWorkers":1,"maxWorkers":5,"batchSize":1,"maxBatchDelay":10,"responseTimeout":120}}}} +model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"pytorch-sample-1-r1":{"1.0":{"defaultVersion":true,"marName":"pytorch-sample.mar","minWorkers":1,"maxWorkers":5,"batchSize":1,"maxBatchDelay":10,"responseTimeout":120}}}} diff --git a/scripts/e2e/run-e2e.sh b/scripts/e2e/run-e2e.sh index 6d9f7f207..3d1f9c1fa 100755 --- a/scripts/e2e/run-e2e.sh +++ b/scripts/e2e/run-e2e.sh @@ -33,4 +33,4 @@ kubectl create namespace ${E2E_PROJECT_NAME} --dry-run=client -o yaml | kubectl cd ../../python/sdk pip install pipenv==2023.7.23 pipenv install --dev --skip-lock --python ${PYTHON_VERSION} -pipenv run pytest -n=8 -W=ignore --cov=merlin -m "not (gpu or feast or batch or pyfunc or local_server_test or cli or customtransformer)" --durations=0 +pipenv run pytest -n=4 -W=ignore --cov=merlin -m "not (gpu or feast or batch or pyfunc or local_server_test or cli or customtransformer)" --durations=0 diff --git a/scripts/e2e/values-e2e.yaml b/scripts/e2e/values-e2e.yaml index d59c5f2bb..9d18f8527 100644 --- a/scripts/e2e/values-e2e.yaml +++ b/scripts/e2e/values-e2e.yaml @@ -80,8 +80,8 @@ environmentConfigs: default_deployment_config: min_replica: 0 max_replica: 1 - cpu_request: "50m" - memory_request: "128Mi" + cpu_request: "100m" + memory_request: "256Mi" default_transformer_config: min_replica: 0 max_replica: 1 diff --git a/ui/src/pages/version/HistoryDetails.js b/ui/src/pages/version/HistoryDetails.js new file mode 100644 index 000000000..26726ee67 --- /dev/null +++ b/ui/src/pages/version/HistoryDetails.js @@ -0,0 +1,182 @@ +import { DateFromNow } from "@caraml-dev/ui-lib"; +import { + EuiBadge, + EuiButtonIcon, + EuiCodeBlock, + EuiFlexGroup, + EuiFlexItem, + EuiHealth, + EuiInMemoryTable, + EuiScreenReaderOnly, + EuiText, +} from "@elastic/eui"; +import { useState } from "react"; +import { ConfigSection, ConfigSectionPanel } from "../../components/section"; +import { useMerlinApi } from "../../hooks/useMerlinApi"; + +const defaultTextSize = "s"; + +const DeploymentStatus = ({ + status, + deployment, + deployedRevision, + endpointStatus, +}) => { + if (deployment.error !== "") { + return Failed; + } + + if (status === "running" || status === "serving") { + if ( + deployment.id === deployedRevision.id && + (endpointStatus === "running" || endpointStatus === "serving") + ) { + return Deployed; + } + return Not Deployed; + } else if (status === "pending") { + return Pending; + } +}; + +const RevisionPanel = ({ deployments, deploymentsLoaded, endpoint }) => { + const orderedDeployments = deployments.sort((a, b) => b.id - a.id); + + const deployedRevision = orderedDeployments.find( + (deployment) => + deployment.status === "running" || deployment.status === "serving" + ) || { id: null }; + + const canBeExpanded = (deployment) => { + return deployment.error !== ""; + }; + + const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap] = useState({}); + + const toggleDetails = (deployment) => { + const itemIdToExpandedRowMapValues = { ...itemIdToExpandedRowMap }; + + if (itemIdToExpandedRowMapValues[deployment.id]) { + delete itemIdToExpandedRowMapValues[deployment.id]; + } else { + itemIdToExpandedRowMapValues[deployment.id] = ( + <> + + Error message + + {deployment.error} + + ); + } + setItemIdToExpandedRowMap(itemIdToExpandedRowMapValues); + }; + + const cellProps = (item, column) => { + if (column.field !== "actions" && canBeExpanded(item)) { + return { + style: { cursor: "pointer" }, + onClick: () => toggleDetails(item), + }; + } + return undefined; + }; + + const columns = [ + { + field: "updated_at", + name: "Deployment Time", + render: (date, deployment) => ( + <> + +    + {deployment.id === deployedRevision.id && ( + Current + )} + {/* {JSON.stringify(deployment.id)} */} + + ), + }, + { + field: "status", + name: "Deployment Status", + render: (status, deployment) => ( + + ), + }, + { + align: "right", + width: "40px", + isExpander: true, + name: ( + + Expand rows + + ), + render: (deployment) => { + const itemIdToExpandedRowMapValues = { ...itemIdToExpandedRowMap }; + + return ( + canBeExpanded(deployment) && ( + toggleDetails(deployment)} + aria-label={ + itemIdToExpandedRowMapValues[deployment.id] + ? "Collapse" + : "Expand" + } + iconType={ + itemIdToExpandedRowMapValues[deployment.id] + ? "arrowUp" + : "arrowDown" + } + /> + ) + ); + }, + }, + ]; + + return ( + + + + + + ); +}; + +export const HistoryDetails = ({ model, version, endpoint }) => { + const [{ data: deployments, isLoaded: deploymentsLoaded }] = useMerlinApi( + `/models/${model.id}/versions/${version.id}/endpoints/${endpoint.id}/deployments`, + {}, + [] + ); + + return ( + + + + + + + ); +}; diff --git a/ui/src/pages/version/VersionDetails.js b/ui/src/pages/version/VersionDetails.js index 037a0be38..4b65f7faa 100644 --- a/ui/src/pages/version/VersionDetails.js +++ b/ui/src/pages/version/VersionDetails.js @@ -14,8 +14,7 @@ * limitations under the License. */ -import React, { Fragment, useEffect, useState } from "react"; -import { Link, Route, Routes, useParams } from "react-router-dom"; +import { replaceBreadcrumbs } from "@caraml-dev/ui-lib"; import { EuiButton, EuiEmptyPrompt, @@ -24,16 +23,18 @@ import { EuiLoadingContent, EuiPageTemplate, EuiSpacer, - EuiText + EuiText, } from "@elastic/eui"; -import { replaceBreadcrumbs } from "@caraml-dev/ui-lib"; +import React, { Fragment, useEffect, useState } from "react"; +import { Link, Route, Routes, useParams } from "react-router-dom"; +import { ContainerLogsView } from "../../components/logs/ContainerLogsView"; import config from "../../config"; -import mocks from "../../mocks"; import { useMerlinApi } from "../../hooks/useMerlinApi"; -import { ContainerLogsView } from "../../components/logs/ContainerLogsView"; +import mocks from "../../mocks"; import { DeploymentPanelHeader } from "./DeploymentPanelHeader"; -import { ModelVersionPanelHeader } from "./ModelVersionPanelHeader"; import { EndpointDetails } from "./EndpointDetails"; +import { HistoryDetails } from "./HistoryDetails"; +import { ModelVersionPanelHeader } from "./ModelVersionPanelHeader"; import { VersionTabNavigation } from "./VersionTabNavigation"; /** @@ -41,7 +42,13 @@ import { VersionTabNavigation } from "./VersionTabNavigation"; * In this page users can also manage all deployed endpoint created from the model version. */ const VersionDetails = () => { - const { projectId, modelId, versionId, endpointId, "*": section } = useParams(); + const { + projectId, + modelId, + versionId, + endpointId, + "*": section, + } = useParams(); const [{ data: model, isLoaded: modelLoaded }] = useMerlinApi( `/projects/${projectId}/models/${modelId}`, { mock: mocks.model }, @@ -63,12 +70,12 @@ const VersionDetails = () => { if (version.endpoints && version.endpoints.length > 0) { setIsDeployed(true); setEnvironments( - version.endpoints.map(endpoint => endpoint.environment) + version.endpoints.map((endpoint) => endpoint.environment) ); if (endpointId) { setEndpoint( - version.endpoints.find(endpoint => endpoint.id === endpointId) + version.endpoints.find((endpoint) => endpoint.id === endpointId) ); } } @@ -82,22 +89,22 @@ const VersionDetails = () => { breadCrumbs.push( { text: "Models", - href: `/merlin/projects/${model.project_id}/models` + href: `/merlin/projects/${model.project_id}/models`, }, { text: model.name || "", - href: `/merlin/projects/${model.project_id}/models/${model.id}` + href: `/merlin/projects/${model.project_id}/models/${model.id}`, }, { text: `Model Version ${version.id}`, - href: `/merlin/projects/${model.project_id}/models/${model.id}/versions/${version.id}` + href: `/merlin/projects/${model.project_id}/models/${model.id}/versions/${version.id}`, } ); } if (endpoint) { breadCrumbs.push({ - text: endpoint.environment_name + text: endpoint.environment_name, }); } @@ -108,61 +115,64 @@ const VersionDetails = () => { {!modelLoaded && !versionLoaded ? ( - - - - - - ) : ( - - + + + + + ) : ( + + + {model.name} + {" version "} + {version.id} + + } + /> + + + + {!(section === "deploy" || section === "redeploy") && + model && + modelLoaded && + version && + versionLoaded && ( - {model.name} - {" version "} - {version.id} + + - } - /> - - - - {!(section === "deploy" || section === "redeploy") && - model && - modelLoaded && - version && - versionLoaded && ( - - - - )} - {!(section === "deploy" || section === "redeploy") && - model && - modelLoaded && - version && - versionLoaded && - environments && - isDeployed && ( - - - - + {!(section === "deploy" || section === "redeploy") && + model && + modelLoaded && + version && + versionLoaded && + environments && + isDeployed && ( + + + + )} {!(section === "deploy" || section === "redeploy") && endpoint && isDeployed && ( - + )} @@ -172,7 +182,8 @@ const VersionDetails = () => { modelLoaded && version && versionLoaded && - !isDeployed && model.type !== "pyfunc_v2" && ( + !isDeployed && + model.type !== "pyfunc_v2" && ( Model version is not deployed} body={ @@ -183,11 +194,10 @@ const VersionDetails = () => {

+ state={{ model: model, version: version }} + > - - Deploy - + Deploy
@@ -195,36 +205,44 @@ const VersionDetails = () => { /> )} - {model && modelLoaded && version && versionLoaded && endpoint && ( - - - } - /> - - } - /> - - )} - - - )} - + {model && modelLoaded && version && versionLoaded && endpoint && ( + + + } + /> + + } + /> + + } + /> + + )} + + + )}
); }; diff --git a/ui/src/pages/version/VersionTabNavigation.js b/ui/src/pages/version/VersionTabNavigation.js index 700165888..10e36a979 100644 --- a/ui/src/pages/version/VersionTabNavigation.js +++ b/ui/src/pages/version/VersionTabNavigation.js @@ -14,9 +14,9 @@ * limitations under the License. */ +import { EuiIcon } from "@elastic/eui"; import React from "react"; import { useNavigate } from "react-router-dom"; -import { EuiIcon } from "@elastic/eui"; import { TabNavigation } from "../../components/TabNavigation"; export const VersionTabNavigation = ({ endpoint, actions, selectedTab }) => { @@ -24,11 +24,15 @@ export const VersionTabNavigation = ({ endpoint, actions, selectedTab }) => { const tabs = [ { id: "details", - name: "Configuration" + name: "Configuration", + }, + { + id: "history", + name: "History", }, { id: "logs", - name: "Logs" + name: "Logs", }, { id: "monitoring_dashboard_link", @@ -39,8 +43,8 @@ export const VersionTabNavigation = ({ endpoint, actions, selectedTab }) => { ), href: endpoint.monitoring_url, - target: "_blank" - } + target: "_blank", + }, ]; return ( diff --git a/ui/src/version/VersionListTable.js b/ui/src/version/VersionListTable.js index b87aac5e3..201d328ea 100644 --- a/ui/src/version/VersionListTable.js +++ b/ui/src/version/VersionListTable.js @@ -426,7 +426,7 @@ const VersionListTable = ({ navigate( - `/merlin/projects/${activeModel.project_id}/models/${activeModel.id}/versions/${endpoint.version_id}/endpoints/${endpoint.id}` + `/merlin/projects/${activeModel.project_id}/models/${activeModel.id}/versions/${endpoint.version_id}/endpoints/${endpoint.id}/details` ) } >