From a041b7e688910cbd025a138c14d6fc7453dc48a5 Mon Sep 17 00:00:00 2001
From: Arief Rahmansyah
Date: Thu, 19 Oct 2023 08:26:20 +0700
Subject: [PATCH] Introducing model version revision (#471)
**What this PR does / why we need it**:
The current implementation of model version redeployment patches the
existing inference service however, if the patch fails, it will delete
the existing inference service
([ref](https://github.com/caraml-dev/merlin/blob/v0.33.0/api/cluster/controller.go#L243-L250)).
This PR reworks how we do the redeployment by introducing model version
revision which itself is a new inference service that deployed for every
redeployment. Then we will have a new Istio VirtualService that routes
the traffic to the latest revision (latest inference service). Note that
the new VirtualService adds a `Content-Type` header to the request
because we need it for graceful migration to Kserve 0.11 where it
requires the `Content-Type` header to be set, but our clients might not
using this header.
It also changes the naming convention for the deployed inference
service:
From: `{model_name}-{model_version}` to
`{model_name}-{model_version}-{model_version_revision}`
### Serverless Deployment Before
### Serverless Deployment After
### Raw Deployment Before
### Raw Deployment After
### Model Version Deployment History UI
Changes this PR introduce:
1. Add `revision_id` column to `version_endpoints` table
2. Add revision number to model service name, prefixed with `r`
3. Add model version virtual service that routes to deployed model
service with revision number to maintain backward compatibility
a. New model service with revision number's URL is something like:
my-model-name-1-r1.domain.com
b. To maintain backward compatibility, the new model version virtual
service have this URL: my-model-name-1.domain.com
4. Add Deployment History tab in the UI
5. Add Deployments API
6. Add redeploy label to deploymentCounter Prometheus metrics
7. Update e2e-test to use environment's default_deployment_config;
Increase github e2e-test cluster environment's default_deployment_config
**Which issue(s) this PR fixes**:
Fixes model version redeployment got deleted if the redeployment fails.
**Does this PR introduce a user-facing change?**:
```release-note
Add model version deployment history UI
```
**Checklist**
- [ ] Added unit test, integration, and/or e2e tests
- [ ] Tested locally
- [ ] Updated documentation
- [ ] Update Swagger spec if the PR introduce API changes
- [ ] Regenerated Golang and Python client if the PR introduce API
changes
---
api/api/deployment_api.go | 19 +
api/api/deployment_api_test.go | 100 +
api/api/router.go | 5 +
api/api/version_endpoints_api.go | 8 +-
api/api/version_endpoints_api_test.go | 16 +-
api/cluster/container_test.go | 2 +-
api/cluster/controller.go | 119 +-
api/cluster/controller_test.go | 407 ++--
api/cluster/errors.go | 26 +-
api/cluster/resource/templater.go | 171 +-
api/cluster/resource/templater_gpu_test.go | 44 +-
api/cluster/resource/templater_test.go | 2080 +----------------
api/cluster/virtual_service.go | 227 ++
api/cluster/virtual_service_test.go | 253 ++
api/cmd/api/main.go | 2 +
api/cmd/inference-logger/main.go | 20 +-
api/cmd/inference-logger/main_test.go | 63 +-
api/models/container.go | 5 +-
api/models/container_test.go | 13 +-
api/models/deployment.go | 2 +-
api/models/service.go | 19 +-
api/models/service_test.go | 9 +-
api/models/version_endpoint.go | 33 +-
api/queue/work/model_service_deployment.go | 56 +-
.../work/model_service_deployment_test.go | 385 ++-
api/service/deployment_service.go | 25 +
api/service/deployment_service_test.go | 88 +
api/service/mocks/deployment_service.go | 54 +
api/service/mocks/endpoints_service.go | 20 +-
api/service/mocks/environment_service.go | 27 +-
api/service/mocks/list_options.go | 35 +-
api/service/mocks/log_service.go | 2 +-
.../mocks/model_endpoint_alert_service.go | 27 +-
api/service/mocks/model_endpoints_service.go | 2 +-
api/service/mocks/models_service.go | 2 +-
api/service/mocks/prediction_job_service.go | 27 +-
api/service/mocks/projects_service.go | 2 +-
api/service/mocks/queue_producer.go | 27 -
api/service/mocks/secret_service.go | 22 +-
api/service/mocks/transformer_service.go | 7 +-
api/service/mocks/versions_service.go | 2 +-
api/service/version_endpoint_service.go | 25 +-
api/service/version_endpoint_service_test.go | 247 +-
api/storage/deployment_storage.go | 8 +
api/storage/mocks/alert_storage.go | 12 +-
api/storage/mocks/deployment_storage.go | 28 +-
api/storage/mocks/model_endpoint_storage.go | 2 +-
api/storage/mocks/prediction_job_storage.go | 2 +-
api/storage/mocks/version_endpoint_storage.go | 10 +-
db-migrations/32_revision_id.down.sql | 1 +
db-migrations/32_revision_id.up.sql | 2 +
python/sdk/test/integration_test.py | 28 +-
.../pytorch-sample/config/config.properties | 2 +-
scripts/e2e/run-e2e.sh | 2 +-
scripts/e2e/values-e2e.yaml | 4 +-
ui/src/pages/version/HistoryDetails.js | 182 ++
ui/src/pages/version/VersionDetails.js | 206 +-
ui/src/pages/version/VersionTabNavigation.js | 14 +-
ui/src/version/VersionListTable.js | 2 +-
59 files changed, 2373 insertions(+), 2857 deletions(-)
create mode 100644 api/api/deployment_api.go
create mode 100644 api/api/deployment_api_test.go
create mode 100644 api/cluster/virtual_service.go
create mode 100644 api/cluster/virtual_service_test.go
create mode 100644 api/service/deployment_service.go
create mode 100644 api/service/deployment_service_test.go
create mode 100644 api/service/mocks/deployment_service.go
delete mode 100644 api/service/mocks/queue_producer.go
create mode 100644 db-migrations/32_revision_id.down.sql
create mode 100644 db-migrations/32_revision_id.up.sql
create mode 100644 ui/src/pages/version/HistoryDetails.js
diff --git a/api/api/deployment_api.go b/api/api/deployment_api.go
new file mode 100644
index 000000000..3ca79edb0
--- /dev/null
+++ b/api/api/deployment_api.go
@@ -0,0 +1,19 @@
+package api
+
+import (
+ "fmt"
+ "net/http"
+)
+
+type DeploymentController struct {
+ *AppContext
+}
+
+func (c *DeploymentController) ListDeployments(r *http.Request, vars map[string]string, _ interface{}) *Response {
+ deployments, err := c.DeploymentService.ListDeployments(vars["model_id"], vars["version_id"], vars["endpoint_id"])
+ if err != nil {
+ return InternalServerError(fmt.Sprintf("Error listing deployments: %v", err))
+ }
+
+ return Ok(deployments)
+}
diff --git a/api/api/deployment_api_test.go b/api/api/deployment_api_test.go
new file mode 100644
index 000000000..69ab53ed8
--- /dev/null
+++ b/api/api/deployment_api_test.go
@@ -0,0 +1,100 @@
+package api
+
+import (
+ "fmt"
+ "net/http"
+ "testing"
+ "time"
+
+ "github.com/caraml-dev/merlin/models"
+ "github.com/caraml-dev/merlin/service/mocks"
+ "github.com/google/uuid"
+)
+
+func TestDeploymentController_ListDeployments(t *testing.T) {
+ endpointUUID := uuid.New()
+ endpointUUIDString := fmt.Sprint(endpointUUID)
+
+ createdUpdated := models.CreatedUpdated{
+ CreatedAt: time.Now(),
+ UpdatedAt: time.Now(),
+ }
+
+ testCases := []struct {
+ desc string
+ vars map[string]string
+ deploymentService func() *mocks.DeploymentService
+ expected *Response
+ }{
+ {
+ desc: "Should success list deployments",
+ vars: map[string]string{
+ "model_id": "model",
+ "version_id": "1",
+ "endpoint_id": endpointUUIDString,
+ },
+ deploymentService: func() *mocks.DeploymentService {
+ mockSvc := &mocks.DeploymentService{}
+ mockSvc.On("ListDeployments", "model", "1", endpointUUIDString).Return([]*models.Deployment{
+ {
+ ID: models.ID(1),
+ ProjectID: models.ID(1),
+ VersionModelID: models.ID(1),
+ VersionID: models.ID(1),
+ VersionEndpointID: endpointUUID,
+ Status: models.EndpointRunning,
+ Error: "",
+ CreatedUpdated: createdUpdated,
+ },
+ }, nil)
+ return mockSvc
+ },
+ expected: &Response{
+ code: http.StatusOK,
+ data: []*models.Deployment{
+ {
+ ID: models.ID(1),
+ ProjectID: models.ID(1),
+ VersionModelID: models.ID(1),
+ VersionID: models.ID(1),
+ VersionEndpointID: endpointUUID,
+ Status: models.EndpointRunning,
+ Error: "",
+ CreatedUpdated: createdUpdated,
+ },
+ },
+ },
+ },
+ {
+ desc: "Should return 500 when failed fetching list of deployments",
+ vars: map[string]string{
+ "model_id": "model",
+ "version_id": "1",
+ "endpoint_id": endpointUUIDString,
+ },
+ deploymentService: func() *mocks.DeploymentService {
+ mockSvc := &mocks.DeploymentService{}
+ mockSvc.On("ListDeployments", "model", "1", endpointUUIDString).Return(nil, fmt.Errorf("Database is down"))
+ return mockSvc
+ },
+ expected: &Response{
+ code: http.StatusInternalServerError,
+ data: Error{
+ Message: "Error listing deployments: Database is down",
+ },
+ },
+ },
+ }
+ for _, tC := range testCases {
+ t.Run(tC.desc, func(t *testing.T) {
+ mockSvc := tC.deploymentService()
+ ctl := &DeploymentController{
+ AppContext: &AppContext{
+ DeploymentService: mockSvc,
+ },
+ }
+ resp := ctl.ListDeployments(&http.Request{}, tC.vars, nil)
+ assertEqualResponses(t, tC.expected, resp)
+ })
+ }
+}
diff --git a/api/api/router.go b/api/api/router.go
index 6ce13d324..45aad0f9d 100644
--- a/api/api/router.go
+++ b/api/api/router.go
@@ -53,6 +53,7 @@ type AppContext struct {
DB *gorm.DB
Enforcer enforcer.Enforcer
+ DeploymentService service.DeploymentService
EnvironmentService service.EnvironmentService
ProjectsService service.ProjectsService
ModelsService service.ModelsService
@@ -154,6 +155,7 @@ func NewRouter(appCtx AppContext) (*mux.Router, error) {
if err != nil {
return nil, err
}
+ deploymentController := DeploymentController{&appCtx}
environmentController := EnvironmentController{&appCtx}
projectsController := ProjectsController{&appCtx}
modelEndpointsController := ModelEndpointsController{&appCtx}
@@ -206,6 +208,9 @@ func NewRouter(appCtx AppContext) (*mux.Router, error) {
// To maintain backward compatibility with SDK v0.1.0
{http.MethodDelete, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint", nil, endpointsController.DeleteEndpoint, "DeleteDefaultEndpoint"},
+ // Deployments API
+ {http.MethodGet, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoints/{endpoint_id}/deployments", nil, deploymentController.ListDeployments, "ListDeployments"},
+
{http.MethodGet, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint/{endpoint_id}", nil, endpointsController.GetEndpoint, "GetEndpoint"},
{http.MethodPut, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint/{endpoint_id}", models.VersionEndpoint{}, endpointsController.UpdateEndpoint, "UpdateEndpoint"},
{http.MethodDelete, "/models/{model_id:[0-9]+}/versions/{version_id:[0-9]+}/endpoint/{endpoint_id}", nil, endpointsController.DeleteEndpoint, "DeleteEndpoint"},
diff --git a/api/api/version_endpoints_api.go b/api/api/version_endpoints_api.go
index 98022671b..f6fe5ef31 100644
--- a/api/api/version_endpoints_api.go
+++ b/api/api/version_endpoints_api.go
@@ -379,12 +379,16 @@ func (c *EndpointsController) ListContainers(r *http.Request, vars map[string]st
if err != nil {
return NotFound(fmt.Sprintf("Version not found: %v", err))
}
+ endpoint, err := c.EndpointsService.FindByID(ctx, endpointID)
+ if err != nil {
+ return NotFound(fmt.Sprintf("Endpoint not found: %v", err))
+ }
- endpoint, err := c.EndpointsService.ListContainers(ctx, model, version, endpointID)
+ containers, err := c.EndpointsService.ListContainers(ctx, model, version, endpoint)
if err != nil {
return InternalServerError(fmt.Sprintf("Error while getting container for endpoint: %v", err))
}
- return Ok(endpoint)
+ return Ok(containers)
}
func validateUpdateRequest(prev *models.VersionEndpoint, new *models.VersionEndpoint) error {
diff --git a/api/api/version_endpoints_api_test.go b/api/api/version_endpoints_api_test.go
index e61dffade..ab124e8a7 100644
--- a/api/api/version_endpoints_api_test.go
+++ b/api/api/version_endpoints_api_test.go
@@ -536,7 +536,13 @@ func TestListContainers(t *testing.T) {
},
endpointService: func() *mocks.EndpointsService {
svc := &mocks.EndpointsService{}
- svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, uuid).Return([]*models.Container{
+ svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{
+ ID: uuid,
+ VersionModelID: models.ID(1),
+ VersionID: models.ID(1),
+ RevisionID: models.ID(1),
+ }, nil)
+ svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return([]*models.Container{
{
Name: "pod-1",
PodName: "pod-1-1",
@@ -656,7 +662,13 @@ func TestListContainers(t *testing.T) {
},
endpointService: func() *mocks.EndpointsService {
svc := &mocks.EndpointsService{}
- svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, uuid).Return(nil, fmt.Errorf("Error creating secret: db is down"))
+ svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{
+ ID: uuid,
+ VersionModelID: models.ID(1),
+ VersionID: models.ID(1),
+ RevisionID: models.ID(1),
+ }, nil)
+ svc.On("ListContainers", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return(nil, fmt.Errorf("Error creating secret: db is down"))
return svc
},
expected: &Response{
diff --git a/api/cluster/container_test.go b/api/cluster/container_test.go
index d98e65610..110688585 100644
--- a/api/cluster/container_test.go
+++ b/api/cluster/container_test.go
@@ -88,7 +88,7 @@ func TestContainer_GetContainers(t *testing.T) {
clusterMetadata := Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
containerFetcher := NewContainerFetcher(v1Client, clusterMetadata)
- ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, config.DeploymentConfig{}, containerFetcher, nil)
+ ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, nil, config.DeploymentConfig{}, containerFetcher, nil)
containers, err := ctl.GetContainers(context.Background(), tt.args.namespace, tt.args.labelSelector)
if !tt.wantError {
assert.NoErrorf(t, err, "expected no error got %v", err)
diff --git a/api/cluster/controller.go b/api/cluster/controller.go
index efcc054f1..960c78fef 100644
--- a/api/cluster/controller.go
+++ b/api/cluster/controller.go
@@ -16,12 +16,14 @@ package cluster
import (
"context"
+ "fmt"
"io"
"time"
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
kservev1beta1client "github.com/kserve/kserve/pkg/client/clientset/versioned/typed/serving/v1beta1"
"github.com/pkg/errors"
+ networkingv1beta1 "istio.io/client-go/pkg/clientset/versioned/typed/networking/v1beta1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
@@ -83,6 +85,7 @@ type controller struct {
clusterClient corev1client.CoreV1Interface
batchClient batchv1client.BatchV1Interface
policyClient policyv1client.PolicyV1Interface
+ istioClient networkingv1beta1.NetworkingV1beta1Interface
namespaceCreator NamespaceCreator
deploymentConfig *config.DeploymentConfig
kfServingResourceTemplater *resource.InferenceServiceTemplater
@@ -126,6 +129,11 @@ func NewController(clusterConfig Config, deployConfig config.DeploymentConfig, s
return nil, err
}
+ istioClient, err := networkingv1beta1.NewForConfig(cfg)
+ if err != nil {
+ return nil, err
+ }
+
containerFetcher := NewContainerFetcher(coreV1Client, Metadata{
ClusterName: clusterConfig.ClusterName,
GcpProject: clusterConfig.GcpProject,
@@ -138,6 +146,7 @@ func NewController(clusterConfig Config, deployConfig config.DeploymentConfig, s
coreV1Client,
batchV1Client,
policyV1Client,
+ istioClient,
deployConfig,
containerFetcher,
kfServingResourceTemplater,
@@ -150,6 +159,7 @@ func newController(
coreV1Client corev1client.CoreV1Interface,
batchV1Client batchv1client.BatchV1Interface,
policyV1Client policyv1client.PolicyV1Interface,
+ istioClient networkingv1beta1.NetworkingV1beta1Interface,
deploymentConfig config.DeploymentConfig,
containerFetcher ContainerFetcher,
templater *resource.InferenceServiceTemplater,
@@ -160,6 +170,7 @@ func newController(
clusterClient: coreV1Client,
batchClient: batchV1Client,
policyClient: policyV1Client,
+ istioClient: istioClient,
namespaceCreator: NewNamespaceCreator(coreV1Client, deploymentConfig.NamespaceTimeout),
deploymentConfig: &deploymentConfig,
ContainerFetcher: containerFetcher,
@@ -186,56 +197,43 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
_, err := c.namespaceCreator.CreateNamespace(ctx, modelService.Namespace)
if err != nil {
log.Errorf("unable to create namespace %s %v", modelService.Namespace, err)
- return nil, ErrUnableToCreateNamespace
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateNamespace, modelService.Namespace))
}
isvcName := modelService.Name
- s, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(isvcName, metav1.GetOptions{})
- if err != nil {
- if !kerrors.IsNotFound(err) {
- log.Errorf("unable to check inference service %s %v", isvcName, err)
- return nil, ErrUnableToGetInferenceServiceStatus
- }
-
- // create new resource
- spec, err := c.kfServingResourceTemplater.CreateInferenceServiceSpec(modelService, c.deploymentConfig)
- if err != nil {
- log.Errorf("unable to create inference service spec %s %v", isvcName, err)
- return nil, ErrUnableToCreateInferenceService
- }
- s, err = c.kserveClient.InferenceServices(modelService.Namespace).Create(spec)
- if err != nil {
- log.Errorf("unable to create inference service %s %v", isvcName, err)
- return nil, ErrUnableToCreateInferenceService
- }
- } else {
- // Get current scale of the existing deployment
- deploymentScale := resource.DeploymentScale{}
+ // Get current scale of the existing deployment
+ deploymentScale := resource.DeploymentScale{}
+ if modelService.CurrentIsvcName != "" {
if modelService.DeploymentMode == deployment.ServerlessDeploymentMode ||
modelService.DeploymentMode == deployment.EmptyDeploymentMode {
- deploymentScale = c.GetCurrentDeploymentScale(ctx, modelService.Namespace, s.Status.Components)
- }
+ currentIsvc, err := c.kserveClient.InferenceServices(modelService.Namespace).Get(modelService.CurrentIsvcName, metav1.GetOptions{})
+ if err != nil && !kerrors.IsNotFound(err) {
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToGetInferenceServiceStatus, isvcName))
+ }
- patchedSpec, err := c.kfServingResourceTemplater.PatchInferenceServiceSpec(s, modelService, c.deploymentConfig, deploymentScale)
- if err != nil {
- log.Errorf("unable to update inference service %s %v", isvcName, err)
- return nil, ErrUnableToUpdateInferenceService
+ deploymentScale = c.GetCurrentDeploymentScale(ctx, modelService.Namespace, currentIsvc.Status.Components)
}
+ }
- // existing resource found, do update
- s, err = c.kserveClient.InferenceServices(modelService.Namespace).Update(patchedSpec)
- if err != nil {
- log.Errorf("unable to update inference service %s %v", isvcName, err)
- return nil, ErrUnableToUpdateInferenceService
- }
+ // create new resource
+ spec, err := c.kfServingResourceTemplater.CreateInferenceServiceSpec(modelService, c.deploymentConfig, deploymentScale)
+ if err != nil {
+ log.Errorf("unable to create inference service spec %s: %v", isvcName, err)
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateInferenceService, isvcName))
+ }
+
+ s, err := c.kserveClient.InferenceServices(modelService.Namespace).Create(spec)
+ if err != nil {
+ log.Errorf("unable to create inference service %s: %v", isvcName, err)
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateInferenceService, isvcName))
}
if c.deploymentConfig.PodDisruptionBudget.Enabled {
pdbs := createPodDisruptionBudgets(modelService, c.deploymentConfig.PodDisruptionBudget)
if err := c.deployPodDisruptionBudgets(ctx, pdbs); err != nil {
- log.Errorf("unable to create pdb %v", err)
- return nil, ErrUnableToCreatePDB
+ log.Errorf("unable to create pdb: %v", err)
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v", ErrUnableToCreatePDB))
}
}
@@ -243,19 +241,46 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
if err != nil {
// remove created inferenceservice when got error
if err := c.deleteInferenceService(isvcName, modelService.Namespace); err != nil {
- log.Warnf("unable to delete inference service %s with error %v", isvcName, err)
+ log.Errorf("unable to delete inference service %s with error %v", isvcName, err)
}
- return nil, err
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToGetInferenceServiceStatus, isvcName))
}
inferenceURL := models.GetInferenceURL(s.Status.URL, isvcName, modelService.Protocol)
+
+ // Create / update virtual service
+ vsCfg, err := NewVirtualService(modelService, inferenceURL)
+ if err != nil {
+ log.Errorf("unable to initialize virtual service builder: %v", err)
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v", ErrUnableToCreateVirtualService))
+ }
+
+ vs, err := c.deployVirtualService(ctx, vsCfg)
+ if err != nil {
+ log.Errorf("unable to create virtual service: %v", err)
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToCreateVirtualService, vsCfg.Name))
+ }
+
+ if vs != nil && len(vs.Spec.Hosts) > 0 {
+ inferenceURL = vsCfg.getInferenceURL(vs)
+ }
+
+ // Delete previous inference service
+ if modelService.CurrentIsvcName != "" {
+ if err := c.deleteInferenceService(modelService.CurrentIsvcName, modelService.Namespace); err != nil {
+ log.Errorf("unable to delete prevision revision %s with error %v", modelService.CurrentIsvcName, err)
+ return nil, errors.Wrapf(err, fmt.Sprintf("%v (%s)", ErrUnableToDeletePreviousInferenceService, modelService.CurrentIsvcName))
+ }
+ }
+
return &models.Service{
- Name: s.Name,
- Namespace: s.Namespace,
- ServiceName: s.Status.URL.Host,
- URL: inferenceURL,
- Metadata: modelService.Metadata,
+ Name: s.Name,
+ Namespace: s.Namespace,
+ ServiceName: s.Status.URL.Host,
+ URL: inferenceURL,
+ Metadata: modelService.Metadata,
+ CurrentIsvcName: s.Name,
}, nil
}
@@ -280,6 +305,14 @@ func (c *controller) Delete(ctx context.Context, modelService *models.Service) (
}
}
+ if modelService.RevisionID > 1 {
+ vsName := fmt.Sprintf("%s-%s-%s", modelService.ModelName, modelService.ModelVersion, models.VirtualServiceComponentType)
+ if err := c.deleteVirtualService(ctx, vsName, modelService.Namespace); err != nil {
+ log.Errorf("unable to delete virtual service %v", err)
+ return nil, ErrUnableToDeleteVirtualService
+ }
+ }
+
return modelService, nil
}
diff --git a/api/cluster/controller_test.go b/api/cluster/controller_test.go
index c6e107eb6..fadbb3a34 100644
--- a/api/cluster/controller_test.go
+++ b/api/cluster/controller_test.go
@@ -25,6 +25,9 @@ import (
fakekserve "github.com/kserve/kserve/pkg/client/clientset/versioned/fake"
fakekservev1beta1 "github.com/kserve/kserve/pkg/client/clientset/versioned/typed/serving/v1beta1/fake"
"github.com/stretchr/testify/assert"
+ istiov1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
+ fakeistio "istio.io/client-go/pkg/clientset/versioned/fake"
+ fakeistionetworking "istio.io/client-go/pkg/clientset/versioned/typed/networking/v1beta1/fake"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
@@ -54,6 +57,7 @@ const (
listMethod = "list"
getMethod = "get"
createMethod = "create"
+ patchMethod = "patch"
updateMethod = "update"
deleteMethod = "delete"
deleteCollectionMethod = "delete-collection"
@@ -63,6 +67,7 @@ const (
knativeVersion = "v1"
inferenceServiceResource = "inferenceservices"
revisionResource = "revisions"
+ virtualServiceResource = "virtualservices"
coreGroup = ""
namespaceResource = "namespaces"
@@ -83,8 +88,13 @@ type inferenceServiceReactor struct {
err error
}
-type knativeRevisionReactor struct {
- rev *knservingv1.Revision
+type pdbReactor struct {
+ pdb *policyv1.PodDisruptionBudget
+ err error
+}
+
+type vsReactor struct {
+ vs *istiov1beta1.VirtualService
err error
}
@@ -93,6 +103,7 @@ var clusterMetadata = Metadata{GcpProject: "my-gcp", ClusterName: "my-cluster"}
// TestDeployInferenceServiceNamespaceCreation test namespaceResource creation when deploying inference service
func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
nsTimeout := 2 * tickDurationSecond * time.Second
+
model := &models.Model{
Name: "my-model",
}
@@ -102,13 +113,19 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
version := &models.Version{
ID: 1,
}
+ revisionID := models.ID(1)
modelOpt := &models.ModelOption{}
- isvc := fakeInferenceService(model.Name, version.ID.String(), project.Name)
+
+ isvc := fakeInferenceService(model.Name, version.ID.String(), revisionID.String(), project.Name)
+ vs := fakeVirtualService(model.Name, version.ID.String())
modelSvc := &models.Service{
- Name: isvc.Name,
- Namespace: project.Name,
- Options: modelOpt,
+ Name: isvc.Name,
+ ModelName: model.Name,
+ ModelVersion: version.ID.String(),
+ RevisionID: revisionID,
+ Namespace: project.Name,
+ Options: modelOpt,
}
tests := []struct {
@@ -257,18 +274,17 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
knClient := knservingfake.NewSimpleClientset().ServingV1()
+
kfClient := fakekserve.NewSimpleClientset().ServingV1beta1().(*fakekservev1beta1.FakeServingV1beta1)
kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
- kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
- return true, isvc, nil
- })
- return true, nil, kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvc.Name)
+ return true, isvc, nil
})
kfClient.PrependReactor(createMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
return true, isvc, nil
})
v1Client := fake.NewSimpleClientset().CoreV1()
+
nsClient := v1Client.Namespaces().(*fakecorev1.FakeNamespaces)
nsClient.Fake.PrependReactor(getMethod, namespaceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
nsClient.Fake.PrependReactor(getMethod, namespaceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
@@ -282,6 +298,11 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
policyV1Client := fake.NewSimpleClientset().PolicyV1()
+ istioClient := fakeistio.NewSimpleClientset().NetworkingV1beta1().(*fakeistionetworking.FakeNetworkingV1beta1)
+ istioClient.PrependReactor(patchMethod, virtualServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
+ return true, vs, nil
+ })
+
deployConfig := config.DeploymentConfig{
NamespaceTimeout: tt.nsTimeout,
DeploymentTimeout: 2 * tickDurationSecond * time.Second,
@@ -289,7 +310,8 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
}
containerFetcher := NewContainerFetcher(v1Client, clusterMetadata)
- ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, deployConfig, containerFetcher, nil)
+
+ ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, istioClient, deployConfig, containerFetcher, nil)
iSvc, err := ctl.Deploy(context.Background(), modelSvc)
if tt.wantError {
@@ -297,6 +319,7 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
assert.Nil(t, iSvc)
return
}
+
assert.NoError(t, err)
assert.NotNil(t, iSvc)
})
@@ -305,8 +328,8 @@ func TestController_DeployInferenceService_NamespaceCreation(t *testing.T) {
func TestController_DeployInferenceService(t *testing.T) {
defaultMaxUnavailablePDB := 20
-
deployTimeout := 2 * tickDurationSecond * time.Second
+
model := &models.Model{
Name: "my-model",
}
@@ -316,81 +339,59 @@ func TestController_DeployInferenceService(t *testing.T) {
version := &models.Version{
ID: 1,
}
+ revisionID := models.ID(1)
modelOpt := &models.ModelOption{}
- isvcName := models.CreateInferenceServiceName(model.Name, version.ID.String())
+
+ isvcName := models.CreateInferenceServiceName(model.Name, version.ID.String(), revisionID.String())
statusReady := createServiceReadyStatus(isvcName, project.Name, baseUrl)
namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{Name: project.Name},
Status: corev1.NamespaceStatus{Phase: corev1.NamespaceActive},
}
pdb := &policyv1.PodDisruptionBudget{}
+ vs := fakeVirtualService(model.Name, version.ID.String())
modelSvc := &models.Service{
- Name: isvcName,
- Namespace: project.Name,
- Options: modelOpt,
+ Name: isvcName,
+ ModelName: model.Name,
+ ModelVersion: version.ID.String(),
+ RevisionID: revisionID,
+ Namespace: project.Name,
+ Options: modelOpt,
}
tests := []struct {
- name string
- modelService *models.Service
- getRevResult *knativeRevisionReactor
- getResult *inferenceServiceReactor
- createResult *inferenceServiceReactor
- updateResult *inferenceServiceReactor
- checkResult *inferenceServiceReactor
- deployTimeout time.Duration
- wantError bool
+ name string
+ modelService *models.Service
+ createResult *inferenceServiceReactor
+ checkResult *inferenceServiceReactor
+ createPdbResult *pdbReactor
+ createVsResult *vsReactor
+ deployTimeout time.Duration
+ wantError bool
}{
{
- "success: create inference service",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName),
- },
- &inferenceServiceReactor{
+ name: "success: create inference service",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
- deployTimeout,
- false,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: false,
},
{
- "success: update inference service",
- modelSvc,
- &knativeRevisionReactor{err: kerrors.NewNotFound(schema.GroupResource{}, "test service")},
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
- nil,
- },
- nil,
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
- nil,
- },
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
- Status: statusReady,
- },
- nil,
- },
- deployTimeout,
- false,
- },
- {
- "success: deploying service",
- &models.Service{
+ name: "success: deploying service",
+ modelService: &models.Service{
Name: isvcName,
Namespace: project.Name,
Options: modelOpt,
@@ -401,29 +402,25 @@ func TestController_DeployInferenceService(t *testing.T) {
MemoryRequest: resource.MustParse("1Gi"),
},
},
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName),
- },
- &inferenceServiceReactor{
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
- deployTimeout,
- false,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: false,
},
{
- "success: create inference service with transformer",
- &models.Service{
+ name: "success: create inference service with transformer",
+ modelService: &models.Service{
Name: isvcName,
Namespace: project.Name,
Options: modelOpt,
@@ -433,187 +430,147 @@ func TestController_DeployInferenceService(t *testing.T) {
Image: "ghcr.io/caraml-dev/merlin-transformer-test",
},
},
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName),
- },
- &inferenceServiceReactor{
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
- deployTimeout,
- false,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: false,
},
{
- "error: failed get",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- errors.New("error"),
- },
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
- nil,
- },
- nil,
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
- Status: statusReady,
- },
+ name: "error: failed create",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
nil,
+ errors.New("error creating inference service"),
},
- deployTimeout,
- true,
+ checkResult: nil,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
{
- "error: failed create",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName),
- },
- &inferenceServiceReactor{
+ name: "error: failed check",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
+ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}},
nil,
- errors.New("error creating inference service"),
},
- nil,
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
- Status: statusReady,
- },
+ checkResult: &inferenceServiceReactor{
nil,
+ errors.New("error check"),
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
{
- "error: failed update",
- modelSvc,
- &knativeRevisionReactor{err: kerrors.NewNotFound(schema.GroupResource{}, "test service")},
- &inferenceServiceReactor{
- &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
- nil,
- },
- nil,
- &inferenceServiceReactor{
+ name: "error: predictor error",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
+ &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}},
nil,
- errors.New("error updating inference service"),
},
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
- Status: statusReady,
+ Status: createPredErrorCond(),
},
nil,
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
{
- "error: failed check",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName),
- },
- &inferenceServiceReactor{
+ name: "error: routes error",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
+ &kservev1beta1.InferenceService{
+ ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
+ Status: createRoutesErrorCond(),
+ },
nil,
- errors.New("error check"),
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
{
- "error: predictor error",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName),
- },
- &inferenceServiceReactor{
+ name: "error: pdb error",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
- Status: createPredErrorCond(),
+ Status: createRoutesErrorCond(),
},
nil,
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{nil, ErrUnableToCreatePDB},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
{
- "error: routes error",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: "kubeflow.com/kfserving", Resource: "inferenceservices"}, isvcName),
- },
- &inferenceServiceReactor{
+ name: "error: vs error",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: createRoutesErrorCond(),
},
nil,
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{nil, ErrUnableToCreateVirtualService},
+ wantError: true,
},
{
- "error: timeout",
- modelSvc,
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName),
- },
- &inferenceServiceReactor{
+ name: "error: timeout",
+ modelService: modelSvc,
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
- 1 * time.Millisecond,
- true,
+ deployTimeout: 1 * time.Millisecond,
+ wantError: true,
},
{
- "error: deploying service due to insufficient CPU",
- &models.Service{
+ name: "error: deploying service due to insufficient CPU",
+ modelService: &models.Service{
Name: isvcName,
Namespace: project.Name,
Options: modelOpt,
@@ -624,29 +581,25 @@ func TestController_DeployInferenceService(t *testing.T) {
MemoryRequest: resource.MustParse("1Gi"),
},
},
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName),
- },
- &inferenceServiceReactor{
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
{
- "error: deploying service due to insufficient memory",
- &models.Service{
+ name: "error: deploying service due to insufficient memory",
+ modelService: &models.Service{
Name: isvcName,
Namespace: project.Name,
Options: modelOpt,
@@ -657,48 +610,35 @@ func TestController_DeployInferenceService(t *testing.T) {
MemoryRequest: resource.MustParse("10Gi"),
},
},
- &knativeRevisionReactor{},
- &inferenceServiceReactor{
- nil,
- kerrors.NewNotFound(schema.GroupResource{Group: kfservingGroup, Resource: inferenceServiceResource}, isvcName),
- },
- &inferenceServiceReactor{
+ createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
- nil,
- &inferenceServiceReactor{
+ checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
- deployTimeout,
- true,
+ deployTimeout: deployTimeout,
+ createPdbResult: &pdbReactor{pdb, nil},
+ createVsResult: &vsReactor{vs, nil},
+ wantError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
knClient := knservingfake.NewSimpleClientset()
- knClient.PrependReactor(getMethod, revisionResource, func(action k8stesting.Action) (bool, runtime.Object, error) {
- return true, tt.getRevResult.rev, tt.getRevResult.err
- })
kfClient := fakekserve.NewSimpleClientset().ServingV1beta1().(*fakekservev1beta1.FakeServingV1beta1)
kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
- kfClient.PrependReactor(getMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
- return true, tt.checkResult.isvc, tt.checkResult.err
- })
- return true, tt.getResult.isvc, tt.getResult.err
+ return true, tt.checkResult.isvc, tt.checkResult.err
})
kfClient.PrependReactor(createMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
return true, tt.createResult.isvc, tt.createResult.err
})
- kfClient.PrependReactor(updateMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
- return true, tt.updateResult.isvc, tt.updateResult.err
- })
kfClient.PrependReactor(deleteMethod, inferenceServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, nil
@@ -711,8 +651,13 @@ func TestController_DeployInferenceService(t *testing.T) {
})
policyV1Client := fake.NewSimpleClientset().PolicyV1().(*fakepolicyv1.FakePolicyV1)
- policyV1Client.Fake.PrependReactor("patch", pdbResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
- return true, pdb, nil
+ policyV1Client.Fake.PrependReactor(patchMethod, pdbResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
+ return true, tt.createPdbResult.pdb, tt.createPdbResult.err
+ })
+
+ istioClient := fakeistio.NewSimpleClientset().NetworkingV1beta1().(*fakeistionetworking.FakeNetworkingV1beta1)
+ istioClient.PrependReactor(patchMethod, virtualServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
+ return true, tt.createVsResult.vs, tt.createVsResult.err
})
deployConfig := config.DeploymentConfig{
@@ -734,7 +679,7 @@ func TestController_DeployInferenceService(t *testing.T) {
FeastServingKeepAlive: &config.FeastServingKeepAliveConfig{},
})
- ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, deployConfig, containerFetcher, templater)
+ ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, istioClient, deployConfig, containerFetcher, templater)
iSvc, err := ctl.Deploy(context.Background(), tt.modelService)
if tt.wantError {
@@ -867,7 +812,7 @@ func TestGetCurrentDeploymentScale(t *testing.T) {
})
// Create test controller
- ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, deployConfig, containerFetcher, templater)
+ ctl, _ := newController(knClient.ServingV1(), kfClient, v1Client, nil, policyV1Client, nil, deployConfig, containerFetcher, templater)
desiredReplicas := ctl.GetCurrentDeploymentScale(context.TODO(), testNamespace, tt.components)
assert.Equal(t, tt.expectedScale, desiredReplicas)
@@ -875,12 +820,16 @@ func TestGetCurrentDeploymentScale(t *testing.T) {
}
}
-func fakeInferenceService(model, version, project string) *kservev1beta1.InferenceService {
- svcName := models.CreateInferenceServiceName(model, version)
+func fakeInferenceService(model, version, revisionID, project string) *kservev1beta1.InferenceService {
+ svcName := models.CreateInferenceServiceName(model, version, revisionID)
status := createServiceReadyStatus(svcName, project, baseUrl)
return &kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: svcName, Namespace: project}, Status: status}
}
+func fakeVirtualService(model, version string) *istiov1beta1.VirtualService {
+ return &istiov1beta1.VirtualService{ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-%s", model, version)}}
+}
+
func createServiceReadyStatus(iSvcName, namespace, baseUrl string) kservev1beta1.InferenceServiceStatus {
status := kservev1beta1.InferenceServiceStatus{}
status.InitializeConditions()
@@ -1016,9 +965,10 @@ func Test_controller_ListPods(t *testing.T) {
}
func TestController_Delete(t *testing.T) {
- isvcName := models.CreateInferenceServiceName("my-model", "1")
+ isvcName := models.CreateInferenceServiceName("my-model", "1", "1")
projectName := "my-project"
pdb := &policyv1.PodDisruptionBudget{}
+ vs := fakeVirtualService("my-model", "1")
tests := []struct {
name string
@@ -1187,11 +1137,16 @@ func TestController_Delete(t *testing.T) {
return true, pdb, nil
})
+ istioClient := fakeistio.NewSimpleClientset().NetworkingV1beta1().(*fakeistionetworking.FakeNetworkingV1beta1)
+ istioClient.PrependReactor(deleteMethod, virtualServiceResource, func(action ktesting.Action) (handled bool, ret runtime.Object, err error) {
+ return true, vs, nil
+ })
+
containerFetcher := NewContainerFetcher(v1Client, clusterMetadata)
templater := clusterresource.NewInferenceServiceTemplater(config.StandardTransformerConfig{})
- ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, tt.deployConfig, containerFetcher, templater)
+ ctl, _ := newController(knClient, kfClient, v1Client, nil, policyV1Client, istioClient, tt.deployConfig, containerFetcher, templater)
mSvc, err := ctl.Delete(context.Background(), tt.modelService)
if tt.wantError {
diff --git a/api/cluster/errors.go b/api/cluster/errors.go
index 67f1cbe28..506f111ba 100644
--- a/api/cluster/errors.go
+++ b/api/cluster/errors.go
@@ -17,15 +17,19 @@ package cluster
import "errors"
var (
- ErrInsufficientCPU = errors.New("CPU request is too large")
- ErrInsufficientMem = errors.New("memory request too large")
- ErrTimeoutNamespace = errors.New("timeout creating namespace")
- ErrUnableToCreateNamespace = errors.New("error creating namespace")
- ErrUnableToGetNamespaceStatus = errors.New("error retrieving namespace status")
- ErrUnableToGetInferenceServiceStatus = errors.New("error retrieving inference service status")
- ErrUnableToCreateInferenceService = errors.New("error creating inference service")
- ErrUnableToUpdateInferenceService = errors.New("error updating inference service")
- ErrTimeoutCreateInferenceService = errors.New("timeout creating inference service")
- ErrUnableToCreatePDB = errors.New("error creating pod disruption budget")
- ErrUnableToDeletePDB = errors.New("error deleting pod disruption budget")
+ ErrInsufficientCPU = errors.New("CPU request is too large")
+ ErrInsufficientMem = errors.New("memory request too large")
+ ErrTimeoutNamespace = errors.New("timeout creating namespace")
+ ErrUnableToCreateNamespace = errors.New("error creating namespace")
+ ErrUnableToGetNamespaceStatus = errors.New("error retrieving namespace status")
+ ErrUnableToGetInferenceServiceStatus = errors.New("error retrieving inference service status")
+ ErrUnableToCreateInferenceService = errors.New("error creating inference service")
+ ErrUnableToUpdateInferenceService = errors.New("error updating inference service")
+ ErrUnableToDeleteInferenceService = errors.New("error deleting inference service")
+ ErrUnableToDeletePreviousInferenceService = errors.New("error deleting previous inference service")
+ ErrTimeoutCreateInferenceService = errors.New("timeout creating inference service")
+ ErrUnableToCreatePDB = errors.New("error creating pod disruption budget")
+ ErrUnableToDeletePDB = errors.New("error deleting pod disruption budget")
+ ErrUnableToCreateVirtualService = errors.New("error creating virtual service")
+ ErrUnableToDeleteVirtualService = errors.New("error deleting virtual service")
)
diff --git a/api/cluster/resource/templater.go b/api/cluster/resource/templater.go
index 41f995240..225830d62 100644
--- a/api/cluster/resource/templater.go
+++ b/api/cluster/resource/templater.go
@@ -82,28 +82,13 @@ const (
grpcHealthProbeCommand = "grpc_health_probe"
)
-var (
- // list of configuration stored as annotations
- configAnnotationKeys = []string{
- annotationPrometheusScrapeFlag,
- annotationPrometheusScrapePort,
- knserving.QueueSidecarResourcePercentageAnnotationKey,
- kserveconstant.AutoscalerClass,
- kserveconstant.AutoscalerMetrics,
- kserveconstant.TargetUtilizationPercentage,
- knautoscaling.ClassAnnotationKey,
- knautoscaling.MetricAnnotationKey,
- knautoscaling.TargetAnnotationKey,
- }
-
- grpcContainerPorts = []corev1.ContainerPort{
- {
- ContainerPort: defaultGRPCPort,
- Name: "h2c",
- Protocol: corev1.ProtocolTCP,
- },
- }
-)
+var grpcContainerPorts = []corev1.ContainerPort{
+ {
+ ContainerPort: defaultGRPCPort,
+ Name: "h2c",
+ Protocol: corev1.ProtocolTCP,
+ },
+}
type DeploymentScale struct {
Predictor *int
@@ -118,10 +103,26 @@ func NewInferenceServiceTemplater(standardTransformerConfig config.StandardTrans
return &InferenceServiceTemplater{standardTransformerConfig: standardTransformerConfig}
}
-func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *models.Service, config *config.DeploymentConfig) (*kservev1beta1.InferenceService, error) {
+func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *models.Service, config *config.DeploymentConfig, currentReplicas DeploymentScale) (*kservev1beta1.InferenceService, error) {
applyDefaults(modelService, config)
- annotations, err := createAnnotations(modelService, config, nil)
+ // Identify the desired initial scale of the new deployment
+ var initialScale *int
+ if currentReplicas.Predictor != nil {
+ // The desired scale of the new deployment is a single value, applicable to both the predictor and the transformer.
+ // Set the desired scale of the new deployment by taking the max of the 2 values.
+ // Consider the transformer's scale only if it is also enabled in the new spec.
+ if modelService.Transformer != nil &&
+ modelService.Transformer.Enabled &&
+ currentReplicas.Transformer != nil &&
+ *currentReplicas.Transformer > *currentReplicas.Predictor {
+ initialScale = currentReplicas.Transformer
+ } else {
+ initialScale = currentReplicas.Predictor
+ }
+ }
+
+ annotations, err := createAnnotations(modelService, config, initialScale)
if err != nil {
return nil, fmt.Errorf("unable to create inference service spec: %w", err)
}
@@ -168,73 +169,6 @@ func (t *InferenceServiceTemplater) CreateInferenceServiceSpec(modelService *mod
return inferenceService, nil
}
-func (t *InferenceServiceTemplater) PatchInferenceServiceSpec(
- orig *kservev1beta1.InferenceService,
- modelService *models.Service,
- config *config.DeploymentConfig,
- currentReplicas DeploymentScale,
-) (*kservev1beta1.InferenceService, error) {
- // Identify the desired initial scale of the new deployment
- var initialScale *int
- if currentReplicas.Predictor != nil {
- // The desired scale of the new deployment is a single value, applicable to both the predictor and the transformer.
- // Set the desired scale of the new deployment by taking the max of the 2 values.
- // Consider the transformer's scale only if it is also enabled in the new spec.
- if modelService.Transformer != nil &&
- modelService.Transformer.Enabled &&
- currentReplicas.Transformer != nil &&
- *currentReplicas.Transformer > *currentReplicas.Predictor {
- initialScale = currentReplicas.Transformer
- } else {
- initialScale = currentReplicas.Predictor
- }
- }
-
- applyDefaults(modelService, config)
-
- orig.ObjectMeta.Labels = modelService.Metadata.ToLabel()
- annotations, err := createAnnotations(modelService, config, initialScale)
- if err != nil {
- return nil, fmt.Errorf("unable to patch inference service spec: %w", err)
- }
- orig.ObjectMeta.Annotations = utils.MergeMaps(utils.ExcludeKeys(orig.ObjectMeta.Annotations, configAnnotationKeys), annotations)
-
- orig.Spec.Predictor = createPredictorSpec(modelService, config)
- orig.Spec.Predictor.TopologySpreadConstraints, err = updateExistingInferenceServiceTopologySpreadConstraints(
- orig,
- modelService,
- config,
- kservev1beta1.PredictorComponent,
- )
- if err != nil {
- return nil, fmt.Errorf("unable to create predictor topology spread constraints: %w", err)
- }
-
- orig.Spec.Transformer = nil
- if modelService.Transformer != nil && modelService.Transformer.Enabled {
- orig.Spec.Transformer = t.createTransformerSpec(modelService, modelService.Transformer)
- if _, ok := orig.Status.Components[kservev1beta1.TransformerComponent]; !ok ||
- orig.Status.Components[kservev1beta1.TransformerComponent].LatestCreatedRevision == "" {
- orig.Spec.Transformer.TopologySpreadConstraints, err = createNewInferenceServiceTopologySpreadConstraints(
- modelService,
- config,
- kservev1beta1.TransformerComponent,
- )
- } else {
- orig.Spec.Transformer.TopologySpreadConstraints, err = updateExistingInferenceServiceTopologySpreadConstraints(
- orig,
- modelService,
- config,
- kservev1beta1.TransformerComponent,
- )
- }
- if err != nil {
- return nil, fmt.Errorf("unable to create transformer topology spread constraints: %w", err)
- }
- }
- return orig, nil
-}
-
func createPredictorSpec(modelService *models.Service, config *config.DeploymentConfig) kservev1beta1.PredictorSpec {
envVars := modelService.EnvVars
@@ -684,39 +618,6 @@ func createNewInferenceServiceTopologySpreadConstraints(
)
}
-// updateExistingInferenceServiceTopologySpreadConstraints creates topology spread constraints for a component of a new
-// inference service
-func updateExistingInferenceServiceTopologySpreadConstraints(
- orig *kservev1beta1.InferenceService,
- modelService *models.Service,
- config *config.DeploymentConfig,
- component kservev1beta1.ComponentType,
-) ([]corev1.TopologySpreadConstraint, error) {
- if len(config.TopologySpreadConstraints) == 0 {
- var topologySpreadConstraints []corev1.TopologySpreadConstraint
- return topologySpreadConstraints, nil
- }
- var newRevisionName string
- if modelService.DeploymentMode == deployment.RawDeploymentMode {
- newRevisionName = fmt.Sprintf("isvc.%s-%s", modelService.Name, component)
- } else if modelService.DeploymentMode == deployment.ServerlessDeploymentMode ||
- modelService.DeploymentMode == deployment.EmptyDeploymentMode {
- var err error
- newRevisionName, err = getNewRevisionNameForExistingServerlessDeployment(
- orig.Status.Components[component].LatestCreatedRevision,
- )
- if err != nil {
- return nil, fmt.Errorf("unable to generate new revision name: %w", err)
- }
- } else {
- return nil, fmt.Errorf("invalid deployment mode: %s", modelService.DeploymentMode)
- }
- return appendPodSpreadingLabelSelectorsToTopologySpreadConstraints(
- config.TopologySpreadConstraints,
- newRevisionName,
- )
-}
-
// appendPodSpreadingLabelSelectorsToTopologySpreadConstraints makes a deep copy of the config topology spread
// constraints and then adds the given revisionName as a label to the match labels of each topology spread constraint
// to spread out all the pods across the specified topologyKey
@@ -758,24 +659,6 @@ func copyTopologySpreadConstraints(
return topologySpreadConstraints, nil
}
-// getNewRevisionNameForExistingServerlessDeployment examines the current revision name of an inference service (
-// serverless deployment) app name that is given to it and increments the last value of the revision number by 1, e.g.
-// sklearn-sample-predictor-00001 -> sklearn-sample-predictor-00002
-func getNewRevisionNameForExistingServerlessDeployment(currentRevisionName string) (string, error) {
- revisionNameElements := strings.Split(currentRevisionName, "-")
- if len(revisionNameElements) < 4 {
- return "", fmt.Errorf("unexpected revision name format that is not in at least 3 parts: %s",
- currentRevisionName)
- }
- currentRevisionNumber, err := strconv.Atoi(revisionNameElements[len(revisionNameElements)-1])
- if err != nil {
- return "", err
- }
-
- revisionNameElements[len(revisionNameElements)-1] = fmt.Sprintf("%05d", currentRevisionNumber+1)
- return strings.Join(revisionNameElements, "-"), nil
-}
-
func createDefaultTransformerEnvVars(modelService *models.Service) models.EnvVars {
defaultEnvVars := models.EnvVars{}
@@ -869,7 +752,7 @@ func createPyFuncDefaultEnvVars(svc *models.Service) models.EnvVars {
envVars := models.EnvVars{
models.EnvVar{
Name: envPyFuncModelName,
- Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion),
+ Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()),
},
models.EnvVar{
Name: envModelName,
@@ -881,7 +764,7 @@ func createPyFuncDefaultEnvVars(svc *models.Service) models.EnvVars {
},
models.EnvVar{
Name: envModelFullName,
- Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion),
+ Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()),
},
models.EnvVar{
Name: envHTTPPort,
diff --git a/api/cluster/resource/templater_gpu_test.go b/api/cluster/resource/templater_gpu_test.go
index ed6c05b2f..5af4e17aa 100644
--- a/api/cluster/resource/templater_gpu_test.go
+++ b/api/cluster/resource/templater_gpu_test.go
@@ -116,6 +116,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
name string
modelSvc *models.Service
resourcePercentage string
+ deploymentScale DeploymentScale
exp *kservev1beta1.InferenceService
wantErr bool
}{
@@ -134,6 +135,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -141,6 +143,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -200,6 +203,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -207,6 +211,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -266,6 +271,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -325,6 +331,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -332,6 +339,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -439,6 +447,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -446,6 +455,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -497,6 +507,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -504,6 +515,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -554,6 +566,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -561,6 +574,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -616,6 +630,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -625,6 +640,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -677,6 +693,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -686,6 +703,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -747,6 +765,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -754,6 +773,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -808,6 +828,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -815,6 +836,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -875,6 +897,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -941,6 +964,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
wantErr: true,
},
{
@@ -963,6 +987,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -970,6 +995,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
knautoscaling.ClassAnnotationKey: knautoscaling.HPA,
knautoscaling.MetricAnnotationKey: knautoscaling.CPU,
knautoscaling.TargetAnnotationKey: "30",
@@ -1029,6 +1055,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1036,6 +1063,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
knautoscaling.ClassAnnotationKey: knautoscaling.HPA,
knautoscaling.MetricAnnotationKey: knautoscaling.Memory,
knautoscaling.TargetAnnotationKey: "150",
@@ -1095,6 +1123,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1102,6 +1131,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
knautoscaling.ClassAnnotationKey: knautoscaling.HPA,
knautoscaling.MetricAnnotationKey: knautoscaling.Memory,
knautoscaling.TargetAnnotationKey: "100",
@@ -1161,6 +1191,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1168,6 +1199,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
knautoscaling.ClassAnnotationKey: knautoscaling.KPA,
knautoscaling.MetricAnnotationKey: knautoscaling.Concurrency,
knautoscaling.TargetAnnotationKey: "2",
@@ -1227,6 +1259,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1234,6 +1267,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
knautoscaling.ClassAnnotationKey: knautoscaling.KPA,
knautoscaling.MetricAnnotationKey: knautoscaling.RPS,
knautoscaling.TargetAnnotationKey: "10",
@@ -1288,6 +1322,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1295,6 +1330,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1349,6 +1385,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1358,6 +1395,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1409,6 +1447,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1416,6 +1455,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1479,6 +1519,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
ResourceRequest: modelSvc.ResourceRequest,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1486,6 +1527,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1533,7 +1575,7 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) {
}
tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig)
+ infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale)
if tt.wantErr {
assert.Error(t, err)
return
diff --git a/api/cluster/resource/templater_test.go b/api/cluster/resource/templater_test.go
index 929fc0462..96851fdd1 100644
--- a/api/cluster/resource/templater_test.go
+++ b/api/cluster/resource/templater_test.go
@@ -97,6 +97,13 @@ var (
},
}
+ testPredictorScale, testTransformerScale = 3, 5
+
+ defaultDeploymentScale = DeploymentScale{
+ Predictor: &testPredictorScale,
+ Transformer: &testTransformerScale,
+ }
+
oneMinuteDuration = time.Minute * 1
twoMinuteDuration = time.Minute * 2
standardTransformerConfig = config.StandardTransformerConfig{
@@ -158,7 +165,6 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
project := mlp.Project{
Name: "project",
}
-
modelSvc := &models.Service{
Name: "my-model-1",
ModelName: "my-model",
@@ -190,6 +196,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
name string
modelSvc *models.Service
resourcePercentage string
+ deploymentScale DeploymentScale
exp *kservev1beta1.InferenceService
wantErr bool
}{
@@ -207,6 +214,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -214,6 +222,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -268,6 +277,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
},
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -275,6 +285,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -329,6 +340,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -383,6 +395,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -390,6 +403,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -435,12 +449,14 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Metadata: modelSvc.Metadata,
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
Namespace: project.Name,
Annotations: map[string]string{
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -487,6 +503,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -494,6 +511,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -540,6 +558,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -547,6 +566,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -592,6 +612,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -599,6 +620,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -649,6 +671,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -658,6 +681,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -707,6 +731,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -716,6 +741,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -761,6 +787,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -768,6 +795,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -825,6 +853,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -832,6 +861,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -884,6 +914,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -891,6 +922,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -948,6 +980,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1030,6 +1063,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1040,6 +1074,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
knautoscaling.ClassAnnotationKey: knautoscaling.HPA,
knautoscaling.MetricAnnotationKey: knautoscaling.CPU,
knautoscaling.TargetAnnotationKey: "30",
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1091,6 +1126,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1101,6 +1137,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
knautoscaling.ClassAnnotationKey: knautoscaling.HPA,
knautoscaling.MetricAnnotationKey: knautoscaling.Memory,
knautoscaling.TargetAnnotationKey: "150", // 30% * default memory request (500Mi) = 150Mi
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1153,6 +1190,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
ResourceRequest: userResourceRequests,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1163,6 +1201,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
knautoscaling.ClassAnnotationKey: knautoscaling.HPA,
knautoscaling.MetricAnnotationKey: knautoscaling.Memory,
knautoscaling.TargetAnnotationKey: "205", // 20% * (1Gi) ~= 205Mi
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1214,6 +1253,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1224,6 +1264,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
knautoscaling.ClassAnnotationKey: knautoscaling.KPA,
knautoscaling.MetricAnnotationKey: knautoscaling.Concurrency,
knautoscaling.TargetAnnotationKey: "2",
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1275,6 +1316,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.HttpJson,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1285,6 +1327,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
knautoscaling.ClassAnnotationKey: knautoscaling.KPA,
knautoscaling.MetricAnnotationKey: knautoscaling.RPS,
knautoscaling.TargetAnnotationKey: "10",
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1331,6 +1374,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.UpiV1,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1338,6 +1382,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1387,6 +1432,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.UpiV1,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1396,6 +1442,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1444,6 +1491,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.UpiV1,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1451,6 +1499,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1509,6 +1558,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Protocol: protocol.UpiV1,
},
resourcePercentage: queueResourcePercentage,
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1516,6 +1566,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1560,7 +1611,7 @@ func TestCreateInferenceServiceSpec(t *testing.T) {
}
tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig)
+ infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale)
if tt.wantErr {
assert.Error(t, err)
return
@@ -1638,10 +1689,11 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/")
transformerProbeConfigUPI := createLivenessProbeSpec(protocol.UpiV1, "/")
tests := []struct {
- name string
- modelSvc *models.Service
- exp *kservev1beta1.InferenceService
- wantErr bool
+ name string
+ modelSvc *models.Service
+ deploymentScale DeploymentScale
+ exp *kservev1beta1.InferenceService
+ wantErr bool
}{
{
name: "custom transformer with default resource request",
@@ -1667,6 +1719,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1674,6 +1727,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1752,6 +1806,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1759,6 +1814,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1834,6 +1890,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
},
Protocol: protocol.UpiV1,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1841,6 +1898,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -1924,6 +1982,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -1931,6 +1990,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2027,6 +2087,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
},
Protocol: protocol.UpiV1,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2034,6 +2095,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2140,6 +2202,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
},
Protocol: protocol.UpiV1,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2149,6 +2212,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
annotationPrometheusScrapeFlag: "true",
annotationPrometheusScrapePort: "8080",
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2239,7 +2303,7 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) {
}
tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig)
+ infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale)
if tt.wantErr {
assert.Error(t, err)
return
@@ -2295,10 +2359,11 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/")
tests := []struct {
- name string
- modelSvc *models.Service
- exp *kservev1beta1.InferenceService
- wantErr bool
+ name string
+ modelSvc *models.Service
+ deploymentScale DeploymentScale
+ exp *kservev1beta1.InferenceService
+ wantErr bool
}{
{
name: "model logger enabled",
@@ -2320,6 +2385,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2327,6 +2393,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2389,6 +2456,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2396,6 +2464,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2477,6 +2546,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2484,6 +2554,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2561,6 +2632,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2568,6 +2640,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2649,6 +2722,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2656,6 +2730,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2718,7 +2793,7 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) {
}
tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig)
+ infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale)
if tt.wantErr {
assert.Error(t, err)
return
@@ -2772,10 +2847,11 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/")
tests := []struct {
- name string
- modelSvc *models.Service
- exp *kservev1beta1.InferenceService
- wantErr bool
+ name string
+ modelSvc *models.Service
+ deploymentScale DeploymentScale
+ exp *kservev1beta1.InferenceService
+ wantErr bool
}{
{
name: "predictor with unspecified deployment mode (serverless)",
@@ -2790,6 +2866,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
Metadata: modelSvc.Metadata,
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2797,6 +2874,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2892,6 +2970,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
DeploymentMode: deployment.ServerlessDeploymentMode,
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -2899,6 +2978,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testPredictorScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -2994,6 +3074,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
DeploymentMode: deployment.RawDeploymentMode,
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -3101,6 +3182,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
},
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -3108,6 +3190,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -3275,6 +3358,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
DeploymentMode: deployment.ServerlessDeploymentMode,
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -3282,6 +3366,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
Annotations: map[string]string{
knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
+ knautoscaling.InitialScaleAnnotationKey: fmt.Sprint(testTransformerScale),
},
Labels: map[string]string{
"gojek.com/app": modelSvc.Metadata.App,
@@ -3449,6 +3534,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
DeploymentMode: deployment.RawDeploymentMode,
Protocol: protocol.HttpJson,
},
+ deploymentScale: defaultDeploymentScale,
exp: &kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: modelSvc.Name,
@@ -3652,1961 +3738,7 @@ func TestCreateInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
}
tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig)
- if tt.wantErr {
- assert.Error(t, err)
- return
- }
- assert.NoError(t, err)
- assert.Equal(t, tt.exp, infSvcSpec)
- })
- }
-}
-
-func TestPatchInferenceServiceSpec(t *testing.T) {
- err := models.InitKubernetesLabeller("gojek.com/", testEnvironmentName)
- assert.NoError(t, err)
-
- defer func() {
- _ = models.InitKubernetesLabeller("", "")
- }()
-
- project := mlp.Project{
- Name: "project",
- }
-
- modelSvc := &models.Service{
- Name: "model-1",
- ModelName: "model",
- ModelVersion: "1",
- Namespace: project.Name,
- ArtifactURI: "gs://my-artifacet",
- Metadata: models.Metadata{
- App: "model",
- Component: models.ComponentModelVersion,
- Stream: "dsp",
- Team: "dsp",
- Labels: mlp.Labels{
- {
- Key: "sample",
- Value: "true",
- },
- },
- },
- Protocol: protocol.HttpJson,
- }
-
- storageUri := fmt.Sprintf("%s/model", modelSvc.ArtifactURI)
-
- // Liveness probe config for the model containers
- probeConfig := createLivenessProbeSpec(protocol.HttpJson, fmt.Sprintf("/v1/models/%s", modelSvc.Name))
-
- // Liveness probe config for the transformers
- transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/")
-
- one := 1
- minReplica := 1
- maxReplica := 10
- cpuRequest := resource.MustParse("1")
- memoryRequest := resource.MustParse("1Gi")
- cpuLimit := cpuRequest.DeepCopy()
- cpuLimit.Add(cpuRequest)
- memoryLimit := memoryRequest.DeepCopy()
- memoryLimit.Add(memoryRequest)
- queueResourcePercentage := "2"
-
- resourceRequests := corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- }
-
- testPredictorScale, testTransformerScale := 3, 5
-
- tests := []struct {
- name string
- modelSvc *models.Service
- deploymentScale DeploymentScale
- original *kservev1beta1.InferenceService
- exp *kservev1beta1.InferenceService
- wantErr bool
- }{
- {
- name: "tensorflow spec",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- },
- {
- name: "tensorflow + transformer spec",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Transformer: &models.Transformer{
- Enabled: true,
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: "python",
- Args: "main.py",
- ResourceRequest: &models.ResourceRequest{
- MinReplica: 1,
- MaxReplica: 1,
- CPURequest: cpuRequest,
- MemoryRequest: memoryRequest,
- },
- },
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- Transformer: &kservev1beta1.TransformerSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: "transformer",
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: []string{"python"},
- Args: []string{"main.py"},
- Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(),
- Resources: corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- },
- LivenessProbe: transformerProbeConfig,
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &one,
- MaxReplicas: one,
- },
- },
- },
- },
- },
- {
- name: "tensorflow + transformer spec to tensorflow spec only",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Transformer: &models.Transformer{
- Enabled: false,
- },
- Protocol: protocol.HttpJson,
- },
- deploymentScale: DeploymentScale{
- Predictor: &testPredictorScale,
- Transformer: &testTransformerScale,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- Transformer: &kservev1beta1.TransformerSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: "transformer",
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: []string{"python"},
- Args: []string{"main.py"},
- Env: []corev1.EnvVar{
- {Name: envTransformerPort, Value: fmt.Sprint(defaultHTTPPort)},
- {Name: envTransformerModelName, Value: "model-1"},
- {Name: envTransformerPredictURL, Value: "model-1-predictor.project"},
- },
- Resources: corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- },
- LivenessProbe: transformerProbeConfig,
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &one,
- MaxReplicas: one,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- knautoscaling.InitialScaleAnnotationKey: "3",
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- Transformer: nil,
- },
- },
- },
- {
- name: "custom spec",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeCustom,
- Options: &models.ModelOption{
- CustomPredictor: &models.CustomPredictor{
- Image: "gcr.io/custom-model:v0.2",
- Command: "./run-1.sh",
- Args: "firstArg secondArg",
- },
- },
- Metadata: modelSvc.Metadata,
- ResourceRequest: userResourceRequests,
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: kserveconstant.InferenceServiceContainerName,
- Image: "gcr.io/custom-model:v0.1",
- Env: createDefaultPredictorEnvVars(modelSvc).ToKubernetesEnvVars(),
- Resources: expUserResourceRequests,
- Command: []string{
- "./run.sh",
- },
- Args: []string{
- "firstArg",
- "secondArg",
- },
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &userResourceRequests.MinReplica,
- MaxReplicas: userResourceRequests.MaxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: kserveconstant.InferenceServiceContainerName,
- Image: "gcr.io/custom-model:v0.2",
- Env: createDefaultPredictorEnvVars(modelSvc).ToKubernetesEnvVars(),
- Resources: expUserResourceRequests,
- Command: []string{
- "./run-1.sh",
- },
- Args: []string{
- "firstArg",
- "secondArg",
- },
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &userResourceRequests.MinReplica,
- MaxReplicas: userResourceRequests.MaxReplica,
- },
- },
- },
- },
- },
- {
- name: "patch deployment mode from serverless to raw_deployment",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- DeploymentMode: deployment.RawDeploymentMode,
- AutoscalingPolicy: &autoscaling.AutoscalingPolicy{
- MetricsType: autoscaling.CPUUtilization,
- TargetValue: 30,
- },
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment),
- kserveconstant.AutoscalerClass: string(kserveconstant.AutoscalerClassHPA),
- kserveconstant.AutoscalerMetrics: string(kserveconstant.AutoScalerMetricsCPU),
- kserveconstant.TargetUtilizationPercentage: "30",
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- },
- {
- name: "patch deployment mode from raw_deployment to serverless_deployment",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: &autoscaling.AutoscalingPolicy{
- MetricsType: autoscaling.Concurrency,
- TargetValue: 2,
- },
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment),
- kserveconstant.AutoscalerClass: string(kserveconstant.AutoscalerClassHPA),
- kserveconstant.AutoscalerMetrics: string(kserveconstant.AutoScalerMetricsCPU),
- kserveconstant.TargetUtilizationPercentage: "30",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- knautoscaling.ClassAnnotationKey: knautoscaling.KPA,
- knautoscaling.MetricAnnotationKey: knautoscaling.Concurrency,
- knautoscaling.TargetAnnotationKey: "2",
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- deployConfig := &config.DeploymentConfig{
- DefaultModelResourceRequests: &config.ResourceRequests{
- MinReplica: minReplica,
- MaxReplica: maxReplica,
- CPURequest: cpuRequest,
- MemoryRequest: memoryRequest,
- },
- QueueResourcePercentage: queueResourcePercentage,
- }
-
- tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.PatchInferenceServiceSpec(tt.original, tt.modelSvc, deployConfig, tt.deploymentScale)
- if tt.wantErr {
- assert.Error(t, err)
- return
- }
- assert.NoError(t, err)
- assert.Equal(t, tt.exp, infSvcSpec)
- })
- }
-}
-
-func TestPatchInferenceServiceSpecWithTopologySpreadConstraints(t *testing.T) {
- err := models.InitKubernetesLabeller("gojek.com/", testEnvironmentName)
- assert.NoError(t, err)
-
- defer func() {
- _ = models.InitKubernetesLabeller("", "")
- }()
-
- project := mlp.Project{
- Name: "project",
- }
-
- modelSvc := &models.Service{
- Name: "model-1",
- ModelName: "model",
- ModelVersion: "1",
- Namespace: project.Name,
- ArtifactURI: "gs://my-artifacet",
- Metadata: models.Metadata{
- App: "model",
- Component: models.ComponentModelVersion,
- Stream: "dsp",
- Team: "dsp",
- Labels: mlp.Labels{
- {
- Key: "sample",
- Value: "true",
- },
- },
- },
- Protocol: protocol.HttpJson,
- }
-
- storageUri := fmt.Sprintf("%s/model", modelSvc.ArtifactURI)
-
- // Liveness probe config for the model containers
- probeConfig := createLivenessProbeSpec(protocol.HttpJson, fmt.Sprintf("/v1/models/%s", modelSvc.Name))
-
- // Liveness probe config for the transformers
- transformerProbeConfig := createLivenessProbeSpec(protocol.HttpJson, "/")
-
- one := 1
- minReplica := 1
- maxReplica := 10
- cpuRequest := resource.MustParse("1")
- memoryRequest := resource.MustParse("1Gi")
- cpuLimit := cpuRequest.DeepCopy()
- cpuLimit.Add(cpuRequest)
- memoryLimit := memoryRequest.DeepCopy()
- memoryLimit.Add(memoryRequest)
- queueResourcePercentage := "2"
-
- resourceRequests := corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- }
-
- testPredictorScale, testTransformerScale := 3, 5
-
- tests := []struct {
- name string
- modelSvc *models.Service
- deploymentScale DeploymentScale
- original *kservev1beta1.InferenceService
- exp *kservev1beta1.InferenceService
- wantErr bool
- }{
- {
- name: "predictor with unspecified deployment mode (serverless)",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- PodSpec: kservev1beta1.PodSpec{
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- },
- {
- name: "predictor with serverless deployment mode",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- Protocol: protocol.HttpJson,
- },
- deploymentScale: DeploymentScale{
- Predictor: &testPredictorScale,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- knautoscaling.InitialScaleAnnotationKey: "3",
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- PodSpec: kservev1beta1.PodSpec{
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- },
- {
- name: "predictor with raw deployment mode",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- DeploymentMode: deployment.RawDeploymentMode,
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- PodSpec: kservev1beta1.PodSpec{
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "isvc.model-1-predictor",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "isvc.model-1-predictor",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "isvc.model-1-predictor",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- },
- },
- },
- },
- {
- name: "predictor and transformer with unspecified deployment mode (serverless)",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Transformer: &models.Transformer{
- Enabled: true,
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: "python",
- Args: "main.py",
- ResourceRequest: &models.ResourceRequest{
- MinReplica: 1,
- MaxReplica: 1,
- CPURequest: cpuRequest,
- MemoryRequest: memoryRequest,
- },
- },
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- PodSpec: kservev1beta1.PodSpec{
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- },
- Transformer: &kservev1beta1.TransformerSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: "transformer",
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: []string{"python"},
- Args: []string{"main.py"},
- Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(),
- Resources: corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- },
- LivenessProbe: transformerProbeConfig,
- },
- },
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-transformer-00001",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-transformer-00001",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "model-1-transformer-00001",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &one,
- MaxReplicas: one,
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- },
- {
- name: "predictor and transformer with serverless deployment mode",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Transformer: &models.Transformer{
- Enabled: true,
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: "python",
- Args: "main.py",
- ResourceRequest: &models.ResourceRequest{
- MinReplica: 1,
- MaxReplica: 1,
- CPURequest: cpuRequest,
- MemoryRequest: memoryRequest,
- },
- },
- DeploymentMode: deployment.ServerlessDeploymentMode,
- Protocol: protocol.HttpJson,
- },
- deploymentScale: DeploymentScale{
- Predictor: &testPredictorScale,
- Transformer: &testTransformerScale,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.Serverless),
- knautoscaling.InitialScaleAnnotationKey: "5",
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- PodSpec: kservev1beta1.PodSpec{
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "model-1-predictor-00002",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- },
- Transformer: &kservev1beta1.TransformerSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: "transformer",
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: []string{"python"},
- Args: []string{"main.py"},
- Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(),
- Resources: corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- },
- LivenessProbe: transformerProbeConfig,
- },
- },
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-transformer-00001",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "model-1-transformer-00001",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "model-1-transformer-00001",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &one,
- MaxReplicas: one,
- },
- },
- },
- Status: kservev1beta1.InferenceServiceStatus{
- Components: map[kservev1beta1.ComponentType]kservev1beta1.ComponentStatusSpec{
- kservev1beta1.PredictorComponent: {
- LatestCreatedRevision: fmt.Sprintf("%s-predictor-00001", modelSvc.Name),
- },
- },
- },
- },
- },
- {
- name: "predictor and transformer with raw deployment mode",
- modelSvc: &models.Service{
- Name: modelSvc.Name,
- ModelName: modelSvc.ModelName,
- ModelVersion: modelSvc.ModelVersion,
- Namespace: project.Name,
- ArtifactURI: modelSvc.ArtifactURI,
- Type: models.ModelTypeTensorflow,
- Options: &models.ModelOption{},
- Metadata: modelSvc.Metadata,
- Transformer: &models.Transformer{
- Enabled: true,
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: "python",
- Args: "main.py",
- ResourceRequest: &models.ResourceRequest{
- MinReplica: 1,
- MaxReplica: 1,
- CPURequest: cpuRequest,
- MemoryRequest: memoryRequest,
- },
- },
- DeploymentMode: deployment.RawDeploymentMode,
- Protocol: protocol.HttpJson,
- },
- original: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment),
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- },
- },
- },
- exp: &kservev1beta1.InferenceService{
- ObjectMeta: metav1.ObjectMeta{
- Name: modelSvc.Name,
- Namespace: project.Name,
- Annotations: map[string]string{
- knserving.QueueSidecarResourcePercentageAnnotationKey: queueResourcePercentage,
- kserveconstant.DeploymentMode: string(kserveconstant.RawDeployment),
- },
- Labels: map[string]string{
- "gojek.com/app": modelSvc.Metadata.App,
- "gojek.com/component": models.ComponentModelVersion,
- "gojek.com/environment": testEnvironmentName,
- "gojek.com/orchestrator": testOrchestratorName,
- "gojek.com/stream": modelSvc.Metadata.Stream,
- "gojek.com/team": modelSvc.Metadata.Team,
- "sample": "true",
- },
- },
- Spec: kservev1beta1.InferenceServiceSpec{
- Predictor: kservev1beta1.PredictorSpec{
- Tensorflow: &kservev1beta1.TFServingSpec{
- PredictorExtensionSpec: kservev1beta1.PredictorExtensionSpec{
- StorageURI: &storageUri,
- Container: corev1.Container{
- Name: kserveconstant.InferenceServiceContainerName,
- Resources: resourceRequests,
- LivenessProbe: probeConfig,
- Env: []corev1.EnvVar{},
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &minReplica,
- MaxReplicas: maxReplica,
- },
- PodSpec: kservev1beta1.PodSpec{
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "isvc.model-1-predictor",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "isvc.model-1-predictor",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "isvc.model-1-predictor",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- },
- Transformer: &kservev1beta1.TransformerSpec{
- PodSpec: kservev1beta1.PodSpec{
- Containers: []corev1.Container{
- {
- Name: "transformer",
- Image: "ghcr.io/gojek/merlin-transformer-test",
- Command: []string{"python"},
- Args: []string{"main.py"},
- Env: createDefaultTransformerEnvVars(modelSvc).ToKubernetesEnvVars(),
- Resources: corev1.ResourceRequirements{
- Requests: corev1.ResourceList{
- corev1.ResourceCPU: cpuRequest,
- corev1.ResourceMemory: memoryRequest,
- },
- Limits: corev1.ResourceList{
- corev1.ResourceCPU: cpuLimit,
- corev1.ResourceMemory: memoryLimit,
- },
- },
- LivenessProbe: transformerProbeConfig,
- },
- },
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "isvc.model-1-transformer",
- },
- },
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app": "isvc.model-1-transformer",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- "app": "isvc.model-1-transformer",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- },
- ComponentExtensionSpec: kservev1beta1.ComponentExtensionSpec{
- MinReplicas: &one,
- MaxReplicas: one,
- },
- },
- },
- },
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- deployConfig := &config.DeploymentConfig{
- DefaultModelResourceRequests: &config.ResourceRequests{
- MinReplica: minReplica,
- MaxReplica: maxReplica,
- CPURequest: cpuRequest,
- MemoryRequest: memoryRequest,
- },
- QueueResourcePercentage: queueResourcePercentage,
- TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
- {
- MaxSkew: 1,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.ScheduleAnyway,
- },
- {
- MaxSkew: 2,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- {
- MaxSkew: 3,
- TopologyKey: "kubernetes.io/hostname",
- WhenUnsatisfiable: corev1.DoNotSchedule,
- LabelSelector: &metav1.LabelSelector{
- MatchLabels: map[string]string{
- "app-label": "spread",
- },
- MatchExpressions: []metav1.LabelSelectorRequirement{
- {
- Key: "app-expression",
- Operator: metav1.LabelSelectorOpIn,
- Values: []string{"1"},
- },
- },
- },
- },
- },
- }
-
- tpl := NewInferenceServiceTemplater(standardTransformerConfig)
- infSvcSpec, err := tpl.PatchInferenceServiceSpec(tt.original, tt.modelSvc, deployConfig, tt.deploymentScale)
+ infSvcSpec, err := tpl.CreateInferenceServiceSpec(tt.modelSvc, deployConfig, tt.deploymentScale)
if tt.wantErr {
assert.Error(t, err)
return
@@ -5801,7 +3933,7 @@ func createPyFuncDefaultEnvVarsWithProtocol(svc *models.Service, protocolValue p
envVars := models.EnvVars{
models.EnvVar{
Name: envPyFuncModelName,
- Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion),
+ Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()),
},
models.EnvVar{
Name: envModelName,
@@ -5813,7 +3945,7 @@ func createPyFuncDefaultEnvVarsWithProtocol(svc *models.Service, protocolValue p
},
models.EnvVar{
Name: envModelFullName,
- Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion),
+ Value: models.CreateInferenceServiceName(svc.ModelName, svc.ModelVersion, svc.RevisionID.String()),
},
models.EnvVar{
Name: envHTTPPort,
diff --git a/api/cluster/virtual_service.go b/api/cluster/virtual_service.go
new file mode 100644
index 000000000..d7e07a84b
--- /dev/null
+++ b/api/cluster/virtual_service.go
@@ -0,0 +1,227 @@
+package cluster
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "net/url"
+ "strings"
+
+ istiov1beta1 "istio.io/api/networking/v1beta1"
+ v1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/types"
+
+ "github.com/caraml-dev/merlin/log"
+ "github.com/caraml-dev/merlin/models"
+ "github.com/caraml-dev/merlin/pkg/protocol"
+ "github.com/mitchellh/copystructure"
+)
+
+const (
+ // TODO: Make these configurable
+ knativeIngressGateway = "knative-serving/knative-ingress-gateway"
+ defaultIstioIngressGatewayHost = "istio-ingressgateway.istio-system.svc.cluster.local"
+)
+
+type VirtualService struct {
+ Name string
+ Namespace string
+ ModelName string
+ VersionID string
+ RevisionID models.ID
+ Labels map[string]string
+ Protocol protocol.Protocol
+ ModelVersionRevisionURL *url.URL
+}
+
+func NewVirtualService(modelService *models.Service, isvcURL string) (*VirtualService, error) {
+ modelVersionRevisionURL, err := url.Parse(isvcURL)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse model version revision url: %s", isvcURL)
+ }
+
+ if modelVersionRevisionURL.Scheme == "" {
+ veURL := "//" + isvcURL
+ modelVersionRevisionURL, err = url.Parse(veURL)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse model version revision url: %s", isvcURL)
+ }
+ }
+
+ return &VirtualService{
+ Name: fmt.Sprintf("%s-%s-%s", modelService.ModelName, modelService.ModelVersion, models.VirtualServiceComponentType),
+ Namespace: modelService.Namespace,
+ ModelName: modelService.ModelName,
+ VersionID: modelService.ModelVersion,
+ RevisionID: modelService.RevisionID,
+ Labels: modelService.Metadata.ToLabel(),
+ Protocol: modelService.Protocol,
+ ModelVersionRevisionURL: modelVersionRevisionURL,
+ }, nil
+}
+
+func (cfg VirtualService) BuildVirtualServiceSpec() (*v1beta1.VirtualService, error) {
+ modelVersionHost, err := cfg.getModelVersionHost()
+ if err != nil {
+ return nil, err
+ }
+
+ modelVersionRevisionHost := cfg.ModelVersionRevisionURL.Hostname()
+ modelVersionRevisionPath := cfg.ModelVersionRevisionURL.Path
+
+ vs := &v1beta1.VirtualService{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "networking.istio.io/v1beta1",
+ Kind: "VirtualService",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: cfg.Name,
+ Namespace: cfg.Namespace,
+ Labels: cfg.Labels,
+ },
+ Spec: istiov1beta1.VirtualService{
+ Gateways: []string{knativeIngressGateway},
+ Hosts: []string{modelVersionHost},
+ Http: cfg.createHttpRoutes(modelVersionRevisionHost, modelVersionRevisionPath),
+ },
+ }
+
+ return vs, nil
+}
+
+// getModelVersionHost creates model version endpoint host based on version endpoint's url
+func (cfg *VirtualService) getModelVersionHost() (string, error) {
+ host := strings.Split(cfg.ModelVersionRevisionURL.Hostname(), fmt.Sprintf(".%s.", cfg.Namespace))
+ if len(host) != 2 {
+ return "", fmt.Errorf("invalid version endpoint url: %s. failed to split domain: %+v", cfg.ModelVersionRevisionURL, host)
+ }
+
+ domain := host[1]
+ return fmt.Sprintf("%s-%s.%s.%s", cfg.ModelName, cfg.VersionID, cfg.Namespace, domain), nil
+}
+
+func (cfg *VirtualService) createHttpRoutes(modelVersionRevisionHost, modelVersionRevisionPath string) []*istiov1beta1.HTTPRoute {
+ routeDestinations := []*istiov1beta1.HTTPRouteDestination{
+ {
+ Destination: &istiov1beta1.Destination{
+ Host: defaultIstioIngressGatewayHost,
+ },
+ Headers: &istiov1beta1.Headers{
+ Request: &istiov1beta1.Headers_HeaderOperations{
+ Set: map[string]string{
+ "Host": modelVersionRevisionHost,
+ },
+ },
+ },
+ Weight: 100,
+ },
+ }
+
+ switch cfg.Protocol {
+ case protocol.UpiV1:
+ return []*istiov1beta1.HTTPRoute{
+ {
+ Route: routeDestinations,
+ },
+ }
+
+ default:
+ routeDestinationsWithContentType, err := copyRouteDestinations(routeDestinations)
+ if err != nil {
+ log.Errorf("failed to copy routeDestinations: %+v", err)
+ return nil
+ }
+ routeDestinationsWithContentType[0].Headers.Request.Set["Content-Type"] = "application/json"
+
+ uri := &istiov1beta1.StringMatch{
+ MatchType: &istiov1beta1.StringMatch_Exact{
+ Exact: fmt.Sprintf("/v1/models/%s-%s:predict", cfg.ModelName, cfg.VersionID),
+ },
+ }
+ rewrite := &istiov1beta1.HTTPRewrite{
+ Uri: fmt.Sprintf("%s:predict", modelVersionRevisionPath),
+ }
+
+ return []*istiov1beta1.HTTPRoute{
+ // For request to the Predict API without Content-Type header, set the header to application/json
+ {
+ Match: []*istiov1beta1.HTTPMatchRequest{
+ {
+ Uri: uri,
+ Headers: map[string]*istiov1beta1.StringMatch{
+ "content-type": {},
+ },
+ },
+ },
+ Route: routeDestinationsWithContentType,
+ Rewrite: rewrite,
+ },
+ // For request to the Predict API with Content-Type header, forward the request to the model version revision
+ {
+ Match: []*istiov1beta1.HTTPMatchRequest{
+ {
+ Uri: uri,
+ },
+ },
+ Route: routeDestinations,
+ Rewrite: rewrite,
+ },
+ // For other request (e.g. List Models API), forward the request
+ // Note that we are currently using Kserve V1 Inference protocol (https://kserve.github.io/website/0.11/modelserving/data_plane/v1_protocol/),
+ // and we are not using API other than Predict API, this route is used as fallback.
+ {
+ Route: routeDestinations,
+ },
+ }
+ }
+}
+
+func (cfg *VirtualService) getInferenceURL(vs *v1beta1.VirtualService) string {
+ modelVersionHost := vs.Spec.Hosts[0]
+
+ switch cfg.Protocol {
+ case protocol.UpiV1:
+ // return only host name
+ return modelVersionHost
+ default:
+ return fmt.Sprintf("http://%s/v1/models/%s-%s:predict", modelVersionHost, cfg.ModelName, cfg.VersionID)
+ }
+}
+
+// copyTopologySpreadConstraints copies the topology spread constraints using the service builder's as a template
+func copyRouteDestinations(src []*istiov1beta1.HTTPRouteDestination) ([]*istiov1beta1.HTTPRouteDestination, error) {
+ destRaw, err := copystructure.Copy(src)
+ if err != nil {
+ return nil, fmt.Errorf("error copying []*HTTPRouteDestination: %w", err)
+ }
+
+ dest, ok := destRaw.([]*istiov1beta1.HTTPRouteDestination)
+ if !ok {
+ return nil, fmt.Errorf("error in type assertion of copied []*HTTPRouteDestination interface: %w", err)
+ }
+
+ return dest, nil
+}
+
+func (c *controller) deployVirtualService(ctx context.Context, vsCfg *VirtualService) (*v1beta1.VirtualService, error) {
+ vsSpec, err := vsCfg.BuildVirtualServiceSpec()
+ if err != nil {
+ return nil, err
+ }
+
+ vsJSON, err := json.Marshal(vsSpec)
+ if err != nil {
+ return nil, err
+ }
+
+ forceEnabled := true
+
+ return c.istioClient.
+ VirtualServices(vsSpec.Namespace).
+ Patch(ctx, vsCfg.Name, types.ApplyPatchType, vsJSON, metav1.PatchOptions{FieldManager: "application/apply-patch", Force: &forceEnabled})
+}
+
+func (c *controller) deleteVirtualService(ctx context.Context, name, namespace string) error {
+ return c.istioClient.VirtualServices(namespace).Delete(ctx, name, metav1.DeleteOptions{})
+}
diff --git a/api/cluster/virtual_service_test.go b/api/cluster/virtual_service_test.go
new file mode 100644
index 000000000..4acebf6cd
--- /dev/null
+++ b/api/cluster/virtual_service_test.go
@@ -0,0 +1,253 @@
+package cluster
+
+import (
+ "fmt"
+ "net/url"
+ "reflect"
+ "testing"
+
+ "github.com/caraml-dev/merlin/models"
+ "github.com/caraml-dev/merlin/pkg/protocol"
+ istiov1beta1 "istio.io/api/networking/v1beta1"
+)
+
+func TestVirtualService_getModelVersionHost(t *testing.T) {
+ defaultModelVersionRevisionURL, _ := url.Parse("http://test-model-1-1.test-namespace.caraml.dev")
+
+ type fields struct {
+ Name string
+ Namespace string
+ ModelName string
+ VersionID string
+ RevisionID models.ID
+ Labels map[string]string
+ Protocol protocol.Protocol
+ ModelVersionRevisionURL *url.URL
+ }
+ tests := []struct {
+ name string
+ fields fields
+ want string
+ wantErr bool
+ }{
+ {
+ name: "1",
+ fields: fields{
+ Name: "test-model-1",
+ Namespace: "test-namespace",
+ ModelName: "test-model",
+ VersionID: "1",
+ RevisionID: models.ID(1),
+ Labels: map[string]string{},
+ Protocol: protocol.HttpJson,
+ ModelVersionRevisionURL: defaultModelVersionRevisionURL,
+ },
+ want: "test-model-1.test-namespace.caraml.dev",
+ wantErr: false,
+ },
+ {
+ name: "2",
+ fields: fields{
+ Name: "test-model-1",
+ Namespace: "test-namespace",
+ ModelName: "test-model",
+ VersionID: "1",
+ RevisionID: models.ID(1),
+ Labels: map[string]string{},
+ Protocol: protocol.HttpJson,
+ ModelVersionRevisionURL: defaultModelVersionRevisionURL,
+ },
+ want: "test-model-1.test-namespace.caraml.dev",
+ wantErr: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ cfg := &VirtualService{
+ Name: tt.fields.Name,
+ Namespace: tt.fields.Namespace,
+ ModelName: tt.fields.ModelName,
+ VersionID: tt.fields.VersionID,
+ RevisionID: tt.fields.RevisionID,
+ Labels: tt.fields.Labels,
+ Protocol: tt.fields.Protocol,
+ ModelVersionRevisionURL: tt.fields.ModelVersionRevisionURL,
+ }
+ got, err := cfg.getModelVersionHost()
+ if (err != nil) != tt.wantErr {
+ t.Errorf("VirtualService.getModelVersionHost() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if got != tt.want {
+ t.Errorf("VirtualService.getModelVersionHost() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestVirtualService_createHttpRoutes(t *testing.T) {
+ type fields struct {
+ Name string
+ Namespace string
+ ModelName string
+ VersionID string
+ RevisionID models.ID
+ Labels map[string]string
+ Protocol protocol.Protocol
+ ModelVersionRevisionURL *url.URL
+ }
+ type args struct {
+ modelVersionRevisionHost string
+ modelVersionRevisionPath string
+ }
+ tests := []struct {
+ name string
+ fields fields
+ args args
+ want []*istiov1beta1.HTTPRoute
+ }{
+ {
+ name: "http",
+ fields: fields{
+ Name: "test-model-1",
+ ModelName: "test-model",
+ VersionID: "1",
+ Protocol: protocol.HttpJson,
+ },
+ args: args{
+ modelVersionRevisionHost: "test-model-1-1.test-namespace.caraml.dev",
+ modelVersionRevisionPath: "/v1/models/test-model-1-1",
+ },
+ want: []*istiov1beta1.HTTPRoute{
+ {
+ Match: []*istiov1beta1.HTTPMatchRequest{
+ {
+ Uri: &istiov1beta1.StringMatch{
+ MatchType: &istiov1beta1.StringMatch_Exact{
+ Exact: "/v1/models/test-model-1:predict",
+ },
+ },
+ Headers: map[string]*istiov1beta1.StringMatch{
+ "content-type": {},
+ },
+ },
+ },
+ Route: []*istiov1beta1.HTTPRouteDestination{
+ {
+ Destination: &istiov1beta1.Destination{
+ Host: defaultIstioIngressGatewayHost,
+ },
+ Headers: &istiov1beta1.Headers{
+ Request: &istiov1beta1.Headers_HeaderOperations{
+ Set: map[string]string{
+ "Content-Type": "application/json",
+ "Host": "test-model-1-1.test-namespace.caraml.dev",
+ },
+ },
+ },
+ Weight: 100,
+ },
+ },
+ Rewrite: &istiov1beta1.HTTPRewrite{
+ Uri: fmt.Sprintf("%s:predict", "/v1/models/test-model-1-1"),
+ },
+ },
+ {
+ Match: []*istiov1beta1.HTTPMatchRequest{
+ {
+ Uri: &istiov1beta1.StringMatch{
+ MatchType: &istiov1beta1.StringMatch_Exact{
+ Exact: "/v1/models/test-model-1:predict",
+ },
+ },
+ },
+ },
+ Route: []*istiov1beta1.HTTPRouteDestination{
+ {
+ Destination: &istiov1beta1.Destination{
+ Host: defaultIstioIngressGatewayHost,
+ },
+ Headers: &istiov1beta1.Headers{
+ Request: &istiov1beta1.Headers_HeaderOperations{
+ Set: map[string]string{
+ "Host": "test-model-1-1.test-namespace.caraml.dev",
+ },
+ },
+ },
+ Weight: 100,
+ },
+ },
+ Rewrite: &istiov1beta1.HTTPRewrite{
+ Uri: fmt.Sprintf("%s:predict", "/v1/models/test-model-1-1"),
+ },
+ },
+ {
+ Route: []*istiov1beta1.HTTPRouteDestination{
+ {
+ Destination: &istiov1beta1.Destination{
+ Host: defaultIstioIngressGatewayHost,
+ },
+ Headers: &istiov1beta1.Headers{
+ Request: &istiov1beta1.Headers_HeaderOperations{
+ Set: map[string]string{
+ "Host": "test-model-1-1.test-namespace.caraml.dev",
+ },
+ },
+ },
+ Weight: 100,
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "upi",
+ fields: fields{
+ Name: "test-model-1",
+ ModelName: "test-model",
+ VersionID: "1",
+ Protocol: protocol.UpiV1,
+ },
+ args: args{
+ modelVersionRevisionHost: "test-model-1-1.test-namespace.caraml.dev",
+ modelVersionRevisionPath: "/v1/models/test-model-1-1",
+ },
+ want: []*istiov1beta1.HTTPRoute{
+ {
+ Route: []*istiov1beta1.HTTPRouteDestination{
+ {
+ Destination: &istiov1beta1.Destination{
+ Host: defaultIstioIngressGatewayHost,
+ },
+ Headers: &istiov1beta1.Headers{
+ Request: &istiov1beta1.Headers_HeaderOperations{
+ Set: map[string]string{
+ "Host": "test-model-1-1.test-namespace.caraml.dev",
+ },
+ },
+ },
+ Weight: 100,
+ },
+ },
+ },
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ cfg := &VirtualService{
+ Name: tt.fields.Name,
+ Namespace: tt.fields.Namespace,
+ ModelName: tt.fields.ModelName,
+ VersionID: tt.fields.VersionID,
+ RevisionID: tt.fields.RevisionID,
+ Labels: tt.fields.Labels,
+ Protocol: tt.fields.Protocol,
+ ModelVersionRevisionURL: tt.fields.ModelVersionRevisionURL,
+ }
+ if got := cfg.createHttpRoutes(tt.args.modelVersionRevisionHost, tt.args.modelVersionRevisionPath); !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("VirtualService.createHttpRoutes() =\n%v\n, want\n%v", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/api/cmd/api/main.go b/api/cmd/api/main.go
index c11cc0157..531e08eaf 100644
--- a/api/cmd/api/main.go
+++ b/api/cmd/api/main.go
@@ -295,6 +295,7 @@ func buildDependencies(ctx context.Context, cfg *config.Config, db *gorm.DB, dis
versionsService := service.NewVersionsService(db, mlpAPIClient)
environmentService := initEnvironmentService(cfg, db)
secretService := service.NewSecretService(mlpAPIClient)
+ deploymentService := service.NewDeploymentService(storage.NewDeploymentStorage(db))
gitlabConfig := cfg.FeatureToggleConfig.AlertConfig.GitlabConfig
gitlabClient, err := gitlab.NewClient(gitlabConfig.BaseURL, gitlabConfig.Token)
@@ -327,6 +328,7 @@ func buildDependencies(ctx context.Context, cfg *config.Config, db *gorm.DB, dis
DB: db,
Enforcer: authEnforcer,
+ DeploymentService: deploymentService,
EnvironmentService: environmentService,
ProjectsService: projectsService,
ModelsService: modelsService,
diff --git a/api/cmd/inference-logger/main.go b/api/cmd/inference-logger/main.go
index cd5d2231b..67bfd7a33 100644
--- a/api/cmd/inference-logger/main.go
+++ b/api/cmd/inference-logger/main.go
@@ -9,6 +9,7 @@ import (
"net/http/httputil"
"net/url"
"os"
+ "regexp"
"strings"
"time"
@@ -269,9 +270,22 @@ func getModelNameAndVersion(inferenceServiceName string) (modelName string, mode
return inferenceServiceName, "1"
}
- idx := strings.LastIndex(inferenceServiceName, "-")
- modelName = inferenceServiceName[:idx]
- modelVersion = inferenceServiceName[idx+1:]
+ // regex to match string that contains revision number at the end
+ // e.g. my-model-1-r1
+ re := regexp.MustCompile(`-r\d+$`)
+
+ // for backward compatibility
+ if !re.MatchString(inferenceServiceName) {
+ idx := strings.LastIndex(inferenceServiceName, "-")
+ modelName = inferenceServiceName[:idx]
+ modelVersion = inferenceServiceName[idx+1:]
+ return
+ }
+
+ withoutRevision := re.ReplaceAllString(inferenceServiceName, "")
+ idx := strings.LastIndex(withoutRevision, "-")
+ modelName = withoutRevision[:idx]
+ modelVersion = withoutRevision[idx+1:]
return
}
diff --git a/api/cmd/inference-logger/main_test.go b/api/cmd/inference-logger/main_test.go
index b121bd264..620e4e622 100644
--- a/api/cmd/inference-logger/main_test.go
+++ b/api/cmd/inference-logger/main_test.go
@@ -6,13 +6,6 @@ import (
"github.com/stretchr/testify/assert"
)
-func TestGetModelVersion(t *testing.T) {
- modelName, modelVersion := getModelNameAndVersion("my-model-1")
-
- assert.Equal(t, "my-model", modelName)
- assert.Equal(t, "1", modelVersion)
-}
-
func TestGetTopicName(t *testing.T) {
assert.Equal(t, "merlin-my-project-my-model-inference-log", getTopicName(getServiceName("my-project", "my-model")))
}
@@ -57,3 +50,59 @@ func Test_getNewRelicAPIKey(t *testing.T) {
})
}
}
+
+func Test_getModelNameAndVersion(t *testing.T) {
+ type args struct {
+ inferenceServiceName string
+ }
+ tests := []struct {
+ name string
+ args args
+ wantModelName string
+ wantModelVersion string
+ }{
+ {
+ name: "without revision",
+ args: args{
+ inferenceServiceName: "my-model-1",
+ },
+ wantModelName: "my-model",
+ wantModelVersion: "1",
+ },
+ {
+ name: "with revision",
+ args: args{
+ inferenceServiceName: "my-model-1-r1",
+ },
+ wantModelName: "my-model",
+ wantModelVersion: "1",
+ },
+ {
+ name: "without revision and model name contain number",
+ args: args{
+ inferenceServiceName: "my-model-0-1-2-10",
+ },
+ wantModelName: "my-model-0-1-2",
+ wantModelVersion: "10",
+ },
+ {
+ name: "with revision and model name contain number",
+ args: args{
+ inferenceServiceName: "my-model-0-1-2-10-r11",
+ },
+ wantModelName: "my-model-0-1-2",
+ wantModelVersion: "10",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ gotModelName, gotModelVersion := getModelNameAndVersion(tt.args.inferenceServiceName)
+ if gotModelName != tt.wantModelName {
+ t.Errorf("getModelNameAndVersion() gotModelName = %v, want %v", gotModelName, tt.wantModelName)
+ }
+ if gotModelVersion != tt.wantModelVersion {
+ t.Errorf("getModelNameAndVersion() gotModelVersion = %v, want %v", gotModelVersion, tt.wantModelVersion)
+ }
+ })
+ }
+}
diff --git a/api/models/container.go b/api/models/container.go
index e8df9bd4e..19777c2c8 100644
--- a/api/models/container.go
+++ b/api/models/container.go
@@ -30,6 +30,7 @@ const (
PredictorComponentType = "predictor"
TransformerComponentType = "transformer"
PDBComponentType = "pdb" // Pod disruption budget
+ VirtualServiceComponentType = "vs"
BatchJobDriverComponentType = "batch_job_driver"
BatchJobExecutorComponentType = "batch_job_executor"
)
@@ -76,8 +77,8 @@ func componentType(containerName, podName string) string {
return componentType
}
-func OnlineInferencePodLabelSelector(modelName string, versionID string) string {
- serviceName := CreateInferenceServiceName(modelName, versionID)
+func OnlineInferencePodLabelSelector(modelName, versionID, revisionID string) string {
+ serviceName := CreateInferenceServiceName(modelName, versionID, revisionID)
return fmt.Sprintf(onlineInferenceLabelTemplate, serviceName)
}
diff --git a/api/models/container_test.go b/api/models/container_test.go
index 7982bc6b5..fd4fe6908 100644
--- a/api/models/container_test.go
+++ b/api/models/container_test.go
@@ -24,8 +24,9 @@ import (
func TestOnlineInferencePodLabelSelector(t *testing.T) {
modelName := "my-model"
versionID := "1"
- result := OnlineInferencePodLabelSelector(modelName, versionID)
- assert.Equal(t, "serving.kserve.io/inferenceservice=my-model-1", result)
+ revisionID := "1"
+ result := OnlineInferencePodLabelSelector(modelName, versionID, revisionID)
+ assert.Equal(t, "serving.kserve.io/inferenceservice=my-model-1-r1", result)
}
func TestBatchInferencePodLabelSelector(t *testing.T) {
@@ -51,14 +52,14 @@ func TestNewContainer(t *testing.T) {
"model",
args{
name: "kfserving-container",
- podName: "test-1-predictor-12345-deployment",
+ podName: "test-1-1-predictor-12345-deployment",
namespace: "sample",
cluster: "test",
gcpProject: "test-project",
},
&Container{
Name: "kfserving-container",
- PodName: "test-1-predictor-12345-deployment",
+ PodName: "test-1-1-predictor-12345-deployment",
ComponentType: "model",
Namespace: "sample",
Cluster: "test",
@@ -69,14 +70,14 @@ func TestNewContainer(t *testing.T) {
"transformer",
args{
name: "transformer",
- podName: "test-1-transformer-12345-deployment",
+ podName: "test-1-1-transformer-12345-deployment",
namespace: "sample",
cluster: "test",
gcpProject: "test-project",
},
&Container{
Name: "transformer",
- PodName: "test-1-transformer-12345-deployment",
+ PodName: "test-1-1-transformer-12345-deployment",
ComponentType: "transformer",
Namespace: "sample",
Cluster: "test",
diff --git a/api/models/deployment.go b/api/models/deployment.go
index 8f8a189e1..c9e998206 100644
--- a/api/models/deployment.go
+++ b/api/models/deployment.go
@@ -20,8 +20,8 @@ import "github.com/google/uuid"
type Deployment struct {
ID ID `json:"id"`
ProjectID ID `json:"project_id"`
- VersionID ID `json:"version_id"`
VersionModelID ID `json:"model_id"`
+ VersionID ID `json:"version_id"`
VersionEndpointID uuid.UUID `json:"version_endpoint_id"`
Status EndpointStatus `json:"status"`
Error string `json:"error"`
diff --git a/api/models/service.go b/api/models/service.go
index f33bd6c03..868e15590 100644
--- a/api/models/service.go
+++ b/api/models/service.go
@@ -27,10 +27,15 @@ import (
"knative.dev/pkg/apis"
)
+const (
+ revisionPrefix = "r"
+)
+
type Service struct {
Name string
ModelName string
ModelVersion string
+ RevisionID ID
Namespace string
ServiceName string
URL string
@@ -45,13 +50,16 @@ type Service struct {
DeploymentMode deployment.Mode
AutoscalingPolicy *autoscaling.AutoscalingPolicy
Protocol protocol.Protocol
+ // CurrentIsvcName is the name of the current running/serving InferenceService's revision
+ CurrentIsvcName string
}
func NewService(model *Model, version *Version, modelOpt *ModelOption, endpoint *VersionEndpoint) *Service {
return &Service{
- Name: CreateInferenceServiceName(model.Name, version.ID.String()),
+ Name: CreateInferenceServiceName(model.Name, version.ID.String(), endpoint.RevisionID.String()),
ModelName: model.Name,
ModelVersion: version.ID.String(),
+ RevisionID: endpoint.RevisionID,
Namespace: model.Project.Name,
ArtifactURI: version.ArtifactURI,
Type: model.Type,
@@ -70,6 +78,7 @@ func NewService(model *Model, version *Version, modelOpt *ModelOption, endpoint
DeploymentMode: endpoint.DeploymentMode,
AutoscalingPolicy: endpoint.AutoscalingPolicy,
Protocol: endpoint.Protocol,
+ CurrentIsvcName: endpoint.InferenceServiceName,
}
}
@@ -99,8 +108,12 @@ func MergeProjectVersionLabels(projectLabels mlp.Labels, versionLabels KV) mlp.L
return projectLabels
}
-func CreateInferenceServiceName(modelName string, versionID string) string {
- return fmt.Sprintf("%s-%s", modelName, versionID)
+func CreateInferenceServiceName(modelName, versionID, revisionID string) string {
+ if revisionID == "" || revisionID == "0" {
+ // This is for backward compatibility, when the endpoint / isvc name didn't include the revision number
+ return fmt.Sprintf("%s-%s", modelName, versionID)
+ }
+ return fmt.Sprintf("%s-%s-%s%s", modelName, versionID, revisionPrefix, revisionID)
}
func GetInferenceURL(url *apis.URL, inferenceServiceName string, protocolValue protocol.Protocol) string {
diff --git a/api/models/service_test.go b/api/models/service_test.go
index f6d611bdb..62dffb03b 100644
--- a/api/models/service_test.go
+++ b/api/models/service_test.go
@@ -1,6 +1,7 @@
package models
import (
+ "fmt"
"reflect"
"testing"
@@ -179,7 +180,10 @@ func TestNewService(t *testing.T) {
project := mlp.Project{Name: "project", Labels: mlpLabels}
model := &Model{Name: "model", Project: project}
version := &Version{ID: 1, Labels: versionLabels}
- endpoint := &VersionEndpoint{}
+ revisionID := ID(1)
+ endpoint := &VersionEndpoint{
+ RevisionID: revisionID,
+ }
type args struct {
model *Model
@@ -201,9 +205,10 @@ func TestNewService(t *testing.T) {
endpoint: endpoint,
},
want: &Service{
- Name: CreateInferenceServiceName(model.Name, version.ID.String()),
+ Name: fmt.Sprintf("%s-%s-r%s", model.Name, version.ID.String(), revisionID),
ModelName: model.Name,
ModelVersion: version.ID.String(),
+ RevisionID: revisionID,
Namespace: model.Project.Name,
ArtifactURI: version.ArtifactURI,
Type: model.Type,
diff --git a/api/models/version_endpoint.go b/api/models/version_endpoint.go
index 8eabda047..8acd6e770 100644
--- a/api/models/version_endpoint.go
+++ b/api/models/version_endpoint.go
@@ -15,7 +15,6 @@
package models
import (
- "fmt"
"net/url"
"github.com/caraml-dev/merlin/pkg/autoscaling"
@@ -31,12 +30,14 @@ import (
type VersionEndpoint struct {
// ID unique id of the version endpoint
ID uuid.UUID `json:"id" gorm:"type:uuid;primary_key;"`
+ // VersionModelID model id from which the version endpoint is created
+ VersionModelID ID `json:"model_id"`
// VersionID model version id from which the version endpoint is created
// The field name has to be prefixed with the related struct name
// in order for gorm Preload to work with references
VersionID ID `json:"version_id"`
- // VersionModelID model id from which the version endpoint is created
- VersionModelID ID `json:"model_id"`
+ // RevisionID defines the revision of the current model version
+ RevisionID ID `json:"revision_id"`
// Status status of the version endpoint
Status EndpointStatus `json:"status"`
// URL url of the version endpoint
@@ -88,19 +89,19 @@ func NewVersionEndpoint(env *Environment, project mlp.Project, model *Model, ver
}
ve := &VersionEndpoint{
- ID: id,
- VersionID: version.ID,
- VersionModelID: version.ModelID,
- Namespace: project.Name,
- InferenceServiceName: fmt.Sprintf("%s-%s", model.Name, version.ID.String()),
- Status: EndpointPending,
- EnvironmentName: env.Name,
- Environment: env,
- ResourceRequest: env.DefaultResourceRequest,
- DeploymentMode: deploymentMode,
- AutoscalingPolicy: autoscalingPolicy,
- EnvVars: envVars,
- Protocol: protocol.HttpJson,
+ ID: id,
+ VersionModelID: version.ModelID,
+ VersionID: version.ID,
+ RevisionID: ID(0),
+ Namespace: project.Name,
+ Status: EndpointPending,
+ EnvironmentName: env.Name,
+ Environment: env,
+ ResourceRequest: env.DefaultResourceRequest,
+ DeploymentMode: deploymentMode,
+ AutoscalingPolicy: autoscalingPolicy,
+ EnvVars: envVars,
+ Protocol: protocol.HttpJson,
}
if monitoringConfig.MonitoringEnabled {
diff --git a/api/queue/work/model_service_deployment.go b/api/queue/work/model_service_deployment.go
index 4dd73a567..ce8f4ace4 100644
--- a/api/queue/work/model_service_deployment.go
+++ b/api/queue/work/model_service_deployment.go
@@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
+ "time"
"github.com/caraml-dev/merlin/cluster"
"github.com/caraml-dev/merlin/log"
@@ -23,7 +24,7 @@ var deploymentCounter = prometheus.NewCounterVec(
Namespace: "merlin_api",
Help: "Number of deployment",
},
- []string{"project", "model", "status"},
+ []string{"project", "model", "status", "redeploy"},
)
var dataArgKey = "data"
@@ -49,6 +50,7 @@ type EndpointJob struct {
func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error {
ctx := context.Background()
+
data := job.Arguments[dataArgKey]
byte, _ := json.Marshal(data)
var jobArgs EndpointJob
@@ -71,32 +73,51 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error {
version := jobArgs.Version
project := jobArgs.Project
model := jobArgs.Model
+ model.Project = project
+
+ isRedeployment := false
// Need to reassign destionationURL cause it is ignored when marshalled and unmarshalled
if endpoint.Logger != nil {
endpoint.Logger.DestinationURL = depl.LoggerDestinationURL
}
- model.Project = project
- log.Infof("creating deployment for model %s version %s with endpoint id: %s", model.Name, endpoint.VersionID, endpoint.ID)
-
- // copy endpoint to avoid race condition
+ endpoint.RevisionID++
endpoint.Status = models.EndpointFailed
+
+ // for backward compatibility, if inference service name is not empty, it means we are redeploying the "legacy" endpoint that created prior to model version revision introduction
+ // for future compatibility, if endpoint.RevisionID > 1, it means we are redeploying the endpoint that created after model version revision introduction
+ if endpoint.InferenceServiceName != "" || endpoint.RevisionID > 1 {
+ isRedeployment = true
+ endpoint.Status = endpointArg.Status
+ }
+
+ log.Infof("creating deployment for model %s version %s revision %s with endpoint id: %s", model.Name, endpoint.VersionID, endpoint.RevisionID, endpoint.ID)
+
+ // record the deployment process
+ deployment, err := depl.DeploymentStorage.Save(&models.Deployment{
+ ProjectID: model.ProjectID,
+ VersionModelID: model.ID,
+ VersionID: endpoint.VersionID,
+ VersionEndpointID: endpoint.ID,
+ Status: models.EndpointPending,
+ })
+ if err != nil {
+ log.Warnf("unable to create deployment history", err)
+ }
+
defer func() {
- deploymentCounter.WithLabelValues(model.Project.Name, model.Name, string(endpoint.Status)).Inc()
+ deploymentCounter.WithLabelValues(model.Project.Name, model.Name, fmt.Sprint(endpoint.Status), fmt.Sprint(isRedeployment)).Inc()
// record the deployment result
- if _, err := depl.DeploymentStorage.Save(&models.Deployment{
- ProjectID: model.ProjectID,
- VersionModelID: model.ID,
- VersionID: endpoint.VersionID,
- VersionEndpointID: endpoint.ID,
- Status: endpoint.Status,
- Error: endpoint.Message,
- }); err != nil {
- log.Warnf("unable to insert deployment history", err)
+ deployment.Status = endpoint.Status
+ deployment.Error = endpoint.Message
+ deployment.UpdatedAt = time.Now()
+ if _, err := depl.DeploymentStorage.Save(deployment); err != nil {
+ log.Warnf("unable to update deployment history", err)
}
+ // record the version endpoint result
if err := depl.Storage.Save(endpoint); err != nil {
log.Errorf("unable to update endpoint status for model: %s, version: %s, reason: %v", model.Name, version.ID, err)
}
@@ -113,6 +134,7 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error {
if !ok {
return fmt.Errorf("unable to find cluster controller for environment %s", endpoint.EnvironmentName)
}
+
svc, err := ctl.Deploy(ctx, modelService)
if err != nil {
log.Errorf("unable to deploy version endpoint for model: %s, version: %s, reason: %v", model.Name, version.ID, err)
@@ -120,6 +142,7 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error {
return err
}
+ // By reaching this point, the deployment is successful
endpoint.URL = svc.URL
previousStatus := endpointArg.Status
if previousStatus == models.EndpointServing {
@@ -128,6 +151,9 @@ func (depl *ModelServiceDeployment) Deploy(job *queue.Job) error {
endpoint.Status = models.EndpointRunning
}
endpoint.ServiceName = svc.ServiceName
+ endpoint.InferenceServiceName = svc.CurrentIsvcName
+ endpoint.Message = "" // reset message
+
return nil
}
diff --git a/api/queue/work/model_service_deployment_test.go b/api/queue/work/model_service_deployment_test.go
index 5f2d05244..df816cd18 100644
--- a/api/queue/work/model_service_deployment_test.go
+++ b/api/queue/work/model_service_deployment_test.go
@@ -8,6 +8,7 @@ import (
"github.com/caraml-dev/merlin/cluster"
clusterMock "github.com/caraml-dev/merlin/cluster/mocks"
+ "github.com/caraml-dev/merlin/log"
"github.com/caraml-dev/merlin/mlp"
"github.com/caraml-dev/merlin/models"
imageBuilderMock "github.com/caraml-dev/merlin/pkg/imagebuilder/mocks"
@@ -55,9 +56,9 @@ func TestExecuteDeployment(t *testing.T) {
project := mlp.Project{Name: "project", Labels: mlpLabels}
model := &models.Model{Name: "model", Project: project}
version := &models.Version{ID: 1, Labels: versionLabels}
- iSvcName := fmt.Sprintf("%s-%d", model.Name, version.ID)
- svcName := fmt.Sprintf("%s-%d.project.svc.cluster.local", model.Name, version.ID)
- url := fmt.Sprintf("%s-%d.example.com", model.Name, version.ID)
+ iSvcName := fmt.Sprintf("%s-%d-1", model.Name, version.ID)
+ svcName := fmt.Sprintf("%s-%d-1.project.svc.cluster.local", model.Name, version.ID)
+ url := fmt.Sprintf("%s-%d-1.example.com", model.Name, version.ID)
tests := []struct {
name string
@@ -81,6 +82,7 @@ func TestExecuteDeployment(t *testing.T) {
},
deploymentStorage: func() *mocks.DeploymentStorage {
mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
mockStorage.On("Save", mock.Anything).Return(nil, nil)
return mockStorage
},
@@ -88,12 +90,11 @@ func TestExecuteDeployment(t *testing.T) {
mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Save", mock.Anything).Return(nil)
mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
- Environment: env,
- EnvironmentName: env.Name,
- ResourceRequest: env.DefaultResourceRequest,
- VersionID: version.ID,
- Namespace: project.Name,
- InferenceServiceName: iSvcName,
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
}, nil)
return mockStorage
},
@@ -125,6 +126,7 @@ func TestExecuteDeployment(t *testing.T) {
},
deploymentStorage: func() *mocks.DeploymentStorage {
mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
mockStorage.On("Save", mock.Anything).Return(nil, nil)
return mockStorage
},
@@ -132,12 +134,11 @@ func TestExecuteDeployment(t *testing.T) {
mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Save", mock.Anything).Return(nil)
mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
- Environment: env,
- EnvironmentName: env.Name,
- ResourceRequest: env.DefaultResourceRequest,
- VersionID: version.ID,
- Namespace: project.Name,
- InferenceServiceName: iSvcName,
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
}, nil)
return mockStorage
},
@@ -169,6 +170,7 @@ func TestExecuteDeployment(t *testing.T) {
},
deploymentStorage: func() *mocks.DeploymentStorage {
mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
mockStorage.On("Save", mock.Anything).Return(nil, nil)
return mockStorage
},
@@ -176,12 +178,11 @@ func TestExecuteDeployment(t *testing.T) {
mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Save", mock.Anything).Return(nil)
mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
- Environment: env,
- EnvironmentName: env.Name,
- ResourceRequest: env.DefaultResourceRequest,
- VersionID: version.ID,
- Namespace: project.Name,
- InferenceServiceName: iSvcName,
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
}, nil)
return mockStorage
},
@@ -215,6 +216,7 @@ func TestExecuteDeployment(t *testing.T) {
},
deploymentStorage: func() *mocks.DeploymentStorage {
mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
mockStorage.On("Save", mock.Anything).Return(nil, nil)
return mockStorage
},
@@ -222,12 +224,11 @@ func TestExecuteDeployment(t *testing.T) {
mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Save", mock.Anything).Return(nil)
mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
- Environment: env,
- EnvironmentName: env.Name,
- ResourceRequest: env.DefaultResourceRequest,
- VersionID: version.ID,
- Namespace: project.Name,
- InferenceServiceName: iSvcName,
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
}, nil)
return mockStorage
},
@@ -262,6 +263,7 @@ func TestExecuteDeployment(t *testing.T) {
},
deploymentStorage: func() *mocks.DeploymentStorage {
mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
mockStorage.On("Save", mock.Anything).Return(nil, nil)
return mockStorage
},
@@ -269,12 +271,11 @@ func TestExecuteDeployment(t *testing.T) {
mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Save", mock.Anything).Return(nil)
mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
- Environment: env,
- EnvironmentName: env.Name,
- ResourceRequest: env.DefaultResourceRequest,
- VersionID: version.ID,
- Namespace: project.Name,
- InferenceServiceName: iSvcName,
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
}, nil)
return mockStorage
},
@@ -301,29 +302,334 @@ func TestExecuteDeployment(t *testing.T) {
},
deploymentStorage: func() *mocks.DeploymentStorage {
mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
mockStorage.On("Save", mock.Anything).Return(nil, nil)
return mockStorage
},
storage: func() *mocks.VersionEndpointStorage {
mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Save", mock.Anything).Return(nil)
+ mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
+ }, nil)
+ return mockStorage
+ },
+ controller: func() *clusterMock.Controller {
+ ctrl := &clusterMock.Controller{}
+ return ctrl
+ },
+ imageBuilder: func() *imageBuilderMock.ImageBuilder {
+ mockImgBuilder := &imageBuilderMock.ImageBuilder{}
+ mockImgBuilder.On("BuildImage", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return("", errors.New("Failed to build image"))
+ return mockImgBuilder
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ctrl := tt.controller()
+ controllers := map[string]cluster.Controller{env.Name: ctrl}
+ imgBuilder := tt.imageBuilder()
+ mockStorage := tt.storage()
+ mockDeploymentStorage := tt.deploymentStorage()
+ job := &queue.Job{
+ Name: "job",
+ Arguments: queue.Arguments{
+ dataArgKey: EndpointJob{
+ Endpoint: tt.endpoint,
+ Version: tt.version,
+ Model: tt.model,
+ Project: tt.model.Project,
+ },
+ },
+ }
+ svc := &ModelServiceDeployment{
+ ClusterControllers: controllers,
+ ImageBuilder: imgBuilder,
+ Storage: mockStorage,
+ DeploymentStorage: mockDeploymentStorage,
+ LoggerDestinationURL: loggerDestinationURL,
+ }
+
+ err := svc.Deploy(job)
+ assert.Equal(t, tt.deployErr, err)
+
+ if len(ctrl.ExpectedCalls) > 0 && ctrl.ExpectedCalls[0].ReturnArguments[0] != nil {
+ deployedSvc := ctrl.ExpectedCalls[0].ReturnArguments[0].(*models.Service)
+ assert.Equal(t, svcMetadata, deployedSvc.Metadata)
+ assert.Equal(t, iSvcName, deployedSvc.Name)
+ }
+
+ mockStorage.AssertNumberOfCalls(t, "Save", 1)
+ mockDeploymentStorage.AssertNumberOfCalls(t, "Save", 2)
+
+ savedEndpoint := mockStorage.Calls[1].Arguments[0].(*models.VersionEndpoint)
+ assert.Equal(t, tt.model.ID, savedEndpoint.VersionModelID)
+ assert.Equal(t, tt.version.ID, savedEndpoint.VersionID)
+ assert.Equal(t, tt.model.Project.Name, savedEndpoint.Namespace)
+ assert.Equal(t, env.Name, savedEndpoint.EnvironmentName)
+
+ if tt.endpoint.ResourceRequest != nil {
+ assert.Equal(t, tt.endpoint.ResourceRequest, savedEndpoint.ResourceRequest)
+ } else {
+ assert.Equal(t, env.DefaultResourceRequest, savedEndpoint.ResourceRequest)
+ }
+
+ if tt.deployErr != nil {
+ assert.Equal(t, models.EndpointFailed, savedEndpoint.Status)
+ } else {
+ assert.Equal(t, models.EndpointRunning, savedEndpoint.Status)
+ assert.Equal(t, url, savedEndpoint.URL)
+ assert.Equal(t, "", savedEndpoint.InferenceServiceName)
+ }
+ })
+ }
+}
+
+func TestExecuteRedeployment(t *testing.T) {
+ isDefaultTrue := true
+ loggerDestinationURL := "http://logger.default"
+
+ env := &models.Environment{
+ Name: "env1",
+ Cluster: "cluster1",
+ IsDefault: &isDefaultTrue,
+ Region: "id",
+ GcpProject: "project",
+ DefaultResourceRequest: &models.ResourceRequest{
+ MinReplica: 0,
+ MaxReplica: 1,
+ CPURequest: resource.MustParse("1"),
+ MemoryRequest: resource.MustParse("1Gi"),
+ },
+ }
+
+ mlpLabels := mlp.Labels{
+ {Key: "key-1", Value: "value-1"},
+ }
+
+ versionLabels := models.KV{
+ "key-1": "value-11",
+ "key-2": "value-2",
+ }
+
+ svcMetadata := models.Metadata{
+ Labels: mlp.Labels{
+ {Key: "key-1", Value: "value-11"},
+ {Key: "key-2", Value: "value-2"},
+ },
+ }
+
+ project := mlp.Project{Name: "project", Labels: mlpLabels}
+ model := &models.Model{Name: "model", Project: project}
+ version := &models.Version{ID: 1, Labels: versionLabels}
+
+ // currentIsvcName := fmt.Sprintf("%s-%d-1", model.Name, version.ID)
+ // currentSvcName := fmt.Sprintf("%s-%d-1.project.svc.cluster.local", model.Name, version.ID)
+ // currentUrl := fmt.Sprintf("%s-%d-1.example.com", model.Name, version.ID)
+ modelSvcName := fmt.Sprintf("%s-%d-2", model.Name, version.ID)
+ svcName := fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID)
+ url := fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID)
+
+ tests := []struct {
+ name string
+ endpoint *models.VersionEndpoint
+ model *models.Model
+ version *models.Version
+ expectedEndpointStatus models.EndpointStatus
+ deployErr error
+ deploymentStorage func() *mocks.DeploymentStorage
+ storage func() *mocks.VersionEndpointStorage
+ controller func() *clusterMock.Controller
+ imageBuilder func() *imageBuilderMock.ImageBuilder
+ }{
+ {
+ name: "Success: Redeploy running endpoint",
+ model: model,
+ version: version,
+ endpoint: &models.VersionEndpoint{
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Status: models.EndpointRunning,
+ },
+ expectedEndpointStatus: models.EndpointRunning,
+ deploymentStorage: func() *mocks.DeploymentStorage {
+ mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
+ mockStorage.On("Save", mock.Anything).Return(nil, nil)
+ return mockStorage
+ },
+ storage: func() *mocks.VersionEndpointStorage {
+ mockStorage := &mocks.VersionEndpointStorage{}
mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
Environment: env,
EnvironmentName: env.Name,
ResourceRequest: env.DefaultResourceRequest,
VersionID: version.ID,
Namespace: project.Name,
- InferenceServiceName: iSvcName,
+ RevisionID: models.ID(1),
+ InferenceServiceName: fmt.Sprintf("%s-%d-1", model.Name, version.ID),
+ Status: models.EndpointRunning,
}, nil)
+ mockStorage.On("Save", &models.VersionEndpoint{
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
+ RevisionID: models.ID(2),
+ InferenceServiceName: modelSvcName,
+ Status: models.EndpointRunning,
+ URL: url,
+ ServiceName: svcName,
+ }).Return(nil)
return mockStorage
},
controller: func() *clusterMock.Controller {
ctrl := &clusterMock.Controller{}
+ ctrl.On("Deploy", mock.Anything, mock.Anything).
+ Return(&models.Service{
+ Name: fmt.Sprintf("%s-%d-2", model.Name, version.ID),
+ CurrentIsvcName: fmt.Sprintf("%s-%d-2", model.Name, version.ID),
+ RevisionID: models.ID(2),
+ Namespace: project.Name,
+ ServiceName: fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID),
+ URL: fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID),
+ Metadata: svcMetadata,
+ }, nil)
+ return ctrl
+ },
+ imageBuilder: func() *imageBuilderMock.ImageBuilder {
+ mockImgBuilder := &imageBuilderMock.ImageBuilder{}
+ return mockImgBuilder
+ },
+ },
+ {
+ name: "Success: Redeploy serving endpoint",
+ model: model,
+ version: version,
+ endpoint: &models.VersionEndpoint{
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Status: models.EndpointServing,
+ },
+ expectedEndpointStatus: models.EndpointServing,
+ deploymentStorage: func() *mocks.DeploymentStorage {
+ mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
+ mockStorage.On("Save", mock.Anything).Return(nil, nil)
+ return mockStorage
+ },
+ storage: func() *mocks.VersionEndpointStorage {
+ mockStorage := &mocks.VersionEndpointStorage{}
+ mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
+ RevisionID: models.ID(1),
+ InferenceServiceName: fmt.Sprintf("%s-%d-1", model.Name, version.ID),
+ Status: models.EndpointServing,
+ }, nil)
+ mockStorage.On("Save", &models.VersionEndpoint{
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
+ RevisionID: models.ID(2),
+ InferenceServiceName: modelSvcName,
+ Status: models.EndpointServing,
+ URL: url,
+ ServiceName: svcName,
+ }).Return(nil)
+ return mockStorage
+ },
+ controller: func() *clusterMock.Controller {
+ ctrl := &clusterMock.Controller{}
+ ctrl.On("Deploy", mock.Anything, mock.Anything).
+ Return(&models.Service{
+ Name: fmt.Sprintf("%s-%d-2", model.Name, version.ID),
+ CurrentIsvcName: fmt.Sprintf("%s-%d-2", model.Name, version.ID),
+ RevisionID: models.ID(2),
+ Namespace: project.Name,
+ ServiceName: fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID),
+ URL: fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID),
+ Metadata: svcMetadata,
+ }, nil)
+ return ctrl
+ },
+ imageBuilder: func() *imageBuilderMock.ImageBuilder {
+ mockImgBuilder := &imageBuilderMock.ImageBuilder{}
+ return mockImgBuilder
+ },
+ },
+ {
+ name: "Success: Redeploy failed endpoint",
+ model: model,
+ version: version,
+ endpoint: &models.VersionEndpoint{
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Status: models.EndpointFailed,
+ },
+ expectedEndpointStatus: models.EndpointRunning,
+ deploymentStorage: func() *mocks.DeploymentStorage {
+ mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("Save", mock.Anything).Return(&models.Deployment{}, nil)
+ mockStorage.On("Save", mock.Anything).Return(nil, nil)
+ return mockStorage
+ },
+ storage: func() *mocks.VersionEndpointStorage {
+ mockStorage := &mocks.VersionEndpointStorage{}
+ mockStorage.On("Get", mock.Anything).Return(&models.VersionEndpoint{
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
+ RevisionID: models.ID(1),
+ InferenceServiceName: fmt.Sprintf("%s-%d-1", model.Name, version.ID),
+ Status: models.EndpointFailed,
+ }, nil)
+ mockStorage.On("Save", &models.VersionEndpoint{
+ Environment: env,
+ EnvironmentName: env.Name,
+ ResourceRequest: env.DefaultResourceRequest,
+ VersionID: version.ID,
+ Namespace: project.Name,
+ RevisionID: models.ID(2),
+ InferenceServiceName: modelSvcName,
+ Status: models.EndpointRunning,
+ URL: url,
+ ServiceName: svcName,
+ }).Return(nil)
+ return mockStorage
+ },
+ controller: func() *clusterMock.Controller {
+ ctrl := &clusterMock.Controller{}
+ ctrl.On("Deploy", mock.Anything, mock.Anything).
+ Return(&models.Service{
+ Name: fmt.Sprintf("%s-%d-2", model.Name, version.ID),
+ CurrentIsvcName: fmt.Sprintf("%s-%d-2", model.Name, version.ID),
+ RevisionID: models.ID(2),
+ Namespace: project.Name,
+ ServiceName: fmt.Sprintf("%s-%d-2.project.svc.cluster.local", model.Name, version.ID),
+ URL: fmt.Sprintf("%s-%d-2.example.com", model.Name, version.ID),
+ Metadata: svcMetadata,
+ }, nil)
return ctrl
},
imageBuilder: func() *imageBuilderMock.ImageBuilder {
mockImgBuilder := &imageBuilderMock.ImageBuilder{}
- mockImgBuilder.On("BuildImage", context.Background(), mock.Anything, mock.Anything, mock.Anything).Return("", errors.New("Failed to build image"))
return mockImgBuilder
},
},
@@ -360,10 +666,14 @@ func TestExecuteDeployment(t *testing.T) {
if len(ctrl.ExpectedCalls) > 0 && ctrl.ExpectedCalls[0].ReturnArguments[0] != nil {
deployedSvc := ctrl.ExpectedCalls[0].ReturnArguments[0].(*models.Service)
assert.Equal(t, svcMetadata, deployedSvc.Metadata)
+ assert.Equal(t, modelSvcName, deployedSvc.Name)
}
mockStorage.AssertNumberOfCalls(t, "Save", 1)
+ mockDeploymentStorage.AssertNumberOfCalls(t, "Save", 2)
+
savedEndpoint := mockStorage.Calls[1].Arguments[0].(*models.VersionEndpoint)
+ log.Infof("savedEndpoint: %+v", savedEndpoint)
assert.Equal(t, tt.model.ID, savedEndpoint.VersionModelID)
assert.Equal(t, tt.version.ID, savedEndpoint.VersionID)
assert.Equal(t, tt.model.Project.Name, savedEndpoint.Namespace)
@@ -374,12 +684,13 @@ func TestExecuteDeployment(t *testing.T) {
} else {
assert.Equal(t, env.DefaultResourceRequest, savedEndpoint.ResourceRequest)
}
+
if tt.deployErr != nil {
assert.Equal(t, models.EndpointFailed, savedEndpoint.Status)
} else {
- assert.Equal(t, models.EndpointRunning, savedEndpoint.Status)
+ assert.Equal(t, tt.expectedEndpointStatus, savedEndpoint.Status)
assert.Equal(t, url, savedEndpoint.URL)
- assert.Equal(t, iSvcName, savedEndpoint.InferenceServiceName)
+ assert.Equal(t, modelSvcName, savedEndpoint.InferenceServiceName)
}
})
}
diff --git a/api/service/deployment_service.go b/api/service/deployment_service.go
new file mode 100644
index 000000000..1c1391870
--- /dev/null
+++ b/api/service/deployment_service.go
@@ -0,0 +1,25 @@
+package service
+
+import (
+ "github.com/caraml-dev/merlin/models"
+ "github.com/caraml-dev/merlin/storage"
+)
+
+type DeploymentService interface {
+ ListDeployments(modelID, versionID, endpointUUID string) ([]*models.Deployment, error)
+}
+
+func NewDeploymentService(storage storage.DeploymentStorage) DeploymentService {
+ return &deploymentService{
+ storage: storage,
+ }
+}
+
+type deploymentService struct {
+ storage storage.DeploymentStorage
+}
+
+func (service *deploymentService) ListDeployments(modelID, versionID, endpointUUID string) ([]*models.Deployment, error) {
+ // TODO: Add pagination
+ return service.storage.ListInModelVersion(modelID, versionID, endpointUUID)
+}
diff --git a/api/service/deployment_service_test.go b/api/service/deployment_service_test.go
new file mode 100644
index 000000000..b90944fc0
--- /dev/null
+++ b/api/service/deployment_service_test.go
@@ -0,0 +1,88 @@
+package service
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+ "time"
+
+ "github.com/caraml-dev/merlin/models"
+ "github.com/caraml-dev/merlin/storage/mocks"
+ "github.com/google/uuid"
+)
+
+func Test_deploymentService_ListDeployments(t *testing.T) {
+ endpointUUID := uuid.New()
+ endpointUUIDString := fmt.Sprint(endpointUUID)
+
+ createdUpdated := models.CreatedUpdated{
+ CreatedAt: time.Now(),
+ UpdatedAt: time.Now(),
+ }
+
+ type args struct {
+ modelID string
+ versionID string
+ endpointUUID string
+ }
+ tests := []struct {
+ name string
+ args args
+ mockDeploymentStorage func() *mocks.DeploymentStorage
+ want []*models.Deployment
+ wantErr bool
+ }{
+ {
+ name: "success",
+ args: args{
+ modelID: "model",
+ versionID: "1",
+ endpointUUID: endpointUUIDString,
+ },
+ mockDeploymentStorage: func() *mocks.DeploymentStorage {
+ mockStorage := &mocks.DeploymentStorage{}
+ mockStorage.On("ListInModelVersion", "model", "1", endpointUUIDString).Return([]*models.Deployment{
+ {
+ ID: models.ID(1),
+ ProjectID: models.ID(1),
+ VersionModelID: models.ID(1),
+ VersionID: models.ID(1),
+ VersionEndpointID: endpointUUID,
+ Status: models.EndpointRunning,
+ Error: "",
+ CreatedUpdated: createdUpdated,
+ },
+ }, nil)
+ return mockStorage
+ },
+ want: []*models.Deployment{{
+ ID: models.ID(1),
+ ProjectID: models.ID(1),
+ VersionModelID: models.ID(1),
+ VersionID: models.ID(1),
+ VersionEndpointID: endpointUUID,
+ Status: models.EndpointRunning,
+ Error: "",
+ CreatedUpdated: createdUpdated,
+ }},
+ wantErr: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ mockDeploymentStorage := tt.mockDeploymentStorage()
+
+ service := &deploymentService{
+ storage: mockDeploymentStorage,
+ }
+ got, err := service.ListDeployments(tt.args.modelID, tt.args.versionID, tt.args.endpointUUID)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("deploymentService.ListDeployments() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("deploymentService.ListDeployments() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/api/service/mocks/deployment_service.go b/api/service/mocks/deployment_service.go
new file mode 100644
index 000000000..70b00c99e
--- /dev/null
+++ b/api/service/mocks/deployment_service.go
@@ -0,0 +1,54 @@
+// Code generated by mockery v2.20.0. DO NOT EDIT.
+
+package mocks
+
+import (
+ models "github.com/caraml-dev/merlin/models"
+ mock "github.com/stretchr/testify/mock"
+)
+
+// DeploymentService is an autogenerated mock type for the DeploymentService type
+type DeploymentService struct {
+ mock.Mock
+}
+
+// ListDeployments provides a mock function with given fields: modelID, versionID, endpointUUID
+func (_m *DeploymentService) ListDeployments(modelID string, versionID string, endpointUUID string) ([]*models.Deployment, error) {
+ ret := _m.Called(modelID, versionID, endpointUUID)
+
+ var r0 []*models.Deployment
+ var r1 error
+ if rf, ok := ret.Get(0).(func(string, string, string) ([]*models.Deployment, error)); ok {
+ return rf(modelID, versionID, endpointUUID)
+ }
+ if rf, ok := ret.Get(0).(func(string, string, string) []*models.Deployment); ok {
+ r0 = rf(modelID, versionID, endpointUUID)
+ } else {
+ if ret.Get(0) != nil {
+ r0 = ret.Get(0).([]*models.Deployment)
+ }
+ }
+
+ if rf, ok := ret.Get(1).(func(string, string, string) error); ok {
+ r1 = rf(modelID, versionID, endpointUUID)
+ } else {
+ r1 = ret.Error(1)
+ }
+
+ return r0, r1
+}
+
+type mockConstructorTestingTNewDeploymentService interface {
+ mock.TestingT
+ Cleanup(func())
+}
+
+// NewDeploymentService creates a new instance of DeploymentService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
+func NewDeploymentService(t mockConstructorTestingTNewDeploymentService) *DeploymentService {
+ mock := &DeploymentService{}
+ mock.Mock.Test(t)
+
+ t.Cleanup(func() { mock.AssertExpectations(t) })
+
+ return mock
+}
diff --git a/api/service/mocks/endpoints_service.go b/api/service/mocks/endpoints_service.go
index c4faea6e5..d5a2e3b33 100644
--- a/api/service/mocks/endpoints_service.go
+++ b/api/service/mocks/endpoints_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -106,25 +106,25 @@ func (_m *EndpointsService) FindByID(ctx context.Context, endpointUuid uuid.UUID
return r0, r1
}
-// ListContainers provides a mock function with given fields: ctx, model, version, endpointUuid
-func (_m *EndpointsService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpointUuid uuid.UUID) ([]*models.Container, error) {
- ret := _m.Called(ctx, model, version, endpointUuid)
+// ListContainers provides a mock function with given fields: ctx, model, version, endpoint
+func (_m *EndpointsService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpoint *models.VersionEndpoint) ([]*models.Container, error) {
+ ret := _m.Called(ctx, model, version, endpoint)
var r0 []*models.Container
var r1 error
- if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, uuid.UUID) ([]*models.Container, error)); ok {
- return rf(ctx, model, version, endpointUuid)
+ if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, *models.VersionEndpoint) ([]*models.Container, error)); ok {
+ return rf(ctx, model, version, endpoint)
}
- if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, uuid.UUID) []*models.Container); ok {
- r0 = rf(ctx, model, version, endpointUuid)
+ if rf, ok := ret.Get(0).(func(context.Context, *models.Model, *models.Version, *models.VersionEndpoint) []*models.Container); ok {
+ r0 = rf(ctx, model, version, endpoint)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*models.Container)
}
}
- if rf, ok := ret.Get(1).(func(context.Context, *models.Model, *models.Version, uuid.UUID) error); ok {
- r1 = rf(ctx, model, version, endpointUuid)
+ if rf, ok := ret.Get(1).(func(context.Context, *models.Model, *models.Version, *models.VersionEndpoint) error); ok {
+ r1 = rf(ctx, model, version, endpoint)
} else {
r1 = ret.Error(1)
}
diff --git a/api/service/mocks/environment_service.go b/api/service/mocks/environment_service.go
index d353cb986..ca6897597 100644
--- a/api/service/mocks/environment_service.go
+++ b/api/service/mocks/environment_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -17,6 +17,10 @@ func (_m *EnvironmentService) GetDefaultEnvironment() (*models.Environment, erro
ret := _m.Called()
var r0 *models.Environment
+ var r1 error
+ if rf, ok := ret.Get(0).(func() (*models.Environment, error)); ok {
+ return rf()
+ }
if rf, ok := ret.Get(0).(func() *models.Environment); ok {
r0 = rf()
} else {
@@ -25,7 +29,6 @@ func (_m *EnvironmentService) GetDefaultEnvironment() (*models.Environment, erro
}
}
- var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
@@ -40,6 +43,10 @@ func (_m *EnvironmentService) GetDefaultPredictionJobEnvironment() (*models.Envi
ret := _m.Called()
var r0 *models.Environment
+ var r1 error
+ if rf, ok := ret.Get(0).(func() (*models.Environment, error)); ok {
+ return rf()
+ }
if rf, ok := ret.Get(0).(func() *models.Environment); ok {
r0 = rf()
} else {
@@ -48,7 +55,6 @@ func (_m *EnvironmentService) GetDefaultPredictionJobEnvironment() (*models.Envi
}
}
- var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
@@ -63,6 +69,10 @@ func (_m *EnvironmentService) GetEnvironment(name string) (*models.Environment,
ret := _m.Called(name)
var r0 *models.Environment
+ var r1 error
+ if rf, ok := ret.Get(0).(func(string) (*models.Environment, error)); ok {
+ return rf(name)
+ }
if rf, ok := ret.Get(0).(func(string) *models.Environment); ok {
r0 = rf(name)
} else {
@@ -71,7 +81,6 @@ func (_m *EnvironmentService) GetEnvironment(name string) (*models.Environment,
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(name)
} else {
@@ -86,6 +95,10 @@ func (_m *EnvironmentService) ListEnvironments(name string) ([]*models.Environme
ret := _m.Called(name)
var r0 []*models.Environment
+ var r1 error
+ if rf, ok := ret.Get(0).(func(string) ([]*models.Environment, error)); ok {
+ return rf(name)
+ }
if rf, ok := ret.Get(0).(func(string) []*models.Environment); ok {
r0 = rf(name)
} else {
@@ -94,7 +107,6 @@ func (_m *EnvironmentService) ListEnvironments(name string) ([]*models.Environme
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(name)
} else {
@@ -109,6 +121,10 @@ func (_m *EnvironmentService) Save(env *models.Environment) (*models.Environment
ret := _m.Called(env)
var r0 *models.Environment
+ var r1 error
+ if rf, ok := ret.Get(0).(func(*models.Environment) (*models.Environment, error)); ok {
+ return rf(env)
+ }
if rf, ok := ret.Get(0).(func(*models.Environment) *models.Environment); ok {
r0 = rf(env)
} else {
@@ -117,7 +133,6 @@ func (_m *EnvironmentService) Save(env *models.Environment) (*models.Environment
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(*models.Environment) error); ok {
r1 = rf(env)
} else {
diff --git a/api/service/mocks/list_options.go b/api/service/mocks/list_options.go
index 0a3f31ec1..81c446b3d 100644
--- a/api/service/mocks/list_options.go
+++ b/api/service/mocks/list_options.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v1.0.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -12,24 +12,6 @@ type ListOptions struct {
mock.Mock
}
-type ListOptions_apply struct {
- *mock.Call
-}
-
-func (_m ListOptions_apply) Return(_a0 *gorm.DB) *ListOptions_apply {
- return &ListOptions_apply{Call: _m.Call.Return(_a0)}
-}
-
-func (_m *ListOptions) Onapply(q *gorm.DB) *ListOptions_apply {
- c := _m.On("apply", q)
- return &ListOptions_apply{Call: c}
-}
-
-func (_m *ListOptions) OnapplyMatch(matchers ...interface{}) *ListOptions_apply {
- c := _m.On("apply", matchers...)
- return &ListOptions_apply{Call: c}
-}
-
// apply provides a mock function with given fields: q
func (_m *ListOptions) apply(q *gorm.DB) *gorm.DB {
ret := _m.Called(q)
@@ -45,3 +27,18 @@ func (_m *ListOptions) apply(q *gorm.DB) *gorm.DB {
return r0
}
+
+type mockConstructorTestingTNewListOptions interface {
+ mock.TestingT
+ Cleanup(func())
+}
+
+// NewListOptions creates a new instance of ListOptions. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
+func NewListOptions(t mockConstructorTestingTNewListOptions) *ListOptions {
+ mock := &ListOptions{}
+ mock.Mock.Test(t)
+
+ t.Cleanup(func() { mock.AssertExpectations(t) })
+
+ return mock
+}
diff --git a/api/service/mocks/log_service.go b/api/service/mocks/log_service.go
index 7006089c3..9c2815a23 100644
--- a/api/service/mocks/log_service.go
+++ b/api/service/mocks/log_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/service/mocks/model_endpoint_alert_service.go b/api/service/mocks/model_endpoint_alert_service.go
index f41440a83..642233439 100644
--- a/api/service/mocks/model_endpoint_alert_service.go
+++ b/api/service/mocks/model_endpoint_alert_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -17,6 +17,10 @@ func (_m *ModelEndpointAlertService) CreateModelEndpointAlert(user string, alert
ret := _m.Called(user, alert)
var r0 *models.ModelEndpointAlert
+ var r1 error
+ if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) (*models.ModelEndpointAlert, error)); ok {
+ return rf(user, alert)
+ }
if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) *models.ModelEndpointAlert); ok {
r0 = rf(user, alert)
} else {
@@ -25,7 +29,6 @@ func (_m *ModelEndpointAlertService) CreateModelEndpointAlert(user string, alert
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(string, *models.ModelEndpointAlert) error); ok {
r1 = rf(user, alert)
} else {
@@ -40,6 +43,10 @@ func (_m *ModelEndpointAlertService) GetModelEndpointAlert(modelID models.ID, mo
ret := _m.Called(modelID, modelEndpointID)
var r0 *models.ModelEndpointAlert
+ var r1 error
+ if rf, ok := ret.Get(0).(func(models.ID, models.ID) (*models.ModelEndpointAlert, error)); ok {
+ return rf(modelID, modelEndpointID)
+ }
if rf, ok := ret.Get(0).(func(models.ID, models.ID) *models.ModelEndpointAlert); ok {
r0 = rf(modelID, modelEndpointID)
} else {
@@ -48,7 +55,6 @@ func (_m *ModelEndpointAlertService) GetModelEndpointAlert(modelID models.ID, mo
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(models.ID, models.ID) error); ok {
r1 = rf(modelID, modelEndpointID)
} else {
@@ -63,6 +69,10 @@ func (_m *ModelEndpointAlertService) ListModelAlerts(modelID models.ID) ([]*mode
ret := _m.Called(modelID)
var r0 []*models.ModelEndpointAlert
+ var r1 error
+ if rf, ok := ret.Get(0).(func(models.ID) ([]*models.ModelEndpointAlert, error)); ok {
+ return rf(modelID)
+ }
if rf, ok := ret.Get(0).(func(models.ID) []*models.ModelEndpointAlert); ok {
r0 = rf(modelID)
} else {
@@ -71,7 +81,6 @@ func (_m *ModelEndpointAlertService) ListModelAlerts(modelID models.ID) ([]*mode
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(models.ID) error); ok {
r1 = rf(modelID)
} else {
@@ -86,6 +95,10 @@ func (_m *ModelEndpointAlertService) ListTeams() ([]string, error) {
ret := _m.Called()
var r0 []string
+ var r1 error
+ if rf, ok := ret.Get(0).(func() ([]string, error)); ok {
+ return rf()
+ }
if rf, ok := ret.Get(0).(func() []string); ok {
r0 = rf()
} else {
@@ -94,7 +107,6 @@ func (_m *ModelEndpointAlertService) ListTeams() ([]string, error) {
}
}
- var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
@@ -109,6 +121,10 @@ func (_m *ModelEndpointAlertService) UpdateModelEndpointAlert(user string, alert
ret := _m.Called(user, alert)
var r0 *models.ModelEndpointAlert
+ var r1 error
+ if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) (*models.ModelEndpointAlert, error)); ok {
+ return rf(user, alert)
+ }
if rf, ok := ret.Get(0).(func(string, *models.ModelEndpointAlert) *models.ModelEndpointAlert); ok {
r0 = rf(user, alert)
} else {
@@ -117,7 +133,6 @@ func (_m *ModelEndpointAlertService) UpdateModelEndpointAlert(user string, alert
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(string, *models.ModelEndpointAlert) error); ok {
r1 = rf(user, alert)
} else {
diff --git a/api/service/mocks/model_endpoints_service.go b/api/service/mocks/model_endpoints_service.go
index a44d52970..53893e16e 100644
--- a/api/service/mocks/model_endpoints_service.go
+++ b/api/service/mocks/model_endpoints_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/service/mocks/models_service.go b/api/service/mocks/models_service.go
index 27a96bd77..dd4bd8f64 100644
--- a/api/service/mocks/models_service.go
+++ b/api/service/mocks/models_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/service/mocks/prediction_job_service.go b/api/service/mocks/prediction_job_service.go
index 4a63dcbe3..9a8a8ace7 100644
--- a/api/service/mocks/prediction_job_service.go
+++ b/api/service/mocks/prediction_job_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -23,6 +23,10 @@ func (_m *PredictionJobService) CreatePredictionJob(ctx context.Context, env *mo
ret := _m.Called(ctx, env, model, version, predictionJob)
var r0 *models.PredictionJob
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) (*models.PredictionJob, error)); ok {
+ return rf(ctx, env, model, version, predictionJob)
+ }
if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) *models.PredictionJob); ok {
r0 = rf(ctx, env, model, version, predictionJob)
} else {
@@ -31,7 +35,6 @@ func (_m *PredictionJobService) CreatePredictionJob(ctx context.Context, env *mo
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) error); ok {
r1 = rf(ctx, env, model, version, predictionJob)
} else {
@@ -46,6 +49,10 @@ func (_m *PredictionJobService) GetPredictionJob(ctx context.Context, env *model
ret := _m.Called(ctx, env, model, version, id)
var r0 *models.PredictionJob
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) (*models.PredictionJob, error)); ok {
+ return rf(ctx, env, model, version, id)
+ }
if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) *models.PredictionJob); ok {
r0 = rf(ctx, env, model, version, id)
} else {
@@ -54,7 +61,6 @@ func (_m *PredictionJobService) GetPredictionJob(ctx context.Context, env *model
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) error); ok {
r1 = rf(ctx, env, model, version, id)
} else {
@@ -69,6 +75,10 @@ func (_m *PredictionJobService) ListContainers(ctx context.Context, env *models.
ret := _m.Called(ctx, env, model, version, predictionJob)
var r0 []*models.Container
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) ([]*models.Container, error)); ok {
+ return rf(ctx, env, model, version, predictionJob)
+ }
if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) []*models.Container); ok {
r0 = rf(ctx, env, model, version, predictionJob)
} else {
@@ -77,7 +87,6 @@ func (_m *PredictionJobService) ListContainers(ctx context.Context, env *models.
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, *models.PredictionJob) error); ok {
r1 = rf(ctx, env, model, version, predictionJob)
} else {
@@ -92,6 +101,10 @@ func (_m *PredictionJobService) ListPredictionJobs(ctx context.Context, project
ret := _m.Called(ctx, project, query)
var r0 []*models.PredictionJob
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, mlp.Project, *service.ListPredictionJobQuery) ([]*models.PredictionJob, error)); ok {
+ return rf(ctx, project, query)
+ }
if rf, ok := ret.Get(0).(func(context.Context, mlp.Project, *service.ListPredictionJobQuery) []*models.PredictionJob); ok {
r0 = rf(ctx, project, query)
} else {
@@ -100,7 +113,6 @@ func (_m *PredictionJobService) ListPredictionJobs(ctx context.Context, project
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, mlp.Project, *service.ListPredictionJobQuery) error); ok {
r1 = rf(ctx, project, query)
} else {
@@ -115,6 +127,10 @@ func (_m *PredictionJobService) StopPredictionJob(ctx context.Context, env *mode
ret := _m.Called(ctx, env, model, version, id)
var r0 *models.PredictionJob
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) (*models.PredictionJob, error)); ok {
+ return rf(ctx, env, model, version, id)
+ }
if rf, ok := ret.Get(0).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) *models.PredictionJob); ok {
r0 = rf(ctx, env, model, version, id)
} else {
@@ -123,7 +139,6 @@ func (_m *PredictionJobService) StopPredictionJob(ctx context.Context, env *mode
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, *models.Environment, *models.Model, *models.Version, models.ID) error); ok {
r1 = rf(ctx, env, model, version, id)
} else {
diff --git a/api/service/mocks/projects_service.go b/api/service/mocks/projects_service.go
index ca52c581f..c543546de 100644
--- a/api/service/mocks/projects_service.go
+++ b/api/service/mocks/projects_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.28.2. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/service/mocks/queue_producer.go b/api/service/mocks/queue_producer.go
deleted file mode 100644
index 18a5f29ac..000000000
--- a/api/service/mocks/queue_producer.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Code generated by mockery v2.6.0. DO NOT EDIT.
-
-package mocks
-
-import (
- queue "github.com/caraml-dev/merlin/queue"
- mock "github.com/stretchr/testify/mock"
-)
-
-// QueueProducer is an autogenerated mock type for the QueueProducer type
-type QueueProducer struct {
- mock.Mock
-}
-
-// EnqueueJob provides a mock function with given fields: job
-func (_m *QueueProducer) EnqueueJob(job *queue.Job) error {
- ret := _m.Called(job)
-
- var r0 error
- if rf, ok := ret.Get(0).(func(*queue.Job) error); ok {
- r0 = rf(job)
- } else {
- r0 = ret.Error(0)
- }
-
- return r0
-}
diff --git a/api/service/mocks/secret_service.go b/api/service/mocks/secret_service.go
index 6ea89b9db..481040635 100644
--- a/api/service/mocks/secret_service.go
+++ b/api/service/mocks/secret_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -19,13 +19,16 @@ func (_m *SecretService) Create(ctx context.Context, projectID int32, secret mlp
ret := _m.Called(ctx, projectID, secret)
var r0 mlp.Secret
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) (mlp.Secret, error)); ok {
+ return rf(ctx, projectID, secret)
+ }
if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) mlp.Secret); ok {
r0 = rf(ctx, projectID, secret)
} else {
r0 = ret.Get(0).(mlp.Secret)
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, int32, mlp.Secret) error); ok {
r1 = rf(ctx, projectID, secret)
} else {
@@ -54,13 +57,16 @@ func (_m *SecretService) GetByIDandProjectID(ctx context.Context, secretID int32
ret := _m.Called(ctx, secretID, projectID)
var r0 mlp.Secret
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, int32, int32) (mlp.Secret, error)); ok {
+ return rf(ctx, secretID, projectID)
+ }
if rf, ok := ret.Get(0).(func(context.Context, int32, int32) mlp.Secret); ok {
r0 = rf(ctx, secretID, projectID)
} else {
r0 = ret.Get(0).(mlp.Secret)
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, int32, int32) error); ok {
r1 = rf(ctx, secretID, projectID)
} else {
@@ -75,6 +81,10 @@ func (_m *SecretService) List(ctx context.Context, projectID int32) (mlp.Secrets
ret := _m.Called(ctx, projectID)
var r0 mlp.Secrets
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, int32) (mlp.Secrets, error)); ok {
+ return rf(ctx, projectID)
+ }
if rf, ok := ret.Get(0).(func(context.Context, int32) mlp.Secrets); ok {
r0 = rf(ctx, projectID)
} else {
@@ -83,7 +93,6 @@ func (_m *SecretService) List(ctx context.Context, projectID int32) (mlp.Secrets
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, int32) error); ok {
r1 = rf(ctx, projectID)
} else {
@@ -98,13 +107,16 @@ func (_m *SecretService) Update(ctx context.Context, projectID int32, secret mlp
ret := _m.Called(ctx, projectID, secret)
var r0 mlp.Secret
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) (mlp.Secret, error)); ok {
+ return rf(ctx, projectID, secret)
+ }
if rf, ok := ret.Get(0).(func(context.Context, int32, mlp.Secret) mlp.Secret); ok {
r0 = rf(ctx, projectID, secret)
} else {
r0 = ret.Get(0).(mlp.Secret)
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, int32, mlp.Secret) error); ok {
r1 = rf(ctx, projectID, secret)
} else {
diff --git a/api/service/mocks/transformer_service.go b/api/service/mocks/transformer_service.go
index 0d0d5b335..ef73d2ffa 100644
--- a/api/service/mocks/transformer_service.go
+++ b/api/service/mocks/transformer_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -21,6 +21,10 @@ func (_m *TransformerService) SimulateTransformer(ctx context.Context, simulatio
ret := _m.Called(ctx, simulationPayload)
var r0 *types.PredictResponse
+ var r1 error
+ if rf, ok := ret.Get(0).(func(context.Context, *models.TransformerSimulation) (*types.PredictResponse, error)); ok {
+ return rf(ctx, simulationPayload)
+ }
if rf, ok := ret.Get(0).(func(context.Context, *models.TransformerSimulation) *types.PredictResponse); ok {
r0 = rf(ctx, simulationPayload)
} else {
@@ -29,7 +33,6 @@ func (_m *TransformerService) SimulateTransformer(ctx context.Context, simulatio
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(context.Context, *models.TransformerSimulation) error); ok {
r1 = rf(ctx, simulationPayload)
} else {
diff --git a/api/service/mocks/versions_service.go b/api/service/mocks/versions_service.go
index 1547474e7..ef1f5625c 100644
--- a/api/service/mocks/versions_service.go
+++ b/api/service/mocks/versions_service.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/service/version_endpoint_service.go b/api/service/version_endpoint_service.go
index fd69335fc..f87d64f4c 100644
--- a/api/service/version_endpoint_service.go
+++ b/api/service/version_endpoint_service.go
@@ -21,7 +21,6 @@ import (
"github.com/caraml-dev/merlin/cluster"
"github.com/caraml-dev/merlin/config"
- "github.com/caraml-dev/merlin/log"
"github.com/caraml-dev/merlin/models"
"github.com/caraml-dev/merlin/pkg/autoscaling"
"github.com/caraml-dev/merlin/pkg/deployment"
@@ -50,7 +49,7 @@ type EndpointsService interface {
// CountEndpoints count number of endpoint created from a model in an environment
CountEndpoints(ctx context.Context, environment *models.Environment, model *models.Model) (int, error)
// ListContainers list all container associated with an endpoint
- ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpointUuid uuid.UUID) ([]*models.Container, error)
+ ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpoint *models.VersionEndpoint) ([]*models.Container, error)
// DeleteEndpoint hard delete endpoint data, including the relation from deployment
DeleteEndpoint(version *models.Version, endpoint *models.VersionEndpoint) error
}
@@ -142,12 +141,7 @@ func (k *endpointService) DeployEndpoint(ctx context.Context, environment *model
},
},
}); err != nil {
- // if error enqueue job, mark endpoint status to failed
- endpoint.Status = models.EndpointFailed
- if err := k.storage.Save(endpoint); err != nil {
- log.Errorf("error to update endpoint %s status to failed: %v", endpoint.ID, err)
- }
- return nil, err
+ return nil, fmt.Errorf("failed to enqueue model service deployment job: %w", err)
}
return endpoint, nil
@@ -251,7 +245,10 @@ func (k *endpointService) UndeployEndpoint(ctx context.Context, environment *mod
}
modelService := &models.Service{
- Name: models.CreateInferenceServiceName(model.Name, version.ID.String()),
+ Name: models.CreateInferenceServiceName(model.Name, version.ID.String(), endpoint.RevisionID.String()),
+ ModelName: model.Name,
+ ModelVersion: version.ID.String(),
+ RevisionID: endpoint.RevisionID,
Namespace: model.Project.Name,
ResourceRequest: endpoint.ResourceRequest,
Transformer: endpoint.Transformer,
@@ -277,8 +274,8 @@ func (k *endpointService) CountEndpoints(ctx context.Context, environment *model
}
// ListContainers list all containers belong to the given version endpoint
-func (k *endpointService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, id uuid.UUID) ([]*models.Container, error) {
- ve, err := k.storage.Get(id)
+func (k *endpointService) ListContainers(ctx context.Context, model *models.Model, version *models.Version, endpoint *models.VersionEndpoint) ([]*models.Container, error) {
+ ve, err := k.storage.Get(endpoint.ID)
if err != nil {
return nil, err
}
@@ -298,14 +295,16 @@ func (k *endpointService) ListContainers(ctx context.Context, model *models.Mode
containers = append(containers, imgBuilderContainers...)
}
- modelContainers, err := ctl.GetContainers(ctx, model.Project.Name, models.OnlineInferencePodLabelSelector(model.Name, version.ID.String()))
+ labelSelector := models.OnlineInferencePodLabelSelector(model.Name, version.ID.String(), endpoint.RevisionID.String())
+
+ modelContainers, err := ctl.GetContainers(ctx, model.Project.Name, labelSelector)
if err != nil {
return nil, err
}
containers = append(containers, modelContainers...)
for _, container := range containers {
- container.VersionEndpointID = id
+ container.VersionEndpointID = endpoint.ID
}
return containers, nil
diff --git a/api/service/version_endpoint_service_test.go b/api/service/version_endpoint_service_test.go
index a59941a17..9f45a3050 100644
--- a/api/service/version_endpoint_service_test.go
+++ b/api/service/version_endpoint_service_test.go
@@ -22,9 +22,6 @@ import (
"testing"
"time"
- "github.com/caraml-dev/merlin/pkg/autoscaling"
- "github.com/caraml-dev/merlin/pkg/deployment"
- "github.com/caraml-dev/merlin/pkg/protocol"
"github.com/feast-dev/feast/sdk/go/protos/feast/core"
"github.com/feast-dev/feast/sdk/go/protos/feast/types"
"github.com/google/uuid"
@@ -41,7 +38,10 @@ import (
"github.com/caraml-dev/merlin/config"
"github.com/caraml-dev/merlin/mlp"
"github.com/caraml-dev/merlin/models"
+ "github.com/caraml-dev/merlin/pkg/autoscaling"
+ "github.com/caraml-dev/merlin/pkg/deployment"
imageBuilderMock "github.com/caraml-dev/merlin/pkg/imagebuilder/mocks"
+ "github.com/caraml-dev/merlin/pkg/protocol"
"github.com/caraml-dev/merlin/pkg/transformer"
feastmocks "github.com/caraml-dev/merlin/pkg/transformer/feast/mocks"
"github.com/caraml-dev/merlin/pkg/transformer/spec"
@@ -79,7 +79,7 @@ func TestDeployEndpoint(t *testing.T) {
model := &models.Model{Name: "model", Project: project}
version := &models.Version{ID: 1}
- iSvcName := fmt.Sprintf("%s-%d", model.Name, version.ID)
+ // iSvcName := fmt.Sprintf("%s-%d-0", model.Name, version.ID)
tests := []struct {
name string
@@ -96,14 +96,13 @@ func TestDeployEndpoint(t *testing.T) {
&models.VersionEndpoint{},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
- Protocol: protocol.HttpJson,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
+ Protocol: protocol.HttpJson,
},
wantDeployError: false,
},
@@ -123,12 +122,11 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -148,14 +146,13 @@ func TestDeployEndpoint(t *testing.T) {
&models.VersionEndpoint{},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
- Protocol: protocol.HttpJson,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
+ Protocol: protocol.HttpJson,
},
wantDeployError: false,
},
@@ -170,14 +167,13 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
- Protocol: protocol.HttpJson,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
+ Protocol: protocol.HttpJson,
},
wantDeployError: false,
},
@@ -192,14 +188,13 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
- Protocol: protocol.HttpJson,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
+ Protocol: protocol.HttpJson,
},
wantDeployError: false,
},
@@ -210,13 +205,12 @@ func TestDeployEndpoint(t *testing.T) {
&models.Model{Name: "model", Project: project, Type: models.ModelTypeCustom},
&models.Version{ID: 1},
&models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
EnvVars: models.EnvVars{
{
Name: "TF_MODEL_NAME",
@@ -231,13 +225,12 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
EnvVars: models.EnvVars{
{
Name: "TF_MODEL_NAME",
@@ -278,13 +271,12 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
Transformer: &models.Transformer{
Enabled: true,
Image: "ghcr.io/caraml-dev/merlin-transformer-test",
@@ -321,12 +313,11 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -370,12 +361,11 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -415,12 +405,11 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -464,12 +453,11 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -518,10 +506,9 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -576,10 +563,9 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -637,10 +623,9 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -699,10 +684,9 @@ func TestDeployEndpoint(t *testing.T) {
MetricsType: autoscaling.CPUUtilization,
TargetValue: 50,
},
- EnvironmentName: env.Name,
- Namespace: project.Name,
- InferenceServiceName: iSvcName,
- Protocol: protocol.HttpJson,
+ EnvironmentName: env.Name,
+ Namespace: project.Name,
+ Protocol: protocol.HttpJson,
},
},
},
@@ -731,10 +715,9 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 4,
@@ -773,14 +756,13 @@ func TestDeployEndpoint(t *testing.T) {
},
},
expectedEndpoint: &models.VersionEndpoint{
- InferenceServiceName: iSvcName,
- DeploymentMode: deployment.ServerlessDeploymentMode,
- AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
- ResourceRequest: env.DefaultResourceRequest,
- Namespace: project.Name,
- URL: "",
- Status: models.EndpointPending,
- Protocol: protocol.UpiV1,
+ DeploymentMode: deployment.ServerlessDeploymentMode,
+ AutoscalingPolicy: autoscaling.DefaultServerlessAutoscalingPolicy,
+ ResourceRequest: env.DefaultResourceRequest,
+ Namespace: project.Name,
+ URL: "",
+ Status: models.EndpointPending,
+ Protocol: protocol.UpiV1,
},
wantDeployError: false,
},
@@ -2171,13 +2153,22 @@ func TestDeployEndpoint_StandardTransformer(t *testing.T) {
}
func TestListContainers(t *testing.T) {
+ id := uuid.New()
+
project := mlp.Project{ID: 1, Name: "my-project"}
model := &models.Model{ID: 1, Name: "model", Type: models.ModelTypeXgboost, Project: project, ProjectID: models.ID(project.ID)}
version := &models.Version{ID: 1}
- id := uuid.New()
+ revisionID := models.ID(1)
env := &models.Environment{Name: "my-env", Cluster: "my-cluster", IsDefault: &isDefaultTrue}
+ endpoint := &models.VersionEndpoint{
+ ID: id,
+ VersionID: version.ID,
+ VersionModelID: model.ID,
+ RevisionID: revisionID,
+ EnvironmentName: env.Name,
+ }
cfg := &config.Config{
- Environment: "dev",
+ Environment: env.Name,
FeatureToggleConfig: config.FeatureToggleConfig{
MonitoringConfig: config.MonitoringConfig{
MonitoringEnabled: false,
@@ -2186,9 +2177,9 @@ func TestListContainers(t *testing.T) {
}
type args struct {
- model *models.Model
- version *models.Version
- id uuid.UUID
+ model *models.Model
+ version *models.Version
+ endpoint *models.VersionEndpoint
}
type componentMock struct {
@@ -2206,20 +2197,21 @@ func TestListContainers(t *testing.T) {
{
"success: non-pyfunc model",
args{
- model, version, id,
+ model, version, endpoint,
},
componentMock{
&models.VersionEndpoint{
ID: id,
VersionID: version.ID,
VersionModelID: model.ID,
+ RevisionID: revisionID,
EnvironmentName: env.Name,
},
nil,
[]*models.Container{
{
Name: "user-container",
- PodName: "mymodel-2-predictor-hlqgv-deployment-6f478cbc67-mp7zf",
+ PodName: "mymodel-2-r1-predictor-hlqgv-deployment-6f478cbc67-mp7zf",
Namespace: project.Name,
Cluster: env.Cluster,
GcpProject: env.GcpProject,
@@ -2231,13 +2223,14 @@ func TestListContainers(t *testing.T) {
{
"success: pyfunc model",
args{
- model, version, id,
+ model, version, endpoint,
},
componentMock{
&models.VersionEndpoint{
ID: id,
VersionID: version.ID,
VersionModelID: model.ID,
+ RevisionID: revisionID,
EnvironmentName: env.Name,
},
&models.Container{
@@ -2250,7 +2243,7 @@ func TestListContainers(t *testing.T) {
[]*models.Container{
{
Name: "user-container",
- PodName: "mymodel-2-predictor-hlqgv-deployment-6f478cbc67-mp7zf",
+ PodName: "mymodel-2-r1-predictor-hlqgv-deployment-6f478cbc67-mp7zf",
Namespace: project.Name,
Cluster: env.Cluster,
GcpProject: env.GcpProject,
@@ -2267,7 +2260,7 @@ func TestListContainers(t *testing.T) {
Return(tt.mock.imageBuilderContainer, nil)
envController := &clusterMock.Controller{}
- envController.On("GetContainers", context.Background(), "my-project", "serving.kserve.io/inferenceservice=model-1").
+ envController.On("GetContainers", context.Background(), "my-project", "serving.kserve.io/inferenceservice=model-1-r1").
Return(tt.mock.modelContainers, nil)
controllers := map[string]cluster.Controller{env.Name: envController}
@@ -2277,7 +2270,6 @@ func TestListContainers(t *testing.T) {
mockStorage.On("Get", mock.Anything).Return(tt.mock.versionEndpoint, nil)
mockDeploymentStorage.On("Save", mock.Anything).Return(nil, nil)
- // endpointSvc := NewEndpointService(controllers, imgBuilder, mockStorage, mockDeploymentStorage, cfg.Environment, cfg.FeatureToggleConfig.MonitoringConfig, loggerDestinationURL)
endpointSvc := NewEndpointService(EndpointServiceParams{
ClusterControllers: controllers,
ImageBuilder: imgBuilder,
@@ -2287,7 +2279,8 @@ func TestListContainers(t *testing.T) {
MonitoringConfig: cfg.FeatureToggleConfig.MonitoringConfig,
LoggerDestinationURL: loggerDestinationURL,
})
- containers, err := endpointSvc.ListContainers(context.Background(), tt.args.model, tt.args.version, tt.args.id)
+
+ containers, err := endpointSvc.ListContainers(context.Background(), tt.args.model, tt.args.version, tt.args.endpoint)
if !tt.wantError {
assert.Nil(t, err, "unwanted error %v", err)
} else {
diff --git a/api/storage/deployment_storage.go b/api/storage/deployment_storage.go
index 33687a4ee..9e6ad7791 100644
--- a/api/storage/deployment_storage.go
+++ b/api/storage/deployment_storage.go
@@ -23,6 +23,8 @@ import (
type DeploymentStorage interface {
// ListInModel return all deployment within a model
ListInModel(model *models.Model) ([]*models.Deployment, error)
+ // ListInModelVersion return all deployment within a model
+ ListInModelVersion(modelID, versionID, endpointUUID string) ([]*models.Deployment, error)
// Save save the deployment to underlying storage
Save(deployment *models.Deployment) (*models.Deployment, error)
// GetFirstSuccessModelVersionPerModel Return mapping of model id and the first model version with a successful model version
@@ -44,6 +46,12 @@ func (d *deploymentStorage) ListInModel(model *models.Model) ([]*models.Deployme
return deployments, err
}
+func (d *deploymentStorage) ListInModelVersion(modelID, versionID, endpointUUID string) ([]*models.Deployment, error) {
+ var deployments []*models.Deployment
+ err := d.db.Where("version_model_id = ? AND version_id = ? AND version_endpoint_id = ?", modelID, versionID, endpointUUID).Find(&deployments).Error
+ return deployments, err
+}
+
func (d *deploymentStorage) Save(deployment *models.Deployment) (*models.Deployment, error) {
err := d.db.Save(deployment).Error
return deployment, err
diff --git a/api/storage/mocks/alert_storage.go b/api/storage/mocks/alert_storage.go
index f21032904..e8a583ebd 100644
--- a/api/storage/mocks/alert_storage.go
+++ b/api/storage/mocks/alert_storage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.14.0. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -45,6 +45,10 @@ func (_m *AlertStorage) GetModelEndpointAlert(modelID models.ID, modelEndpointID
ret := _m.Called(modelID, modelEndpointID)
var r0 *models.ModelEndpointAlert
+ var r1 error
+ if rf, ok := ret.Get(0).(func(models.ID, models.ID) (*models.ModelEndpointAlert, error)); ok {
+ return rf(modelID, modelEndpointID)
+ }
if rf, ok := ret.Get(0).(func(models.ID, models.ID) *models.ModelEndpointAlert); ok {
r0 = rf(modelID, modelEndpointID)
} else {
@@ -53,7 +57,6 @@ func (_m *AlertStorage) GetModelEndpointAlert(modelID models.ID, modelEndpointID
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(models.ID, models.ID) error); ok {
r1 = rf(modelID, modelEndpointID)
} else {
@@ -68,6 +71,10 @@ func (_m *AlertStorage) ListModelEndpointAlerts(modelID models.ID) ([]*models.Mo
ret := _m.Called(modelID)
var r0 []*models.ModelEndpointAlert
+ var r1 error
+ if rf, ok := ret.Get(0).(func(models.ID) ([]*models.ModelEndpointAlert, error)); ok {
+ return rf(modelID)
+ }
if rf, ok := ret.Get(0).(func(models.ID) []*models.ModelEndpointAlert); ok {
r0 = rf(modelID)
} else {
@@ -76,7 +83,6 @@ func (_m *AlertStorage) ListModelEndpointAlerts(modelID models.ID) ([]*models.Mo
}
}
- var r1 error
if rf, ok := ret.Get(1).(func(models.ID) error); ok {
r1 = rf(modelID)
} else {
diff --git a/api/storage/mocks/deployment_storage.go b/api/storage/mocks/deployment_storage.go
index 7535732ab..c9a231353 100644
--- a/api/storage/mocks/deployment_storage.go
+++ b/api/storage/mocks/deployment_storage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -78,6 +78,32 @@ func (_m *DeploymentStorage) ListInModel(model *models.Model) ([]*models.Deploym
return r0, r1
}
+// ListInModelVersion provides a mock function with given fields: modelID, versionID, endpointUUID
+func (_m *DeploymentStorage) ListInModelVersion(modelID string, versionID string, endpointUUID string) ([]*models.Deployment, error) {
+ ret := _m.Called(modelID, versionID, endpointUUID)
+
+ var r0 []*models.Deployment
+ var r1 error
+ if rf, ok := ret.Get(0).(func(string, string, string) ([]*models.Deployment, error)); ok {
+ return rf(modelID, versionID, endpointUUID)
+ }
+ if rf, ok := ret.Get(0).(func(string, string, string) []*models.Deployment); ok {
+ r0 = rf(modelID, versionID, endpointUUID)
+ } else {
+ if ret.Get(0) != nil {
+ r0 = ret.Get(0).([]*models.Deployment)
+ }
+ }
+
+ if rf, ok := ret.Get(1).(func(string, string, string) error); ok {
+ r1 = rf(modelID, versionID, endpointUUID)
+ } else {
+ r1 = ret.Error(1)
+ }
+
+ return r0, r1
+}
+
// Save provides a mock function with given fields: deployment
func (_m *DeploymentStorage) Save(deployment *models.Deployment) (*models.Deployment, error) {
ret := _m.Called(deployment)
diff --git a/api/storage/mocks/model_endpoint_storage.go b/api/storage/mocks/model_endpoint_storage.go
index 40e190969..26423ff77 100644
--- a/api/storage/mocks/model_endpoint_storage.go
+++ b/api/storage/mocks/model_endpoint_storage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/storage/mocks/prediction_job_storage.go b/api/storage/mocks/prediction_job_storage.go
index d856c7a24..642b3366b 100644
--- a/api/storage/mocks/prediction_job_storage.go
+++ b/api/storage/mocks/prediction_job_storage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
diff --git a/api/storage/mocks/version_endpoint_storage.go b/api/storage/mocks/version_endpoint_storage.go
index b92d5fe4b..2707677dd 100644
--- a/api/storage/mocks/version_endpoint_storage.go
+++ b/api/storage/mocks/version_endpoint_storage.go
@@ -1,4 +1,4 @@
-// Code generated by mockery v2.22.1. DO NOT EDIT.
+// Code generated by mockery v2.20.0. DO NOT EDIT.
package mocks
@@ -38,13 +38,13 @@ func (_m *VersionEndpointStorage) CountEndpoints(environment *models.Environment
return r0, r1
}
-// Delete provides a mock function with given fields: version
-func (_m *VersionEndpointStorage) Delete(version *models.VersionEndpoint) error {
- ret := _m.Called(version)
+// Delete provides a mock function with given fields: endpoint
+func (_m *VersionEndpointStorage) Delete(endpoint *models.VersionEndpoint) error {
+ ret := _m.Called(endpoint)
var r0 error
if rf, ok := ret.Get(0).(func(*models.VersionEndpoint) error); ok {
- r0 = rf(version)
+ r0 = rf(endpoint)
} else {
r0 = ret.Error(0)
}
diff --git a/db-migrations/32_revision_id.down.sql b/db-migrations/32_revision_id.down.sql
new file mode 100644
index 000000000..ecc8bd04d
--- /dev/null
+++ b/db-migrations/32_revision_id.down.sql
@@ -0,0 +1 @@
+ALTER TABLE version_endpoints DROP COLUMN revision_id;
diff --git a/db-migrations/32_revision_id.up.sql b/db-migrations/32_revision_id.up.sql
new file mode 100644
index 000000000..4fee07746
--- /dev/null
+++ b/db-migrations/32_revision_id.up.sql
@@ -0,0 +1,2 @@
+ALTER TABLE version_endpoints
+ADD COLUMN revision_id VARCHAR(32);
diff --git a/python/sdk/test/integration_test.py b/python/sdk/test/integration_test.py
index c77feee65..8c3a14756 100644
--- a/python/sdk/test/integration_test.py
+++ b/python/sdk/test/integration_test.py
@@ -98,10 +98,7 @@ def test_sklearn(
with merlin.new_model_version() as v:
merlin.log_model(model_dir=model_dir)
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
- endpoint = merlin.deploy(
- v, deployment_mode=deployment_mode, resource_request=resource_request
- )
+ endpoint = merlin.deploy(v, deployment_mode=deployment_mode)
resp = requests.post(f"{endpoint.url}", json=request_json)
assert resp.status_code == 200
@@ -133,10 +130,7 @@ def test_xgboost(
# Upload the serialized model to MLP
merlin.log_model(model_dir=model_dir)
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
- endpoint = merlin.deploy(
- v, deployment_mode=deployment_mode, resource_request=resource_request
- )
+ endpoint = merlin.deploy(v, deployment_mode=deployment_mode)
resp = requests.post(f"{endpoint.url}", json=request_json)
assert resp.status_code == 200
@@ -240,8 +234,7 @@ def test_pytorch(integration_test_url, project_name, use_google_oauth, requests)
with merlin.new_model_version() as v:
merlin.log_model(model_dir=model_dir)
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
- endpoint = merlin.deploy(v, resource_request=resource_request)
+ endpoint = merlin.deploy(v)
resp = requests.post(f"{endpoint.url}", json=request_json)
@@ -265,9 +258,8 @@ def test_set_traffic(integration_test_url, project_name, use_google_oauth, reque
with merlin.new_model_version() as v:
# Upload the serialized model to MLP
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
merlin.log_model(model_dir=model_dir)
- endpoint = merlin.deploy(v, resource_request=resource_request)
+ endpoint = merlin.deploy(v)
resp = requests.post(f"{endpoint.url}", json=request_json)
@@ -307,9 +299,8 @@ def test_serve_traffic(integration_test_url, project_name, use_google_oauth, req
with merlin.new_model_version() as v:
# Upload the serialized model to MLP
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
merlin.log_model(model_dir=model_dir)
- endpoint = merlin.deploy(v, resource_request=resource_request)
+ endpoint = merlin.deploy(v)
resp = requests.post(f"{endpoint.url}", json=request_json)
@@ -355,10 +346,7 @@ def test_multi_env(integration_test_url, project_name, use_google_oauth, request
with merlin.new_model_version() as v:
# Upload the serialized model to MLP
merlin.log_model(model_dir=model_dir)
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
- endpoint = merlin.deploy(
- v, environment_name=default_env.name, resource_request=resource_request
- )
+ endpoint = merlin.deploy(v, environment_name=default_env.name)
resp = requests.post(f"{endpoint.url}", json=request_json)
@@ -395,7 +383,7 @@ def test_resource_request(
# Upload the serialized model to MLP
merlin.log_model(model_dir=model_dir)
- resource_request = ResourceRequest(1, 1, "100m", "200Mi")
+ resource_request = ResourceRequest(1, 1, "100m", "256Mi")
endpoint = merlin.deploy(
v,
environment_name=default_env.name,
@@ -444,7 +432,7 @@ def test_resource_request_with_gpu(
# Upload the serialized model to MLP
merlin.log_model(model_dir=model_dir)
- resource_request = ResourceRequest(1, 1, "100m", "200Mi", **gpu_config)
+ resource_request = ResourceRequest(1, 1, "100m", "256Mi", **gpu_config)
endpoint = merlin.deploy(
v,
environment_name=default_env.name,
diff --git a/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties b/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties
index bd7e62955..876c0e1a9 100644
--- a/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties
+++ b/python/sdk/test/pytorch-model/pytorch-sample/config/config.properties
@@ -10,4 +10,4 @@ job_queue_size=10
enable_envvars_config=true
install_py_dep_per_model=true
model_store=/mnt/models/model-store
-model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"pytorch-sample-1":{"1.0":{"defaultVersion":true,"marName":"pytorch-sample.mar","minWorkers":1,"maxWorkers":5,"batchSize":1,"maxBatchDelay":10,"responseTimeout":120}}}}
+model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"pytorch-sample-1-r1":{"1.0":{"defaultVersion":true,"marName":"pytorch-sample.mar","minWorkers":1,"maxWorkers":5,"batchSize":1,"maxBatchDelay":10,"responseTimeout":120}}}}
diff --git a/scripts/e2e/run-e2e.sh b/scripts/e2e/run-e2e.sh
index 6d9f7f207..3d1f9c1fa 100755
--- a/scripts/e2e/run-e2e.sh
+++ b/scripts/e2e/run-e2e.sh
@@ -33,4 +33,4 @@ kubectl create namespace ${E2E_PROJECT_NAME} --dry-run=client -o yaml | kubectl
cd ../../python/sdk
pip install pipenv==2023.7.23
pipenv install --dev --skip-lock --python ${PYTHON_VERSION}
-pipenv run pytest -n=8 -W=ignore --cov=merlin -m "not (gpu or feast or batch or pyfunc or local_server_test or cli or customtransformer)" --durations=0
+pipenv run pytest -n=4 -W=ignore --cov=merlin -m "not (gpu or feast or batch or pyfunc or local_server_test or cli or customtransformer)" --durations=0
diff --git a/scripts/e2e/values-e2e.yaml b/scripts/e2e/values-e2e.yaml
index d59c5f2bb..9d18f8527 100644
--- a/scripts/e2e/values-e2e.yaml
+++ b/scripts/e2e/values-e2e.yaml
@@ -80,8 +80,8 @@ environmentConfigs:
default_deployment_config:
min_replica: 0
max_replica: 1
- cpu_request: "50m"
- memory_request: "128Mi"
+ cpu_request: "100m"
+ memory_request: "256Mi"
default_transformer_config:
min_replica: 0
max_replica: 1
diff --git a/ui/src/pages/version/HistoryDetails.js b/ui/src/pages/version/HistoryDetails.js
new file mode 100644
index 000000000..26726ee67
--- /dev/null
+++ b/ui/src/pages/version/HistoryDetails.js
@@ -0,0 +1,182 @@
+import { DateFromNow } from "@caraml-dev/ui-lib";
+import {
+ EuiBadge,
+ EuiButtonIcon,
+ EuiCodeBlock,
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiHealth,
+ EuiInMemoryTable,
+ EuiScreenReaderOnly,
+ EuiText,
+} from "@elastic/eui";
+import { useState } from "react";
+import { ConfigSection, ConfigSectionPanel } from "../../components/section";
+import { useMerlinApi } from "../../hooks/useMerlinApi";
+
+const defaultTextSize = "s";
+
+const DeploymentStatus = ({
+ status,
+ deployment,
+ deployedRevision,
+ endpointStatus,
+}) => {
+ if (deployment.error !== "") {
+ return Failed;
+ }
+
+ if (status === "running" || status === "serving") {
+ if (
+ deployment.id === deployedRevision.id &&
+ (endpointStatus === "running" || endpointStatus === "serving")
+ ) {
+ return Deployed;
+ }
+ return Not Deployed;
+ } else if (status === "pending") {
+ return Pending;
+ }
+};
+
+const RevisionPanel = ({ deployments, deploymentsLoaded, endpoint }) => {
+ const orderedDeployments = deployments.sort((a, b) => b.id - a.id);
+
+ const deployedRevision = orderedDeployments.find(
+ (deployment) =>
+ deployment.status === "running" || deployment.status === "serving"
+ ) || { id: null };
+
+ const canBeExpanded = (deployment) => {
+ return deployment.error !== "";
+ };
+
+ const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap] = useState({});
+
+ const toggleDetails = (deployment) => {
+ const itemIdToExpandedRowMapValues = { ...itemIdToExpandedRowMap };
+
+ if (itemIdToExpandedRowMapValues[deployment.id]) {
+ delete itemIdToExpandedRowMapValues[deployment.id];
+ } else {
+ itemIdToExpandedRowMapValues[deployment.id] = (
+ <>
+
+ Error message
+
+ {deployment.error}
+ >
+ );
+ }
+ setItemIdToExpandedRowMap(itemIdToExpandedRowMapValues);
+ };
+
+ const cellProps = (item, column) => {
+ if (column.field !== "actions" && canBeExpanded(item)) {
+ return {
+ style: { cursor: "pointer" },
+ onClick: () => toggleDetails(item),
+ };
+ }
+ return undefined;
+ };
+
+ const columns = [
+ {
+ field: "updated_at",
+ name: "Deployment Time",
+ render: (date, deployment) => (
+ <>
+
+
+ {deployment.id === deployedRevision.id && (
+ Current
+ )}
+ {/* {JSON.stringify(deployment.id)} */}
+ >
+ ),
+ },
+ {
+ field: "status",
+ name: "Deployment Status",
+ render: (status, deployment) => (
+
+ ),
+ },
+ {
+ align: "right",
+ width: "40px",
+ isExpander: true,
+ name: (
+
+ Expand rows
+
+ ),
+ render: (deployment) => {
+ const itemIdToExpandedRowMapValues = { ...itemIdToExpandedRowMap };
+
+ return (
+ canBeExpanded(deployment) && (
+ toggleDetails(deployment)}
+ aria-label={
+ itemIdToExpandedRowMapValues[deployment.id]
+ ? "Collapse"
+ : "Expand"
+ }
+ iconType={
+ itemIdToExpandedRowMapValues[deployment.id]
+ ? "arrowUp"
+ : "arrowDown"
+ }
+ />
+ )
+ );
+ },
+ },
+ ];
+
+ return (
+
+
+
+
+
+ );
+};
+
+export const HistoryDetails = ({ model, version, endpoint }) => {
+ const [{ data: deployments, isLoaded: deploymentsLoaded }] = useMerlinApi(
+ `/models/${model.id}/versions/${version.id}/endpoints/${endpoint.id}/deployments`,
+ {},
+ []
+ );
+
+ return (
+
+
+
+
+
+
+ );
+};
diff --git a/ui/src/pages/version/VersionDetails.js b/ui/src/pages/version/VersionDetails.js
index 037a0be38..4b65f7faa 100644
--- a/ui/src/pages/version/VersionDetails.js
+++ b/ui/src/pages/version/VersionDetails.js
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-import React, { Fragment, useEffect, useState } from "react";
-import { Link, Route, Routes, useParams } from "react-router-dom";
+import { replaceBreadcrumbs } from "@caraml-dev/ui-lib";
import {
EuiButton,
EuiEmptyPrompt,
@@ -24,16 +23,18 @@ import {
EuiLoadingContent,
EuiPageTemplate,
EuiSpacer,
- EuiText
+ EuiText,
} from "@elastic/eui";
-import { replaceBreadcrumbs } from "@caraml-dev/ui-lib";
+import React, { Fragment, useEffect, useState } from "react";
+import { Link, Route, Routes, useParams } from "react-router-dom";
+import { ContainerLogsView } from "../../components/logs/ContainerLogsView";
import config from "../../config";
-import mocks from "../../mocks";
import { useMerlinApi } from "../../hooks/useMerlinApi";
-import { ContainerLogsView } from "../../components/logs/ContainerLogsView";
+import mocks from "../../mocks";
import { DeploymentPanelHeader } from "./DeploymentPanelHeader";
-import { ModelVersionPanelHeader } from "./ModelVersionPanelHeader";
import { EndpointDetails } from "./EndpointDetails";
+import { HistoryDetails } from "./HistoryDetails";
+import { ModelVersionPanelHeader } from "./ModelVersionPanelHeader";
import { VersionTabNavigation } from "./VersionTabNavigation";
/**
@@ -41,7 +42,13 @@ import { VersionTabNavigation } from "./VersionTabNavigation";
* In this page users can also manage all deployed endpoint created from the model version.
*/
const VersionDetails = () => {
- const { projectId, modelId, versionId, endpointId, "*": section } = useParams();
+ const {
+ projectId,
+ modelId,
+ versionId,
+ endpointId,
+ "*": section,
+ } = useParams();
const [{ data: model, isLoaded: modelLoaded }] = useMerlinApi(
`/projects/${projectId}/models/${modelId}`,
{ mock: mocks.model },
@@ -63,12 +70,12 @@ const VersionDetails = () => {
if (version.endpoints && version.endpoints.length > 0) {
setIsDeployed(true);
setEnvironments(
- version.endpoints.map(endpoint => endpoint.environment)
+ version.endpoints.map((endpoint) => endpoint.environment)
);
if (endpointId) {
setEndpoint(
- version.endpoints.find(endpoint => endpoint.id === endpointId)
+ version.endpoints.find((endpoint) => endpoint.id === endpointId)
);
}
}
@@ -82,22 +89,22 @@ const VersionDetails = () => {
breadCrumbs.push(
{
text: "Models",
- href: `/merlin/projects/${model.project_id}/models`
+ href: `/merlin/projects/${model.project_id}/models`,
},
{
text: model.name || "",
- href: `/merlin/projects/${model.project_id}/models/${model.id}`
+ href: `/merlin/projects/${model.project_id}/models/${model.id}`,
},
{
text: `Model Version ${version.id}`,
- href: `/merlin/projects/${model.project_id}/models/${model.id}/versions/${version.id}`
+ href: `/merlin/projects/${model.project_id}/models/${model.id}/versions/${version.id}`,
}
);
}
if (endpoint) {
breadCrumbs.push({
- text: endpoint.environment_name
+ text: endpoint.environment_name,
});
}
@@ -108,61 +115,64 @@ const VersionDetails = () => {
{!modelLoaded && !versionLoaded ? (
-
-
-
-
-
- ) : (
-
-
+
+
+
+
+ ) : (
+
+
+ {model.name}
+ {" version "}
+ {version.id}
+
+ }
+ />
+
+
+
+ {!(section === "deploy" || section === "redeploy") &&
+ model &&
+ modelLoaded &&
+ version &&
+ versionLoaded && (
- {model.name}
- {" version "}
- {version.id}
+
+
- }
- />
-
-
-
- {!(section === "deploy" || section === "redeploy") &&
- model &&
- modelLoaded &&
- version &&
- versionLoaded && (
-
-
-
-
)}
- {!(section === "deploy" || section === "redeploy") &&
- model &&
- modelLoaded &&
- version &&
- versionLoaded &&
- environments &&
- isDeployed && (
-
-
-
-
+ {!(section === "deploy" || section === "redeploy") &&
+ model &&
+ modelLoaded &&
+ version &&
+ versionLoaded &&
+ environments &&
+ isDeployed && (
+
+
+
+
)}
{!(section === "deploy" || section === "redeploy") &&
endpoint &&
isDeployed && (
-
+
)}
@@ -172,7 +182,8 @@ const VersionDetails = () => {
modelLoaded &&
version &&
versionLoaded &&
- !isDeployed && model.type !== "pyfunc_v2" && (
+ !isDeployed &&
+ model.type !== "pyfunc_v2" && (
Model version is not deployed}
body={
@@ -183,11 +194,10 @@ const VersionDetails = () => {
+ state={{ model: model, version: version }}
+ >
-
- Deploy
-
+ Deploy
@@ -195,36 +205,44 @@ const VersionDetails = () => {
/>
)}
- {model && modelLoaded && version && versionLoaded && endpoint && (
-
-
- }
- />
-
- }
- />
-
- )}
-
-
- )}
-
+ {model && modelLoaded && version && versionLoaded && endpoint && (
+
+
+ }
+ />
+
+ }
+ />
+
+ }
+ />
+
+ )}
+
+
+ )}
);
};
diff --git a/ui/src/pages/version/VersionTabNavigation.js b/ui/src/pages/version/VersionTabNavigation.js
index 700165888..10e36a979 100644
--- a/ui/src/pages/version/VersionTabNavigation.js
+++ b/ui/src/pages/version/VersionTabNavigation.js
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+import { EuiIcon } from "@elastic/eui";
import React from "react";
import { useNavigate } from "react-router-dom";
-import { EuiIcon } from "@elastic/eui";
import { TabNavigation } from "../../components/TabNavigation";
export const VersionTabNavigation = ({ endpoint, actions, selectedTab }) => {
@@ -24,11 +24,15 @@ export const VersionTabNavigation = ({ endpoint, actions, selectedTab }) => {
const tabs = [
{
id: "details",
- name: "Configuration"
+ name: "Configuration",
+ },
+ {
+ id: "history",
+ name: "History",
},
{
id: "logs",
- name: "Logs"
+ name: "Logs",
},
{
id: "monitoring_dashboard_link",
@@ -39,8 +43,8 @@ export const VersionTabNavigation = ({ endpoint, actions, selectedTab }) => {
),
href: endpoint.monitoring_url,
- target: "_blank"
- }
+ target: "_blank",
+ },
];
return (
diff --git a/ui/src/version/VersionListTable.js b/ui/src/version/VersionListTable.js
index b87aac5e3..201d328ea 100644
--- a/ui/src/version/VersionListTable.js
+++ b/ui/src/version/VersionListTable.js
@@ -426,7 +426,7 @@ const VersionListTable = ({
navigate(
- `/merlin/projects/${activeModel.project_id}/models/${activeModel.id}/versions/${endpoint.version_id}/endpoints/${endpoint.id}`
+ `/merlin/projects/${activeModel.project_id}/models/${activeModel.id}/versions/${endpoint.version_id}/endpoints/${endpoint.id}/details`
)
}
>