feat(api,ui): Add new max allowed replicas field to env configs and r…

…elevant checks (#546) # Description Previously the Merlin API server does not have any constraints on the max number of replicas users are able to set for their deployments. This constraint however, appears on the UI via the variable `REACT_APP_MAX_ALLOWED_REPLICA` that can be set in the configs. In order to make the max allowed replica constraint consistent across the API server (as well as the SDK) and the UI, this PR introduces a new configuration value under the `Environment` struct. This struct currently already contains other environment (cluster) related configs such as the max allowable CPU and memory a user is allowed to set. Doing so thus requires a very simple DB update in order to introduce a new column corresponding to the max allowed replica field, and also allows the UI to retrieve this max allowed replica value directly from the environments API endpoint (just as it does for other environment-related configs such as GPU information) instead of via a React app configuration value. https://github.com/caraml-dev/merlin/assets/36802364/7c37be61-d540-495e-b12c-4efa92ebae0f ## Additional bug fixes Removal of the requirement for the `endpoint` and `version` props to be set in the `DeploymentPanelHeader` component, which causes a warning to occur show when no endpoint is selected for a specific version: <img width="1638" alt="Screenshot 2024-03-02 at 12 58 48 PM" src="https://github.com/caraml-dev/merlin/assets/36802364/ed7a1e4a-3978-4f09-8928-486cc31f9e27"> Resetting of the `resource_request` schema after the `None` transformer type is selected AFTER any other transformer type is selected and configured with invalid `resource_request` values rejected by yup validation (this happens because the invalid resource request values do not get removed even after the transformer has been 'removed'; note that a `None` type transformer still gets certain default transformer configs saved though they are technically redundant - it might be worth changing this in the future): https://github.com/caraml-dev/merlin/assets/36802364/062694a1-cca6-4fed-a869-d01ab0958ce8 # Modifications - `api/cluster/controller.go` - Addition of new max replica check on the API server for **Merlin models** during deployment - `api/config/config.go` - Removal of `MaxAllowedReplica` from the `ReactAppConfig` struct - `api/config/environment.go` - Addition of `MaxAllowedReplica` to the `EnvironmentConfig` struct - `ui/src/config.js` - Removal of `REACT_APP_MAX_ALLOWED_REPLICA` from React configs - `ui/src/pages/version/components/forms/DeployModelVersionForm.js` - Addition of a new state and state setter to set the max allowed replica value based on the environment selected in the form context - `ui/src/pages/version/components/forms/components/DeploymentConfigPanel.js` - Addition of a new effect update to update the max allowed replica value when the environment in a deployment form is updated - `ui/src/pages/version/components/forms/components/ResourcesPanel.js` - Removal of ticks and the addition of min/max labels to indicate variable min/max replica values users may set for a deployment - `ui/src/pages/version/components/forms/validation/schema.js` - Update validation schema to use a variable max allowed replica value to determine the struct validation outcomes # Tests  # Checklist - [x] Added PR label - [x] Added unit test, integration, and/or e2e tests - [x] Tested locally - [ ] Updated documentation - [ ] Update Swagger spec if the PR introduce API changes - [ ] Regenerated Golang and Python client if the PR introduces API changes # Release Notes  ```release-note This update potentially allows users to set a larger max replica value than usual for their deployments and may also inadvertently constrain the number of max replicas they can set, depending on the max allowed replica value set by the Merlin API server operator. ```
caraml-dev · Mar 4, 2024 · 09a8e71 · 09a8e71
1 parent 650bc5e
commit 09a8e71
Show file tree

Hide file tree

Showing 20 changed files with 127 additions and 53 deletions.
diff --git a/api/cluster/controller.go b/api/cluster/controller.go
@@ -195,6 +195,13 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
 			log.Errorf("insufficient available memory resource to fulfil user request of %d", memRequest)
 			return nil, ErrInsufficientMem
 		}
+		if modelService.ResourceRequest.MaxReplica > c.deploymentConfig.MaxAllowedReplica {
+			log.Errorf("Requested Max Replica (%d) is more than max permissible (%d)",
+				modelService.ResourceRequest.MaxReplica,
+				c.deploymentConfig.MaxAllowedReplica,
+			)
+			return nil, ErrRequestedMaxReplicasNotAllowed
+		}
 	}
 
 	_, err := c.namespaceCreator.CreateNamespace(ctx, modelService.Namespace)

diff --git a/api/cluster/controller_test.go b/api/cluster/controller_test.go
@@ -655,6 +655,35 @@ func TestController_DeployInferenceService(t *testing.T) {
 			createVsResult:  &vsReactor{vs, nil},
 			wantError:       true,
 		},
+		{
+			name: "error: deploying service due to max replica requests greater than max value allowed",
+			modelService: &models.Service{
+				Name:      isvcName,
+				Namespace: project.Name,
+				Options:   modelOpt,
+				ResourceRequest: &models.ResourceRequest{
+					MinReplica:    2,
+					MaxReplica:    5,
+					CPURequest:    resource.MustParse("1000m"),
+					MemoryRequest: resource.MustParse("1Gi"),
+				},
+			},
+			createResult: &inferenceServiceReactor{
+				&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
+				nil,
+			},
+			checkResult: &inferenceServiceReactor{
+				&kservev1beta1.InferenceService{
+					ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
+					Status:     statusReady,
+				},
+				nil,
+			},
+			deployTimeout:   deployTimeout,
+			createPdbResult: &pdbReactor{pdb, nil},
+			createVsResult:  &vsReactor{vs, nil},
+			wantError:       true,
+		},
 	}
 
 	for _, tt := range tests {
@@ -696,6 +725,7 @@ func TestController_DeployInferenceService(t *testing.T) {
 				NamespaceTimeout:                   2 * tickDurationSecond * time.Second,
 				MaxCPU:                             resource.MustParse("8"),
 				MaxMemory:                          resource.MustParse("8Gi"),
+				MaxAllowedReplica:                  4,
 				DefaultModelResourceRequests:       &config.ResourceRequests{},
 				DefaultTransformerResourceRequests: &config.ResourceRequests{},
 				PodDisruptionBudget: config.PodDisruptionBudgetConfig{

diff --git a/api/cluster/errors.go b/api/cluster/errors.go
@@ -19,6 +19,7 @@ import "errors"
 var (
 	ErrInsufficientCPU                        = errors.New("CPU request is too large")
 	ErrInsufficientMem                        = errors.New("memory request too large")
+	ErrRequestedMaxReplicasNotAllowed         = errors.New("requested max replicas is more than max permissible")
 	ErrTimeoutNamespace                       = errors.New("timeout creating namespace")
 	ErrUnableToCreateNamespace                = errors.New("error creating namespace")
 	ErrUnableToGetNamespaceStatus             = errors.New("error retrieving namespace status")

diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go
@@ -209,14 +209,15 @@ func initEnvironmentService(cfg *config.Config, db *gorm.DB) service.Environment
 
 			log.Infof("adding environment %s: cluster: %s, is_default: %v", envCfg.Name, envCfg.Cluster, envCfg.IsDefault)
 			env = &models.Environment{
-				Name:       envCfg.Name,
-				Cluster:    envCfg.Cluster,
-				IsDefault:  isDefault,
-				Region:     envCfg.Region,
-				GcpProject: envCfg.GcpProject,
-				MaxCPU:     envCfg.MaxCPU,
-				MaxMemory:  envCfg.MaxMemory,
-				GPUs:       models.ParseGPUsConfig(envCfg.GPUs),
+				Name:              envCfg.Name,
+				Cluster:           envCfg.Cluster,
+				IsDefault:         isDefault,
+				Region:            envCfg.Region,
+				GcpProject:        envCfg.GcpProject,
+				MaxCPU:            envCfg.MaxCPU,
+				MaxMemory:         envCfg.MaxMemory,
+				MaxAllowedReplica: envCfg.MaxAllowedReplica,
+				GPUs:              models.ParseGPUsConfig(envCfg.GPUs),
 				DefaultResourceRequest: &models.ResourceRequest{
 					MinReplica:    deploymentCfg.DefaultModelResourceRequests.MinReplica,
 					MaxReplica:    deploymentCfg.DefaultModelResourceRequests.MaxReplica,
@@ -253,6 +254,7 @@ func initEnvironmentService(cfg *config.Config, db *gorm.DB) service.Environment
 			env.GcpProject = envCfg.GcpProject
 			env.MaxCPU = envCfg.MaxCPU
 			env.MaxMemory = envCfg.MaxMemory
+			env.MaxAllowedReplica = envCfg.MaxAllowedReplica
 			env.GPUs = models.ParseGPUsConfig(envCfg.GPUs)
 			env.DefaultResourceRequest = &models.ResourceRequest{
 				MinReplica:    deploymentCfg.DefaultModelResourceRequests.MinReplica,

diff --git a/api/config/config.go b/api/config/config.go
@@ -77,19 +77,18 @@ type UIConfig struct {
 }
 
 type ReactAppConfig struct {
-	DocURL            Documentations `json:"REACT_APP_MERLIN_DOCS_URL,omitempty"`
-	DockerRegistries  string         `json:"REACT_APP_DOCKER_REGISTRIES,omitempty"`
-	Environment       string         `json:"REACT_APP_ENVIRONMENT,omitempty"`
-	FeastCoreURL      string         `json:"REACT_APP_FEAST_CORE_API,omitempty"`
-	HomePage          string         `json:"REACT_APP_HOMEPAGE,omitempty"`
-	MaxAllowedReplica int            `default:"20" json:"REACT_APP_MAX_ALLOWED_REPLICA,omitempty"`
-	MerlinURL         string         `json:"REACT_APP_MERLIN_API,omitempty"`
-	MlpURL            string         `json:"REACT_APP_MLP_API,omitempty"`
-	OauthClientID     string         `json:"REACT_APP_OAUTH_CLIENT_ID,omitempty"`
-	SentryDSN         string         `json:"REACT_APP_SENTRY_DSN,omitempty"`
-	UPIDocumentation  string         `json:"REACT_APP_UPI_DOC_URL,omitempty"`
-	CPUCost           string         `json:"REACT_APP_CPU_COST,omitempty"`
-	MemoryCost        string         `json:"REACT_APP_MEMORY_COST,omitempty"`
+	DocURL           Documentations `json:"REACT_APP_MERLIN_DOCS_URL,omitempty"`
+	DockerRegistries string         `json:"REACT_APP_DOCKER_REGISTRIES,omitempty"`
+	Environment      string         `json:"REACT_APP_ENVIRONMENT,omitempty"`
+	FeastCoreURL     string         `json:"REACT_APP_FEAST_CORE_API,omitempty"`
+	HomePage         string         `json:"REACT_APP_HOMEPAGE,omitempty"`
+	MerlinURL        string         `json:"REACT_APP_MERLIN_API,omitempty"`
+	MlpURL           string         `json:"REACT_APP_MLP_API,omitempty"`
+	OauthClientID    string         `json:"REACT_APP_OAUTH_CLIENT_ID,omitempty"`
+	SentryDSN        string         `json:"REACT_APP_SENTRY_DSN,omitempty"`
+	UPIDocumentation string         `json:"REACT_APP_UPI_DOC_URL,omitempty"`
+	CPUCost          string         `json:"REACT_APP_CPU_COST,omitempty"`
+	MemoryCost       string         `json:"REACT_APP_MEMORY_COST,omitempty"`
 }
 
 type BaseImageConfigs map[string]BaseImageConfig

diff --git a/api/config/deployment.go b/api/config/deployment.go
@@ -31,10 +31,12 @@ type DeploymentConfig struct {
 	DefaultModelResourceRequests *ResourceRequests
 	// Default resource request for transformer deployment
 	DefaultTransformerResourceRequests *ResourceRequests
-	// Max CPU of machine
+	// Max allowed CPU of each pod
 	MaxCPU resource.Quantity
-	// Max Memory of machine
+	// Max allowed Memory of each pod
 	MaxMemory resource.Quantity
+	// Max allowed MaxReplica value of each deployment
+	MaxAllowedReplica int
 	// TopologySpreadConstraints to be applied on the pods of each model deployment
 	TopologySpreadConstraints []corev1.TopologySpreadConstraint
 	// Percentage of knative's queue proxy resource request from the inference service resource request

diff --git a/api/config/environment.go b/api/config/environment.go
@@ -44,6 +44,7 @@ type EnvironmentConfig struct {
 
 	MaxCPU                    string                    `yaml:"max_cpu"`
 	MaxMemory                 string                    `yaml:"max_memory"`
+	MaxAllowedReplica         int                       `yaml:"max_allowed_replica"`
 	TopologySpreadConstraints TopologySpreadConstraints `yaml:"topology_spread_constraints"`
 	PodDisruptionBudget       PodDisruptionBudgetConfig `yaml:"pod_disruption_budget"`
 
@@ -177,6 +178,7 @@ func ParseDeploymentConfig(envCfg *EnvironmentConfig, cfg *Config) DeploymentCon
 		},
 		MaxCPU:                                resource.MustParse(envCfg.MaxCPU),
 		MaxMemory:                             resource.MustParse(envCfg.MaxMemory),
+		MaxAllowedReplica:                     envCfg.MaxAllowedReplica,
 		TopologySpreadConstraints:             envCfg.TopologySpreadConstraints,
 		QueueResourcePercentage:               envCfg.QueueResourcePercentage,
 		PyfuncGRPCOptions:                     cfg.PyfuncGRPCOptions,

diff --git a/api/models/environment.go b/api/models/environment.go
@@ -23,6 +23,7 @@ type Environment struct {
 	GcpProject                        string           `json:"gcp_project"`
 	MaxCPU                            string           `json:"max_cpu"`
 	MaxMemory                         string           `json:"max_memory"`
+	MaxAllowedReplica                 int              `json:"max_allowed_replica"`
 	GPUs                              GPUs             `json:"gpus" gorm:"column:gpus"`
 	DefaultResourceRequest            *ResourceRequest `json:"default_resource_request"`
 	DefaultTransformerResourceRequest *ResourceRequest `json:"default_transformer_resource_request"`

diff --git a/db-migrations/37_environments_add_max_allowed_replicas.down.sql b/db-migrations/37_environments_add_max_allowed_replicas.down.sql
@@ -0,0 +1 @@
+ALTER TABLE environments DROP COLUMN max_allowed_replica;
diff --git a/db-migrations/37_environments_add_max_allowed_replicas.up.sql b/db-migrations/37_environments_add_max_allowed_replicas.up.sql
@@ -0,0 +1,2 @@
+ALTER TABLE environments
+ADD COLUMN max_allowed_replica int;
diff --git a/scripts/e2e/values-e2e.yaml b/scripts/e2e/values-e2e.yaml
@@ -74,6 +74,7 @@ environmentConfigs:
     namespace_timeout: 2m
     max_cpu: 250m
     max_memory: 512Mi
+    max_allowed_replica: 30
     queue_resource_percentage: 20
     is_prediction_job_enabled: true
     is_default_prediction_job: true

diff --git a/ui/src/config.js b/ui/src/config.js
@@ -55,11 +55,6 @@ export const appConfig = {
       source_type: "BIGTABLE",
     },
   ],
-  scaling: {
-    maxAllowedReplica: getEnv("REACT_APP_MAX_ALLOWED_REPLICA")
-      ? parseInt(getEnv("REACT_APP_MAX_ALLOWED_REPLICA"))
-      : 20,
-  },
   imagebuilder:{
     cluster:  getEnv("REACT_APP_IMAGE_BUILDER_CLUSTER"),
     gcp_project: getEnv("REACT_APP_IMAGE_BUILDER_GCP_PROJECT"),

diff --git a/ui/src/pages/version/DeploymentPanelHeader.js b/ui/src/pages/version/DeploymentPanelHeader.js
@@ -130,7 +130,5 @@ export const DeploymentPanelHeader = ({
 
 DeploymentPanelHeader.propTypes = {
   model: PropTypes.object.isRequired,
-  version: PropTypes.object.isRequired,
-  endpoint: PropTypes.object.isRequired,
   environments: PropTypes.array.isRequired,
 };
diff --git a/ui/src/pages/version/components/forms/DeployModelVersionForm.js b/ui/src/pages/version/components/forms/DeployModelVersionForm.js
@@ -20,6 +20,7 @@ import {
   versionEndpointSchema,
 } from "./validation/schema";
 import { PROTOCOL } from "../../../../services/version_endpoint/VersionEndpoint";
+import EnvironmentsContext from "../../../../providers/environments/context";
 
 const _ = require('lodash');
 const targetRequestStatus = (currentStatus) => {
@@ -72,21 +73,35 @@ export const DeployModelVersionForm = ({
     });
   }
 
+  const { data } = useContext(FormContext);
+  const environments = useContext(EnvironmentsContext);
+
+  const [maxAllowedReplica, setMaxAllowedReplica] = useState(() => {
+    if (data.environment_name !== "") {
+      return environments.find((env) => env.name === data.environment_name).max_allowed_replica;
+    }
+    return 0;
+  });
+
   const mainSteps = [
     {
       title: "Model",
       children: (
         <ModelStep
           version={version}
           isEnvironmentDisabled={isEnvironmentDisabled}
+          maxAllowedReplica={maxAllowedReplica}
+          setMaxAllowedReplica={setMaxAllowedReplica}
         />
       ),
-      validationSchema: versionEndpointSchema,
+      validationSchema: versionEndpointSchema(maxAllowedReplica),
     },
     {
       title: "Transformer",
-      children: <TransformerStep />,
-      validationSchema: transformerConfigSchema,
+      children: <TransformerStep
+        maxAllowedReplica={maxAllowedReplica}
+      />,
+      validationSchema: transformerConfigSchema(maxAllowedReplica),
     },
   ];
 
@@ -100,7 +115,7 @@ export const DeployModelVersionForm = ({
   const predictionLoggerStep = {
     title: "Logging",
     children: <PredictionLoggerStep />,
-    validationSchema: versionEndpointSchema,
+    validationSchema: versionEndpointSchema(maxAllowedReplica),
   };
 
   const customTransformerStep = {

diff --git a/ui/src/pages/version/components/forms/components/DeploymentConfigPanel.js b/ui/src/pages/version/components/forms/components/DeploymentConfigPanel.js
@@ -45,6 +45,7 @@ const DEFAULT_AUTOSCALING_POLICY = {
  * @param {*} onChange. Callback to be made when configuration is changed
  * @param {*} errors. Why pass errors?
  * @param {*} isEnvironmentDisabled. Disable deployment to the environment if the flag is true.
+ * @param {*} setMaxAllowedReplica State setter to update MaxAllowedReplica constraint
  */
 export const DeploymentConfigPanel = ({
   environment,
@@ -53,6 +54,7 @@ export const DeploymentConfigPanel = ({
   onChange,
   errors = {},
   isEnvironmentDisabled = false,
+  setMaxAllowedReplica,
 }) => {
   const environments = useContext(EnvironmentsContext);
 
@@ -85,6 +87,12 @@ export const DeploymentConfigPanel = ({
         onChange(key)(versionEndpoint[key]);
       });
     }
+
+    environments.forEach((env) => {
+      if (env.name === value) {
+        setMaxAllowedReplica(env.max_allowed_replica);
+      }
+    });
   };
 
   const onDeploymentModeChange = (deploymentMode) => {

diff --git a/ui/src/pages/version/components/forms/components/ResourcesPanel.js b/ui/src/pages/version/components/forms/components/ResourcesPanel.js
@@ -21,7 +21,6 @@ import EnvironmentsContext from "../../../../../providers/environments/context";
 import { calculateCost } from "../../../../../utils/costEstimation";
 import { Panel } from "./Panel";
 
-const maxTicks = 20;
 
 export const ResourcesPanel = ({
   environment: initEnvironment,
@@ -69,7 +68,16 @@ export const ResourcesPanel = ({
     } else {
       setGpuValueOptions([{ value: "None", inputDisplay: "None" }]);
     }
-  }, [resourcesConfig, resourcesConfig.gpu_name, gpus]);
+
+    if (resourcesConfig.min_replica > maxAllowedReplica) {
+      onChange("min_replica")(maxAllowedReplica - 1)
+    }
+
+    if (resourcesConfig.max_replica > maxAllowedReplica) {
+      onChange("max_replica")(maxAllowedReplica)
+    }
+
+  }, [resourcesConfig, resourcesConfig.gpu_name, gpus, maxAllowedReplica, onChange]);
 
   const replicasError = useMemo(
     () => [...(errors.min_replica || []), ...(errors.max_replica || [])],
@@ -235,10 +243,8 @@ export const ResourcesPanel = ({
             fullWidth
             min={0}
             max={maxAllowedReplica}
-            // This component only allows up to 20 ticks to be displayed at the slider
-            tickInterval={Math.ceil((maxAllowedReplica + 1) / maxTicks)}
             showInput
-            showTicks
+            showLabels
             value={[
               resourcesConfig.min_replica || 0,
               resourcesConfig.max_replica || 0,

diff --git a/ui/src/pages/version/components/forms/components/SelectTransformerPanel.js b/ui/src/pages/version/components/forms/components/SelectTransformerPanel.js
@@ -25,6 +25,12 @@ export const SelectTransformerPanel = ({
       onChange("enabled")(false);
       onChange("transformer_type")(undefined);
       onChange("type_on_ui")("");
+      onChange("resource_request")({
+        min_replica: process.env.REACT_APP_ENVIRONMENT === "production" ? 2 : 0,
+        max_replica: process.env.REACT_APP_ENVIRONMENT === "production" ? 4 : 2,
+        cpu_request: "500m",
+        memory_request: "512Mi"
+      })
     } else {
       onChange("enabled")(true);
       onChange("transformer_type")(value !== "feast" ? value : "standard");

diff --git a/ui/src/pages/version/components/forms/steps/ModelStep.js b/ui/src/pages/version/components/forms/steps/ModelStep.js
@@ -6,15 +6,14 @@ import {
 } from "@caraml-dev/ui-lib";
 import { EuiAccordion, EuiFlexGroup, EuiFlexItem, EuiSpacer } from "@elastic/eui";
 import React, { useContext } from "react";
-import { appConfig } from "../../../../../config";
 import { PROTOCOL } from "../../../../../services/version_endpoint/VersionEndpoint";
 import { DeploymentConfigPanel } from "../components/DeploymentConfigPanel";
 import { EnvVariablesPanel } from "../components/EnvVariablesPanel";
 import { LoggerPanel } from "../components/LoggerPanel";
 import { ResourcesPanel } from "../components/ResourcesPanel";
 import { ImageBuilderSection } from "../components/ImageBuilderSection";
 
-export const ModelStep = ({ version, isEnvironmentDisabled = false }) => {
+export const ModelStep = ({ version, isEnvironmentDisabled = false, maxAllowedReplica, setMaxAllowedReplica }) => {
   const { data, onChangeHandler } = useContext(FormContext);
   const { onChange } = useOnChangeHandler(onChangeHandler);
   const { errors } = useContext(FormValidationContext);
@@ -29,6 +28,7 @@ export const ModelStep = ({ version, isEnvironmentDisabled = false }) => {
           onChange={onChange}
           errors={errors}
           isEnvironmentDisabled={isEnvironmentDisabled}
+          setMaxAllowedReplica={setMaxAllowedReplica}
         />
       </EuiFlexItem>
 
@@ -38,7 +38,7 @@ export const ModelStep = ({ version, isEnvironmentDisabled = false }) => {
           isGPUEnabled={true}
           resourcesConfig={data.resource_request}
           onChangeHandler={onChange("resource_request")}
-          maxAllowedReplica={appConfig.scaling.maxAllowedReplica}
+          maxAllowedReplica={maxAllowedReplica}
           errors={get(errors, "resource_request")}
           child={
           <EuiAccordion
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		ALTER TABLE environments DROP COLUMN max_allowed_replica;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		ALTER TABLE environments
		ADD COLUMN max_allowed_replica int;