Skip to content

Commit

Permalink
Add new max allowed replicas field to env configs and relevant checks
Browse files Browse the repository at this point in the history
  • Loading branch information
deadlycoconuts committed Mar 1, 2024
1 parent a810ebf commit 07b5021
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 10 deletions.
7 changes: 7 additions & 0 deletions api/cluster/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@ func (c *controller) Deploy(ctx context.Context, modelService *models.Service) (
log.Errorf("insufficient available memory resource to fulfil user request of %d", memRequest)
return nil, ErrInsufficientMem
}
if modelService.ResourceRequest.MaxReplica > c.deploymentConfig.MaxAllowedReplica {
log.Errorf("Requested Max Replica (%d) is more than max permissible (%d)",
modelService.ResourceRequest.MaxReplica,
c.deploymentConfig.MaxAllowedReplica,
)
return nil, ErrRequestedMaxReplicasNotAllowed
}
}

_, err := c.namespaceCreator.CreateNamespace(ctx, modelService.Namespace)
Expand Down
30 changes: 30 additions & 0 deletions api/cluster/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,35 @@ func TestController_DeployInferenceService(t *testing.T) {
createVsResult: &vsReactor{vs, nil},
wantError: true,
},
{
name: "error: deploying service due to max replica requests greater than max value allowed",
modelService: &models.Service{
Name: isvcName,
Namespace: project.Name,
Options: modelOpt,
ResourceRequest: &models.ResourceRequest{
MinReplica: 2,
MaxReplica: 5,
CPURequest: resource.MustParse("1000m"),
MemoryRequest: resource.MustParse("1Gi"),
},
},
createResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name}},
nil,
},
checkResult: &inferenceServiceReactor{
&kservev1beta1.InferenceService{
ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: project.Name},
Status: statusReady,
},
nil,
},
deployTimeout: deployTimeout,
createPdbResult: &pdbReactor{pdb, nil},
createVsResult: &vsReactor{vs, nil},
wantError: true,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -696,6 +725,7 @@ func TestController_DeployInferenceService(t *testing.T) {
NamespaceTimeout: 2 * tickDurationSecond * time.Second,
MaxCPU: resource.MustParse("8"),
MaxMemory: resource.MustParse("8Gi"),
MaxAllowedReplica: 4,
DefaultModelResourceRequests: &config.ResourceRequests{},
DefaultTransformerResourceRequests: &config.ResourceRequests{},
PodDisruptionBudget: config.PodDisruptionBudgetConfig{
Expand Down
1 change: 1 addition & 0 deletions api/cluster/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import "errors"
var (
ErrInsufficientCPU = errors.New("CPU request is too large")
ErrInsufficientMem = errors.New("memory request too large")
ErrRequestedMaxReplicasNotAllowed = errors.New("requested max replicas is more than max permissible")
ErrTimeoutNamespace = errors.New("timeout creating namespace")
ErrUnableToCreateNamespace = errors.New("error creating namespace")
ErrUnableToGetNamespaceStatus = errors.New("error retrieving namespace status")
Expand Down
18 changes: 10 additions & 8 deletions api/cmd/api/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,15 @@ func initEnvironmentService(cfg *config.Config, db *gorm.DB) service.Environment

log.Infof("adding environment %s: cluster: %s, is_default: %v", envCfg.Name, envCfg.Cluster, envCfg.IsDefault)
env = &models.Environment{
Name: envCfg.Name,
Cluster: envCfg.Cluster,
IsDefault: isDefault,
Region: envCfg.Region,
GcpProject: envCfg.GcpProject,
MaxCPU: envCfg.MaxCPU,
MaxMemory: envCfg.MaxMemory,
GPUs: models.ParseGPUsConfig(envCfg.GPUs),
Name: envCfg.Name,
Cluster: envCfg.Cluster,
IsDefault: isDefault,
Region: envCfg.Region,
GcpProject: envCfg.GcpProject,
MaxCPU: envCfg.MaxCPU,
MaxMemory: envCfg.MaxMemory,
MaxAllowedReplica: envCfg.MaxAllowedReplica,
GPUs: models.ParseGPUsConfig(envCfg.GPUs),
DefaultResourceRequest: &models.ResourceRequest{
MinReplica: deploymentCfg.DefaultModelResourceRequests.MinReplica,
MaxReplica: deploymentCfg.DefaultModelResourceRequests.MaxReplica,
Expand Down Expand Up @@ -253,6 +254,7 @@ func initEnvironmentService(cfg *config.Config, db *gorm.DB) service.Environment
env.GcpProject = envCfg.GcpProject
env.MaxCPU = envCfg.MaxCPU
env.MaxMemory = envCfg.MaxMemory
env.MaxAllowedReplica = envCfg.MaxAllowedReplica
env.GPUs = models.ParseGPUsConfig(envCfg.GPUs)
env.DefaultResourceRequest = &models.ResourceRequest{
MinReplica: deploymentCfg.DefaultModelResourceRequests.MinReplica,
Expand Down
6 changes: 4 additions & 2 deletions api/config/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ type DeploymentConfig struct {
DefaultModelResourceRequests *ResourceRequests
// Default resource request for transformer deployment
DefaultTransformerResourceRequests *ResourceRequests
// Max CPU of machine
// Max allowed CPU of each pod
MaxCPU resource.Quantity
// Max Memory of machine
// Max allowed Memory of each pod
MaxMemory resource.Quantity
// Max allowed MaxReplica value of each deployment
MaxAllowedReplica int
// TopologySpreadConstraints to be applied on the pods of each model deployment
TopologySpreadConstraints []corev1.TopologySpreadConstraint
// Percentage of knative's queue proxy resource request from the inference service resource request
Expand Down
2 changes: 2 additions & 0 deletions api/config/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type EnvironmentConfig struct {

MaxCPU string `yaml:"max_cpu"`
MaxMemory string `yaml:"max_memory"`
MaxAllowedReplica int `yaml:"max_allowed_replica"`
TopologySpreadConstraints TopologySpreadConstraints `yaml:"topology_spread_constraints"`
PodDisruptionBudget PodDisruptionBudgetConfig `yaml:"pod_disruption_budget"`

Expand Down Expand Up @@ -177,6 +178,7 @@ func ParseDeploymentConfig(envCfg *EnvironmentConfig, cfg *Config) DeploymentCon
},
MaxCPU: resource.MustParse(envCfg.MaxCPU),
MaxMemory: resource.MustParse(envCfg.MaxMemory),
MaxAllowedReplica: envCfg.MaxAllowedReplica,
TopologySpreadConstraints: envCfg.TopologySpreadConstraints,
QueueResourcePercentage: envCfg.QueueResourcePercentage,
PyfuncGRPCOptions: cfg.PyfuncGRPCOptions,
Expand Down
1 change: 1 addition & 0 deletions api/models/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type Environment struct {
GcpProject string `json:"gcp_project"`
MaxCPU string `json:"max_cpu"`
MaxMemory string `json:"max_memory"`
MaxAllowedReplica int `yaml:"max_allowed_replica"`
GPUs GPUs `json:"gpus" gorm:"column:gpus"`
DefaultResourceRequest *ResourceRequest `json:"default_resource_request"`
DefaultTransformerResourceRequest *ResourceRequest `json:"default_transformer_resource_request"`
Expand Down

0 comments on commit 07b5021

Please sign in to comment.