diff --git a/api/cluster/resource/templater.go b/api/cluster/resource/templater.go index b4cd26c9c..95ffd54fc 100644 --- a/api/cluster/resource/templater.go +++ b/api/cluster/resource/templater.go @@ -222,20 +222,26 @@ func (t *InferenceServiceTemplater) createPredictorSpec(modelService *models.Ser nodeSelector := map[string]string{} tolerations := []corev1.Toleration{} - if modelService.ResourceRequest.GPUName != "" && !modelService.ResourceRequest.GPURequest.IsZero() { - // Look up to the GPU resource type and quantity from DeploymentConfig - for _, gpuConfig := range t.deploymentConfig.GPUs { - if gpuConfig.Name == modelService.ResourceRequest.GPUName { - // Declare and initialize resourceType and resourceQuantity variables - resourceType := corev1.ResourceName(gpuConfig.ResourceType) - resourceQuantity := modelService.ResourceRequest.GPURequest - - // Set the resourceType as the key in the maps, with resourceQuantity as the value - resources.Requests[resourceType] = resourceQuantity - resources.Limits[resourceType] = resourceQuantity - - nodeSelector = gpuConfig.NodeSelector - tolerations = gpuConfig.Tolerations + + if modelService.ResourceRequest.GPUName != "" { + if modelService.ResourceRequest.GPURequest.IsZero() { + // This should never be set as zero if a GPU name is specified + return kservev1beta1.PredictorSpec{}, fmt.Errorf("GPU request cannot set as be 0") + } else { + // Look up to the GPU resource type and quantity from DeploymentConfig + for _, gpuConfig := range t.deploymentConfig.GPUs { + if gpuConfig.Name == modelService.ResourceRequest.GPUName { + // Declare and initialize resourceType and resourceQuantity variables + resourceType := corev1.ResourceName(gpuConfig.ResourceType) + resourceQuantity := modelService.ResourceRequest.GPURequest + + // Set the resourceType as the key in the maps, with resourceQuantity as the value + resources.Requests[resourceType] = resourceQuantity + resources.Limits[resourceType] = resourceQuantity + + nodeSelector = gpuConfig.NodeSelector + tolerations = gpuConfig.Tolerations + } } } } diff --git a/api/cluster/resource/templater_gpu_test.go b/api/cluster/resource/templater_gpu_test.go index 7067f682d..2faf48bee 100644 --- a/api/cluster/resource/templater_gpu_test.go +++ b/api/cluster/resource/templater_gpu_test.go @@ -106,6 +106,14 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { }, } + invalidResourceRequest := &models.ResourceRequest{ + MinReplica: 1, + MaxReplica: 2, + CPURequest: resource.MustParse("500m"), + MemoryRequest: resource.MustParse("500Mi"), + GPUName: "NVIDIA P4", + } + queueResourcePercentage := "2" storageUri := fmt.Sprintf("%s/model", modelSvc.ArtifactURI) @@ -1563,6 +1571,25 @@ func TestCreateInferenceServiceSpecWithGPU(t *testing.T) { }, }, }, + { + name: "invalid resource request with 0 GPU requested", + modelSvc: &models.Service{ + Name: modelSvc.Name, + ModelName: modelSvc.ModelName, + ModelVersion: modelSvc.ModelVersion, + Namespace: project.Name, + ArtifactURI: modelSvc.ArtifactURI, + Type: models.ModelTypeTensorflow, + Options: &models.ModelOption{}, + Metadata: modelSvc.Metadata, + Protocol: protocol.HttpJson, + ResourceRequest: invalidResourceRequest, + }, + resourcePercentage: queueResourcePercentage, + deploymentScale: defaultDeploymentScale, + exp: &kservev1beta1.InferenceService{}, + wantErr: true, + }, } for _, tt := range tests { diff --git a/ui/src/pages/version/components/forms/components/ResourcesPanel.js b/ui/src/pages/version/components/forms/components/ResourcesPanel.js index 0f4465951..34bca68da 100644 --- a/ui/src/pages/version/components/forms/components/ResourcesPanel.js +++ b/ui/src/pages/version/components/forms/components/ResourcesPanel.js @@ -82,7 +82,7 @@ export const ResourcesPanel = ({ return; } onChange("gpu_name")(gpu_name); - onChange("gpu_request")(undefined); + onChange("gpu_request")(gpus[gpu_name].values[0]); onChange("min_monthly_cost_per_gpu")( gpus[gpu_name].min_monthly_cost_per_gpu );