diff --git a/charts/kserve/templates/inferenceservice.yaml b/charts/kserve/templates/inferenceservice.yaml index 338feb580..0c2e4a30c 100644 --- a/charts/kserve/templates/inferenceservice.yaml +++ b/charts/kserve/templates/inferenceservice.yaml @@ -103,6 +103,12 @@ spec: aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }} {{- end }} requests: + {{- if .Values.cpu }} + cpu: {{ .Values.cpu }} + {{- end }} + {{- if .Values.memory }} + memory: {{ .Values.memory }} + {{- end }} {{- if gt (int $gpuCount) 0}} nvidia.com/gpu: {{ .Values.gpuCount }} {{- end }} @@ -171,6 +177,12 @@ spec: {{- end }} resources: requests: + {{- if .Values.cpu }} + cpu: {{ .Values.cpu }} + {{- end }} + {{- if .Values.memory }} + memory: {{ .Values.memory }} + {{- end }} {{- if gt (int $gpuCount) 0}} nvidia.com/gpu: {{ .Values.gpuCount }} {{- end }} diff --git a/pkg/serving/update.go b/pkg/serving/update.go index 0f2b36f83..f8e7ac9a6 100644 --- a/pkg/serving/update.go +++ b/pkg/serving/update.go @@ -563,6 +563,19 @@ func setInferenceServiceForCustomModel(args *types.UpdateKServeArgs, inferenceSe inferenceService.Spec.Predictor.Containers[0].Image = args.Image } + // set resources requests + resourceRequests := inferenceService.Spec.Predictor.Containers[0].Resources.Requests + if resourceRequests == nil { + resourceRequests = make(map[v1.ResourceName]resource.Quantity) + } + if args.Cpu != "" { + resourceRequests[v1.ResourceCPU] = resource.MustParse(args.Cpu) + } + if args.Memory != "" { + resourceRequests[v1.ResourceMemory] = resource.MustParse(args.Memory) + } + inferenceService.Spec.Predictor.Containers[0].Resources.Requests = resourceRequests + // set resources limits resourceLimits := inferenceService.Spec.Predictor.Containers[0].Resources.Limits if resourceLimits == nil {