diff --git a/cmd/plugins/balloons/policy/balloons-policy.go b/cmd/plugins/balloons/policy/balloons-policy.go index f86ea69e3..ca9950c30 100644 --- a/cmd/plugins/balloons/policy/balloons-policy.go +++ b/cmd/plugins/balloons/policy/balloons-policy.go @@ -1411,7 +1411,7 @@ func (p *balloons) updatePinning(blns ...*Balloon) { } else { allowedCpus = pinnableCpus } - p.pinCpuMem(c, allowedCpus, bln.Mems, bln.memTypeMask) + p.pinCpuMem(c, allowedCpus, bln.Mems, bln.memTypeMask, bln.Def.PinMemory) } } } @@ -1517,7 +1517,7 @@ func (p *balloons) dismissContainer(c cache.Container, bln *Balloon) { } // pinCpuMem pins container to CPUs and memory nodes if flagged -func (p *balloons) pinCpuMem(c cache.Container, cpus cpuset.CPUSet, mems idset.IDSet, memTypeMask libmem.TypeMask) { +func (p *balloons) pinCpuMem(c cache.Container, cpus cpuset.CPUSet, mems idset.IDSet, memTypeMask libmem.TypeMask, blnDefPinMemory *bool) { if p.bpoptions.PinCPU == nil || *p.bpoptions.PinCPU { log.Debug(" - pinning %s to cpuset: %s", c.PrettyName(), cpus) c.SetCpusetCpus(cpus.String()) @@ -1526,7 +1526,13 @@ func (p *balloons) pinCpuMem(c cache.Container, cpus cpuset.CPUSet, mems idset.I c.SetCPUShares(int64(cache.MilliCPUToShares(int64(mCpu)))) } } - if p.bpoptions.PinMemory == nil || *p.bpoptions.PinMemory { + // Start from policy-level PinMemory... + pinMemory := p.bpoptions.PinMemory == nil || *p.bpoptions.PinMemory + // ...and allow override in balloon-type-level PinMemory + if blnDefPinMemory != nil { + pinMemory = *blnDefPinMemory + } + if pinMemory { if c.PreserveMemoryResources() { log.Debug(" - preserving %s pinning to memory %q", c.PrettyName, c.GetCpusetMems()) preserveMems, err := parseIDSet(c.GetCpusetMems()) diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index 2ce11773a..869f22cbb 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -217,6 +217,11 @@ spec: items: type: string type: array + pinMemory: + description: |- + PinMemory controls pinning containers to memory nodes. + Overrides the policy level PinMemory setting in this balloon type. + type: boolean preferCloseToDevices: description: |- PreferCloseToDevices: prefer creating new balloons of this diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index 2ce11773a..869f22cbb 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -217,6 +217,11 @@ spec: items: type: string type: array + pinMemory: + description: |- + PinMemory controls pinning containers to memory nodes. + Overrides the policy level PinMemory setting in this balloon type. + type: boolean preferCloseToDevices: description: |- PreferCloseToDevices: prefer creating new balloons of this diff --git a/docs/resource-policy/policy/balloons.md b/docs/resource-policy/policy/balloons.md index 7b9f0a564..f3a7776eb 100644 --- a/docs/resource-policy/policy/balloons.md +++ b/docs/resource-policy/policy/balloons.md @@ -78,10 +78,11 @@ Balloons policy parameters: default is `true`: the container cannot use other CPUs. - `pinMemory` controls pinning a container to the memories that are closest to the CPUs of its balloon. The default is `true`: allow - using memory only from the closest NUMA nodes. Warning: this may - cause kernel to kill containers due to out-of-memory error when - closest NUMA nodes do not have enough memory. In this situation - consider switching this option `false`. + using memory only from the closest NUMA nodes. Can be overridden by + pinMemory in balloon types. Warning: pinning memory may cause kernel + to kill containers due to out-of-memory error when allowed NUMA + nodes do not have enough memory. In this situation consider + switching this option `false`. - `preserve` specifies containers whose resource pinning must not be modified by the policy. - `matchExpressions` if a container matches an expression in this @@ -174,10 +175,13 @@ Balloons policy parameters: - `cpuClass` specifies the name of the CPU class according to which CPUs of balloons are configured. Class properties are defined in separate `cpu.classes` objects, see below. + - `pinMemory` overrides policy-level `pinMemory` in balloons of this + type. - `memoryTypes` is a list of allowed memory types for containers in a balloon. Supported types are "HBM", "DRAM" and "PMEM". This setting can be overridden by a pod/container specific - `memory-type` annotation. + `memory-type` annotation. Memory types have no when not pinning + memory (see `pinMemory`). - `preferCloseToDevices`: prefer creating new balloons close to listed devices. List of strings - `preferCoreType`: specifies preferences of the core type which diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go index 28679ef2d..4f4f42762 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go @@ -157,6 +157,9 @@ type BalloonDef struct { // +listType=set // +kubebuilder:validation:items:XValidation:rule="self == 'DRAM' || self == 'HBM' || self == 'PMEM'",messageExpression="\"invalid memory type: \" + self + \", expected DRAM, HBM, or PMEM\"" MemoryTypes []string `json:"memoryTypes,omitempty"` + // PinMemory controls pinning containers to memory nodes. + // Overrides the policy level PinMemory setting in this balloon type. + PinMemory *bool `json:"pinMemory,omitempty"` // AllocatorPriority (High, Normal, Low, None) // This parameter is passed to CPU allocator when creating or // resizing a balloon. At init, balloons with highest priority diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go index d3a740065..61915644b 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go @@ -43,6 +43,11 @@ func (in *BalloonDef) DeepCopyInto(out *BalloonDef) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.PinMemory != nil { + in, out := &in.PinMemory, &out.PinMemory + *out = new(bool) + **out = **in + } if in.PreferSpreadOnPhysicalCores != nil { in, out := &in.PreferSpreadOnPhysicalCores, &out.PreferSpreadOnPhysicalCores *out = new(bool) diff --git a/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/balloons-memory-types.cfg b/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/balloons-memory-types.cfg index 203ab2855..ed1f5d528 100644 --- a/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/balloons-memory-types.cfg +++ b/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/balloons-memory-types.cfg @@ -19,6 +19,12 @@ config: preferSpreadingPods: true preferNewBalloons: true + - name: no-pin-mem + minCPUs: 1 + maxCPUs: 1 + preferNewBalloons: true + pinMemory: false + instrumentation: httpEndpoint: ":8891" prometheusExport: true diff --git a/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/code.var.sh b/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/code.var.sh index 68da623e4..3459e8687 100644 --- a/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/code.var.sh +++ b/test/e2e/policies.test-suite/balloons/n6-hbm-cxl/test01-memory-types/code.var.sh @@ -23,7 +23,8 @@ POD_ANNOTATION[5]="memory-type.resource-policy.nri.io/container.pod0c5: hbm,dram POD_ANNOTATION[10]="balloon.balloons.resource-policy.nri.io/container.pod0c0: mem-types" POD_ANNOTATION[16]="balloon.balloons.resource-policy.nri.io/container.pod0c6: mem-types" POD_ANNOTATION[17]="balloon.balloons.resource-policy.nri.io/container.pod0c7: no-mem-types" -CPUREQ="200m" MEMREQ="300M" CPULIM="" MEMLIM="300M" CONTCOUNT=8 create balloons-busybox +POD_ANNOTATION[18]="balloon.balloons.resource-policy.nri.io/container.pod0c8: no-pin-mem" +CPUREQ="200m" MEMREQ="300M" CPULIM="" MEMLIM="300M" CONTCOUNT=9 create balloons-busybox report allowed verify 'mems["pod0c0"] == {hbm0} if packages["pod0c0"] == {"package0"} else mems["pod0c0"] == {hbm1}' \ 'mems["pod0c1"] == {dram0} if packages["pod0c1"] == {"package0"} else mems["pod0c1"] == {dram1}' \ @@ -32,7 +33,8 @@ verify 'mems["pod0c0"] == {hbm0} if packages["pod0c0"] == {"package0 'mems["pod0c4"] == {dram0,pmem0} if packages["pod0c4"] == {"package0"} else mems["pod0c4"] == {dram1,pmem1}' \ 'mems["pod0c5"] == {hbm0,dram0,pmem0} if packages["pod0c5"] == {"package0"} else mems["pod0c5"] == {hbm1,dram1,pmem1}' \ 'mems["pod0c6"] == {hbm0,pmem0} if packages["pod0c6"] == {"package0"} else mems["pod0c6"] == {hbm1,pmem1}' \ - 'mems["pod0c7"] == {dram0} if packages["pod0c7"] == {"package0"} else mems["pod0c7"] == {dram1}' + 'mems["pod0c7"] == {dram0} if packages["pod0c7"] == {"package0"} else mems["pod0c7"] == {dram1}' \ + 'mems["pod0c8"] == {dram0,dram1,hbm0,hbm1,pmem0,pmem1}' cleanup