diff --git a/cmd/plugins/topology-aware/policy/pod-preferences.go b/cmd/plugins/topology-aware/policy/pod-preferences.go index 8f702f618..af1bb1b90 100644 --- a/cmd/plugins/topology-aware/policy/pod-preferences.go +++ b/cmd/plugins/topology-aware/policy/pod-preferences.go @@ -62,11 +62,13 @@ type cpuClass int var cpuClassNames = map[cpuClass]string{ cpuNormal: "normal", cpuReserved: "reserved", + cpuPreserve: "preserve", } const ( cpuNormal cpuClass = iota cpuReserved + cpuPreserve ) // types by memory type name @@ -93,6 +95,7 @@ const ( memoryDRAM memoryPMEM memoryHBM + memoryPreserve memoryFirstUnusedBit memoryAll = memoryFirstUnusedBit - 1 @@ -155,6 +158,9 @@ func sharedCPUsPreference(pod cache.Pod, container cache.Container) (bool, bool) // If the effective annotations are not found, this function falls back to // looking for the deprecated syntax by calling podMemoryTypePreference. func memoryTypePreference(pod cache.Pod, container cache.Container) memoryType { + if container.PreserveMemoryResources() { + return memoryPreserve + } key := preferMemoryTypeKey value, ok := pod.GetEffectiveAnnotation(key, container.GetName()) if !ok { @@ -437,6 +443,8 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in // easy cases: kube-system namespace, Burstable or BestEffort QoS class containers preferReserved, explicitReservation := checkReservedCPUsAnnotations(container) switch { + case container.PreserveCpuResources(): + return 0, fraction, false, cpuPreserve case preferReserved == true: return 0, fraction, false, cpuReserved case checkReservedPoolNamespaces(namespace) && !explicitReservation: diff --git a/cmd/plugins/topology-aware/policy/pools.go b/cmd/plugins/topology-aware/policy/pools.go index c0d8e47e0..e3975e522 100644 --- a/cmd/plugins/topology-aware/policy/pools.go +++ b/cmd/plugins/topology-aware/policy/pools.go @@ -347,7 +347,7 @@ func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant // the same pool. This assumption can be relaxed later, requires separate // (but connected) scoring of memory and CPU. - if request.CPUType() == cpuReserved { + if request.CPUType() == cpuReserved || request.CPUType() == cpuPreserve { pool = p.root } else { affinity, err := p.calculatePoolAffinities(request.GetContainer()) @@ -601,7 +601,8 @@ func (p *policy) applyGrant(grant Grant) { cpus := "" kind := "" - if cpuType == cpuNormal { + switch cpuType { + case cpuNormal: if exclusive.IsEmpty() { cpus = shared.String() kind = "shared" @@ -614,11 +615,13 @@ func (p *policy) applyGrant(grant Grant) { cpus = exclusive.String() } } - } else if cpuType == cpuReserved { + case cpuReserved: kind = "reserved" cpus = reserved.String() cpuPortion = grant.ReservedPortion() - } else { + case cpuPreserve: + // Will skip CPU pinning, may still pin memory. + default: log.Debug("unsupported granted cpuType %s", cpuType) return } @@ -629,13 +632,16 @@ func (p *policy) applyGrant(grant Grant) { } if opt.PinCPU { - if cpus != "" { - log.Info(" => pinning %s to (%s) cpuset %s", container.PrettyName(), kind, cpus) + if cpuType == cpuPreserve { + log.Info(" => preserving %s cpuset %s", container.PrettyName(), container.GetCpusetCpus()) } else { - log.Info(" => not pinning %s CPUs, cpuset is empty...", - container.PrettyName()) + if cpus != "" { + log.Info(" => pinning %s to (%s) cpuset %s", container.PrettyName(), kind, cpus) + } else { + log.Info(" => not pinning %s CPUs, cpuset is empty...", container.PrettyName()) + } + container.SetCpusetCpus(cpus) } - container.SetCpusetCpus(cpus) // Notes: // It is extremely important to ensure that the exclusive subset of mixed @@ -664,11 +670,15 @@ func (p *policy) applyGrant(grant Grant) { container.SetCPUShares(int64(cache.MilliCPUToShares(int64(milliCPU)))) } - if mems != "" { - log.Debug(" => pinning %s to memory %s", container.PrettyName(), mems) - container.SetCpusetMems(mems) + if grant.MemoryType() == memoryPreserve { + log.Debug(" => preserving %s memory pinning %s", container.PrettyName(), container.GetCpusetMems()) } else { - log.Debug(" => not pinning %s memory, memory set is empty...", container.PrettyName()) + if mems != "" { + log.Debug(" => pinning %s to memory %s", container.PrettyName(), mems) + } else { + log.Debug(" => not pinning %s memory, memory set is empty...", container.PrettyName()) + } + container.SetCpusetMems(mems) } } @@ -717,6 +727,11 @@ func (p *policy) updateSharedAllocations(grant *Grant) { continue } + if other.CPUType() == cpuPreserve { + log.Info(" => %s not affected (preserving CPU pinning)", other) + continue + } + if other.SharedPortion() == 0 && !other.ExclusiveCPUs().IsEmpty() { log.Info(" => %s not affected (only exclusive CPUs)...", other) continue @@ -750,7 +765,7 @@ func (p *policy) filterInsufficientResources(req Request, originals []Node) []No supply := node.FreeSupply() reqMemType := req.MemoryType() - if reqMemType == memoryUnspec { + if reqMemType == memoryUnspec || reqMemType == memoryPreserve { // The algorithm for handling unspecified memory allocations is the same as for handling a request // with memory type all. reqMemType = memoryAll @@ -883,7 +898,7 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco log.Debug(" - affinity is a TIE") // 3) matching memory type wins - if reqType := request.MemoryType(); reqType != memoryUnspec { + if reqType := request.MemoryType(); reqType != memoryUnspec && reqType != memoryPreserve { if node1.HasMemoryType(reqType) && !node2.HasMemoryType(reqType) { log.Debug(" => %s WINS on memory type", node1.Name()) return true diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index 497f8c1e7..b2e4ef210 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -456,7 +456,7 @@ func (cs *supply) AccountReleaseCPU(g Grant) { // allocateMemory tries to fulfill the memory allocation part of a request. func (cs *supply) allocateMemory(r Request) (memoryMap, error) { reqType := r.MemoryType() - if reqType == memoryUnspec { + if reqType == memoryUnspec || reqType == memoryPreserve { reqType = memoryAll } diff --git a/docs/resource-policy/policy/topology-aware.md b/docs/resource-policy/policy/topology-aware.md index dc6fdff1b..ec84a52d3 100644 --- a/docs/resource-policy/policy/topology-aware.md +++ b/docs/resource-policy/policy/topology-aware.md @@ -310,6 +310,22 @@ defined affinities with implicit co-location requires both careful consideration and a thorough understanding of affinity evaluation, or it should be avoided altogether. +## Disabling CPU or Memory Pinning of a Container + +Some containers may need to run on all CPUs or access all memories +without restrictions. Annotate these pods and containers to prevent +the resource policy from touching their CPU or memory pinning. + +```yaml +cpu.preserve.resource-policy.nri.io/container.CONTAINER_NAME: "true" +cpu.preserve.resource-policy.nri.io/pod: "true" +cpu.preserve.resource-policy.nri.io: "true" + +memory.preserve.resource-policy.nri.io/container.CONTAINER_NAME: "true" +memory.preserve.resource-policy.nri.io/pod: "true" +memory.preserve.resource-policy.nri.io: "true" +``` + ## Cold Start The `topology-aware` policy supports "cold start" functionality. When cold start diff --git a/test/e2e/policies.test-suite/topology-aware/n4c16/test11-reserved-cpu-annotations/code.var.sh b/test/e2e/policies.test-suite/topology-aware/n4c16/test11-reserved-cpu-annotations/code.var.sh index b2768782c..b9c20ef79 100644 --- a/test/e2e/policies.test-suite/topology-aware/n4c16/test11-reserved-cpu-annotations/code.var.sh +++ b/test/e2e/policies.test-suite/topology-aware/n4c16/test11-reserved-cpu-annotations/code.var.sh @@ -1,9 +1,12 @@ -# Test that -# - containers marked in Annotations pinned on Reserved CPUs. +# Test annotations: +# - prefer-reserved-cpus +# - cpu.preserve +# - memory.preserve cleanup-test-pods() { ( vm-command "kubectl delete pods pod0 --now" ) || true ( vm-command "kubectl delete pods pod1 --now" ) || true + ( vm-command "kubectl delete pods pod2 --now" ) || true } cleanup-test-pods @@ -24,6 +27,26 @@ report allowed verify 'cpus["pod0c0"] == {"cpu10", "cpu11"}' verify 'cpus["pod1c0"] == {"cpu08"}' +ANNOTATIONS=( + 'cpu.preserve.resource-policy.nri.io: "true"' + 'memory.preserve.resource-policy.nri.io/container.pod2c1: "true"' + 'memory.preserve.resource-policy.nri.io/container.pod2c2: "true"' + 'cpu.preserve.resource-policy.nri.io/container.pod2c2: "false"' + 'cpu.preserve.resource-policy.nri.io/container.pod2c3: "false"' + 'memory.preserve.resource-policy.nri.io/container.pod2c3: "false"' +) +CONTCOUNT=4 CPU=100m MEM=100M create reserved-annotated +report allowed + +verify 'len(cpus["pod2c0"]) == 16' \ + 'len(mems["pod2c0"]) == 4' \ + 'len(cpus["pod2c1"]) == 16' \ + 'len(mems["pod2c1"]) == 4' \ + 'len(cpus["pod2c2"]) == 1' \ + 'len(mems["pod2c2"]) == 4' \ + 'len(cpus["pod2c3"]) == 1' \ + 'len(mems["pod2c3"]) == 1' + cleanup-test-pods helm-terminate