Skip to content

Commit

Permalink
topology-aware: implement CPU and memory pinning preservation
Browse files Browse the repository at this point in the history
Signed-off-by: Antti Kervinen <[email protected]>
  • Loading branch information
askervin authored and klihub committed Feb 14, 2024
1 parent 5085371 commit 42457cf
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 18 deletions.
8 changes: 8 additions & 0 deletions cmd/plugins/topology-aware/policy/pod-preferences.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ type cpuClass int
var cpuClassNames = map[cpuClass]string{
cpuNormal: "normal",
cpuReserved: "reserved",
cpuPreserve: "preserve",
}

const (
cpuNormal cpuClass = iota
cpuReserved
cpuPreserve
)

// types by memory type name
Expand All @@ -93,6 +95,7 @@ const (
memoryDRAM
memoryPMEM
memoryHBM
memoryPreserve
memoryFirstUnusedBit
memoryAll = memoryFirstUnusedBit - 1

Expand Down Expand Up @@ -155,6 +158,9 @@ func sharedCPUsPreference(pod cache.Pod, container cache.Container) (bool, bool)
// If the effective annotations are not found, this function falls back to
// looking for the deprecated syntax by calling podMemoryTypePreference.
func memoryTypePreference(pod cache.Pod, container cache.Container) memoryType {
if container.PreserveMemoryResources() {
return memoryPreserve
}
key := preferMemoryTypeKey
value, ok := pod.GetEffectiveAnnotation(key, container.GetName())
if !ok {
Expand Down Expand Up @@ -437,6 +443,8 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in
// easy cases: kube-system namespace, Burstable or BestEffort QoS class containers
preferReserved, explicitReservation := checkReservedCPUsAnnotations(container)
switch {
case container.PreserveCpuResources():
return 0, fraction, false, cpuPreserve
case preferReserved == true:
return 0, fraction, false, cpuReserved
case checkReservedPoolNamespaces(namespace) && !explicitReservation:
Expand Down
45 changes: 30 additions & 15 deletions cmd/plugins/topology-aware/policy/pools.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant
// the same pool. This assumption can be relaxed later, requires separate
// (but connected) scoring of memory and CPU.

if request.CPUType() == cpuReserved {
if request.CPUType() == cpuReserved || request.CPUType() == cpuPreserve {
pool = p.root
} else {
affinity, err := p.calculatePoolAffinities(request.GetContainer())
Expand Down Expand Up @@ -601,7 +601,8 @@ func (p *policy) applyGrant(grant Grant) {

cpus := ""
kind := ""
if cpuType == cpuNormal {
switch cpuType {
case cpuNormal:
if exclusive.IsEmpty() {
cpus = shared.String()
kind = "shared"
Expand All @@ -614,11 +615,13 @@ func (p *policy) applyGrant(grant Grant) {
cpus = exclusive.String()
}
}
} else if cpuType == cpuReserved {
case cpuReserved:
kind = "reserved"
cpus = reserved.String()
cpuPortion = grant.ReservedPortion()
} else {
case cpuPreserve:
// Will skip CPU pinning, may still pin memory.
default:
log.Debug("unsupported granted cpuType %s", cpuType)
return
}
Expand All @@ -629,13 +632,16 @@ func (p *policy) applyGrant(grant Grant) {
}

if opt.PinCPU {
if cpus != "" {
log.Info(" => pinning %s to (%s) cpuset %s", container.PrettyName(), kind, cpus)
if cpuType == cpuPreserve {
log.Info(" => preserving %s cpuset %s", container.PrettyName(), container.GetCpusetCpus())
} else {
log.Info(" => not pinning %s CPUs, cpuset is empty...",
container.PrettyName())
if cpus != "" {
log.Info(" => pinning %s to (%s) cpuset %s", container.PrettyName(), kind, cpus)
} else {
log.Info(" => not pinning %s CPUs, cpuset is empty...", container.PrettyName())
}
container.SetCpusetCpus(cpus)
}
container.SetCpusetCpus(cpus)

// Notes:
// It is extremely important to ensure that the exclusive subset of mixed
Expand Down Expand Up @@ -664,11 +670,15 @@ func (p *policy) applyGrant(grant Grant) {
container.SetCPUShares(int64(cache.MilliCPUToShares(int64(milliCPU))))
}

if mems != "" {
log.Debug(" => pinning %s to memory %s", container.PrettyName(), mems)
container.SetCpusetMems(mems)
if grant.MemoryType() == memoryPreserve {
log.Debug(" => preserving %s memory pinning %s", container.PrettyName(), container.GetCpusetMems())
} else {
log.Debug(" => not pinning %s memory, memory set is empty...", container.PrettyName())
if mems != "" {
log.Debug(" => pinning %s to memory %s", container.PrettyName(), mems)
} else {
log.Debug(" => not pinning %s memory, memory set is empty...", container.PrettyName())
}
container.SetCpusetMems(mems)
}
}

Expand Down Expand Up @@ -717,6 +727,11 @@ func (p *policy) updateSharedAllocations(grant *Grant) {
continue
}

if other.CPUType() == cpuPreserve {
log.Info(" => %s not affected (preserving CPU pinning)", other)
continue
}

if other.SharedPortion() == 0 && !other.ExclusiveCPUs().IsEmpty() {
log.Info(" => %s not affected (only exclusive CPUs)...", other)
continue
Expand Down Expand Up @@ -750,7 +765,7 @@ func (p *policy) filterInsufficientResources(req Request, originals []Node) []No
supply := node.FreeSupply()
reqMemType := req.MemoryType()

if reqMemType == memoryUnspec {
if reqMemType == memoryUnspec || reqMemType == memoryPreserve {
// The algorithm for handling unspecified memory allocations is the same as for handling a request
// with memory type all.
reqMemType = memoryAll
Expand Down Expand Up @@ -883,7 +898,7 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco
log.Debug(" - affinity is a TIE")

// 3) matching memory type wins
if reqType := request.MemoryType(); reqType != memoryUnspec {
if reqType := request.MemoryType(); reqType != memoryUnspec && reqType != memoryPreserve {
if node1.HasMemoryType(reqType) && !node2.HasMemoryType(reqType) {
log.Debug(" => %s WINS on memory type", node1.Name())
return true
Expand Down
2 changes: 1 addition & 1 deletion cmd/plugins/topology-aware/policy/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ func (cs *supply) AccountReleaseCPU(g Grant) {
// allocateMemory tries to fulfill the memory allocation part of a request.
func (cs *supply) allocateMemory(r Request) (memoryMap, error) {
reqType := r.MemoryType()
if reqType == memoryUnspec {
if reqType == memoryUnspec || reqType == memoryPreserve {
reqType = memoryAll
}

Expand Down
16 changes: 16 additions & 0 deletions docs/resource-policy/policy/topology-aware.md
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,22 @@ defined affinities with implicit co-location requires both careful consideration
and a thorough understanding of affinity evaluation, or it should be avoided
altogether.

## Disabling CPU or Memory Pinning of a Container

Some containers may need to run on all CPUs or access all memories
without restrictions. Annotate these pods and containers to prevent
the resource policy from touching their CPU or memory pinning.

```yaml
cpu.preserve.resource-policy.nri.io/container.CONTAINER_NAME: "true"
cpu.preserve.resource-policy.nri.io/pod: "true"
cpu.preserve.resource-policy.nri.io: "true"
memory.preserve.resource-policy.nri.io/container.CONTAINER_NAME: "true"
memory.preserve.resource-policy.nri.io/pod: "true"
memory.preserve.resource-policy.nri.io: "true"
```

## Cold Start

The `topology-aware` policy supports "cold start" functionality. When cold start
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Test that
# - containers marked in Annotations pinned on Reserved CPUs.
# Test annotations:
# - prefer-reserved-cpus
# - cpu.preserve
# - memory.preserve

cleanup-test-pods() {
( vm-command "kubectl delete pods pod0 --now" ) || true
( vm-command "kubectl delete pods pod1 --now" ) || true
( vm-command "kubectl delete pods pod2 --now" ) || true
}
cleanup-test-pods

Expand All @@ -24,6 +27,26 @@ report allowed
verify 'cpus["pod0c0"] == {"cpu10", "cpu11"}'
verify 'cpus["pod1c0"] == {"cpu08"}'

ANNOTATIONS=(
'cpu.preserve.resource-policy.nri.io: "true"'
'memory.preserve.resource-policy.nri.io/container.pod2c1: "true"'
'memory.preserve.resource-policy.nri.io/container.pod2c2: "true"'
'cpu.preserve.resource-policy.nri.io/container.pod2c2: "false"'
'cpu.preserve.resource-policy.nri.io/container.pod2c3: "false"'
'memory.preserve.resource-policy.nri.io/container.pod2c3: "false"'
)
CONTCOUNT=4 CPU=100m MEM=100M create reserved-annotated
report allowed

verify 'len(cpus["pod2c0"]) == 16' \
'len(mems["pod2c0"]) == 4' \
'len(cpus["pod2c1"]) == 16' \
'len(mems["pod2c1"]) == 4' \
'len(cpus["pod2c2"]) == 1' \
'len(mems["pod2c2"]) == 4' \
'len(cpus["pod2c3"]) == 1' \
'len(mems["pod2c3"]) == 1'

cleanup-test-pods

helm-terminate

0 comments on commit 42457cf

Please sign in to comment.