Skip to content

Commit

Permalink
WiP: topology-aware: support for CPU allocator priorities.
Browse files Browse the repository at this point in the history
Add support for configurable default and annotated per-container
CPU priority preferences. These determine the preferred priority
for CPUs when doing fully or partially exclusive CPU allocation.
Priorities are calculated for such allocations and passed on to
the CPU allocator which then tries to fulfill these preferences.

It should now be possible to configure the policy to allocate
(exclusive) E-cores by default and P-cores to containers which
are annotated so, or to do it the other way around.

Signed-off-by: Krisztian Litkey <[email protected]>
  • Loading branch information
klihub committed Mar 13, 2024
1 parent c6a9e24 commit 148dfac
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 21 deletions.
66 changes: 51 additions & 15 deletions cmd/plugins/topology-aware/policy/pod-preferences.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ const (
keyColdStartPreference = "cold-start"
// annotation key for reserved pools
keyReservedCPUsPreference = "prefer-reserved-cpus"
// annotation key for CPU Priority preference
keyCpuPriorityPreference = "prefer-cpu-priority"

// effective annotation key for isolated CPU preference
preferIsolatedCPUsKey = keyIsolationPreference + "." + kubernetes.ResmgrKeyNamespace
Expand All @@ -53,6 +55,8 @@ const (
preferColdStartKey = keyColdStartPreference + "." + kubernetes.ResmgrKeyNamespace
// annotation key for reserved pools
preferReservedCPUsKey = keyReservedCPUsPreference + "." + kubernetes.ResmgrKeyNamespace
// effective annotation key for CPU priority preference
preferCpuPriorityKey = keyCpuPriorityPreference + "." + kubernetes.ResmgrKeyNamespace
)

// cpuClass is a type of CPU to allocate
Expand Down Expand Up @@ -153,6 +157,36 @@ func sharedCPUsPreference(pod cache.Pod, container cache.Container) (bool, bool)
return preference, true
}

// cpuPrioPreference returns the CPU priority preference for the given container
// and whether the container was explicitly annotated with this setting.
func cpuPrioPreference(pod cache.Pod, container cache.Container, fallback cpuPrio) (cpuPrio, bool) {
key := preferCpuPriorityKey
value, ok := pod.GetEffectiveAnnotation(key, container.GetName())

if !ok {
prio := fallback
log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio)
return prio, false
}

if value == "default" {
prio := defaultPrio
log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio)
return prio, true
}

prio, ok := cpuPrioByName[value]
if !ok {
log.Error("%s: invalid CPU priority preference %q", container.PrettyName(), value)
prio := fallback
log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio)
return prio, false
}

log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio)
return prio, true
}

// memoryTypePreference returns what type of memory should be allocated for the container.
//
// If the effective annotations are not found, this function falls back to
Expand Down Expand Up @@ -370,7 +404,7 @@ func checkReservedCPUsAnnotations(c cache.Container) (bool, bool) {
// 2. fraction: amount of fractional CPU in milli-CPU
// 3. isolate: (bool) whether to prefer isolated full CPUs
// 4. cpuType: (cpuClass) class of CPU to allocate (reserved vs. normal)
func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass) {
func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass, cpuPrio) {
//
// CPU allocation preferences for a container consist of
//
Expand Down Expand Up @@ -439,60 +473,62 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in
request := reqs.Requests[corev1.ResourceCPU]
qosClass := pod.GetQOSClass()
fraction := int(request.MilliValue())
prio := defaultPrio // ignored for fractional allocations

// easy cases: kube-system namespace, Burstable or BestEffort QoS class containers
preferReserved, explicitReservation := checkReservedCPUsAnnotations(container)
switch {
case container.PreserveCpuResources():
return 0, fraction, false, cpuPreserve
return 0, fraction, false, cpuPreserve, prio
case preferReserved == true:
return 0, fraction, false, cpuReserved
return 0, fraction, false, cpuReserved, prio
case checkReservedPoolNamespaces(namespace) && !explicitReservation:
return 0, fraction, false, cpuReserved
return 0, fraction, false, cpuReserved, prio
case qosClass == corev1.PodQOSBurstable:
return 0, fraction, false, cpuNormal
return 0, fraction, false, cpuNormal, prio
case qosClass == corev1.PodQOSBestEffort:
return 0, 0, false, cpuNormal
return 0, 0, false, cpuNormal, prio
}

// complex case: Guaranteed QoS class containers
cores := fraction / 1000
fraction = fraction % 1000
preferIsolated, explicitIsolated := isolatedCPUsPreference(pod, container)
preferShared, explicitShared := sharedCPUsPreference(pod, container)
prio, _ = cpuPrioPreference(pod, container, defaultPrio) // ignored for fractional allocations

switch {
// sub-core CPU request
case cores == 0:
return 0, fraction, false, cpuNormal
return 0, fraction, false, cpuNormal, prio
// 1 <= CPU request < 2
case cores < 2:
// fractional allocation, potentially mixed
if fraction > 0 {
if preferShared {
return 0, 1000*cores + fraction, false, cpuNormal
return 0, 1000*cores + fraction, false, cpuNormal, prio
}
return cores, fraction, preferIsolated, cpuNormal
return cores, fraction, preferIsolated, cpuNormal, prio
}
// non-fractional allocation
if preferShared && explicitShared {
return 0, 1000*cores + fraction, false, cpuNormal
return 0, 1000*cores + fraction, false, cpuNormal, prio
}
return cores, fraction, preferIsolated, cpuNormal
return cores, fraction, preferIsolated, cpuNormal, prio
// CPU request >= 2
default:
// fractional allocation, only mixed if explicitly annotated as unshared
if fraction > 0 {
if !preferShared && explicitShared {
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio
}
return 0, 1000*cores + fraction, false, cpuNormal
return 0, 1000*cores + fraction, false, cpuNormal, prio
}
// non-fractional allocation
if preferShared && explicitShared {
return 0, 1000 * cores, false, cpuNormal
return 0, 1000 * cores, false, cpuNormal, prio
}
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio
}
}

Expand Down
40 changes: 35 additions & 5 deletions cmd/plugins/topology-aware/policy/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,28 @@ import (
idset "github.com/intel/goresctrl/pkg/utils"
)

type (
cpuPrio = cpuallocator.CPUPriority
)

const (
highPrio = cpuallocator.PriorityHigh
normalPrio = cpuallocator.PriorityNormal
lowPrio = cpuallocator.PriorityLow
nonePrio = cpuallocator.PriorityNone
)

var (
defaultPrio = nonePrio

cpuPrioByName = map[string]cpuPrio{
"high": highPrio,
"normal": normalPrio,
"low": lowPrio,
"none": nonePrio,
}
)

// Supply represents avaialbe CPU and memory capacity of a node.
type Supply interface {
// GetNode returns the node supplying this capacity.
Expand Down Expand Up @@ -95,6 +117,8 @@ type Request interface {
String() string
// CPUType returns the type of requested CPU.
CPUType() cpuClass
// CPUPrio returns the preferred priority of requested CPU.
CPUPrio() cpuPrio
// SetCPUType sets the type of requested CPU.
SetCPUType(cpuType cpuClass)
// FullCPUs return the number of full CPUs requested.
Expand Down Expand Up @@ -223,6 +247,7 @@ type request struct {
fraction int // amount of fractional CPU requested
isolate bool // prefer isolated exclusive CPUs
cpuType cpuClass // preferred CPU type (normal, reserved)
prio cpuPrio // CPU priority preference, ignored for fraction requests

memReq uint64 // memory request
memLim uint64 // memory limit
Expand Down Expand Up @@ -575,15 +600,15 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) {
// allocate isolated exclusive CPUs or slice them off the sharable set
switch {
case full > 0 && cs.isolated.Size() >= full && cr.isolate:
exclusive, err = cs.takeCPUs(&cs.isolated, nil, full)
exclusive, err = cs.takeCPUs(&cs.isolated, nil, full, cr.CPUPrio())
if err != nil {
return nil, policyError("internal error: "+
"%s: can't take %d exclusive isolated CPUs from %s: %v",
cs.node.Name(), full, cs.isolated, err)
}

case full > 0 && cs.AllocatableSharedCPU() > 1000*full:
exclusive, err = cs.takeCPUs(&cs.sharable, nil, full)
exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio())
if err != nil {
return nil, policyError("internal error: "+
"%s: can't take %d exclusive CPUs from %s: %v",
Expand Down Expand Up @@ -764,8 +789,8 @@ func (cs *supply) ReserveMemory(g Grant) error {
}

// takeCPUs takes up to cnt CPUs from a given CPU set to another.
func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int) (cpuset.CPUSet, error) {
cset, err := cs.node.Policy().cpuAllocator.AllocateCpus(from, cnt, cpuallocator.PriorityHigh)
func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int, prio cpuPrio) (cpuset.CPUSet, error) {
cset, err := cs.node.Policy().cpuAllocator.AllocateCpus(from, cnt, prio)
if err != nil {
return cset, err
}
Expand Down Expand Up @@ -942,7 +967,7 @@ func (cs *supply) DumpMemoryState(prefix string) {
// newRequest creates a new request for the given container.
func newRequest(container cache.Container) Request {
pod, _ := container.GetPod()
full, fraction, isolate, cpuType := cpuAllocationPreferences(pod, container)
full, fraction, isolate, cpuType, prio := cpuAllocationPreferences(pod, container)
req, lim, mtype := memoryAllocationPreference(pod, container)
coldStart := time.Duration(0)

Expand Down Expand Up @@ -984,6 +1009,7 @@ func newRequest(container cache.Container) Request {
memLim: lim,
memType: mtype,
coldStart: coldStart,
prio: prio,
}
}

Expand Down Expand Up @@ -1019,6 +1045,10 @@ func (cr *request) CPUType() cpuClass {
return cr.cpuType
}

func (cr *request) CPUPrio() cpuPrio {
return cr.prio
}

// SetCPUType sets the requested type of CPU for the grant.
func (cr *request) SetCPUType(cpuType cpuClass) {
cr.cpuType = cpuType
Expand Down
4 changes: 3 additions & 1 deletion cmd/plugins/topology-aware/policy/topology-aware-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error {

opt = cfg
p.cfg = cfg
defaultPrio = cfg.DefaultCPUPriority.Value()

if err := p.initialize(); err != nil {
*p = savedPolicy
Expand All @@ -435,6 +436,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error {
if err := grant.RefetchNodes(); err != nil {
*p = savedPolicy
opt = p.cfg
defaultPrio = p.cfg.DefaultCPUPriority.Value()
return policyError("failed to reconfigure: %v", err)
}
}
Expand Down Expand Up @@ -523,7 +525,7 @@ func (p *policy) checkConstraints() error {
// Use CpuAllocator to pick reserved CPUs among
// allowed ones. Because using those CPUs is allowed,
// they remain (they are put back) in the allowed set.
cset, err := p.cpuAllocator.AllocateCpus(&p.allowed, p.reserveCnt, cpuallocator.PriorityNormal)
cset, err := p.cpuAllocator.AllocateCpus(&p.allowed, p.reserveCnt, normalPrio)
p.allowed = p.allowed.Union(cset)
if err != nil {
log.Fatal("cannot reserve %dm CPUs for ReservedResources from AvailableResources: %s", qty.MilliValue(), err)
Expand Down
12 changes: 12 additions & 0 deletions config/crd/bases/config.nri_topologyawarepolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ spec:
- classes
type: object
type: object
defaultCPUPriority:
default: none
description: |-
DefaultCPUPriority (high, normal, low, none) if set, is the default CPU
priority passed to the CPU allocator when allocating exclusive CPUs and
the container is not annotated otherwise.
enum:
- high
- normal
- low
- none
type: string
instrumentation:
description: Config provides runtime configuration for instrumentation.
properties:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ spec:
- classes
type: object
type: object
defaultCPUPriority:
default: none
description: |-
DefaultCPUPriority (high, normal, low, none) if set, is the default CPU
priority passed to the CPU allocator when allocating exclusive CPUs and
the container is not annotated otherwise.
enum:
- high
- normal
- low
- none
type: string
instrumentation:
description: Config provides runtime configuration for instrumentation.
properties:
Expand Down
31 changes: 31 additions & 0 deletions pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
package topologyaware

import (
"strings"

policy "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy"
"github.com/containers/nri-plugins/pkg/cpuallocator"
)

type (
Expand All @@ -33,6 +36,27 @@ const (
AmountCPUSet = policy.AmountCPUSet
)

type CPUPriority string

const (
PriorityHigh CPUPriority = "high"
PriorityNormal CPUPriority = "normal"
PriorityLow CPUPriority = "low"
PriorityNone CPUPriority = "none"
)

func (p CPUPriority) Value() cpuallocator.CPUPriority {
switch strings.ToLower(string(p)) {
case string(PriorityHigh):
return cpuallocator.PriorityHigh
case string(PriorityNormal):
return cpuallocator.PriorityNormal
case string(PriorityLow):
return cpuallocator.PriorityLow
}
return cpuallocator.PriorityNone
}

// +k8s:deepcopy-gen=true
// +optional
type Config struct {
Expand Down Expand Up @@ -77,4 +101,11 @@ type Config struct {
// of it.
// +kubebuilder:validation:Required
ReservedResources Constraints `json:"reservedResources"`
// DefaultCPUPriority (high, normal, low, none) if set, is the default CPU
// priority passed to the CPU allocator when allocating exclusive CPUs and
// the container is not annotated otherwise.
// +kubebuilder:validation:Enum=high;normal;low;none
// +kubebuilder:default=none
// +kubebuilder:validation:Format:string
DefaultCPUPriority CPUPriority `json:"defaultCPUPriority,omitempty"`
}

0 comments on commit 148dfac

Please sign in to comment.