diff --git a/cmd/plugins/topology-aware/policy/cache_test.go b/cmd/plugins/topology-aware/policy/cache_test.go index a3696d0cd..7ce1a7b80 100644 --- a/cmd/plugins/topology-aware/policy/cache_test.go +++ b/cmd/plugins/topology-aware/policy/cache_test.go @@ -88,11 +88,11 @@ func TestAllocationMarshalling(t *testing.T) { }{ { name: "non-zero Exclusive", - data: []byte(`{"key1":{"Exclusive":"1","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), + data: []byte(`{"key1":{"PrettyName":"","Exclusive":"1","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), }, { name: "zero Exclusive", - data: []byte(`{"key1":{"Exclusive":"","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), + data: []byte(`{"key1":{"PrettyName":"","Exclusive":"","Part":1,"CPUType":0,"Container":"1","Pool":"testnode","MemoryPool":0,"MemType":"DRAM,PMEM,HBM","MemSize":0,"ColdStart":0}}`), }, } for _, tc := range tcases { diff --git a/cmd/plugins/topology-aware/policy/mocks_test.go b/cmd/plugins/topology-aware/policy/mocks_test.go index d9e7c6c21..f9b2da507 100644 --- a/cmd/plugins/topology-aware/policy/mocks_test.go +++ b/cmd/plugins/topology-aware/policy/mocks_test.go @@ -19,6 +19,7 @@ import ( "time" nri "github.com/containerd/nri/pkg/api" + "github.com/containers/nri-plugins/pkg/agent/podresapi" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" "github.com/containers/nri-plugins/pkg/cpuallocator" "github.com/containers/nri-plugins/pkg/resmgr/cache" @@ -324,6 +325,9 @@ func (fake *mockSystem) SetCpusOnline(online bool, cpus idset.IDSet) (idset.IDSe func (fake *mockSystem) NodeDistance(idset.ID, idset.ID) int { return 10 } +func (fake *mockSystem) NodeHintToCPUs(string) string { + return "" +} type mockContainer struct { name string @@ -538,6 +542,9 @@ func (m *mockContainer) PreserveMemoryResources() bool { func (m *mockContainer) MemoryTypes() (libmem.TypeMask, error) { return libmem.TypeMaskDRAM, nil } +func (m *mockContainer) GetPodResources() *podresapi.ContainerResources { + return nil +} type mockPod struct { name string @@ -625,6 +632,9 @@ func (m *mockPod) GetTasks(bool) ([]string, error) { func (m *mockPod) GetCtime() time.Time { panic("unimplemented") } +func (m *mockPod) GetPodResources() *podresapi.PodResources { + return nil +} type mockCache struct { returnValueForGetPolicyEntry bool @@ -632,7 +642,7 @@ type mockCache struct { returnValue2ForLookupContainer bool } -func (m *mockCache) InsertPod(*nri.PodSandbox) (cache.Pod, error) { +func (m *mockCache) InsertPod(*nri.PodSandbox, <-chan *podresapi.PodResources) cache.Pod { panic("unimplemented") } func (m *mockCache) DeletePod(string) cache.Pod { @@ -695,7 +705,7 @@ func (m *mockCache) GetPolicyEntry(string, interface{}) bool { func (m *mockCache) Save() error { return nil } -func (m *mockCache) RefreshPods([]*nri.PodSandbox) ([]cache.Pod, []cache.Pod, []cache.Container) { +func (m *mockCache) RefreshPods([]*nri.PodSandbox, <-chan podresapi.PodResourcesList) ([]cache.Pod, []cache.Pod, []cache.Container) { panic("unimplemented") } func (m *mockCache) RefreshContainers([]*nri.Container) ([]cache.Container, []cache.Container) { diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index f6d25a7d7..52c739605 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -831,6 +831,7 @@ func (cs *supply) GetScore(req Request) Score { // calculate real hint scores hints := cr.container.GetTopologyHints() + hints.ResolvePartialHints(cs.GetNode().System().NodeHintToCPUs) score.hints = make(map[string]float64, len(hints)) for provider, hint := range cr.container.GetTopologyHints() { diff --git a/docs/resource-policy/policy/topology-aware.md b/docs/resource-policy/policy/topology-aware.md index 103dc94bb..368b104a4 100644 --- a/docs/resource-policy/policy/topology-aware.md +++ b/docs/resource-policy/policy/topology-aware.md @@ -443,7 +443,7 @@ metadata: prefer-reserved-cpus.resource-policy.nri.io/container.special: "false" ``` -## Allowing or denying mount/device paths via annotations +## Controlling Topology Hints Via Annotations User is able mark certain pods and containers to have allowed or denied paths for mounts or devices. What this means is that when the system @@ -486,6 +486,16 @@ metadata: - /xy-zy/another-blacklisted-path5 ``` +## Using Pod Resource API for Extra Topology Hints + +If access to the `kubelet`'s `Pod Resource API` is enabled in the +[Node Agent's](../developers-guide/architecture.md#node-agent) configuration, +it is automatically used to generate per-container topology hints when a +device with locality to a NUMA node is advertised by the API. Annotated allow +and deny lists can be used to selectively disable or enable per-resource hints, +using `podresapi:$RESOURCE_NAME` as the path. + + ## Container Affinity and Anti-Affinity ### Introduction diff --git a/pkg/sysfs/system.go b/pkg/sysfs/system.go index 7dfe23181..7381a6460 100644 --- a/pkg/sysfs/system.go +++ b/pkg/sysfs/system.go @@ -123,6 +123,8 @@ type System interface { Offlined() cpuset.CPUSet Isolated() cpuset.CPUSet + + NodeHintToCPUs(string) string } // System devices @@ -742,6 +744,24 @@ func (sys *system) Isolated() cpuset.CPUSet { return sys.IsolatedCPUs() } +// Resolve given node topology hints to CPUs. +func (sys *system) NodeHintToCPUs(nodes string) string { + mset, err := cpuset.Parse(nodes) + if err != nil { + log.Error("failed to resolve nodes %q to CPUs: %v", nodes, err) + return "" + } + + cset := cpuset.New() + for _, id := range mset.List() { + if n, ok := sys.nodes[id]; ok { + cset = cset.Union(n.CPUSet()) + } + } + + return cset.Intersection(sys.OnlineCPUs()).String() +} + // Discover Cpus present in the system. func (sys *system) discoverCPUs() error { if sys.cpus != nil { diff --git a/pkg/topology/topology.go b/pkg/topology/topology.go index 6eeb116f4..d3957a1c8 100644 --- a/pkg/topology/topology.go +++ b/pkg/topology/topology.go @@ -227,6 +227,17 @@ func MergeTopologyHints(org, hints Hints) (res Hints) { return } +// ResolvePartialHints resolves NUMA-only hints to CPU hints using the given function. +func (hints Hints) ResolvePartialHints(resolve func(NUMAs string) string) { + for k, h := range hints { + if h.CPUs == "" && h.NUMAs != "" { + h.CPUs = resolve(h.NUMAs) + log.Debugf("partial NUMA hint %q resolved to CPUs %q", h.NUMAs, h.CPUs) + hints[k] = h + } + } +} + // String returns the hints as a string. func (h *Hint) String() string { cpus, nodes, sockets, sep := "", "", "", ""