diff --git a/cmd/plugins/topology-aware/policy/cache.go b/cmd/plugins/topology-aware/policy/cache.go index d12159014..454cb8bd6 100644 --- a/cmd/plugins/topology-aware/policy/cache.go +++ b/cmd/plugins/topology-aware/policy/cache.go @@ -31,7 +31,9 @@ const ( func (p *policy) saveAllocations() { p.cache.SetPolicyEntry(keyAllocations, cache.Cacheable(&p.allocations)) - p.cache.Save() + if err := p.cache.Save(); err != nil { + log.Warnf("failed to save allocations to cache: %v", err) + } } func (p *policy) restoreAllocations(allocations *allocations) error { @@ -231,12 +233,11 @@ func (a *allocations) Get() interface{} { func (a *allocations) Set(value interface{}) { var from *allocations - switch value.(type) { + switch val := value.(type) { case allocations: - v := value.(allocations) - from = &v + from = &val case *allocations: - from = value.(*allocations) + from = val } a.grants = make(map[string]Grant, 32) diff --git a/cmd/plugins/topology-aware/policy/coldstart_test.go b/cmd/plugins/topology-aware/policy/coldstart_test.go index 2ce630c47..643ff67ad 100644 --- a/cmd/plugins/topology-aware/policy/coldstart_test.go +++ b/cmd/plugins/topology-aware/policy/coldstart_test.go @@ -38,7 +38,9 @@ func sendEvent(param interface{}) error { fmt.Printf("Event received: %v", param) event := param.(*events.Policy) - globalPolicy.HandleEvent(event) + if _, err := globalPolicy.HandleEvent(event); err != nil { + log.Warnf("failed to handle test event: %v", err) + } return nil } @@ -126,18 +128,20 @@ func TestColdStart(t *testing.T) { t.Errorf("Expected one memory controller %v, got: %v", tc.expectedPMEMSystemNodeID, mems) } - if grant.MemoryType()&memoryDRAM != 0 { - // FIXME: should we report only the limited memory types or the granted types - // while the cold start is going on? - // t.Errorf("No DRAM was expected before coldstart timer: %v", grant.MemoryType()) - } + // FIXME: should we report only the limited memory types or the granted types + // while the cold start is going on? + //if grant.MemoryType()&memoryDRAM != 0 { + // t.Errorf("No DRAM was expected before coldstart timer: %v", grant.MemoryType()) + //} globalPolicy = policy - policy.options.SendEvent(&events.Policy{ + if err := policy.options.SendEvent(&events.Policy{ Type: events.ContainerStarted, Data: tc.container, - }) + }); err != nil { + log.Warnf("failed to send test event: %v", err) + } time.Sleep(tc.expectedColdStartTimeout * 2) diff --git a/cmd/plugins/topology-aware/policy/libmem.go b/cmd/plugins/topology-aware/policy/libmem.go index 588b146d5..697393580 100644 --- a/cmd/plugins/topology-aware/policy/libmem.go +++ b/cmd/plugins/topology-aware/policy/libmem.go @@ -18,9 +18,9 @@ import libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" func (p *policy) getMemOffer(pool Node, req Request) (*libmem.Offer, error) { var ( + zone libmem.NodeMask + mtyp libmem.TypeMask ctr = req.GetContainer() - zone = libmem.NodeMask(0) - mtyp = libmem.TypeMask(0) ) if memType := req.MemoryType(); memType == memoryPreserve { @@ -80,18 +80,10 @@ func (p *policy) releaseMem(id string) error { return p.memAllocator.Release(id) } -func (p *policy) poolZoneType(pool Node, memType memoryType) libmem.TypeMask { - return p.memAllocator.ZoneType(libmem.NewNodeMask(pool.GetMemset(memType).Members()...)) -} - func (p *policy) memZoneType(zone libmem.NodeMask) libmem.TypeMask { return p.memAllocator.ZoneType(zone) } -func (p *policy) poolZone(pool Node, memType memoryType) libmem.NodeMask { - return libmem.NewNodeMask(pool.GetMemset(memType).Members()...) -} - func (p *policy) poolZoneCapacity(pool Node, memType memoryType) int64 { return p.memAllocator.ZoneCapacity(libmem.NewNodeMask(pool.GetMemset(memType).Members()...)) } diff --git a/cmd/plugins/topology-aware/policy/mocks_test.go b/cmd/plugins/topology-aware/policy/mocks_test.go index f9b2da507..ab79c973d 100644 --- a/cmd/plugins/topology-aware/policy/mocks_test.go +++ b/cmd/plugins/topology-aware/policy/mocks_test.go @@ -128,11 +128,9 @@ func (p *mockCPUPackage) SstInfo() *sst.SstPackageInfo { } type mockCPU struct { - isolated cpuset.CPUSet - online cpuset.CPUSet - id idset.ID - node mockSystemNode - pkg mockCPUPackage + id idset.ID + node mockSystemNode + pkg mockCPUPackage } func (c *mockCPU) BaseFrequency() uint64 { @@ -334,8 +332,6 @@ type mockContainer struct { namespace string returnValueForGetResourceRequirements v1.ResourceRequirements returnValueForGetID string - memoryLimit int64 - cpuset cpuset.CPUSet returnValueForQOSClass v1.PodQOSClass pod cache.Pod } diff --git a/cmd/plugins/topology-aware/policy/node.go b/cmd/plugins/topology-aware/policy/node.go index 28498763e..21ae09bbc 100644 --- a/cmd/plugins/topology-aware/policy/node.go +++ b/cmd/plugins/topology-aware/policy/node.go @@ -105,9 +105,9 @@ type Node interface { // AssignNUMANodes assigns the given set of NUMA nodes to this one. AssignNUMANodes(ids []idset.ID) // DepthFirst traverse the tree@node calling the function at each node. - DepthFirst(func(Node) error) error + DepthFirst(func(Node)) // BreadthFirst traverse the tree@node calling the function at each node. - BreadthFirst(func(Node) error) error + BreadthFirst(func(Node)) // Dump state of the node. Dump(string, ...int) // Dump type-specific state of the node. @@ -329,29 +329,19 @@ func (n *node) dump(prefix string, level ...int) { } // Do a depth-first traversal starting at node calling the given function at each node. -func (n *node) DepthFirst(fn func(Node) error) error { +func (n *node) DepthFirst(fn func(Node)) { for _, c := range n.children { - if err := c.DepthFirst(fn); err != nil { - return err - } + c.DepthFirst(fn) } - - return fn(n) + fn(n) } // Do a breadth-first traversal starting at node calling the given function at each node. -func (n *node) BreadthFirst(fn func(Node) error) error { - if err := fn(n); err != nil { - return err - } - +func (n *node) BreadthFirst(fn func(Node)) { + fn(n) for _, c := range n.children { - if err := c.BreadthFirst(fn); err != nil { - return err - } + c.BreadthFirst(fn) } - - return nil } // System returns the policy System instance. @@ -791,7 +781,7 @@ func (n *socketnode) HintScore(hint topology.Hint) float64 { func (p *policy) NewVirtualNode(name string, parent Node) Node { n := &virtualnode{} n.self.node = n - n.node.init(p, fmt.Sprintf("%s", name), VirtualNode, parent) + n.node.init(p, name, VirtualNode, parent) return n } diff --git a/cmd/plugins/topology-aware/policy/pod-preferences.go b/cmd/plugins/topology-aware/policy/pod-preferences.go index 9a0d1a32c..23b53a9fa 100644 --- a/cmd/plugins/topology-aware/policy/pod-preferences.go +++ b/cmd/plugins/topology-aware/policy/pod-preferences.go @@ -492,7 +492,7 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in switch { case container.PreserveCpuResources(): return 0, fraction, false, cpuPreserve, prio - case preferReserved == true: + case preferReserved: return 0, fraction, false, cpuReserved, prio case checkReservedPoolNamespaces(namespace) && !explicitReservation: return 0, fraction, false, cpuReserved, prio diff --git a/cmd/plugins/topology-aware/policy/pools.go b/cmd/plugins/topology-aware/policy/pools.go index 9ad290489..33b7b767b 100644 --- a/cmd/plugins/topology-aware/policy/pools.go +++ b/cmd/plugins/topology-aware/policy/pools.go @@ -184,7 +184,7 @@ func (p *policy) buildPoolsByTopology() error { // enumerate pools, calculate depth, discover resource capacity, assign NUMA nodes p.pools = make([]Node, 0) - p.root.DepthFirst(func(n Node) error { + p.root.DepthFirst(func(n Node) { p.pools = append(p.pools, n) n.(*node).id = p.nodeCnt p.nodeCnt++ @@ -195,8 +195,6 @@ func (p *policy) buildPoolsByTopology() error { n.DiscoverSupply(assigned[n.(*node).self.node]) delete(assigned, n.(*node).self.node) - - return nil }) // make sure all PMEM, HBM nodes got assigned @@ -634,75 +632,18 @@ func (p *policy) updateSharedAllocations(grant *Grant) { } } -func (p *policy) filterInsufficientResources(req Request, pools []Node) []Node { - filtered := make([]Node, 0) - - memNeed := req.MemAmountToAllocate() - isolate := req.Isolate() - full, fraction := req.FullCPUs(), req.CPUFraction() - - for _, node := range pools { - // check pool memory availability - memType := req.MemoryType() - if memType == memoryUnspec || memType == memoryPreserve { - memType = memoryAll - } - - memAvail := p.poolZoneFree(node, memType) - if memAvail < memNeed { - log.Debug("%s has insufficient available %s memory (%s < %s)", node.Name(), - memType, prettyMem(memAvail), prettyMem(memNeed)) - continue - } - - log.Debug("%s has enough available %s memory (%s >= %s)", node.Name(), - memType, prettyMem(memAvail), prettyMem(memNeed)) - - cs := node.FreeSupply() - - // check pool cpu availability - isolated := cs.IsolatedCPUs().Size() - slicable := cs.AllocatableSharedCPU() - - if isolate { - if isolated < full && slicable < 1000*full { - log.Debug("%s has insufficient slicable capacity (%dm) for %d isolated CPUs", - node.Name(), slicable, full) - continue - } - - log.Debug("%s has enough slicable capacity (%dm) for %d isolated CPUs", - node.Name(), slicable, full) - } - - if slicable < 1000*full+fraction { - log.Debug("%s has insufficient slicable capacity (%dm) for %d+%dm full+fractional CPU", - node.Name(), slicable, full, fraction) - continue - } - - log.Debug("%s has enough slicable capacity (%dm) for %d+%dm full+fractional CPU", - node.Name(), slicable, full, fraction) - - filtered = append(filtered, node) - } - - return filtered -} - // Score pools against the request and sort them by score. func (p *policy) sortPoolsByScore(req Request, aff map[int]int32) (map[int]Score, []Node) { scores := make(map[int]Score, p.nodeCnt) - p.root.DepthFirst(func(n Node) error { + p.root.DepthFirst(func(n Node) { scores[n.NodeID()] = n.GetScore(req) - return nil }) // Filter out pools which don't have enough uncompressible resources // (memory) to satisfy the request. //filteredPools := p.filterInsufficientResources(req, p.pools) - filteredPools := make([]Node, len(p.pools), len(p.pools)) + filteredPools := make([]Node, len(p.pools)) copy(filteredPools, p.pools) sort.Slice(filteredPools, func(i, j int) bool { @@ -1048,12 +989,11 @@ func affinityScore(affinities map[int]int32, node Node) float64 { a := affinities[n.NodeID()] score += q * float64(a) } - node.BreadthFirst(func(n Node) error { + node.BreadthFirst(func(n Node) { diff := float64(n.RootDistance() - node.RootDistance()) q := math.Pow(Q, diff) a := affinities[n.NodeID()] score += q * float64(a) - return nil }) return score } diff --git a/cmd/plugins/topology-aware/policy/pools_test.go b/cmd/plugins/topology-aware/policy/pools_test.go index 00a530950..d3de02b52 100644 --- a/cmd/plugins/topology-aware/policy/pools_test.go +++ b/cmd/plugins/topology-aware/policy/pools_test.go @@ -16,7 +16,6 @@ package topologyaware import ( "fmt" - "io/ioutil" "os" "path" "testing" @@ -28,15 +27,6 @@ import ( "github.com/containers/nri-plugins/pkg/utils" ) -func findNodeWithID(id int, nodes []Node) Node { - for _, node := range nodes { - if node.NodeID() == id { - return node - } - } - panic("No node found with id " + fmt.Sprintf("%d", id)) -} - func findNodeWithName(name string, nodes []Node) Node { for _, node := range nodes { if node.Name() == name { @@ -46,35 +36,12 @@ func findNodeWithName(name string, nodes []Node) Node { panic("No node found with name " + name) } -func setLinks(nodes []Node, tree map[int][]int) { - hasParent := map[int]struct{}{} - for parent, children := range tree { - parentNode := findNodeWithID(parent, nodes) - for _, child := range children { - childNode := findNodeWithID(child, nodes) - childNode.LinkParent(parentNode) - hasParent[child] = struct{}{} - } - } - orphans := []int{} - for id := range tree { - if _, ok := hasParent[id]; !ok { - node := findNodeWithID(id, nodes) - node.LinkParent(nilnode) - orphans = append(orphans, id) - } - } - if len(orphans) != 1 { - panic(fmt.Sprintf("expected one root node, got %d with IDs %v", len(orphans), orphans)) - } -} - func TestPoolCreation(t *testing.T) { // Test pool creation with "real" sysfs data. // Create a temporary directory for the test data. - dir, err := ioutil.TempDir("", "nri-resource-policy-test-sysfs-") + dir, err := os.MkdirTemp("", "nri-resource-policy-test-sysfs-") if err != nil { panic(err) } @@ -173,7 +140,9 @@ func TestPoolCreation(t *testing.T) { log.EnableDebug(true) policy := New().(*policy) - policy.Setup(policyOptions) + if err := policy.Setup(policyOptions); err != nil { + log.Warnf("failed to setup test policy: %v", err) + } log.EnableDebug(false) if policy.root.GetSupply().SharableCPUs().Size()+policy.root.GetSupply().IsolatedCPUs().Size()+policy.root.GetSupply().ReservedCPUs().Size() != tc.expectedRootNodeCPUs { @@ -227,7 +196,7 @@ func TestWorkloadPlacement(t *testing.T) { // server system. // Create a temporary directory for the test data. - dir, err := ioutil.TempDir("", "nri-resource-policy-test-sysfs-") + dir, err := os.MkdirTemp("", "nri-resource-policy-test-sysfs-") if err != nil { panic(err) } @@ -296,7 +265,9 @@ func TestWorkloadPlacement(t *testing.T) { log.EnableDebug(true) policy := New().(*policy) - policy.Setup(policyOptions) + if err := policy.Setup(policyOptions); err != nil { + log.Warnf("failed to setup test policy: %v", err) + } log.EnableDebug(false) scores, filteredPools := policy.sortPoolsByScore(tc.req, tc.affinities) @@ -331,7 +302,7 @@ func TestAffinities(t *testing.T) { // // Create a temporary directory for the test data. - dir, err := ioutil.TempDir("", "nri-resource-policy-test-sysfs-") + dir, err := os.MkdirTemp("", "nri-resource-policy-test-sysfs-") if err != nil { panic(err) } @@ -554,7 +525,9 @@ func TestAffinities(t *testing.T) { log.EnableDebug(true) policy := New().(*policy) - policy.Setup(policyOptions) + if err := policy.Setup(policyOptions); err != nil { + log.Warnf("failed to setup test policy: %v", err) + } log.EnableDebug(false) affinities := map[int]int32{} diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index 52c739605..c5e9f333a 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -584,7 +584,7 @@ func (cs *supply) DumpAllocatable() string { cpu += sep + fmt.Sprintf("sharable:%s (", kubernetes.ShortCPUSet(cs.sharable)) sep = "" if local_grantedShared > 0 || total_grantedShared > 0 { - cpu += fmt.Sprintf("grantedShared:") + cpu += "grantedShared:" kind := "" if local_grantedShared > 0 { cpu += fmt.Sprintf("%dm", local_grantedShared) @@ -1108,9 +1108,8 @@ func (cg *grant) String() string { } func (cg *grant) AccountAllocateCPU() { - cg.node.DepthFirst(func(n Node) error { + cg.node.DepthFirst(func(n Node) { n.FreeSupply().AccountAllocateCPU(cg) - return nil }) for node := cg.node.Parent(); !node.IsNil(); node = node.Parent() { node.FreeSupply().AccountAllocateCPU(cg) @@ -1154,9 +1153,8 @@ func (cg *grant) ReallocMemory(types libmem.TypeMask) error { } func (cg *grant) AccountReleaseCPU() { - cg.node.DepthFirst(func(n Node) error { + cg.node.DepthFirst(func(n Node) { n.FreeSupply().AccountReleaseCPU(cg) - return nil }) for node := cg.node.Parent(); !node.IsNil(); node = node.Parent() { node.FreeSupply().AccountReleaseCPU(cg) diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go index 48b751e42..e9858f05a 100644 --- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go +++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go @@ -65,7 +65,6 @@ type policy struct { allocations allocations // container pool assignments cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy memAllocator *libmem.Allocator - coldstartOff bool // coldstart forced off (have movable PMEM zones) metrics *TopologyAwareMetrics } @@ -153,10 +152,14 @@ func (p *policy) Start() error { func (p *policy) Sync(add []cache.Container, del []cache.Container) error { log.Debug("synchronizing state...") for _, c := range del { - p.ReleaseResources(c) + if err := p.ReleaseResources(c); err != nil { + log.Warnf("failed to release resources for %s: %v", c.PrettyName(), err) + } } for _, c := range add { - p.AllocateResources(c) + if err := p.AllocateResources(c); err != nil { + log.Warnf("failed to allocate resources for %s: %v", c.PrettyName(), err) + } } p.checkAllocations(" ")