From 348e84f7119fb18dd21b32958baff2801b455853 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Mon, 3 Jun 2024 16:44:22 +0300 Subject: [PATCH] topology-aware: initial libmem conversion. Cut out the original memory accounting and allocation code. Plug in a libmem-based memory allocator instead. Signed-off-by: Krisztian Litkey --- cmd/plugins/topology-aware/policy/cache.go | 70 +- .../topology-aware/policy/coldstart.go | 10 +- cmd/plugins/topology-aware/policy/libmem.go | 95 +++ cmd/plugins/topology-aware/policy/node.go | 35 +- .../topology-aware/policy/pod-preferences.go | 49 +- cmd/plugins/topology-aware/policy/pools.go | 324 ++------ .../topology-aware/policy/resources.go | 776 +++++------------- .../policy/topology-aware-policy.go | 38 +- 8 files changed, 474 insertions(+), 923 deletions(-) create mode 100644 cmd/plugins/topology-aware/policy/libmem.go diff --git a/cmd/plugins/topology-aware/policy/cache.go b/cmd/plugins/topology-aware/policy/cache.go index 95a1596f9..af690bb38 100644 --- a/cmd/plugins/topology-aware/policy/cache.go +++ b/cmd/plugins/topology-aware/policy/cache.go @@ -19,8 +19,8 @@ import ( "time" "github.com/containers/nri-plugins/pkg/resmgr/cache" + libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" "github.com/containers/nri-plugins/pkg/utils/cpuset" - idset "github.com/intel/goresctrl/pkg/utils" ) const ( @@ -66,23 +66,33 @@ func (p *policy) reinstateGrants(grants map[string]Grant) error { pool := grant.GetCPUNode() supply := pool.FreeSupply() - if err := supply.Reserve(grant); err != nil { - return policyError("failed to update pool %q with CPU grant of %q: %v", + o, err := p.restoreMemOffer(grant) + if err != nil { + return policyError("failed to get libmem offer for pool %q, grant of %s: %w", pool.Name(), c.PrettyName(), err) } - log.Info("updated pool %q with reinstated CPU grant of %q", - pool.Name(), c.PrettyName()) - - pool = grant.GetMemoryNode() - if err := supply.ReserveMemory(grant); err != nil { - grant.GetCPUNode().FreeSupply().ReleaseCPU(grant) - return policyError("failed to update pool %q with extra memory of %q: %v", + updates, err := supply.Reserve(grant, o) + if err != nil { + return policyError("failed to update pool %q with CPU grant of %q: %v", pool.Name(), c.PrettyName(), err) } - log.Info("updated pool %q with reinstanted memory reservation of %q", - pool.Name(), c.PrettyName()) + for uID, uZone := range updates { + if ug, ok := p.allocations.grants[uID]; !ok { + log.Error("failed to update grant %s to memory zone to %s, grant not found", + uID, uZone) + } else { + ug.SetMemoryZone(uZone) + if opt.PinMemory { + ug.GetContainer().SetCpusetMems(uZone.MemsetString()) + } + log.Info("updated grant %s to memory zone %s", uID, uZone) + } + } + + log.Info("updated pool %q with reinstated CPU grant of %q, memory zone %s", + pool.Name(), c.PrettyName(), grant.GetMemoryZone()) p.allocations.grants[id] = grant p.applyGrant(grant) @@ -94,16 +104,15 @@ func (p *policy) reinstateGrants(grants map[string]Grant) error { } type cachedGrant struct { - Exclusive string - Part int - CPUType cpuClass - Container string - Pool string - MemoryPool string - MemType memoryType - Memset idset.IDSet - MemoryLimit memoryMap - ColdStart time.Duration + Exclusive string + Part int + CPUType cpuClass + Container string + Pool string + MemoryPool libmem.NodeMask + MemType memoryType + MemSize int64 + ColdStart time.Duration } func newCachedGrant(cg Grant) *cachedGrant { @@ -113,15 +122,9 @@ func newCachedGrant(cg Grant) *cachedGrant { ccg.CPUType = cg.CPUType() ccg.Container = cg.GetContainer().GetID() ccg.Pool = cg.GetCPUNode().Name() - ccg.MemoryPool = cg.GetMemoryNode().Name() + ccg.MemoryPool = cg.GetMemoryZone() ccg.MemType = cg.MemoryType() - ccg.Memset = cg.Memset().Clone() - - ccg.MemoryLimit = make(memoryMap) - for key, value := range cg.MemLimit() { - ccg.MemoryLimit[key] = value - } - + ccg.MemSize = cg.GetMemorySize() ccg.ColdStart = cg.ColdStart() return ccg @@ -144,14 +147,11 @@ func (ccg *cachedGrant) ToGrant(policy *policy) (Grant, error) { cpuset.MustParse(ccg.Exclusive), ccg.Part, ccg.MemType, - ccg.MemoryLimit, ccg.ColdStart, ) - if g.Memset().String() != ccg.Memset.String() { - log.Error("cache error: mismatch in stored/recalculated memset: %s != %s", - ccg.Memset, g.Memset()) - } + g.SetMemoryZone(ccg.MemoryPool) + g.SetMemorySize(ccg.MemSize) return g, nil } diff --git a/cmd/plugins/topology-aware/policy/coldstart.go b/cmd/plugins/topology-aware/policy/coldstart.go index 7468bc250..f369ab14e 100644 --- a/cmd/plugins/topology-aware/policy/coldstart.go +++ b/cmd/plugins/topology-aware/policy/coldstart.go @@ -19,6 +19,7 @@ import ( "github.com/containers/nri-plugins/pkg/resmgr/cache" "github.com/containers/nri-plugins/pkg/resmgr/events" + libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" ) // trigger cold start for the container if necessary. @@ -63,8 +64,13 @@ func (p *policy) finishColdStart(c cache.Container) (bool, error) { return false, policyError("coldstart: no grant found for %s", c.PrettyName()) } - log.Info("restoring memset to grant %v", g) - g.RestoreMemset() + log.Info("reallocating %s after coldstart", g) + err := g.ReallocMemory(p.memZoneType(g.GetMemoryZone()) | libmem.TypeMaskDRAM) + if err != nil { + log.Error("failed to reallocate %s after coldstart: %v", g, err) + } else { + log.Info("reallocated %s", g) + } g.ClearTimer() return true, nil diff --git a/cmd/plugins/topology-aware/policy/libmem.go b/cmd/plugins/topology-aware/policy/libmem.go new file mode 100644 index 000000000..2bc353baf --- /dev/null +++ b/cmd/plugins/topology-aware/policy/libmem.go @@ -0,0 +1,95 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package topologyaware + +import libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" + +func (p *policy) getMemOffer(pool Node, req Request) (*libmem.Offer, error) { + var ( + ctr = req.GetContainer() + zone = libmem.NodeMask(0) + mtyp = libmem.TypeMask(0) + ) + + if memType := req.MemoryType(); memType == memoryPreserve { + zone = libmem.NewNodeMask(pool.GetMemset(memoryAll).Members()...) + mtyp = p.memAllocator.ZoneType(zone) + } else { + zone = libmem.NewNodeMask(pool.GetMemset(memType).Members()...) + mtyp = libmem.TypeMask(memType) + } + + o, err := p.memAllocator.GetOffer( + libmem.Container( + ctr.GetID(), + ctr.PrettyName(), + req.MemAmountToAllocate(), + string(ctr.GetQOSClass()), + zone, + mtyp, + ), + ) + + return o, err +} + +func (p *policy) restoreMemOffer(g Grant) (*libmem.Offer, error) { + var ( + ctr = g.GetContainer() + zone = g.GetMemoryZone() + mtyp = p.memAllocator.ZoneType(zone) + ) + + o, err := p.memAllocator.GetOffer( + libmem.Container( + ctr.GetID(), + ctr.PrettyName(), + g.GetMemorySize(), + string(ctr.GetQOSClass()), + zone, + mtyp, + ), + ) + + return o, err +} + +func (p *policy) reallocMem(id string, nodes libmem.NodeMask, types libmem.TypeMask) (libmem.NodeMask, map[string]libmem.NodeMask, error) { + return p.memAllocator.Realloc(id, nodes, types) +} + +func (p *policy) releaseMem(id string) error { + return p.memAllocator.Release(id) +} + +func (p *policy) poolZoneType(pool Node, memType memoryType) libmem.TypeMask { + return p.memAllocator.ZoneType(libmem.NewNodeMask(pool.GetMemset(memType).Members()...)) +} + +func (p *policy) memZoneType(zone libmem.NodeMask) libmem.TypeMask { + return p.memAllocator.ZoneType(zone) +} + +func (p *policy) poolZone(pool Node, memType memoryType) libmem.NodeMask { + return libmem.NewNodeMask(pool.GetMemset(memType).Members()...) +} + +func (p *policy) poolZoneCapacity(pool Node, memType memoryType) int64 { + return p.memAllocator.ZoneCapacity(libmem.NewNodeMask(pool.GetMemset(memType).Members()...)) +} + +func (p *policy) poolZoneFree(pool Node, memType memoryType) int64 { + return p.memAllocator.ZoneFree(libmem.NewNodeMask(pool.GetMemset(memType).Members()...)) +} diff --git a/cmd/plugins/topology-aware/policy/node.go b/cmd/plugins/topology-aware/policy/node.go index 30dad107f..28498763e 100644 --- a/cmd/plugins/topology-aware/policy/node.go +++ b/cmd/plugins/topology-aware/policy/node.go @@ -298,7 +298,6 @@ func (n *node) Dump(prefix string, level ...int) { n.self.node.dump(prefix, lvl) log.Debug("%s - %s", idt, n.noderes.DumpCapacity()) log.Debug("%s - %s", idt, n.freeres.DumpAllocatable()) - n.freeres.DumpMemoryState(idt + " ") if n.mem.Size() > 0 { log.Debug("%s - normal memory: %v", idt, n.mem) } @@ -309,15 +308,8 @@ func (n *node) Dump(prefix string, level ...int) { log.Debug("%s - PMEM memory: %v", idt, n.pMem) } for _, grant := range n.policy.allocations.grants { - cpuNodeID := grant.GetCPUNode().NodeID() - memNodeID := grant.GetMemoryNode().NodeID() - switch { - case cpuNodeID == n.id && memNodeID == n.id: - log.Debug("%s + cpu+mem %s", idt, grant) - case cpuNodeID == n.id: - log.Debug("%s + cpuonly %s", idt, grant) - case memNodeID == n.id: - log.Debug("%s + memonly %s", idt, grant) + if grant.GetCPUNode().NodeID() == n.id { + log.Debug("%s + %s", idt, grant) } } if !n.Parent().IsNil() { @@ -396,7 +388,7 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply { n.Name()) } - n.noderes = newSupply(n, cpuset.New(), cpuset.New(), cpuset.New(), 0, 0, nil, nil) + n.noderes = newSupply(n, cpuset.New(), cpuset.New(), cpuset.New(), 0, 0) for _, c := range n.children { supply := c.GetSupply() n.noderes.Cumulate(supply) @@ -409,7 +401,6 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply { } else { log.Debug("%s: discovering attached/assigned resources...", n.Name()) - mmap := createMemoryMap(0, 0, 0) cpus := cpuset.New() for _, nodeID := range assignedNUMANodes { @@ -424,18 +415,15 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply { switch node.GetMemoryType() { case system.MemoryTypeDRAM: n.mem.Add(nodeID) - mmap.AddDRAM(meminfo.MemTotal) shortCPUs := kubernetes.ShortCPUSet(nodeCPUs) log.Debug(" + assigned DRAM NUMA node #%d (cpuset: %s, DRAM %.2fM)", nodeID, shortCPUs, float64(meminfo.MemTotal)/float64(1024*1024)) case system.MemoryTypePMEM: n.pMem.Add(nodeID) - mmap.AddPMEM(meminfo.MemTotal) log.Debug(" + assigned PMEM NUMA node #%d (DRAM %.2fM)", nodeID, float64(meminfo.MemTotal)/float64(1024*1024)) case system.MemoryTypeHBM: n.hbm.Add(nodeID) - mmap.AddHBM(meminfo.MemTotal) log.Debug(" + assigned HBMEM NUMA node #%d (DRAM %.2fM)", nodeID, float64(meminfo.MemTotal)/float64(1024*1024)) default: @@ -463,7 +451,7 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply { isolated := cpus.Intersection(n.policy.isolated) reserved := cpus.Intersection(n.policy.reserved).Difference(isolated) sharable := cpus.Difference(isolated).Difference(reserved) - n.noderes = newSupply(n, isolated, reserved, sharable, 0, 0, mmap, nil) + n.noderes = newSupply(n, isolated, reserved, sharable, 0, 0) log.Debug(" = %s", n.noderes.DumpCapacity()) } @@ -491,8 +479,6 @@ func (n *node) AssignNUMANodes(ids []idset.ID) { // assignNUMANodes assigns the given set of NUMA nodes to this one. func (n *node) assignNUMANodes(ids []idset.ID) { - mem := createMemoryMap(0, 0, 0) - for _, numaNodeID := range ids { if n.mem.Has(numaNodeID) || n.pMem.Has(numaNodeID) || n.hbm.Has(numaNodeID) { log.Warn("*** NUMA node #%d already discovered by or assigned to %s", @@ -500,27 +486,17 @@ func (n *node) assignNUMANodes(ids []idset.ID) { continue } numaNode := n.policy.sys.Node(numaNodeID) - memTotal := uint64(0) - if meminfo, err := numaNode.MemoryInfo(); err != nil { - log.Error("%s: failed to get memory info for NUMA node #%d", - n.Name(), numaNodeID) - } else { - memTotal = meminfo.MemTotal - } switch numaNode.GetMemoryType() { case system.MemoryTypeDRAM: - mem.Add(memTotal, 0, 0) n.mem.Add(numaNodeID) log.Info("*** DRAM NUMA node #%d assigned to pool node %q", numaNodeID, n.Name()) case system.MemoryTypePMEM: n.pMem.Add(numaNodeID) - mem.Add(0, memTotal, 0) log.Info("*** PMEM NUMA node #%d assigned to pool node %q", numaNodeID, n.Name()) case system.MemoryTypeHBM: n.hbm.Add(numaNodeID) - mem.Add(0, 0, memTotal) log.Info("*** HBM NUMA node #%d assigned to pool node %q", numaNodeID, n.Name()) default: @@ -528,9 +504,6 @@ func (n *node) assignNUMANodes(ids []idset.ID) { numaNodeID, numaNode.GetMemoryType()) } } - - n.noderes.AssignMemory(mem) - n.freeres.AssignMemory(mem) } // Discover the set of memory attached to this node. diff --git a/cmd/plugins/topology-aware/policy/pod-preferences.go b/cmd/plugins/topology-aware/policy/pod-preferences.go index 37bc7bf4d..c672bf085 100644 --- a/cmd/plugins/topology-aware/policy/pod-preferences.go +++ b/cmd/plugins/topology-aware/policy/pod-preferences.go @@ -29,6 +29,7 @@ import ( "github.com/containers/nri-plugins/pkg/kubernetes" "github.com/containers/nri-plugins/pkg/resmgr/cache" + libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" ) const ( @@ -83,25 +84,17 @@ var memoryNamedTypes = map[string]memoryType{ "mixed": memoryAll, } -// names by memory type -var memoryTypeNames = map[memoryType]string{ - memoryDRAM: "DRAM", - memoryPMEM: "PMEM", - memoryHBM: "HBM", -} - // memoryType is bitmask of types of memory to allocate -type memoryType int +type memoryType libmem.TypeMask // memoryType bits const ( - memoryUnspec memoryType = (0x1 << iota) >> 1 - memoryDRAM - memoryPMEM - memoryHBM - memoryPreserve - memoryFirstUnusedBit - memoryAll = memoryFirstUnusedBit - 1 + memoryUnspec = memoryType(libmem.TypeMask(0)) + memoryDRAM = memoryType(libmem.TypeMaskDRAM) + memoryPMEM = memoryType(libmem.TypeMaskPMEM) + memoryHBM = memoryType(libmem.TypeMaskHBM) + memoryPreserve = memoryType(libmem.TypeMaskHBM << 1) + memoryAll = memoryType(memoryDRAM | memoryPMEM | memoryHBM) // type of memory to use if none specified defaultMemoryType = memoryAll @@ -573,19 +566,23 @@ func podMemoryTypePreference(pod cache.Pod, c cache.Container) memoryType { } // memoryAllocationPreference returns the amount and kind of memory to allocate. -func memoryAllocationPreference(pod cache.Pod, c cache.Container) (uint64, uint64, memoryType) { +func memoryAllocationPreference(pod cache.Pod, c cache.Container) (int64, int64, memoryType) { + var ( + req int64 + lim int64 + ) + resources, ok := c.GetResourceUpdates() if !ok { resources = c.GetResourceRequirements() } mtype := memoryTypePreference(pod, c) - req, lim := uint64(0), uint64(0) if memReq, ok := resources.Requests[corev1.ResourceMemory]; ok { - req = uint64(memReq.Value()) + req = memReq.Value() } if memLim, ok := resources.Limits[corev1.ResourceMemory]; ok { - lim = uint64(memLim.Value()) + lim = memLim.Value() } return req, lim, mtype @@ -601,15 +598,7 @@ func (t cpuClass) String() string { // String stringifies a memoryType. func (t memoryType) String() string { - str := "" - sep := "" - for _, bit := range []memoryType{memoryDRAM, memoryPMEM, memoryHBM} { - if int(t)&int(bit) != 0 { - str += sep + memoryTypeNames[bit] - sep = "," - } - } - return str + return libmem.TypeMask(t).String() } // parseMemoryType parses a memory type string, ideally produced by String() @@ -656,3 +645,7 @@ func (t *memoryType) UnmarshalJSON(data []byte) error { *t = mtype return nil } + +func (t memoryType) TypeMask() libmem.TypeMask { + return libmem.TypeMask(t) +} diff --git a/cmd/plugins/topology-aware/policy/pools.go b/cmd/plugins/topology-aware/policy/pools.go index 2716f6c53..a2cab5959 100644 --- a/cmd/plugins/topology-aware/policy/pools.go +++ b/cmd/plugins/topology-aware/policy/pools.go @@ -21,6 +21,7 @@ import ( "github.com/containers/nri-plugins/pkg/utils/cpuset" "github.com/containers/nri-plugins/pkg/resmgr/cache" + libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" system "github.com/containers/nri-plugins/pkg/sysfs" idset "github.com/intel/goresctrl/pkg/utils" ) @@ -349,9 +350,12 @@ func (p *policy) checkHWTopology() error { // Pick a pool and allocate resource from it to the container. func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant, error) { - var pool Node + var ( + pool Node + offer *libmem.Offer + ) - request := newRequest(container) + request := newRequest(container, p.memAllocator.Masks().AvailableTypes()) if p.root.FreeSupply().ReservedCPUs().IsEmpty() && request.CPUType() == cpuReserved { // Fallback to allocating reserved CPUs from the shared pool @@ -359,12 +363,13 @@ func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant request.SetCPUType(cpuNormal) } - // Assumption: in the beginning the CPUs and memory will be allocated from - // the same pool. This assumption can be relaxed later, requires separate - // (but connected) scoring of memory and CPU. - if request.CPUType() == cpuReserved || request.CPUType() == cpuPreserve { pool = p.root + o, err := p.getMemOffer(pool, request) + if err != nil { + return nil, policyError("failed to get offer for request %s: %v", request, err) + } + offer = o } else { affinity, err := p.calculatePoolAffinities(request.GetContainer()) @@ -404,199 +409,36 @@ func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant if pool == nil { pool = pools[0] } + + offer = scores[pool.NodeID()].Offer() + if offer == nil { + return nil, policyError("failed to get offer for request %s", request) + } } supply := pool.FreeSupply() - grant, err := supply.Allocate(request) + grant, updates, err := supply.Allocate(request, offer) if err != nil { return nil, policyError("failed to allocate %s from %s: %v", request, supply.DumpAllocatable(), err) } - log.Debug("allocated req '%s' to memory node '%s' (memset %s,%s,%s)", - container.PrettyName(), grant.GetMemoryNode().Name(), - grant.GetMemoryNode().GetMemset(memoryDRAM), - grant.GetMemoryNode().GetMemset(memoryPMEM), - grant.GetMemoryNode().GetMemset(memoryHBM)) - - // In case the workload is assigned to a memory node with multiple - // child nodes, there is no guarantee that the workload will - // allocate memory "nicely". Instead we'll have to make the - // conservative assumption that the memory will all be allocated - // from one single node, and that node can be any of the child - // nodes in the system. Thus, we'll need to reserve the memory - // from all child nodes, and move the containers already - // assigned to the child nodes upwards in the topology tree, if - // they no longer fit to the child node that they are in. In - // other words, they'll need to have a wider range of memory - // node options in order to fit to memory. - // - // - // Example: - // - // Workload 1 and Workload 2 are running on the leaf nodes: - // - // +----------------+ - // |Total mem: 4G | - // |Total CPUs: 4 | Workload 1: - // |Reserved: | - // | 1.5G | 1G mem - // | | - // | | Workload 2: - // | | - // +----------------+ 0.5G mem - // / \ - // / \ - // / \ - // / \ - // / \ - // / \ - // / \ - // / \ - // +----------------+ +----------------+ - // |Total mem: 2G | |Total mem: 2G | - // |Total CPUs: 2 | |Total CPUs: 2 | - // |Reserved: | |Reserved: | - // | 1G | | 0.5G | - // | | | | - // | | | | - // | * WL 1 | | * WL 2 | - // +----------------+ +----------------+ - // - // - // Then Workload 3 comes in and is assigned to the root node. Memory - // reservations are done on the leaf nodes: - // - // +----------------+ - // |Total mem: 4G | - // |Total CPUs: 4 | Workload 1: - // |Reserved: | - // | 3G | 1G mem - // | | - // | | Workload 2: - // | * WL 3 | - // +----------------+ 0.5G mem - // / \ - // / \ Workload 3: - // / \ - // / \ 1.5G mem - // / \ - // / \ - // / \ - // / \ - // +----------------+ +----------------+ - // |Total mem: 2G | |Total mem: 2G | - // |Total CPUs: 2 | |Total CPUs: 2 | - // |Reserved: | |Reserved: | - // | 2.5G | | 2G | - // | | | | - // | | | | - // | * WL 1 | | * WL 2 | - // +----------------+ +----------------+ - // - // - // Workload 1 no longer fits to the leaf node, because the total - // reservation from the leaf node is over the memory maximum. - // Thus, it's moved upwards in the tree to the root node. Memory - // resevations are again updated accordingly: - // - // +----------------+ - // |Total mem: 4G | - // |Total CPUs: 4 | Workload 1: - // |Reserved: | - // | 3G | 1G mem - // | | - // | * WL 1 | Workload 2: - // | * WL 3 | - // +----------------+ 0.5G mem - // / \ - // / \ Workload 3: - // / \ - // / \ 1.5G mem - // / \ - // / \ - // / \ - // / \ - // +----------------+ +----------------+ - // |Total mem: 2G | |Total mem: 2G | - // |Total CPUs: 2 | |Total CPUs: 2 | - // |Reserved: | |Reserved: | - // | 2.5G | | 3G | - // | | | | - // | | | | - // | | | * WL 2 | - // +----------------+ +----------------+ - // - // - // Now Workload 2 doesn't fit to the leaf node either. It's also moved - // to the root node: - // - // +----------------+ - // |Total mem: 4G | - // |Total CPUs: 4 | Workload 1: - // |Reserved: | - // | 3G | 1G mem - // | * WL 2 | - // | * WL 1 | Workload 2: - // | * WL 3 | - // +----------------+ 0.5G mem - // / \ - // / \ Workload 3: - // / \ - // / \ 1.5G mem - // / \ - // / \ - // / \ - // / \ - // +----------------+ +----------------+ - // |Total mem: 2G | |Total mem: 2G | - // |Total CPUs: 2 | |Total CPUs: 2 | - // |Reserved: | |Reserved: | - // | 3G | | 3G | - // | | | | - // | | | | - // | | | | - // +----------------+ +----------------+ - // - - // We need to analyze all existing containers which are a subset of current grant. - memset := grant.GetMemoryNode().GetMemset(grant.MemoryType()) - - // Add an extra memory reservation to all subnodes. - // TODO: no need to do any of this if no memory request - grant.UpdateExtraMemoryReservation() - - // See how much memory reservations the workloads on the - // nodes up from this one cause to the node. We only need to - // analyze the workloads up until this node, because it's - // guaranteed that the subtree can hold the workloads. - - // If it turns out that the current workloads no longer fit - // to the node with the reservations from nodes from above - // in the tree, move all nodes upward. Note that this - // creates a reservation of the same size to the node, so in - // effect the node has to be empty of its "own" workloads. - // In this case move all the workloads one level up in the tree. - - changed := true - for changed { - changed = false - for _, oldGrant := range p.allocations.grants { - oldMemset := oldGrant.GetMemoryNode().GetMemset(grant.MemoryType()) - if oldMemset.Size() < memset.Size() && memset.Has(oldMemset.Members()...) { - changed, err = oldGrant.ExpandMemset() - if err != nil { - return nil, err - } - if changed { - log.Debug("* moved container %s upward to node %s to guarantee memory", - oldGrant.GetContainer().PrettyName(), oldGrant.GetMemoryNode().Name()) - break - } + for id, z := range updates { + g, ok := p.allocations.grants[id] + if !ok { + log.Error("offer commit returned zone update %s for unknown container %s", z, id) + } else { + log.Info("updating memory allocation for %s to %s", g.GetContainer().PrettyName(), z) + g.SetMemoryZone(z) + if opt.PinMemory { + g.GetContainer().SetCpusetMems(z.MemsetString()) } } } + log.Debug("allocated req '%s' to memory zone %s", container.PrettyName(), + grant.GetMemoryZone()) + p.allocations.grants[container.GetID()] = grant p.saveAllocations() @@ -642,10 +484,7 @@ func (p *policy) applyGrant(grant Grant) { return } - mems := "" - if opt.PinMemory { - mems = grant.Memset().String() - } + mems := grant.GetMemoryZone() if opt.PinCPU { if cpuType == cpuPreserve { @@ -689,12 +528,12 @@ func (p *policy) applyGrant(grant Grant) { if grant.MemoryType() == memoryPreserve { log.Debug(" => preserving %s memory pinning %s", container.PrettyName(), container.GetCpusetMems()) } else { - if mems != "" { + if mems != libmem.NodeMask(0) { log.Debug(" => pinning %s to memory %s", container.PrettyName(), mems) } else { log.Debug(" => not pinning %s memory, memory set is empty...", container.PrettyName()) } - container.SetCpusetMems(mems) + container.SetCpusetMems(mems.MemsetString()) } } @@ -769,53 +608,27 @@ func (p *policy) updateSharedAllocations(grant *Grant) { } } -func (p *policy) filterInsufficientResources(req Request, originals []Node) []Node { - sufficient := make([]Node, 0) - - for _, node := range originals { - // TODO: Need to filter based on the memory demotion scheme here. For example, if the request is - // of memory type memoryAll, the memory used might be PMEM until it's full and after that DRAM. If - // it's DRAM, amount of PMEM should not be considered and so on. How to find this out in a live - // system? - - supply := node.FreeSupply() - reqMemType := req.MemoryType() +func (p *policy) filterInsufficientResources(req Request, pools []Node) []Node { + filtered := make([]Node, 0) - if reqMemType == memoryUnspec || reqMemType == memoryPreserve { - // The algorithm for handling unspecified memory allocations is the same as for handling a request - // with memory type all. - reqMemType = memoryAll + required := req.MemAmountToAllocate() + for _, node := range pools { + memType := req.MemoryType() + if memType == memoryUnspec || memType == memoryPreserve { + memType = memoryAll } - required := req.MemAmountToAllocate() - - for _, memType := range []memoryType{memoryPMEM, memoryDRAM, memoryHBM} { - if reqMemType&memType != 0 { - extra := supply.ExtraMemoryReservation(memType) - free := supply.MemoryLimit()[memType] - if extra > free { - continue - } - if required+extra <= free { - sufficient = append(sufficient, node) - required = 0 - break - } - if req.ColdStart() > 0 { - // For a "cold start" request, the memory request must fit completely in the PMEM. So reject the node. - break - } - // Subtracting unsigned integers. - // Here free >= extra, that is, (free - extra) is non-negative, - // and required > free - extra, that is, required stays positive. - required -= (free - extra) - } - } - if required > 0 { - log.Debug("%s: filtered out %s with insufficient memory", req.GetContainer().PrettyName(), node.Name()) + available := p.poolZoneFree(node, memType) + if available < required { + log.Debug("%s has insufficient available memory (%s < %s)", node.Name(), + prettyMem(available), prettyMem(required)) + } else { + log.Debug("%s has enough available memory", node.Name()) + filtered = append(filtered, node) } } - return sufficient + + return filtered } // Score pools against the request and sort them by score. @@ -850,6 +663,7 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco isolated2, reserved2, shared2 := score2.IsolatedCapacity(), score2.ReservedCapacity(), score2.SharedCapacity() a1 := affinityScore(affinity, node1) a2 := affinityScore(affinity, node2) + o1, o2 := score1.Offer(), score2.Offer() log.Debug("comparing scores for %s and %s", node1.Name(), node2.Name()) log.Debug(" %s: %s, affinity score %f", node1.Name(), score1.String(), a1) @@ -914,6 +728,46 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco log.Debug(" - affinity is a TIE") + // better matching or tighter memory offer wins + switch { + case o1 != nil && o2 == nil: + log.Debug(" => %s loses on memory offer (failed offer)", node2.Name()) + return true + case o1 == nil && o2 != nil: + log.Debug(" => %s loses on memory offer (failed offer)", node1.Name()) + return false + case o1 == nil && o2 == nil: + log.Debug(" - memory offer is a TIE (both failed)") + default: + m1, m2 := o1.NodeMask(), o2.NodeMask() + t1, t2 := p.memZoneType(m1), p.memZoneType(m2) + memType := request.MemoryType() + + if t1 == memType.TypeMask() && t2 != memType.TypeMask() { + log.Debug(" - %s loses on mis-matching type (%s != %s)", node2.Name(), t2, memType) + return true + } + if t1 != memType.TypeMask() && t2 == memType.TypeMask() { + log.Debug(" - %s loses on mis-matching type (%s != %s)", node1.Name(), t1, memType) + return false + } + log.Debug(" - offer memory types are a tie (%s vs %s)", t1, t2) + + if m1.Size() < m2.Size() { + log.Debug(" - %s loses on memory offer (%s less tight than %s)", + node2.Name(), m2, m1) + return true + } + if m2.Size() < m1.Size() { + log.Debug(" - %s loses on memory offer (%s less tight than %s)", + node1.Name(), m1, m2) + return false + } + if m2.Size() == m1.Size() { + log.Debug(" - memory offers are a TIE (%s vs. %s)", m1, m2) + } + } + // matching memory type wins if reqType := request.MemoryType(); reqType != memoryUnspec && reqType != memoryPreserve { if node1.HasMemoryType(reqType) && !node2.HasMemoryType(reqType) { diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index 0b20f4d7e..43174897d 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -21,12 +21,11 @@ import ( "github.com/containers/nri-plugins/pkg/sysfs" "github.com/containers/nri-plugins/pkg/utils/cpuset" - v1 "k8s.io/api/core/v1" "github.com/containers/nri-plugins/pkg/cpuallocator" "github.com/containers/nri-plugins/pkg/kubernetes" "github.com/containers/nri-plugins/pkg/resmgr/cache" - idset "github.com/intel/goresctrl/pkg/utils" + libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" ) type ( @@ -67,12 +66,8 @@ type Supply interface { GrantedReserved() int // GrantedShared returns the locally granted shared CPU capacity in this supply. GrantedShared() int - // GrantedMemory returns the locally granted memory capacity in this supply. - GrantedMemory(memoryType) uint64 // Cumulate cumulates the given supply into this one. Cumulate(Supply) - // AssignMemory adds extra memory to this supply (for extra NUMA nodes assigned to a pool). - AssignMemory(mem memoryMap) // AccountAllocateCPU accounts for (removes) allocated exclusive capacity from the supply. AccountAllocateCPU(Grant) // AccountReleaseCPU accounts for (reinserts) released exclusive capacity into the supply. @@ -81,33 +76,17 @@ type Supply interface { GetScore(Request) Score // AllocatableSharedCPU calculates the allocatable amount of shared CPU of this supply. AllocatableSharedCPU(...bool) int - // Allocate allocates CPU capacity from this supply and returns it as a grant. - Allocate(Request) (Grant, error) + // Allocate allocates a grant from the supply. + Allocate(Request, *libmem.Offer) (Grant, map[string]libmem.NodeMask, error) // ReleaseCPU releases a previously allocated CPU grant from this supply. ReleaseCPU(Grant) - // ReleaseMemory releases a previously allocated memory grant from this supply. - ReleaseMemory(Grant) - // ReallocateMemory updates the Grant to allocate memory from this supply. - ReallocateMemory(Grant) error - // ExtraMemoryReservation returns the memory reservation. - ExtraMemoryReservation(memoryType) uint64 - // SetExtraMemroyReservation sets the extra memory reservation based on the granted memory. - SetExtraMemoryReservation(Grant) - // ReleaseExtraMemoryReservation removes the extra memory reservations based on the granted memory. - ReleaseExtraMemoryReservation(Grant) - // MemoryLimit returns the amount of various memory types belonging to this grant. - MemoryLimit() memoryMap // Reserve accounts for CPU grants after reloading cached allocations. - Reserve(Grant) error - // ReserveMemory accounts for memory grants after reloading cached allocations. - ReserveMemory(Grant) error + Reserve(Grant, *libmem.Offer) (map[string]libmem.NodeMask, error) // DumpCapacity returns a printable representation of the supply's resource capacity. DumpCapacity() string // DumpAllocatable returns a printable representation of the supply's alloctable resources. DumpAllocatable() string - // DumpMemoryState dumps the state of the available and allocated memory. - DumpMemoryState(string) } // Request represents CPU and memory resources requested by a container. @@ -131,7 +110,7 @@ type Request interface { // MemoryType returns the type(s) of requested memory. MemoryType() memoryType // MemAmountToAllocate retuns how much memory we need to reserve for a request. - MemAmountToAllocate() uint64 + MemAmountToAllocate() int64 // ColdStart returns the cold start timeout. ColdStart() time.Duration } @@ -140,8 +119,6 @@ type Request interface { type Grant interface { // SetCPUPortion sets the fraction CPU portion for the grant. SetCPUPortion(fraction int) - // SetMemoryAllocation sets the memory allocation for the grant. - SetMemoryAllocation(memoryType, memoryMap, time.Duration) // Clone creates a copy of this grant. Clone() Grant // RefetchNodes updates the stored cpu and memory nodes of this grant by name. @@ -150,9 +127,10 @@ type Grant interface { GetContainer() cache.Container // GetCPUNode returns the node that granted CPU capacity to the container. GetCPUNode() Node - // GetMemoryNode returns the node which granted memory capacity to - // the container. - GetMemoryNode() Node + // GetMemorySize returns the amount of memory allocated to this grant. + GetMemorySize() int64 + // GetMemoryZone returns the memory zone allocated granted to the container. + GetMemoryZone() libmem.NodeMask // CPUType returns the type of granted CPUs CPUType() cpuClass // CPUPortion returns granted milli-CPUs of non-full CPUs of CPUType(). @@ -172,30 +150,25 @@ type Grant interface { IsolatedCPUs() cpuset.CPUSet // MemoryType returns the type(s) of granted memory. MemoryType() memoryType - // SetMemoryNode updates the grant memory controllers. - SetMemoryNode(Node) - // Memset returns the granted memory controllers as a string. - Memset() idset.IDSet - // ExpandMemset() makes the memory controller set larger as the grant - // is moved up in the node hierarchy. - ExpandMemset() (bool, error) - // MemLimit returns the amount of memory that the container is - // allowed to use. - MemLimit() memoryMap + // SetMemoryType sets the memory type for this grant. + SetMemoryType(memoryType) + // SetMemoryZone sets the memory zone for this grant. + SetMemoryZone(libmem.NodeMask) + // SetMemorySize sets the amount of memory to allocate. + SetMemorySize(int64) + // SetColdstart sets coldstart period for the grant. + SetColdstart(time.Duration) + // String returns a printable representation of this grant. String() string // Release releases the grant from all the Supplys it uses. Release() + // Reallocate memory with the given types. + ReallocMemory(types libmem.TypeMask) error // AccountAllocateCPU accounts for (removes) allocated exclusive capacity for this grant. AccountAllocateCPU() // AccountReleaseCPU accounts for (reinserts) released exclusive capacity for this grant. AccountReleaseCPU() - // UpdateExtraMemoryReservation() updates the reservations in the subtree - // of nodes under the node from which the memory was granted. - UpdateExtraMemoryReservation() - // RestoreMemset restores the granted memory set to node maximum - // and reapplies the grant. - RestoreMemset() // ColdStart returns the cold start timeout. ColdStart() time.Duration // AddTimer adds a cold start timer. @@ -221,22 +194,20 @@ type Score interface { Colocated() int HintScores() map[string]float64 PrioCapacity(cpuPrio) int + + Offer() *libmem.Offer + String() string } -type memoryMap map[memoryType]uint64 - // supply implements our Supply interface. type supply struct { - node Node // node supplying CPUs and memory - isolated cpuset.CPUSet // isolated CPUs at this node - reserved cpuset.CPUSet // reserved CPUs at this node - sharable cpuset.CPUSet // sharable CPUs at this node - grantedReserved int // amount of reserved CPUs allocated - grantedShared int // amount of shareable CPUs allocated - mem memoryMap // available memory for this node - grantedMem memoryMap // total memory granted - extraMemReservations map[Grant]memoryMap // how much memory each workload above has requested + node Node // node supplying CPUs and memory + isolated cpuset.CPUSet // isolated CPUs at this node + reserved cpuset.CPUSet // reserved CPUs at this node + sharable cpuset.CPUSet // sharable CPUs at this node + grantedReserved int // amount of reserved CPUs allocated + grantedShared int // amount of shareable CPUs allocated } var _ Supply = &supply{} @@ -249,10 +220,9 @@ type request struct { isolate bool // prefer isolated exclusive CPUs cpuType cpuClass // preferred CPU type (normal, reserved) prio cpuPrio // CPU priority preference, ignored for fraction requests - - memReq uint64 // memory request - memLim uint64 // memory limit - memType memoryType // requested types of memory + memReq int64 + memLim int64 + memType memoryType // requested types of memory // coldStart tells the timeout (in milliseconds) how long to wait until // a DRAM memory controller should be added to a container asking for a @@ -268,15 +238,14 @@ var _ Request = &request{} type grant struct { container cache.Container // container CPU is granted to node Node // node CPU is supplied from - memoryNode Node // node memory is supplied from exclusive cpuset.CPUSet // exclusive CPUs cpuType cpuClass // type of CPUs (normal, reserved, ...) cpuPortion int // milliCPUs granted from CPUs of cpuType memType memoryType // requested types of memory - memset idset.IDSet // assigned memory nodes - allocatedMem memoryMap // memory limit coldStart time.Duration // how long until cold start is done coldStartTimer *time.Timer // timer to trigger cold start timeout + memSize int64 // amount of memory to allocate + memZone libmem.NodeMask // allocated memory zone } var _ Grant = &grant{} @@ -285,6 +254,7 @@ var _ Grant = &grant{} type score struct { supply Supply // CPU supply (node) req Request // CPU request (container) + offer *libmem.Offer // possible memory allocation isolated int // remaining isolated CPUs reserved int // remaining reserved CPUs shared int // remaining shared capacity @@ -297,85 +267,17 @@ var _ Score = &score{} // newSupply creates CPU supply for the given node, cpusets and existing grant. -func newSupply(n Node, isolated, reserved, sharable cpuset.CPUSet, grantedReserved int, grantedShared int, mem, grantedMem memoryMap) Supply { - if mem == nil { - mem = createMemoryMap(0, 0, 0) - } - if grantedMem == nil { - grantedMem = createMemoryMap(0, 0, 0) - } +func newSupply(n Node, isolated, reserved, sharable cpuset.CPUSet, grantedReserved int, grantedShared int) Supply { return &supply{ - node: n, - isolated: isolated.Clone(), - reserved: reserved.Clone(), - sharable: sharable.Clone(), - grantedReserved: grantedReserved, - grantedShared: grantedShared, - mem: mem, - grantedMem: grantedMem, - extraMemReservations: make(map[Grant]memoryMap), - } -} - -func createMemoryMap(dram, pmem, hbm uint64) memoryMap { - return memoryMap{ - memoryDRAM: dram, - memoryPMEM: pmem, - memoryHBM: hbm, - memoryAll: dram + pmem + hbm, - memoryUnspec: 0, + node: n, + isolated: isolated.Clone(), + reserved: reserved.Clone(), + sharable: sharable.Clone(), + grantedReserved: grantedReserved, + grantedShared: grantedShared, } } -func (m memoryMap) Add(dram, pmem, hbm uint64) { - m[memoryDRAM] += dram - m[memoryPMEM] += pmem - m[memoryPMEM] += hbm - m[memoryAll] += dram + pmem + hbm -} - -func (m memoryMap) AddDRAM(dram uint64) { - m[memoryDRAM] += dram - m[memoryAll] += dram -} - -func (m memoryMap) AddPMEM(pmem uint64) { - m[memoryPMEM] += pmem - m[memoryAll] += pmem -} - -func (m memoryMap) AddHBM(hbm uint64) { - m[memoryHBM] += hbm - m[memoryAll] += hbm -} - -func (m memoryMap) String() string { - mem, sep := "", "" - - dram, pmem, hbm, types := m[memoryDRAM], m[memoryPMEM], m[memoryHBM], 0 - if dram > 0 || pmem > 0 || hbm > 0 { - if dram > 0 { - mem += "DRAM " + prettyMem(dram) - sep = ", " - types++ - } - if pmem > 0 { - mem += sep + "PMEM " + prettyMem(pmem) - sep = ", " - types++ - } - if hbm > 0 { - mem += sep + "HBM " + prettyMem(hbm) - types++ - } - if types > 1 { - mem += sep + "total " + prettyMem(pmem+dram+hbm) - } - } - - return mem -} - // GetNode returns the node supplying CPU and memory. func (cs *supply) GetNode() Node { return cs.node @@ -383,16 +285,7 @@ func (cs *supply) GetNode() Node { // Clone clones the given CPU supply. func (cs *supply) Clone() Supply { - // Copy the maps. - mem := make(memoryMap) - for key, value := range cs.mem { - mem[key] = value - } - grantedMem := make(memoryMap) - for key, value := range cs.grantedMem { - grantedMem[key] = value - } - return newSupply(cs.node, cs.isolated, cs.reserved, cs.sharable, cs.grantedReserved, cs.grantedShared, mem, grantedMem) + return newSupply(cs.node, cs.isolated, cs.reserved, cs.sharable, cs.grantedReserved, cs.grantedShared) } // IsolatedCpus returns the isolated CPUSet of this supply. @@ -420,15 +313,6 @@ func (cs *supply) GrantedShared() int { return cs.grantedShared } -func (cs *supply) GrantedMemory(memType memoryType) uint64 { - // Return only granted memory of correct type - return cs.grantedMem[memType] -} - -func (cs *supply) MemoryLimit() memoryMap { - return cs.mem -} - // Cumulate more CPU to supply. func (cs *supply) Cumulate(more Supply) { mcs := more.(*supply) @@ -438,20 +322,6 @@ func (cs *supply) Cumulate(more Supply) { cs.sharable = cs.sharable.Union(mcs.sharable) cs.grantedReserved += mcs.grantedReserved cs.grantedShared += mcs.grantedShared - - for key, value := range mcs.mem { - cs.mem[key] += value - } - for key, value := range mcs.grantedMem { - cs.grantedMem[key] += value - } -} - -// AssignMemory adds memory (for extra NUMA nodes assigned to a pool node). -func (cs *supply) AssignMemory(mem memoryMap) { - for key, value := range mem { - cs.mem[key] += value - } } // AccountAllocateCPU accounts for (removes) allocated exclusive capacity from the supply. @@ -480,96 +350,29 @@ func (cs *supply) AccountReleaseCPU(g Grant) { cs.sharable = cs.sharable.Union(sharable) } -// allocateMemory tries to fulfill the memory allocation part of a request. -func (cs *supply) allocateMemory(r Request) (memoryMap, error) { - reqType := r.MemoryType() - if reqType == memoryUnspec || reqType == memoryPreserve { - reqType = memoryAll - } - - allocated := createMemoryMap(0, 0, 0) - requested := r.MemAmountToAllocate() - remaining := requested - - // - // Notes: - // We try to allocate PMEM, then DRAM, and finally HBM, honoring - // the types allowed by the request. We don't need to care about - // extra memory reservations for this node as all the nodes with - // insufficient memory have been filtered out before allocation. - // - // However, for cold started containers we do check if there is - // enough PMEM free to accomodate the full request and bail out - // if that check fails. - // - - for _, memType := range []memoryType{memoryPMEM, memoryDRAM, memoryHBM} { - if remaining > 0 && (reqType&memType) != 0 { - available := cs.mem[memType] - - log.Debug("%s: trying %s %s of %s available", - r.GetContainer().PrettyName(), - prettyMem(remaining), memType.String(), prettyMem(available)) - - if remaining <= available { - allocated[memType] = remaining - } else { - allocated[memType] = available - } - - cs.grantedMem[memType] += allocated[memType] - cs.mem[memType] -= allocated[memType] - remaining -= allocated[memType] - } - - if remaining > 0 { - if r.ColdStart() > 0 && memType == memoryPMEM { - return nil, policyError("internal error: "+ - "not enough PMEM for cold start at %s", cs.GetNode().Name()) - } - } else { - break - } - } - - if remaining > 0 { - log.Debug("%s: %s allocation from %s fell short %s", - r.GetContainer().PrettyName(), - reqType.String(), cs.GetNode().Name(), prettyMem(remaining)) - - for memType, amount := range allocated { - if amount > 0 { - cs.grantedMem[memType] -= amount - cs.mem[memType] += amount - } - } - - return nil, policyError("internal error: "+ - "not enough memory at %s", cs.node.Name()) +// Allocate allocates a grant from the supply. +func (cs *supply) Allocate(r Request, o *libmem.Offer) (Grant, map[string]libmem.NodeMask, error) { + if o == nil { + return nil, nil, fmt.Errorf("nil libmem offer") } - cs.grantedMem[memoryAll] += requested - cs.mem[memoryAll] -= requested - - return allocated, nil -} - -// Allocate allocates a grant from the supply. -func (cs *supply) Allocate(r Request) (Grant, error) { grant, err := cs.AllocateCPU(r) if err != nil { - return nil, err + return nil, nil, err } - memory, err := cs.allocateMemory(r) + zone, updates, err := o.Commit() if err != nil { cs.ReleaseCPU(grant) - return nil, err + return nil, nil, fmt.Errorf("failed to commit memory offer: %v", err) } - grant.SetMemoryAllocation(r.MemoryType(), memory, r.ColdStart()) + grant.SetMemorySize(r.MemAmountToAllocate()) + grant.SetMemoryType(r.MemoryType()) + grant.SetMemoryZone(zone) + grant.SetColdstart(r.ColdStart()) - return grant, nil + return grant, updates, nil } // AllocateCPU allocates CPU for a grant from the supply. @@ -623,7 +426,7 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { cs.node.Name(), full, cs.sharable, cs.AllocatableSharedCPU()) } - grant := newGrant(cs.node, cr.GetContainer(), cpuType, exclusive, 0, 0, nil, 0) + grant := newGrant(cs.node, cr.GetContainer(), cpuType, exclusive, 0, 0, 0) grant.AccountAllocateCPU() if fraction > 0 { @@ -652,31 +455,6 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { return grant, nil } -func (cs *supply) ReallocateMemory(g Grant) error { - log.Debug("%s: reallocating memory (%s) from %s to %s", - g.GetContainer().PrettyName(), - g.MemLimit().String(), - g.GetMemoryNode().Name(), - cs.GetNode().Name()) - - // The grant has been previously allocated from another supply. Reallocate it here. - g.GetMemoryNode().FreeSupply().ReleaseMemory(g) - - mem := uint64(0) - allocatedMemory := g.MemLimit() - for key, value := range allocatedMemory { - if cs.mem[key] < value { - return policyError("internal error: not enough memory for reallocation at %s (released from %s)", cs.GetNode().Name(), g.GetMemoryNode().Name()) - } - cs.mem[key] -= value - cs.grantedMem[key] += value - mem += value - } - cs.grantedMem[memoryAll] += mem - cs.mem[memoryAll] -= mem - return nil -} - func (cs *supply) ReleaseCPU(g Grant) { isolated := g.ExclusiveCPUs().Intersection(cs.node.GetSupply().IsolatedCPUs()) sharable := g.ExclusiveCPUs().Difference(isolated) @@ -689,71 +467,21 @@ func (cs *supply) ReleaseCPU(g Grant) { g.AccountReleaseCPU() } -// ReleaseMemory returns memory from the given grant to the supply. -func (cs *supply) ReleaseMemory(g Grant) { - releasedMemory := uint64(0) - - log.Debug("%s: releasing granted memory (%s) from %s", - g.GetContainer().PrettyName(), - g.MemLimit().String(), cs.GetNode().Name()) - - for key, value := range g.MemLimit() { - cs.grantedMem[key] -= value - cs.mem[key] += value - releasedMemory += value - } - cs.grantedMem[memoryAll] -= releasedMemory - cs.mem[memoryAll] += releasedMemory - - cs.node.DepthFirst(func(n Node) error { - n.FreeSupply().ReleaseExtraMemoryReservation(g) - return nil - }) -} - -func (cs *supply) ExtraMemoryReservation(memType memoryType) uint64 { - extra := uint64(0) - for _, res := range cs.extraMemReservations { - extra += res[memType] - } - return extra -} - -func (cs *supply) ReleaseExtraMemoryReservation(g Grant) { - if mems, ok := cs.extraMemReservations[g]; ok { - log.Debug("%s: releasing extra memory reservation (%s) from %s", - g.GetContainer().PrettyName(), mems.String(), - cs.GetNode().Name()) - delete(cs.extraMemReservations, g) - } -} - -func (cs *supply) SetExtraMemoryReservation(g Grant) { - res := make(memoryMap) - extraMemory := uint64(0) - for key, value := range g.MemLimit() { - res[key] = value - extraMemory += value - } - res[memoryAll] = extraMemory - cs.extraMemReservations[g] = res -} - -func (cs *supply) Reserve(g Grant) error { +func (cs *supply) Reserve(g Grant, o *libmem.Offer) (map[string]libmem.NodeMask, error) { if g.CPUType() == cpuNormal { isolated := g.IsolatedCPUs() exclusive := g.ExclusiveCPUs().Difference(isolated) sharedPortion := g.SharedPortion() if !cs.isolated.Intersection(isolated).Equals(isolated) { - return policyError("can't reserve isolated CPUs (%s) of %s from %s", + return nil, policyError("can't reserve isolated CPUs (%s) of %s from %s", isolated.String(), g.String(), cs.DumpAllocatable()) } if !cs.sharable.Intersection(exclusive).Equals(exclusive) { - return policyError("can't reserve exclusive CPUs (%s) of %s from %s", + return nil, policyError("can't reserve exclusive CPUs (%s) of %s from %s", exclusive.String(), g.String(), cs.DumpAllocatable()) } if cs.AllocatableSharedCPU() < 1000*exclusive.Size()+sharedPortion { - return policyError("can't reserve %d shared CPUs of %s from %s", + return nil, policyError("can't reserve %d shared CPUs of %s from %s", sharedPortion, g.String(), cs.DumpAllocatable()) } cs.isolated = cs.isolated.Difference(isolated) @@ -762,7 +490,7 @@ func (cs *supply) Reserve(g Grant) error { } else if g.CPUType() == cpuReserved { sharedPortion := 1000*g.ExclusiveCPUs().Size() + g.SharedPortion() if sharedPortion > 0 && cs.AllocatableReservedCPU() < sharedPortion { - return policyError("can't reserve %d reserved CPUs of %s from %s", + return nil, policyError("can't reserve %d reserved CPUs of %s from %s", sharedPortion, g.String(), cs.DumpAllocatable()) } cs.grantedReserved += sharedPortion @@ -770,24 +498,14 @@ func (cs *supply) Reserve(g Grant) error { g.AccountAllocateCPU() - return nil -} - -func (cs *supply) ReserveMemory(g Grant) error { - mem := uint64(0) - allocatedMemory := g.MemLimit() - for key, value := range allocatedMemory { - if cs.mem[key] < value { - return policyError("internal error: not enough memory for allocation at %s", g.GetMemoryNode().Name()) - } - cs.mem[key] -= value - cs.grantedMem[key] += value - mem += value + zone, updates, err := o.Commit() + if err != nil { + g.Release() + return nil, policyError("failed to commit offer: %v", err) } - cs.grantedMem[memoryAll] += mem - cs.mem[memoryAll] -= mem - g.UpdateExtraMemoryReservation() - return nil + + g.SetMemoryZone(zone) + return updates, nil } // takeCPUs takes up to cnt CPUs from a given CPU set to another. @@ -806,7 +524,7 @@ func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int, prio cpuPrio) (cpus // DumpCapacity returns a printable representation of the supply's resource capacity. func (cs *supply) DumpCapacity() string { - cpu, mem, sep := "", cs.mem.String(), "" + cpu, mem, sep := "", "", "" if !cs.isolated.IsEmpty() { cpu = fmt.Sprintf("isolated:%s", kubernetes.ShortCPUSet(cs.isolated)) @@ -822,6 +540,10 @@ func (cs *supply) DumpCapacity() string { 1000*cs.sharable.Size()) } + if amount := cs.node.Policy().poolZoneCapacity(cs.node, memoryAll); amount > 0 { + mem = prettyMem(amount) + } + capacity := "<" + cs.node.Name() + " capacity: " if cpu == "" && mem == "" { @@ -843,7 +565,7 @@ func (cs *supply) DumpCapacity() string { // DumpAllocatable returns a printable representation of the supply's resource capacity. func (cs *supply) DumpAllocatable() string { - cpu, mem, sep := "", cs.mem.String(), "" + cpu, mem, sep := "", "", "" if !cs.isolated.IsEmpty() { cpu = fmt.Sprintf("isolated:%s", kubernetes.ShortCPUSet(cs.isolated)) @@ -881,6 +603,10 @@ func (cs *supply) DumpAllocatable() string { allocatable := "<" + cs.node.Name() + " allocatable: " + if amount := cs.node.Policy().poolZoneFree(cs.node, memoryAll); amount > 0 { + mem = prettyMem(amount) + } + if cpu == "" && mem == "" { allocatable += "-" } else { @@ -899,11 +625,11 @@ func (cs *supply) DumpAllocatable() string { } // prettyMem formats the given amount as k, M, G, or T units. -func prettyMem(value uint64) string { +func prettyMem(value int64) string { units := []string{"k", "M", "G", "T"} - coeffs := []uint64{1 << 10, 1 << 20, 1 << 30, 1 << 40} + coeffs := []int64{1 << 10, 1 << 20, 1 << 30, 1 << 40} - c, u := uint64(1), "" + c, u := int64(1), "" for i := 0; i < len(units); i++ { if coeffs[i] > value { break @@ -915,59 +641,8 @@ func prettyMem(value uint64) string { return strconv.FormatFloat(v, 'f', 2, 64) + u } -// DumpMemoryState dumps the state of the available and allocated memory. -func (cs *supply) DumpMemoryState(prefix string) { - memTypes := []memoryType{memoryDRAM, memoryPMEM, memoryHBM} - totalFree := uint64(0) - totalGranted := uint64(0) - for _, kind := range memTypes { - free := cs.mem[kind] - granted := cs.grantedMem[kind] - if free != 0 || granted != 0 { - log.Debug(prefix+"- %s: free: %s, granted %s", - kind, prettyMem(free), prettyMem(granted)) - } - totalFree += free - totalGranted += granted - } - log.Debug(prefix+"- total free: %s, total granted %s", - prettyMem(totalFree), prettyMem(totalGranted)) - - printHdr := true - if len(cs.extraMemReservations) > 0 { - for g, memMap := range cs.extraMemReservations { - split := "" - sep := "" - total := uint64(0) - if mem := memMap[memoryDRAM]; mem > 0 { - split = "DRAM " + prettyMem(mem) - sep = ", " - total += mem - } - if mem := memMap[memoryPMEM]; mem > 0 { - split += sep + "PMEM " + prettyMem(mem) - sep = ", " - total += mem - } - if mem := memMap[memoryHBM]; mem > 0 { - split += sep + "HBMEM " + prettyMem(mem) - sep = ", " - total += mem - } - if total > 0 { - if printHdr { - log.Debug(prefix + "- extra reservations:") - printHdr = false - } - log.Debug(prefix+" - %s: %s (%s)", - g.GetContainer().PrettyName(), prettyMem(total), split) - } - } - } -} - // newRequest creates a new request for the given container. -func newRequest(container cache.Container) Request { +func newRequest(container cache.Container, types libmem.TypeMask) Request { pod, _ := container.GetPod() full, fraction, isolate, cpuType, prio := cpuAllocationPreferences(pod, container) req, lim, mtype := memoryAllocationPreference(pod, container) @@ -980,25 +655,49 @@ func newRequest(container cache.Container) Request { mtype = defaultMemoryType &^ memoryHBM } - if mtype&memoryPMEM != 0 && mtype&memoryDRAM != 0 { - parsedColdStart, err := coldStartPreference(pod, container) - if err != nil { - log.Error("Failed to parse cold start preference") + if mtype != memoryPreserve { + mtype = memoryType(mtype.TypeMask().And(types)) + + if coldStartOff { + if mtype == memoryPMEM { + mtype |= memoryDRAM + log.Error("%s: coldstart disabled (movable non-DRAM memory zones present)", + container.PrettyName()) + } } else { - if parsedColdStart.Duration.Duration > 0 { - if coldStartOff { - log.Error("coldstart disabled (movable non-DRAM memory zones present)") - } else { - coldStart = time.Duration(parsedColdStart.Duration.Duration) + pref, err := coldStartPreference(pod, container) + if err != nil { + log.Error("failed to parse coldstart preference") + } else { + coldStart = time.Duration(pref.Duration.Duration) + if coldStart > 0 { + mtype &^= memoryDRAM } } } - } else if mtype == memoryPMEM { - if coldStartOff { - mtype = mtype | memoryDRAM - log.Error("%s: forced also DRAM usage (movable non-DRAM memory zones present)", - container.PrettyName()) - } + + /* + if mtype&memoryPMEM != 0 && mtype&memoryDRAM != 0 { + parsedColdStart, err := coldStartPreference(pod, container) + if err != nil { + log.Error("Failed to parse cold start preference") + } else { + if parsedColdStart.Duration.Duration > 0 { + if coldStartOff { + log.Error("coldstart disabled (movable non-DRAM memory zones present)") + } else { + coldStart = time.Duration(parsedColdStart.Duration.Duration) + mtype &^= memoryDRAM + } + } + } + } else if mtype == memoryPMEM { + if coldStartOff { + mtype = mtype | memoryDRAM + log.Error("%s: forced also DRAM usage (movable non-DRAM memory zones present)", + container.PrettyName()) + } + }*/ } return &request{ @@ -1073,27 +772,11 @@ func (cr *request) Isolate() bool { } // MemAmountToAllocate retuns how much memory we need to reserve for a request. -func (cr *request) MemAmountToAllocate() uint64 { - var amount uint64 = 0 - switch cr.GetContainer().GetQOSClass() { - case v1.PodQOSBurstable: - // May be a request and/or limit. We focus on the limit because we - // need to prepare for the case when all containers are using all - // the memory they are allowed to. If limit is not set then we'll - // allocate the request (which the container will get). - if cr.memLim > 0 { - amount = cr.memLim - } else { - amount = cr.memReq - } - case v1.PodQOSGuaranteed: - // Limit and request are the same. - amount = cr.memLim - case v1.PodQOSBestEffort: - // No requests or limits. - amount = 0 +func (cr *request) MemAmountToAllocate() int64 { + if cr.memLim == 0 && cr.memReq != 0 { + return cr.memReq } - return amount + return cr.memLim } // MemoryType returns the requested type of memory for the grant. @@ -1178,6 +861,18 @@ func (cs *supply) GetScore(req Request) Score { score.hints[provider] = cs.node.HintScore(hint) } + node := cs.node + if req.MemoryType() == memoryPreserve { + node = cs.node.Policy().root + } + + o, err := cs.node.Policy().getMemOffer(node, cr) + if err != nil { + log.Error("failed to get offer for request %s: %v", req, err) + } else { + score.offer = o + } + return score } @@ -1263,23 +958,25 @@ func (score *score) PrioCapacity(prio cpuPrio) int { return score.prio[prio] } +func (score *score) Offer() *libmem.Offer { + return score.offer +} + func (score *score) String() string { return fmt.Sprintf("", score.supply.GetNode().Name(), score.isolated, score.reserved, score.shared, score.colocated, score.hints) } // newGrant creates a CPU grant from the given node for the container. -func newGrant(n Node, c cache.Container, cpuType cpuClass, exclusive cpuset.CPUSet, cpuPortion int, mt memoryType, allocated memoryMap, coldstart time.Duration) Grant { +func newGrant(n Node, c cache.Container, cpuType cpuClass, exclusive cpuset.CPUSet, cpuPortion int, mt memoryType, coldstart time.Duration) Grant { grant := &grant{ node: n, - memoryNode: n, container: c, cpuType: cpuType, exclusive: exclusive, cpuPortion: cpuPortion, - } - if allocated != nil { - grant.SetMemoryAllocation(mt, allocated, coldstart) + memType: mt, + coldStart: coldstart, } return grant } @@ -1289,40 +986,36 @@ func (cg *grant) SetCPUPortion(fraction int) { cg.cpuPortion = fraction } -// SetMemoryAllocation sets the memory allocation for the grant. -func (cg *grant) SetMemoryAllocation(mt memoryType, allocated memoryMap, coldstart time.Duration) { - initial := memoryPMEM - if coldstart <= 0 { - initial = mt - } - mems := cg.node.GetMemset(initial) - if mems.Size() == 0 { - mems = cg.node.GetMemset(memoryDRAM) - if mems.Size() == 0 { - mems = cg.node.GetMemset(memoryAll) - } - } - mems = mems.Clone() +// SetMemoryType sets the memory type for the grant. +func (cg *grant) SetMemoryType(memType memoryType) { + cg.memType = memType +} + +// SetMemoryZone sets the memory zone for the grant. +func (cg *grant) SetMemoryZone(zone libmem.NodeMask) { + cg.memZone = zone +} + +// SetMemorySize sets the amount of memory to allocate. +func (cg *grant) SetMemorySize(size int64) { + cg.memSize = size +} - cg.memType = mt - cg.memset = mems - cg.allocatedMem = allocated - cg.coldStart = coldstart +// SetColdstart sets coldstart period for the grant. +func (cg *grant) SetColdstart(period time.Duration) { + cg.coldStart = period } // Clone creates a copy of this grant. func (cg *grant) Clone() Grant { return &grant{ - node: cg.GetCPUNode(), - memoryNode: cg.GetMemoryNode(), - container: cg.GetContainer(), - exclusive: cg.ExclusiveCPUs(), - cpuType: cg.CPUType(), - cpuPortion: cg.SharedPortion(), - memType: cg.MemoryType(), - memset: cg.Memset().Clone(), - allocatedMem: cg.MemLimit(), - coldStart: cg.ColdStart(), + node: cg.GetCPUNode(), + container: cg.GetContainer(), + exclusive: cg.ExclusiveCPUs(), + cpuType: cg.CPUType(), + cpuPortion: cg.SharedPortion(), + memType: cg.MemoryType(), + coldStart: cg.ColdStart(), } } @@ -1332,12 +1025,7 @@ func (cg *grant) RefetchNodes() error { if !ok { return policyError("failed to refetch grant cpu node %s", cg.node.Name()) } - memoryNode, ok := cg.memoryNode.Policy().nodes[cg.memoryNode.Name()] - if !ok { - return policyError("failed to refetch grant memory node %s", cg.memoryNode.Name()) - } cg.node = node - cg.memoryNode = memoryNode return nil } @@ -1351,14 +1039,14 @@ func (cg *grant) GetCPUNode() Node { return cg.node } -// GetNode returns the Node this grant gets its memory allocation from. -func (cg *grant) GetMemoryNode() Node { - return cg.memoryNode +// GetMemorySize returns the amount of memory allocated to this grant. +func (cg *grant) GetMemorySize() int64 { + return cg.memSize } -func (cg *grant) SetMemoryNode(n Node) { - cg.memoryNode = n - cg.memset = n.GetMemset(cg.MemoryType()) +// GetMemoryZone returns the memory zone this grant is allocated to. +func (cg *grant) GetMemoryZone() libmem.NodeMask { + return cg.memZone } // CPUType returns the requested type of CPU for the grant. @@ -1412,16 +1100,6 @@ func (cg *grant) MemoryType() memoryType { return cg.memType } -// Memset returns the granted memory controllers as an IDSet. -func (cg *grant) Memset() idset.IDSet { - return cg.memset -} - -// MemLimit returns the granted memory. -func (cg *grant) MemLimit() memoryMap { - return cg.allocatedMem -} - // String returns a printable representation of the CPU grant. func (cg *grant) String() string { var cpuType, isolated, exclusive, reserved, shared string @@ -1442,10 +1120,10 @@ func (cg *grant) String() string { cg.node.FreeSupply().SharableCPUs(), cg.SharedPortion()) } - memset := ", MemPin: " + cg.memset.String() + mem := fmt.Sprintf(", memory: %s (%s)", cg.memZone, prettyMem(cg.memSize)) return fmt.Sprintf("", - cg.container.PrettyName(), cg.node.Name(), cpuType, isolated, exclusive, reserved, shared, memset) + cg.container.PrettyName(), cg.node.Name(), cpuType, isolated, exclusive, reserved, shared, mem) } func (cg *grant) AccountAllocateCPU() { @@ -1460,92 +1138,48 @@ func (cg *grant) AccountAllocateCPU() { func (cg *grant) Release() { cg.GetCPUNode().FreeSupply().ReleaseCPU(cg) - cg.GetMemoryNode().FreeSupply().ReleaseMemory(cg) + err := cg.node.Policy().releaseMem(cg.container.GetID()) + if err != nil { + log.Error("releasing memory for %s failed: %v", cg.container.PrettyName(), err) + } cg.StopTimer() } -func (cg *grant) AccountReleaseCPU() { - cg.node.DepthFirst(func(n Node) error { - n.FreeSupply().AccountReleaseCPU(cg) - return nil - }) - for node := cg.node.Parent(); !node.IsNil(); node = node.Parent() { - node.FreeSupply().AccountReleaseCPU(cg) +func (cg *grant) ReallocMemory(types libmem.TypeMask) error { + zone, updates, err := cg.node.Policy().reallocMem(cg.container.GetID(), 0, types) + if err != nil { + return err } -} - -func (cg *grant) RestoreMemset() { - mems := cg.GetMemoryNode().GetMemset(cg.memType) - cg.memset = mems - cg.GetMemoryNode().Policy().applyGrant(cg) -} -func (cg *grant) ExpandMemset() (bool, error) { - supply := cg.GetMemoryNode().FreeSupply() - node := cg.GetMemoryNode() - parent := node.Parent() - - // We have to assume that the memory has been allocated how we granted it (if PMEM ran out - // the allocations have been made from DRAM and so on). - - // Figure out if there is enough memory now to have grant as-is. - extra := supply.ExtraMemoryReservation(memoryAll) - free := supply.MemoryLimit()[memoryAll] - if extra <= free { - // The grant fits in the node even with extra reservations - return false, nil - } - // Else it doesn't fit, so move the grant up in the memory tree. - required := uint64(0) - for _, memType := range []memoryType{memoryPMEM, memoryDRAM, memoryHBM} { - required += cg.MemLimit()[memType] - } - log.Debug("out-of-memory risk in %s: extra reservations %s > free %s -> moving up %s total memory grant from %s", - cg, prettyMem(extra), prettyMem(free), prettyMem(required), node.Name()) - - // Find an ancestor where the grant fits. As reservations in - // child nodes do not show up in free + extra in parent nodes, - // releasing the grant is not necessary before searching. - for ; !parent.IsNil(); parent = parent.Parent() { - pSupply := parent.FreeSupply() - parentFree := pSupply.MemoryLimit()[memoryAll] - parentExtra := pSupply.ExtraMemoryReservation(memoryAll) - if parentExtra+required <= parentFree { - required = 0 - break - } - log.Debug("- %s has %s free but %s extra reservations, moving further up", - parent.Name(), prettyMem(parentFree), prettyMem(parentExtra)) - } - if required > 0 { - return false, fmt.Errorf("internal error: cannot find enough memory (%s) for %s from ancestors of %s", prettyMem(required), cg, node.Name()) + cg.SetMemoryZone(zone) + if opt.PinMemory { + cg.container.SetCpusetMems(zone.MemsetString()) } - // Release granted memory from the node and allocate it from the parent node. - err := parent.FreeSupply().ReallocateMemory(cg) - if err != nil { - return false, err + for id, z := range updates { + g, ok := cg.node.Policy().allocations.grants[id] + if !ok { + log.Error("offer commit returned zone update %s for unknown container %s", z, id) + } else { + log.Info("updating memory allocation for %s to %s", g.GetContainer().PrettyName(), z) + g.SetMemoryZone(z) + if opt.PinMemory { + g.GetContainer().SetCpusetMems(z.MemsetString()) + } + } } - cg.SetMemoryNode(parent) - cg.UpdateExtraMemoryReservation() - - // Make the container to use the new memory set. - // FIXME: this could be done in a second pass to avoid doing this many times - cg.GetMemoryNode().Policy().applyGrant(cg) - return true, nil + return nil } -func (cg *grant) UpdateExtraMemoryReservation() { - // For every subnode, make sure that this grant is added to the extra memory allocation. - cg.GetMemoryNode().DepthFirst(func(n Node) error { - // No extra allocation should be done to the node itself. - if !n.IsSameNode(cg.GetMemoryNode()) { - supply := n.FreeSupply() - supply.SetExtraMemoryReservation(cg) - } +func (cg *grant) AccountReleaseCPU() { + cg.node.DepthFirst(func(n Node) error { + n.FreeSupply().AccountReleaseCPU(cg) return nil }) + for node := cg.node.Parent(); !node.IsNil(); node = node.Parent() { + node.FreeSupply().AccountReleaseCPU(cg) + } } func (cg *grant) ColdStart() time.Duration { diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go index efc829cc3..3e04fb649 100644 --- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go +++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go @@ -27,10 +27,10 @@ import ( "github.com/containers/nri-plugins/pkg/cpuallocator" "github.com/containers/nri-plugins/pkg/resmgr/cache" "github.com/containers/nri-plugins/pkg/resmgr/events" + libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory" policyapi "github.com/containers/nri-plugins/pkg/resmgr/policy" system "github.com/containers/nri-plugins/pkg/sysfs" - idset "github.com/intel/goresctrl/pkg/utils" ) const ( @@ -66,7 +66,8 @@ type policy struct { depth int // tree depth allocations allocations // container pool assignments cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy - coldstartOff bool // coldstart forced off (have movable PMEM zones) + memAllocator *libmem.Allocator + coldstartOff bool // coldstart forced off (have movable PMEM zones) } var opt = &cfgapi.Config{} @@ -84,6 +85,8 @@ func New() policyapi.Backend { // Setup initializes the topology-aware policy instance. func (p *policy) Setup(opts *policyapi.BackendOptions) error { + var err error + cfg, ok := opts.Config.(*cfgapi.Config) if !ok { return policyError("failed initialize %s policy: config of wrong type %T", @@ -96,6 +99,10 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error { p.sys = opts.System p.options = opts p.cpuAllocator = cpuallocator.NewCPUAllocator(opts.System) + p.memAllocator, err = libmem.NewAllocator(libmem.WithSystemNodes(opts.System)) + if err != nil { + return policyError("failed to initialize %s policy: %w", err) + } opt = cfg defaultPrio = cfg.DefaultCPUPriority.Value() @@ -273,8 +280,10 @@ func (p *policy) GetTopologyZones() []*policyapi.TopologyZone { total := pool.GetSupply().(*supply) free := pool.FreeSupply().(*supply) - capacity := int64(total.mem[memoryAll]) - available := int64(free.mem[memoryAll] - free.ExtraMemoryReservation(memoryAll)) + + memZone := libmem.NewNodeMask(pool.GetMemset(memoryAll).Members()...) + capacity := p.memAllocator.ZoneCapacity(memZone) + available := p.memAllocator.ZoneFree(memZone) memory := &policyapi.ZoneResource{ Name: policyapi.MemoryResource, @@ -351,23 +360,10 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string { data[policyapi.ExportExclusiveCPUs] = exclusive } - mems := grant.Memset() - dram := idset.NewIDSet() - pmem := idset.NewIDSet() - hbm := idset.NewIDSet() - for _, id := range mems.SortedMembers() { - node := p.sys.Node(id) - switch node.GetMemoryType() { - case system.MemoryTypeDRAM: - dram.Add(id) - case system.MemoryTypePMEM: - pmem.Add(id) - /* - case system.MemoryTypeHBM: - hbm.Add(id) - */ - } - } + mems := grant.GetMemoryZone() + dram := mems.And(p.memAllocator.Masks().NodesByTypes(libmem.TypeMaskDRAM)) + pmem := mems.And(p.memAllocator.Masks().NodesByTypes(libmem.TypeMaskPMEM)) + hbm := mems.And(p.memAllocator.Masks().NodesByTypes(libmem.TypeMaskHBM)) data["ALL_MEMS"] = mems.String() if dram.Size() > 0 { data["DRAM_MEMS"] = dram.String()