Skip to content

Commit

Permalink
topology-aware: initial libmem conversion.
Browse files Browse the repository at this point in the history
Cut out the original memory accounting and allocation code.
Plug in a libmem-based memory allocator instead.

Signed-off-by: Krisztian Litkey <[email protected]>
  • Loading branch information
klihub committed Jul 4, 2024
1 parent cbe8a2d commit 48980b0
Show file tree
Hide file tree
Showing 8 changed files with 452 additions and 921 deletions.
70 changes: 35 additions & 35 deletions cmd/plugins/topology-aware/policy/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import (
"time"

"github.com/containers/nri-plugins/pkg/resmgr/cache"
libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
"github.com/containers/nri-plugins/pkg/utils/cpuset"
idset "github.com/intel/goresctrl/pkg/utils"
)

const (
Expand Down Expand Up @@ -66,23 +66,33 @@ func (p *policy) reinstateGrants(grants map[string]Grant) error {
pool := grant.GetCPUNode()
supply := pool.FreeSupply()

if err := supply.Reserve(grant); err != nil {
return policyError("failed to update pool %q with CPU grant of %q: %v",
o, err := p.restoreMemOffer(grant)
if err != nil {
return policyError("failed to get libmem offer for pool %q, grant of %s: %w",
pool.Name(), c.PrettyName(), err)
}

log.Info("updated pool %q with reinstated CPU grant of %q",
pool.Name(), c.PrettyName())

pool = grant.GetMemoryNode()
if err := supply.ReserveMemory(grant); err != nil {
grant.GetCPUNode().FreeSupply().ReleaseCPU(grant)
return policyError("failed to update pool %q with extra memory of %q: %v",
updates, err := supply.Reserve(grant, o)
if err != nil {
return policyError("failed to update pool %q with CPU grant of %q: %v",
pool.Name(), c.PrettyName(), err)
}

log.Info("updated pool %q with reinstanted memory reservation of %q",
pool.Name(), c.PrettyName())
for uID, uZone := range updates {
if ug, ok := p.allocations.grants[uID]; !ok {
log.Error("failed to update grant %s to memory zone to %s, grant not found",
uID, uZone)
} else {
ug.SetMemoryZone(uZone)
if opt.PinMemory {
ug.GetContainer().SetCpusetMems(uZone.MemsetString())
}
log.Info("updated grant %s to memory zone %s", uID, uZone)
}
}

log.Info("updated pool %q with reinstated CPU grant of %q, memory zone %s",
pool.Name(), c.PrettyName(), grant.GetMemoryZone())

p.allocations.grants[id] = grant
p.applyGrant(grant)
Expand All @@ -94,16 +104,15 @@ func (p *policy) reinstateGrants(grants map[string]Grant) error {
}

type cachedGrant struct {
Exclusive string
Part int
CPUType cpuClass
Container string
Pool string
MemoryPool string
MemType memoryType
Memset idset.IDSet
MemoryLimit memoryMap
ColdStart time.Duration
Exclusive string
Part int
CPUType cpuClass
Container string
Pool string
MemoryPool libmem.NodeMask
MemType memoryType
MemSize int64
ColdStart time.Duration
}

func newCachedGrant(cg Grant) *cachedGrant {
Expand All @@ -113,15 +122,9 @@ func newCachedGrant(cg Grant) *cachedGrant {
ccg.CPUType = cg.CPUType()
ccg.Container = cg.GetContainer().GetID()
ccg.Pool = cg.GetCPUNode().Name()
ccg.MemoryPool = cg.GetMemoryNode().Name()
ccg.MemoryPool = cg.GetMemoryZone()
ccg.MemType = cg.MemoryType()
ccg.Memset = cg.Memset().Clone()

ccg.MemoryLimit = make(memoryMap)
for key, value := range cg.MemLimit() {
ccg.MemoryLimit[key] = value
}

ccg.MemSize = cg.GetMemorySize()
ccg.ColdStart = cg.ColdStart()

return ccg
Expand All @@ -144,14 +147,11 @@ func (ccg *cachedGrant) ToGrant(policy *policy) (Grant, error) {
cpuset.MustParse(ccg.Exclusive),
ccg.Part,
ccg.MemType,
ccg.MemoryLimit,
ccg.ColdStart,
)

if g.Memset().String() != ccg.Memset.String() {
log.Error("cache error: mismatch in stored/recalculated memset: %s != %s",
ccg.Memset, g.Memset())
}
g.SetMemoryZone(ccg.MemoryPool)
g.SetMemorySize(ccg.MemSize)

return g, nil
}
Expand Down
10 changes: 8 additions & 2 deletions cmd/plugins/topology-aware/policy/coldstart.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/containers/nri-plugins/pkg/resmgr/cache"
"github.com/containers/nri-plugins/pkg/resmgr/events"
libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
)

// trigger cold start for the container if necessary.
Expand Down Expand Up @@ -63,8 +64,13 @@ func (p *policy) finishColdStart(c cache.Container) (bool, error) {
return false, policyError("coldstart: no grant found for %s", c.PrettyName())
}

log.Info("restoring memset to grant %v", g)
g.RestoreMemset()
log.Info("reallocating %s after coldstart", g)
err := g.ReallocMemory(p.memZoneType(g.GetMemoryZone()) | libmem.TypeMaskDRAM)
if err != nil {
log.Error("failed to reallocate %s after coldstart: %v", g, err)
} else {
log.Info("reallocated %s", g)
}
g.ClearTimer()

return true, nil
Expand Down
95 changes: 95 additions & 0 deletions cmd/plugins/topology-aware/policy/libmem.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright The NRI Plugins Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package topologyaware

import libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"

func (p *policy) getMemOffer(pool Node, req Request) (*libmem.Offer, error) {
var (
ctr = req.GetContainer()
zone = libmem.NodeMask(0)
mtyp = libmem.TypeMask(0)
)

if memType := req.MemoryType(); memType == memoryPreserve {
zone = libmem.NewNodeMask(pool.GetMemset(memoryAll).Members()...)
mtyp = p.memAllocator.ZoneType(zone)
} else {
zone = libmem.NewNodeMask(pool.GetMemset(memType).Members()...)
mtyp = libmem.TypeMask(memType)
}

o, err := p.memAllocator.GetOffer(
libmem.ContainerWithTypes(
ctr.GetID(),
ctr.PrettyName(),
string(ctr.GetQOSClass()),
req.MemAmountToAllocate(),
zone,
mtyp,
),
)

return o, err
}

func (p *policy) restoreMemOffer(g Grant) (*libmem.Offer, error) {
var (
ctr = g.GetContainer()
zone = g.GetMemoryZone()
mtyp = p.memAllocator.ZoneType(zone)
)

o, err := p.memAllocator.GetOffer(
libmem.ContainerWithTypes(
ctr.GetID(),
ctr.PrettyName(),
string(ctr.GetQOSClass()),
g.GetMemorySize(),
zone,
mtyp,
),
)

return o, err
}

func (p *policy) reallocMem(id string, nodes libmem.NodeMask, types libmem.TypeMask) (libmem.NodeMask, map[string]libmem.NodeMask, error) {
return p.memAllocator.Realloc(id, nodes, types)
}

func (p *policy) releaseMem(id string) error {
return p.memAllocator.Release(id)
}

func (p *policy) poolZoneType(pool Node, memType memoryType) libmem.TypeMask {
return p.memAllocator.ZoneType(libmem.NewNodeMask(pool.GetMemset(memType).Members()...))
}

func (p *policy) memZoneType(zone libmem.NodeMask) libmem.TypeMask {
return p.memAllocator.ZoneType(zone)
}

func (p *policy) poolZone(pool Node, memType memoryType) libmem.NodeMask {
return libmem.NewNodeMask(pool.GetMemset(memType).Members()...)
}

func (p *policy) poolZoneCapacity(pool Node, memType memoryType) int64 {
return p.memAllocator.ZoneCapacity(libmem.NewNodeMask(pool.GetMemset(memType).Members()...))
}

func (p *policy) poolZoneFree(pool Node, memType memoryType) int64 {
return p.memAllocator.ZoneFree(libmem.NewNodeMask(pool.GetMemset(memType).Members()...))
}
35 changes: 4 additions & 31 deletions cmd/plugins/topology-aware/policy/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ func (n *node) Dump(prefix string, level ...int) {
n.self.node.dump(prefix, lvl)
log.Debug("%s - %s", idt, n.noderes.DumpCapacity())
log.Debug("%s - %s", idt, n.freeres.DumpAllocatable())
n.freeres.DumpMemoryState(idt + " ")
if n.mem.Size() > 0 {
log.Debug("%s - normal memory: %v", idt, n.mem)
}
Expand All @@ -309,15 +308,8 @@ func (n *node) Dump(prefix string, level ...int) {
log.Debug("%s - PMEM memory: %v", idt, n.pMem)
}
for _, grant := range n.policy.allocations.grants {
cpuNodeID := grant.GetCPUNode().NodeID()
memNodeID := grant.GetMemoryNode().NodeID()
switch {
case cpuNodeID == n.id && memNodeID == n.id:
log.Debug("%s + cpu+mem %s", idt, grant)
case cpuNodeID == n.id:
log.Debug("%s + cpuonly %s", idt, grant)
case memNodeID == n.id:
log.Debug("%s + memonly %s", idt, grant)
if grant.GetCPUNode().NodeID() == n.id {
log.Debug("%s + %s", idt, grant)
}
}
if !n.Parent().IsNil() {
Expand Down Expand Up @@ -396,7 +388,7 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
n.Name())
}

n.noderes = newSupply(n, cpuset.New(), cpuset.New(), cpuset.New(), 0, 0, nil, nil)
n.noderes = newSupply(n, cpuset.New(), cpuset.New(), cpuset.New(), 0, 0)
for _, c := range n.children {
supply := c.GetSupply()
n.noderes.Cumulate(supply)
Expand All @@ -409,7 +401,6 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
} else {
log.Debug("%s: discovering attached/assigned resources...", n.Name())

mmap := createMemoryMap(0, 0, 0)
cpus := cpuset.New()

for _, nodeID := range assignedNUMANodes {
Expand All @@ -424,18 +415,15 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
switch node.GetMemoryType() {
case system.MemoryTypeDRAM:
n.mem.Add(nodeID)
mmap.AddDRAM(meminfo.MemTotal)
shortCPUs := kubernetes.ShortCPUSet(nodeCPUs)
log.Debug(" + assigned DRAM NUMA node #%d (cpuset: %s, DRAM %.2fM)",
nodeID, shortCPUs, float64(meminfo.MemTotal)/float64(1024*1024))
case system.MemoryTypePMEM:
n.pMem.Add(nodeID)
mmap.AddPMEM(meminfo.MemTotal)
log.Debug(" + assigned PMEM NUMA node #%d (DRAM %.2fM)", nodeID,
float64(meminfo.MemTotal)/float64(1024*1024))
case system.MemoryTypeHBM:
n.hbm.Add(nodeID)
mmap.AddHBM(meminfo.MemTotal)
log.Debug(" + assigned HBMEM NUMA node #%d (DRAM %.2fM)",
nodeID, float64(meminfo.MemTotal)/float64(1024*1024))
default:
Expand Down Expand Up @@ -463,7 +451,7 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
isolated := cpus.Intersection(n.policy.isolated)
reserved := cpus.Intersection(n.policy.reserved).Difference(isolated)
sharable := cpus.Difference(isolated).Difference(reserved)
n.noderes = newSupply(n, isolated, reserved, sharable, 0, 0, mmap, nil)
n.noderes = newSupply(n, isolated, reserved, sharable, 0, 0)
log.Debug(" = %s", n.noderes.DumpCapacity())
}

Expand Down Expand Up @@ -491,46 +479,31 @@ func (n *node) AssignNUMANodes(ids []idset.ID) {

// assignNUMANodes assigns the given set of NUMA nodes to this one.
func (n *node) assignNUMANodes(ids []idset.ID) {
mem := createMemoryMap(0, 0, 0)

for _, numaNodeID := range ids {
if n.mem.Has(numaNodeID) || n.pMem.Has(numaNodeID) || n.hbm.Has(numaNodeID) {
log.Warn("*** NUMA node #%d already discovered by or assigned to %s",
numaNodeID, n.Name())
continue
}
numaNode := n.policy.sys.Node(numaNodeID)
memTotal := uint64(0)
if meminfo, err := numaNode.MemoryInfo(); err != nil {
log.Error("%s: failed to get memory info for NUMA node #%d",
n.Name(), numaNodeID)
} else {
memTotal = meminfo.MemTotal
}
switch numaNode.GetMemoryType() {
case system.MemoryTypeDRAM:
mem.Add(memTotal, 0, 0)
n.mem.Add(numaNodeID)
log.Info("*** DRAM NUMA node #%d assigned to pool node %q",
numaNodeID, n.Name())
case system.MemoryTypePMEM:
n.pMem.Add(numaNodeID)
mem.Add(0, memTotal, 0)
log.Info("*** PMEM NUMA node #%d assigned to pool node %q",
numaNodeID, n.Name())
case system.MemoryTypeHBM:
n.hbm.Add(numaNodeID)
mem.Add(0, 0, memTotal)
log.Info("*** HBM NUMA node #%d assigned to pool node %q",
numaNodeID, n.Name())
default:
log.Fatal("can't assign NUMA node #%d of type %v to pool node %q",
numaNodeID, numaNode.GetMemoryType())
}
}

n.noderes.AssignMemory(mem)
n.freeres.AssignMemory(mem)
}

// Discover the set of memory attached to this node.
Expand Down
Loading

0 comments on commit 48980b0

Please sign in to comment.