From 348e84f7119fb18dd21b32958baff2801b455853 Mon Sep 17 00:00:00 2001
From: Krisztian Litkey <krisztian.litkey@intel.com>
Date: Mon, 3 Jun 2024 16:44:22 +0300
Subject: [PATCH] topology-aware: initial libmem conversion.

Cut out the original memory accounting and allocation code.
Plug in a libmem-based memory allocator instead.

Signed-off-by: Krisztian Litkey <krisztian.litkey@intel.com>
---
 cmd/plugins/topology-aware/policy/cache.go    |  70 +-
 .../topology-aware/policy/coldstart.go        |  10 +-
 cmd/plugins/topology-aware/policy/libmem.go   |  95 +++
 cmd/plugins/topology-aware/policy/node.go     |  35 +-
 .../topology-aware/policy/pod-preferences.go  |  49 +-
 cmd/plugins/topology-aware/policy/pools.go    | 324 ++------
 .../topology-aware/policy/resources.go        | 776 +++++-------------
 .../policy/topology-aware-policy.go           |  38 +-
 8 files changed, 474 insertions(+), 923 deletions(-)
 create mode 100644 cmd/plugins/topology-aware/policy/libmem.go

diff --git a/cmd/plugins/topology-aware/policy/cache.go b/cmd/plugins/topology-aware/policy/cache.go
index 95a1596f9..af690bb38 100644
--- a/cmd/plugins/topology-aware/policy/cache.go
+++ b/cmd/plugins/topology-aware/policy/cache.go
@@ -19,8 +19,8 @@ import (
 	"time"
 
 	"github.com/containers/nri-plugins/pkg/resmgr/cache"
+	libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
 	"github.com/containers/nri-plugins/pkg/utils/cpuset"
-	idset "github.com/intel/goresctrl/pkg/utils"
 )
 
 const (
@@ -66,23 +66,33 @@ func (p *policy) reinstateGrants(grants map[string]Grant) error {
 		pool := grant.GetCPUNode()
 		supply := pool.FreeSupply()
 
-		if err := supply.Reserve(grant); err != nil {
-			return policyError("failed to update pool %q with CPU grant of %q: %v",
+		o, err := p.restoreMemOffer(grant)
+		if err != nil {
+			return policyError("failed to get libmem offer for pool %q, grant of %s: %w",
 				pool.Name(), c.PrettyName(), err)
 		}
 
-		log.Info("updated pool %q with reinstated CPU grant of %q",
-			pool.Name(), c.PrettyName())
-
-		pool = grant.GetMemoryNode()
-		if err := supply.ReserveMemory(grant); err != nil {
-			grant.GetCPUNode().FreeSupply().ReleaseCPU(grant)
-			return policyError("failed to update pool %q with extra memory of %q: %v",
+		updates, err := supply.Reserve(grant, o)
+		if err != nil {
+			return policyError("failed to update pool %q with CPU grant of %q: %v",
 				pool.Name(), c.PrettyName(), err)
 		}
 
-		log.Info("updated pool %q with reinstanted memory reservation of %q",
-			pool.Name(), c.PrettyName())
+		for uID, uZone := range updates {
+			if ug, ok := p.allocations.grants[uID]; !ok {
+				log.Error("failed to update grant %s to memory zone to %s, grant not found",
+					uID, uZone)
+			} else {
+				ug.SetMemoryZone(uZone)
+				if opt.PinMemory {
+					ug.GetContainer().SetCpusetMems(uZone.MemsetString())
+				}
+				log.Info("updated grant %s to memory zone %s", uID, uZone)
+			}
+		}
+
+		log.Info("updated pool %q with reinstated CPU grant of %q, memory zone %s",
+			pool.Name(), c.PrettyName(), grant.GetMemoryZone())
 
 		p.allocations.grants[id] = grant
 		p.applyGrant(grant)
@@ -94,16 +104,15 @@ func (p *policy) reinstateGrants(grants map[string]Grant) error {
 }
 
 type cachedGrant struct {
-	Exclusive   string
-	Part        int
-	CPUType     cpuClass
-	Container   string
-	Pool        string
-	MemoryPool  string
-	MemType     memoryType
-	Memset      idset.IDSet
-	MemoryLimit memoryMap
-	ColdStart   time.Duration
+	Exclusive  string
+	Part       int
+	CPUType    cpuClass
+	Container  string
+	Pool       string
+	MemoryPool libmem.NodeMask
+	MemType    memoryType
+	MemSize    int64
+	ColdStart  time.Duration
 }
 
 func newCachedGrant(cg Grant) *cachedGrant {
@@ -113,15 +122,9 @@ func newCachedGrant(cg Grant) *cachedGrant {
 	ccg.CPUType = cg.CPUType()
 	ccg.Container = cg.GetContainer().GetID()
 	ccg.Pool = cg.GetCPUNode().Name()
-	ccg.MemoryPool = cg.GetMemoryNode().Name()
+	ccg.MemoryPool = cg.GetMemoryZone()
 	ccg.MemType = cg.MemoryType()
-	ccg.Memset = cg.Memset().Clone()
-
-	ccg.MemoryLimit = make(memoryMap)
-	for key, value := range cg.MemLimit() {
-		ccg.MemoryLimit[key] = value
-	}
-
+	ccg.MemSize = cg.GetMemorySize()
 	ccg.ColdStart = cg.ColdStart()
 
 	return ccg
@@ -144,14 +147,11 @@ func (ccg *cachedGrant) ToGrant(policy *policy) (Grant, error) {
 		cpuset.MustParse(ccg.Exclusive),
 		ccg.Part,
 		ccg.MemType,
-		ccg.MemoryLimit,
 		ccg.ColdStart,
 	)
 
-	if g.Memset().String() != ccg.Memset.String() {
-		log.Error("cache error: mismatch in stored/recalculated memset: %s != %s",
-			ccg.Memset, g.Memset())
-	}
+	g.SetMemoryZone(ccg.MemoryPool)
+	g.SetMemorySize(ccg.MemSize)
 
 	return g, nil
 }
diff --git a/cmd/plugins/topology-aware/policy/coldstart.go b/cmd/plugins/topology-aware/policy/coldstart.go
index 7468bc250..f369ab14e 100644
--- a/cmd/plugins/topology-aware/policy/coldstart.go
+++ b/cmd/plugins/topology-aware/policy/coldstart.go
@@ -19,6 +19,7 @@ import (
 
 	"github.com/containers/nri-plugins/pkg/resmgr/cache"
 	"github.com/containers/nri-plugins/pkg/resmgr/events"
+	libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
 )
 
 // trigger cold start for the container if necessary.
@@ -63,8 +64,13 @@ func (p *policy) finishColdStart(c cache.Container) (bool, error) {
 		return false, policyError("coldstart: no grant found for %s", c.PrettyName())
 	}
 
-	log.Info("restoring memset to grant %v", g)
-	g.RestoreMemset()
+	log.Info("reallocating %s after coldstart", g)
+	err := g.ReallocMemory(p.memZoneType(g.GetMemoryZone()) | libmem.TypeMaskDRAM)
+	if err != nil {
+		log.Error("failed to reallocate %s after coldstart: %v", g, err)
+	} else {
+		log.Info("reallocated %s", g)
+	}
 	g.ClearTimer()
 
 	return true, nil
diff --git a/cmd/plugins/topology-aware/policy/libmem.go b/cmd/plugins/topology-aware/policy/libmem.go
new file mode 100644
index 000000000..2bc353baf
--- /dev/null
+++ b/cmd/plugins/topology-aware/policy/libmem.go
@@ -0,0 +1,95 @@
+// Copyright The NRI Plugins Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package topologyaware
+
+import libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
+
+func (p *policy) getMemOffer(pool Node, req Request) (*libmem.Offer, error) {
+	var (
+		ctr  = req.GetContainer()
+		zone = libmem.NodeMask(0)
+		mtyp = libmem.TypeMask(0)
+	)
+
+	if memType := req.MemoryType(); memType == memoryPreserve {
+		zone = libmem.NewNodeMask(pool.GetMemset(memoryAll).Members()...)
+		mtyp = p.memAllocator.ZoneType(zone)
+	} else {
+		zone = libmem.NewNodeMask(pool.GetMemset(memType).Members()...)
+		mtyp = libmem.TypeMask(memType)
+	}
+
+	o, err := p.memAllocator.GetOffer(
+		libmem.Container(
+			ctr.GetID(),
+			ctr.PrettyName(),
+			req.MemAmountToAllocate(),
+			string(ctr.GetQOSClass()),
+			zone,
+			mtyp,
+		),
+	)
+
+	return o, err
+}
+
+func (p *policy) restoreMemOffer(g Grant) (*libmem.Offer, error) {
+	var (
+		ctr  = g.GetContainer()
+		zone = g.GetMemoryZone()
+		mtyp = p.memAllocator.ZoneType(zone)
+	)
+
+	o, err := p.memAllocator.GetOffer(
+		libmem.Container(
+			ctr.GetID(),
+			ctr.PrettyName(),
+			g.GetMemorySize(),
+			string(ctr.GetQOSClass()),
+			zone,
+			mtyp,
+		),
+	)
+
+	return o, err
+}
+
+func (p *policy) reallocMem(id string, nodes libmem.NodeMask, types libmem.TypeMask) (libmem.NodeMask, map[string]libmem.NodeMask, error) {
+	return p.memAllocator.Realloc(id, nodes, types)
+}
+
+func (p *policy) releaseMem(id string) error {
+	return p.memAllocator.Release(id)
+}
+
+func (p *policy) poolZoneType(pool Node, memType memoryType) libmem.TypeMask {
+	return p.memAllocator.ZoneType(libmem.NewNodeMask(pool.GetMemset(memType).Members()...))
+}
+
+func (p *policy) memZoneType(zone libmem.NodeMask) libmem.TypeMask {
+	return p.memAllocator.ZoneType(zone)
+}
+
+func (p *policy) poolZone(pool Node, memType memoryType) libmem.NodeMask {
+	return libmem.NewNodeMask(pool.GetMemset(memType).Members()...)
+}
+
+func (p *policy) poolZoneCapacity(pool Node, memType memoryType) int64 {
+	return p.memAllocator.ZoneCapacity(libmem.NewNodeMask(pool.GetMemset(memType).Members()...))
+}
+
+func (p *policy) poolZoneFree(pool Node, memType memoryType) int64 {
+	return p.memAllocator.ZoneFree(libmem.NewNodeMask(pool.GetMemset(memType).Members()...))
+}
diff --git a/cmd/plugins/topology-aware/policy/node.go b/cmd/plugins/topology-aware/policy/node.go
index 30dad107f..28498763e 100644
--- a/cmd/plugins/topology-aware/policy/node.go
+++ b/cmd/plugins/topology-aware/policy/node.go
@@ -298,7 +298,6 @@ func (n *node) Dump(prefix string, level ...int) {
 	n.self.node.dump(prefix, lvl)
 	log.Debug("%s  - %s", idt, n.noderes.DumpCapacity())
 	log.Debug("%s  - %s", idt, n.freeres.DumpAllocatable())
-	n.freeres.DumpMemoryState(idt + "  ")
 	if n.mem.Size() > 0 {
 		log.Debug("%s  - normal memory: %v", idt, n.mem)
 	}
@@ -309,15 +308,8 @@ func (n *node) Dump(prefix string, level ...int) {
 		log.Debug("%s  - PMEM memory: %v", idt, n.pMem)
 	}
 	for _, grant := range n.policy.allocations.grants {
-		cpuNodeID := grant.GetCPUNode().NodeID()
-		memNodeID := grant.GetMemoryNode().NodeID()
-		switch {
-		case cpuNodeID == n.id && memNodeID == n.id:
-			log.Debug("%s    + cpu+mem %s", idt, grant)
-		case cpuNodeID == n.id:
-			log.Debug("%s    + cpuonly %s", idt, grant)
-		case memNodeID == n.id:
-			log.Debug("%s    + memonly %s", idt, grant)
+		if grant.GetCPUNode().NodeID() == n.id {
+			log.Debug("%s    + %s", idt, grant)
 		}
 	}
 	if !n.Parent().IsNil() {
@@ -396,7 +388,7 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
 				n.Name())
 		}
 
-		n.noderes = newSupply(n, cpuset.New(), cpuset.New(), cpuset.New(), 0, 0, nil, nil)
+		n.noderes = newSupply(n, cpuset.New(), cpuset.New(), cpuset.New(), 0, 0)
 		for _, c := range n.children {
 			supply := c.GetSupply()
 			n.noderes.Cumulate(supply)
@@ -409,7 +401,6 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
 	} else {
 		log.Debug("%s: discovering attached/assigned resources...", n.Name())
 
-		mmap := createMemoryMap(0, 0, 0)
 		cpus := cpuset.New()
 
 		for _, nodeID := range assignedNUMANodes {
@@ -424,18 +415,15 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
 			switch node.GetMemoryType() {
 			case system.MemoryTypeDRAM:
 				n.mem.Add(nodeID)
-				mmap.AddDRAM(meminfo.MemTotal)
 				shortCPUs := kubernetes.ShortCPUSet(nodeCPUs)
 				log.Debug("  + assigned DRAM NUMA node #%d (cpuset: %s, DRAM %.2fM)",
 					nodeID, shortCPUs, float64(meminfo.MemTotal)/float64(1024*1024))
 			case system.MemoryTypePMEM:
 				n.pMem.Add(nodeID)
-				mmap.AddPMEM(meminfo.MemTotal)
 				log.Debug("  + assigned PMEM NUMA node #%d (DRAM %.2fM)", nodeID,
 					float64(meminfo.MemTotal)/float64(1024*1024))
 			case system.MemoryTypeHBM:
 				n.hbm.Add(nodeID)
-				mmap.AddHBM(meminfo.MemTotal)
 				log.Debug("  + assigned HBMEM NUMA node #%d (DRAM %.2fM)",
 					nodeID, float64(meminfo.MemTotal)/float64(1024*1024))
 			default:
@@ -463,7 +451,7 @@ func (n *node) discoverSupply(assignedNUMANodes []idset.ID) Supply {
 		isolated := cpus.Intersection(n.policy.isolated)
 		reserved := cpus.Intersection(n.policy.reserved).Difference(isolated)
 		sharable := cpus.Difference(isolated).Difference(reserved)
-		n.noderes = newSupply(n, isolated, reserved, sharable, 0, 0, mmap, nil)
+		n.noderes = newSupply(n, isolated, reserved, sharable, 0, 0)
 		log.Debug("  = %s", n.noderes.DumpCapacity())
 	}
 
@@ -491,8 +479,6 @@ func (n *node) AssignNUMANodes(ids []idset.ID) {
 
 // assignNUMANodes assigns the given set of NUMA nodes to this one.
 func (n *node) assignNUMANodes(ids []idset.ID) {
-	mem := createMemoryMap(0, 0, 0)
-
 	for _, numaNodeID := range ids {
 		if n.mem.Has(numaNodeID) || n.pMem.Has(numaNodeID) || n.hbm.Has(numaNodeID) {
 			log.Warn("*** NUMA node #%d already discovered by or assigned to %s",
@@ -500,27 +486,17 @@ func (n *node) assignNUMANodes(ids []idset.ID) {
 			continue
 		}
 		numaNode := n.policy.sys.Node(numaNodeID)
-		memTotal := uint64(0)
-		if meminfo, err := numaNode.MemoryInfo(); err != nil {
-			log.Error("%s: failed to get memory info for NUMA node #%d",
-				n.Name(), numaNodeID)
-		} else {
-			memTotal = meminfo.MemTotal
-		}
 		switch numaNode.GetMemoryType() {
 		case system.MemoryTypeDRAM:
-			mem.Add(memTotal, 0, 0)
 			n.mem.Add(numaNodeID)
 			log.Info("*** DRAM NUMA node #%d assigned to pool node %q",
 				numaNodeID, n.Name())
 		case system.MemoryTypePMEM:
 			n.pMem.Add(numaNodeID)
-			mem.Add(0, memTotal, 0)
 			log.Info("*** PMEM NUMA node #%d assigned to pool node %q",
 				numaNodeID, n.Name())
 		case system.MemoryTypeHBM:
 			n.hbm.Add(numaNodeID)
-			mem.Add(0, 0, memTotal)
 			log.Info("*** HBM NUMA node #%d assigned to pool node %q",
 				numaNodeID, n.Name())
 		default:
@@ -528,9 +504,6 @@ func (n *node) assignNUMANodes(ids []idset.ID) {
 				numaNodeID, numaNode.GetMemoryType())
 		}
 	}
-
-	n.noderes.AssignMemory(mem)
-	n.freeres.AssignMemory(mem)
 }
 
 // Discover the set of memory attached to this node.
diff --git a/cmd/plugins/topology-aware/policy/pod-preferences.go b/cmd/plugins/topology-aware/policy/pod-preferences.go
index 37bc7bf4d..c672bf085 100644
--- a/cmd/plugins/topology-aware/policy/pod-preferences.go
+++ b/cmd/plugins/topology-aware/policy/pod-preferences.go
@@ -29,6 +29,7 @@ import (
 
 	"github.com/containers/nri-plugins/pkg/kubernetes"
 	"github.com/containers/nri-plugins/pkg/resmgr/cache"
+	libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
 )
 
 const (
@@ -83,25 +84,17 @@ var memoryNamedTypes = map[string]memoryType{
 	"mixed": memoryAll,
 }
 
-// names by memory type
-var memoryTypeNames = map[memoryType]string{
-	memoryDRAM: "DRAM",
-	memoryPMEM: "PMEM",
-	memoryHBM:  "HBM",
-}
-
 // memoryType is bitmask of types of memory to allocate
-type memoryType int
+type memoryType libmem.TypeMask
 
 // memoryType bits
 const (
-	memoryUnspec memoryType = (0x1 << iota) >> 1
-	memoryDRAM
-	memoryPMEM
-	memoryHBM
-	memoryPreserve
-	memoryFirstUnusedBit
-	memoryAll = memoryFirstUnusedBit - 1
+	memoryUnspec   = memoryType(libmem.TypeMask(0))
+	memoryDRAM     = memoryType(libmem.TypeMaskDRAM)
+	memoryPMEM     = memoryType(libmem.TypeMaskPMEM)
+	memoryHBM      = memoryType(libmem.TypeMaskHBM)
+	memoryPreserve = memoryType(libmem.TypeMaskHBM << 1)
+	memoryAll      = memoryType(memoryDRAM | memoryPMEM | memoryHBM)
 
 	// type of memory to use if none specified
 	defaultMemoryType = memoryAll
@@ -573,19 +566,23 @@ func podMemoryTypePreference(pod cache.Pod, c cache.Container) memoryType {
 }
 
 // memoryAllocationPreference returns the amount and kind of memory to allocate.
-func memoryAllocationPreference(pod cache.Pod, c cache.Container) (uint64, uint64, memoryType) {
+func memoryAllocationPreference(pod cache.Pod, c cache.Container) (int64, int64, memoryType) {
+	var (
+		req int64
+		lim int64
+	)
+
 	resources, ok := c.GetResourceUpdates()
 	if !ok {
 		resources = c.GetResourceRequirements()
 	}
 	mtype := memoryTypePreference(pod, c)
-	req, lim := uint64(0), uint64(0)
 
 	if memReq, ok := resources.Requests[corev1.ResourceMemory]; ok {
-		req = uint64(memReq.Value())
+		req = memReq.Value()
 	}
 	if memLim, ok := resources.Limits[corev1.ResourceMemory]; ok {
-		lim = uint64(memLim.Value())
+		lim = memLim.Value()
 	}
 
 	return req, lim, mtype
@@ -601,15 +598,7 @@ func (t cpuClass) String() string {
 
 // String stringifies a memoryType.
 func (t memoryType) String() string {
-	str := ""
-	sep := ""
-	for _, bit := range []memoryType{memoryDRAM, memoryPMEM, memoryHBM} {
-		if int(t)&int(bit) != 0 {
-			str += sep + memoryTypeNames[bit]
-			sep = ","
-		}
-	}
-	return str
+	return libmem.TypeMask(t).String()
 }
 
 // parseMemoryType parses a memory type string, ideally produced by String()
@@ -656,3 +645,7 @@ func (t *memoryType) UnmarshalJSON(data []byte) error {
 	*t = mtype
 	return nil
 }
+
+func (t memoryType) TypeMask() libmem.TypeMask {
+	return libmem.TypeMask(t)
+}
diff --git a/cmd/plugins/topology-aware/policy/pools.go b/cmd/plugins/topology-aware/policy/pools.go
index 2716f6c53..a2cab5959 100644
--- a/cmd/plugins/topology-aware/policy/pools.go
+++ b/cmd/plugins/topology-aware/policy/pools.go
@@ -21,6 +21,7 @@ import (
 	"github.com/containers/nri-plugins/pkg/utils/cpuset"
 
 	"github.com/containers/nri-plugins/pkg/resmgr/cache"
+	libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
 	system "github.com/containers/nri-plugins/pkg/sysfs"
 	idset "github.com/intel/goresctrl/pkg/utils"
 )
@@ -349,9 +350,12 @@ func (p *policy) checkHWTopology() error {
 
 // Pick a pool and allocate resource from it to the container.
 func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant, error) {
-	var pool Node
+	var (
+		pool  Node
+		offer *libmem.Offer
+	)
 
-	request := newRequest(container)
+	request := newRequest(container, p.memAllocator.Masks().AvailableTypes())
 
 	if p.root.FreeSupply().ReservedCPUs().IsEmpty() && request.CPUType() == cpuReserved {
 		// Fallback to allocating reserved CPUs from the shared pool
@@ -359,12 +363,13 @@ func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant
 		request.SetCPUType(cpuNormal)
 	}
 
-	// Assumption: in the beginning the CPUs and memory will be allocated from
-	// the same pool. This assumption can be relaxed later, requires separate
-	// (but connected) scoring of memory and CPU.
-
 	if request.CPUType() == cpuReserved || request.CPUType() == cpuPreserve {
 		pool = p.root
+		o, err := p.getMemOffer(pool, request)
+		if err != nil {
+			return nil, policyError("failed to get offer for request %s: %v", request, err)
+		}
+		offer = o
 	} else {
 		affinity, err := p.calculatePoolAffinities(request.GetContainer())
 
@@ -404,199 +409,36 @@ func (p *policy) allocatePool(container cache.Container, poolHint string) (Grant
 		if pool == nil {
 			pool = pools[0]
 		}
+
+		offer = scores[pool.NodeID()].Offer()
+		if offer == nil {
+			return nil, policyError("failed to get offer for request %s", request)
+		}
 	}
 
 	supply := pool.FreeSupply()
-	grant, err := supply.Allocate(request)
+	grant, updates, err := supply.Allocate(request, offer)
 	if err != nil {
 		return nil, policyError("failed to allocate %s from %s: %v",
 			request, supply.DumpAllocatable(), err)
 	}
 
-	log.Debug("allocated req '%s' to memory node '%s' (memset %s,%s,%s)",
-		container.PrettyName(), grant.GetMemoryNode().Name(),
-		grant.GetMemoryNode().GetMemset(memoryDRAM),
-		grant.GetMemoryNode().GetMemset(memoryPMEM),
-		grant.GetMemoryNode().GetMemset(memoryHBM))
-
-	// In case the workload is assigned to a memory node with multiple
-	// child nodes, there is no guarantee that the workload will
-	// allocate memory "nicely". Instead we'll have to make the
-	// conservative assumption that the memory will all be allocated
-	// from one single node, and that node can be any of the child
-	// nodes in the system. Thus, we'll need to reserve the memory
-	// from all child nodes, and move the containers already
-	// assigned to the child nodes upwards in the topology tree, if
-	// they no longer fit to the child node that they are in. In
-	// other words, they'll need to have a wider range of memory
-	// node options in order to fit to memory.
-	//
-	//
-	// Example:
-	//
-	// Workload 1 and Workload 2 are running on the leaf nodes:
-	//
-	//                    +----------------+
-	//                    |Total mem: 4G   |
-	//                    |Total CPUs: 4   |            Workload 1:
-	//                    |Reserved:       |
-	//                    |  1.5G          |             1G mem
-	//                    |                |
-	//                    |                |            Workload 2:
-	//                    |                |
-	//                    +----------------+             0.5G mem
-	//                       /          \
-	//                      /            \
-	//                     /              \
-	//                    /                \
-	//                   /                  \
-	//                  /                    \
-	//                 /                      \
-	//                /                        \
-	//  +----------------+                  +----------------+
-	//  |Total mem: 2G   |                  |Total mem: 2G   |
-	//  |Total CPUs: 2   |                  |Total CPUs: 2   |
-	//  |Reserved:       |                  |Reserved:       |
-	//  |  1G            |                  |  0.5G          |
-	//  |                |                  |                |
-	//  |                |                  |                |
-	//  |     * WL 1     |                  |     * WL 2     |
-	//  +----------------+                  +----------------+
-	//
-	//
-	// Then Workload 3 comes in and is assigned to the root node. Memory
-	// reservations are done on the leaf nodes:
-	//
-	//                    +----------------+
-	//                    |Total mem: 4G   |
-	//                    |Total CPUs: 4   |            Workload 1:
-	//                    |Reserved:       |
-	//                    |  3G            |             1G mem
-	//                    |                |
-	//                    |                |            Workload 2:
-	//                    |  * WL 3        |
-	//                    +----------------+             0.5G mem
-	//                       /          \
-	//                      /            \              Workload 3:
-	//                     /              \
-	//                    /                \             1.5G mem
-	//                   /                  \
-	//                  /                    \
-	//                 /                      \
-	//                /                        \
-	//  +----------------+                  +----------------+
-	//  |Total mem: 2G   |                  |Total mem: 2G   |
-	//  |Total CPUs: 2   |                  |Total CPUs: 2   |
-	//  |Reserved:       |                  |Reserved:       |
-	//  |  2.5G          |                  |  2G            |
-	//  |                |                  |                |
-	//  |                |                  |                |
-	//  |     * WL 1     |                  |     * WL 2     |
-	//  +----------------+                  +----------------+
-	//
-	//
-	// Workload 1 no longer fits to the leaf node, because the total
-	// reservation from the leaf node is over the memory maximum.
-	// Thus, it's moved upwards in the tree to the root node. Memory
-	// resevations are again updated accordingly:
-	//
-	//                    +----------------+
-	//                    |Total mem: 4G   |
-	//                    |Total CPUs: 4   |            Workload 1:
-	//                    |Reserved:       |
-	//                    |  3G            |             1G mem
-	//                    |                |
-	//                    |  * WL 1        |            Workload 2:
-	//                    |  * WL 3        |
-	//                    +----------------+             0.5G mem
-	//                       /          \
-	//                      /            \              Workload 3:
-	//                     /              \
-	//                    /                \             1.5G mem
-	//                   /                  \
-	//                  /                    \
-	//                 /                      \
-	//                /                        \
-	//  +----------------+                  +----------------+
-	//  |Total mem: 2G   |                  |Total mem: 2G   |
-	//  |Total CPUs: 2   |                  |Total CPUs: 2   |
-	//  |Reserved:       |                  |Reserved:       |
-	//  |  2.5G          |                  |  3G            |
-	//  |                |                  |                |
-	//  |                |                  |                |
-	//  |                |                  |     * WL 2     |
-	//  +----------------+                  +----------------+
-	//
-	//
-	// Now Workload 2 doesn't fit to the leaf node either. It's also moved
-	// to the root node:
-	//
-	//                    +----------------+
-	//                    |Total mem: 4G   |
-	//                    |Total CPUs: 4   |            Workload 1:
-	//                    |Reserved:       |
-	//                    |  3G            |             1G mem
-	//                    |  * WL 2        |
-	//                    |  * WL 1        |            Workload 2:
-	//                    |  * WL 3        |
-	//                    +----------------+             0.5G mem
-	//                       /          \
-	//                      /            \              Workload 3:
-	//                     /              \
-	//                    /                \             1.5G mem
-	//                   /                  \
-	//                  /                    \
-	//                 /                      \
-	//                /                        \
-	//  +----------------+                  +----------------+
-	//  |Total mem: 2G   |                  |Total mem: 2G   |
-	//  |Total CPUs: 2   |                  |Total CPUs: 2   |
-	//  |Reserved:       |                  |Reserved:       |
-	//  |  3G            |                  |  3G            |
-	//  |                |                  |                |
-	//  |                |                  |                |
-	//  |                |                  |                |
-	//  +----------------+                  +----------------+
-	//
-
-	// We need to analyze all existing containers which are a subset of current grant.
-	memset := grant.GetMemoryNode().GetMemset(grant.MemoryType())
-
-	// Add an extra memory reservation to all subnodes.
-	// TODO: no need to do any of this if no memory request
-	grant.UpdateExtraMemoryReservation()
-
-	// See how much memory reservations the workloads on the
-	// nodes up from this one cause to the node. We only need to
-	// analyze the workloads up until this node, because it's
-	// guaranteed that the subtree can hold the workloads.
-
-	// If it turns out that the current workloads no longer fit
-	// to the node with the reservations from nodes from above
-	// in the tree, move all nodes upward. Note that this
-	// creates a reservation of the same size to the node, so in
-	// effect the node has to be empty of its "own" workloads.
-	// In this case move all the workloads one level up in the tree.
-
-	changed := true
-	for changed {
-		changed = false
-		for _, oldGrant := range p.allocations.grants {
-			oldMemset := oldGrant.GetMemoryNode().GetMemset(grant.MemoryType())
-			if oldMemset.Size() < memset.Size() && memset.Has(oldMemset.Members()...) {
-				changed, err = oldGrant.ExpandMemset()
-				if err != nil {
-					return nil, err
-				}
-				if changed {
-					log.Debug("* moved container %s upward to node %s to guarantee memory",
-						oldGrant.GetContainer().PrettyName(), oldGrant.GetMemoryNode().Name())
-					break
-				}
+	for id, z := range updates {
+		g, ok := p.allocations.grants[id]
+		if !ok {
+			log.Error("offer commit returned zone update %s for unknown container %s", z, id)
+		} else {
+			log.Info("updating memory allocation for %s to %s", g.GetContainer().PrettyName(), z)
+			g.SetMemoryZone(z)
+			if opt.PinMemory {
+				g.GetContainer().SetCpusetMems(z.MemsetString())
 			}
 		}
 	}
 
+	log.Debug("allocated req '%s' to memory zone %s", container.PrettyName(),
+		grant.GetMemoryZone())
+
 	p.allocations.grants[container.GetID()] = grant
 
 	p.saveAllocations()
@@ -642,10 +484,7 @@ func (p *policy) applyGrant(grant Grant) {
 		return
 	}
 
-	mems := ""
-	if opt.PinMemory {
-		mems = grant.Memset().String()
-	}
+	mems := grant.GetMemoryZone()
 
 	if opt.PinCPU {
 		if cpuType == cpuPreserve {
@@ -689,12 +528,12 @@ func (p *policy) applyGrant(grant Grant) {
 	if grant.MemoryType() == memoryPreserve {
 		log.Debug("  => preserving %s memory pinning %s", container.PrettyName(), container.GetCpusetMems())
 	} else {
-		if mems != "" {
+		if mems != libmem.NodeMask(0) {
 			log.Debug("  => pinning %s to memory %s", container.PrettyName(), mems)
 		} else {
 			log.Debug("  => not pinning %s memory, memory set is empty...", container.PrettyName())
 		}
-		container.SetCpusetMems(mems)
+		container.SetCpusetMems(mems.MemsetString())
 	}
 }
 
@@ -769,53 +608,27 @@ func (p *policy) updateSharedAllocations(grant *Grant) {
 	}
 }
 
-func (p *policy) filterInsufficientResources(req Request, originals []Node) []Node {
-	sufficient := make([]Node, 0)
-
-	for _, node := range originals {
-		// TODO: Need to filter based on the memory demotion scheme here. For example, if the request is
-		// of memory type memoryAll, the memory used might be PMEM until it's full and after that DRAM. If
-		// it's DRAM, amount of PMEM should not be considered and so on. How to find this out in a live
-		// system?
-
-		supply := node.FreeSupply()
-		reqMemType := req.MemoryType()
+func (p *policy) filterInsufficientResources(req Request, pools []Node) []Node {
+	filtered := make([]Node, 0)
 
-		if reqMemType == memoryUnspec || reqMemType == memoryPreserve {
-			// The algorithm for handling unspecified memory allocations is the same as for handling a request
-			// with memory type all.
-			reqMemType = memoryAll
+	required := req.MemAmountToAllocate()
+	for _, node := range pools {
+		memType := req.MemoryType()
+		if memType == memoryUnspec || memType == memoryPreserve {
+			memType = memoryAll
 		}
 
-		required := req.MemAmountToAllocate()
-
-		for _, memType := range []memoryType{memoryPMEM, memoryDRAM, memoryHBM} {
-			if reqMemType&memType != 0 {
-				extra := supply.ExtraMemoryReservation(memType)
-				free := supply.MemoryLimit()[memType]
-				if extra > free {
-					continue
-				}
-				if required+extra <= free {
-					sufficient = append(sufficient, node)
-					required = 0
-					break
-				}
-				if req.ColdStart() > 0 {
-					// For a "cold start" request, the memory request must fit completely in the PMEM. So reject the node.
-					break
-				}
-				// Subtracting unsigned integers.
-				// Here free >= extra, that is, (free - extra) is non-negative,
-				// and required > free - extra, that is, required stays positive.
-				required -= (free - extra)
-			}
-		}
-		if required > 0 {
-			log.Debug("%s: filtered out %s with insufficient memory", req.GetContainer().PrettyName(), node.Name())
+		available := p.poolZoneFree(node, memType)
+		if available < required {
+			log.Debug("%s has insufficient available memory (%s < %s)", node.Name(),
+				prettyMem(available), prettyMem(required))
+		} else {
+			log.Debug("%s has enough available memory", node.Name())
+			filtered = append(filtered, node)
 		}
 	}
-	return sufficient
+
+	return filtered
 }
 
 // Score pools against the request and sort them by score.
@@ -850,6 +663,7 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco
 	isolated2, reserved2, shared2 := score2.IsolatedCapacity(), score2.ReservedCapacity(), score2.SharedCapacity()
 	a1 := affinityScore(affinity, node1)
 	a2 := affinityScore(affinity, node2)
+	o1, o2 := score1.Offer(), score2.Offer()
 
 	log.Debug("comparing scores for %s and %s", node1.Name(), node2.Name())
 	log.Debug("  %s: %s, affinity score %f", node1.Name(), score1.String(), a1)
@@ -914,6 +728,46 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco
 
 	log.Debug("  - affinity is a TIE")
 
+	// better matching or tighter memory offer wins
+	switch {
+	case o1 != nil && o2 == nil:
+		log.Debug("  => %s loses on memory offer (failed offer)", node2.Name())
+		return true
+	case o1 == nil && o2 != nil:
+		log.Debug("  => %s loses on memory offer (failed offer)", node1.Name())
+		return false
+	case o1 == nil && o2 == nil:
+		log.Debug("  - memory offer is a TIE (both failed)")
+	default:
+		m1, m2 := o1.NodeMask(), o2.NodeMask()
+		t1, t2 := p.memZoneType(m1), p.memZoneType(m2)
+		memType := request.MemoryType()
+
+		if t1 == memType.TypeMask() && t2 != memType.TypeMask() {
+			log.Debug("   - %s loses on mis-matching type (%s != %s)", node2.Name(), t2, memType)
+			return true
+		}
+		if t1 != memType.TypeMask() && t2 == memType.TypeMask() {
+			log.Debug("   - %s loses on mis-matching type (%s != %s)", node1.Name(), t1, memType)
+			return false
+		}
+		log.Debug("   - offer memory types are a tie (%s vs %s)", t1, t2)
+
+		if m1.Size() < m2.Size() {
+			log.Debug("   - %s loses on memory offer (%s less tight than %s)",
+				node2.Name(), m2, m1)
+			return true
+		}
+		if m2.Size() < m1.Size() {
+			log.Debug("   - %s loses on memory offer (%s less tight than %s)",
+				node1.Name(), m1, m2)
+			return false
+		}
+		if m2.Size() == m1.Size() {
+			log.Debug("   - memory offers are a TIE (%s vs. %s)", m1, m2)
+		}
+	}
+
 	// matching memory type wins
 	if reqType := request.MemoryType(); reqType != memoryUnspec && reqType != memoryPreserve {
 		if node1.HasMemoryType(reqType) && !node2.HasMemoryType(reqType) {
diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go
index 0b20f4d7e..43174897d 100644
--- a/cmd/plugins/topology-aware/policy/resources.go
+++ b/cmd/plugins/topology-aware/policy/resources.go
@@ -21,12 +21,11 @@ import (
 
 	"github.com/containers/nri-plugins/pkg/sysfs"
 	"github.com/containers/nri-plugins/pkg/utils/cpuset"
-	v1 "k8s.io/api/core/v1"
 
 	"github.com/containers/nri-plugins/pkg/cpuallocator"
 	"github.com/containers/nri-plugins/pkg/kubernetes"
 	"github.com/containers/nri-plugins/pkg/resmgr/cache"
-	idset "github.com/intel/goresctrl/pkg/utils"
+	libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
 )
 
 type (
@@ -67,12 +66,8 @@ type Supply interface {
 	GrantedReserved() int
 	// GrantedShared returns the locally granted shared CPU capacity in this supply.
 	GrantedShared() int
-	// GrantedMemory returns the locally granted memory capacity in this supply.
-	GrantedMemory(memoryType) uint64
 	// Cumulate cumulates the given supply into this one.
 	Cumulate(Supply)
-	// AssignMemory adds extra memory to this supply (for extra NUMA nodes assigned to a pool).
-	AssignMemory(mem memoryMap)
 	// AccountAllocateCPU accounts for (removes) allocated exclusive capacity from the supply.
 	AccountAllocateCPU(Grant)
 	// AccountReleaseCPU accounts for (reinserts) released exclusive capacity into the supply.
@@ -81,33 +76,17 @@ type Supply interface {
 	GetScore(Request) Score
 	// AllocatableSharedCPU calculates the allocatable amount of shared CPU of this supply.
 	AllocatableSharedCPU(...bool) int
-	// Allocate allocates CPU capacity from this supply and returns it as a grant.
-	Allocate(Request) (Grant, error)
+	// Allocate allocates a grant from the supply.
+	Allocate(Request, *libmem.Offer) (Grant, map[string]libmem.NodeMask, error)
 	// ReleaseCPU releases a previously allocated CPU grant from this supply.
 	ReleaseCPU(Grant)
-	// ReleaseMemory releases a previously allocated memory grant from this supply.
-	ReleaseMemory(Grant)
-	// ReallocateMemory updates the Grant to allocate memory from this supply.
-	ReallocateMemory(Grant) error
-	// ExtraMemoryReservation returns the memory reservation.
-	ExtraMemoryReservation(memoryType) uint64
-	// SetExtraMemroyReservation sets the extra memory reservation based on the granted memory.
-	SetExtraMemoryReservation(Grant)
-	// ReleaseExtraMemoryReservation removes the extra memory reservations based on the granted memory.
-	ReleaseExtraMemoryReservation(Grant)
-	// MemoryLimit returns the amount of various memory types belonging to this grant.
-	MemoryLimit() memoryMap
 
 	// Reserve accounts for CPU grants after reloading cached allocations.
-	Reserve(Grant) error
-	// ReserveMemory accounts for memory grants after reloading cached allocations.
-	ReserveMemory(Grant) error
+	Reserve(Grant, *libmem.Offer) (map[string]libmem.NodeMask, error)
 	// DumpCapacity returns a printable representation of the supply's resource capacity.
 	DumpCapacity() string
 	// DumpAllocatable returns a printable representation of the supply's alloctable resources.
 	DumpAllocatable() string
-	// DumpMemoryState dumps the state of the available and allocated memory.
-	DumpMemoryState(string)
 }
 
 // Request represents CPU and memory resources requested by a container.
@@ -131,7 +110,7 @@ type Request interface {
 	// MemoryType returns the type(s) of requested memory.
 	MemoryType() memoryType
 	// MemAmountToAllocate retuns how much memory we need to reserve for a request.
-	MemAmountToAllocate() uint64
+	MemAmountToAllocate() int64
 	// ColdStart returns the cold start timeout.
 	ColdStart() time.Duration
 }
@@ -140,8 +119,6 @@ type Request interface {
 type Grant interface {
 	// SetCPUPortion sets the fraction CPU portion for the grant.
 	SetCPUPortion(fraction int)
-	// SetMemoryAllocation sets the memory allocation for the grant.
-	SetMemoryAllocation(memoryType, memoryMap, time.Duration)
 	// Clone creates a copy of this grant.
 	Clone() Grant
 	// RefetchNodes updates the stored cpu and memory nodes of this grant by name.
@@ -150,9 +127,10 @@ type Grant interface {
 	GetContainer() cache.Container
 	// GetCPUNode returns the node that granted CPU capacity to the container.
 	GetCPUNode() Node
-	// GetMemoryNode returns the node which granted memory capacity to
-	// the container.
-	GetMemoryNode() Node
+	// GetMemorySize returns the amount of memory allocated to this grant.
+	GetMemorySize() int64
+	// GetMemoryZone returns the memory zone allocated granted to the container.
+	GetMemoryZone() libmem.NodeMask
 	// CPUType returns the type of granted CPUs
 	CPUType() cpuClass
 	// CPUPortion returns granted milli-CPUs of non-full CPUs of CPUType().
@@ -172,30 +150,25 @@ type Grant interface {
 	IsolatedCPUs() cpuset.CPUSet
 	// MemoryType returns the type(s) of granted memory.
 	MemoryType() memoryType
-	// SetMemoryNode updates the grant memory controllers.
-	SetMemoryNode(Node)
-	// Memset returns the granted memory controllers as a string.
-	Memset() idset.IDSet
-	// ExpandMemset() makes the memory controller set larger as the grant
-	// is moved up in the node hierarchy.
-	ExpandMemset() (bool, error)
-	// MemLimit returns the amount of memory that the container is
-	// allowed to use.
-	MemLimit() memoryMap
+	// SetMemoryType sets the memory type for this grant.
+	SetMemoryType(memoryType)
+	// SetMemoryZone sets the memory zone for this grant.
+	SetMemoryZone(libmem.NodeMask)
+	// SetMemorySize sets the amount of memory to allocate.
+	SetMemorySize(int64)
+	// SetColdstart sets coldstart period for the grant.
+	SetColdstart(time.Duration)
+
 	// String returns a printable representation of this grant.
 	String() string
 	// Release releases the grant from all the Supplys it uses.
 	Release()
+	// Reallocate memory with the given types.
+	ReallocMemory(types libmem.TypeMask) error
 	// AccountAllocateCPU accounts for (removes) allocated exclusive capacity for this grant.
 	AccountAllocateCPU()
 	// AccountReleaseCPU accounts for (reinserts) released exclusive capacity for this grant.
 	AccountReleaseCPU()
-	// UpdateExtraMemoryReservation() updates the reservations in the subtree
-	// of nodes under the node from which the memory was granted.
-	UpdateExtraMemoryReservation()
-	// RestoreMemset restores the granted memory set to node maximum
-	// and reapplies the grant.
-	RestoreMemset()
 	// ColdStart returns the cold start timeout.
 	ColdStart() time.Duration
 	// AddTimer adds a cold start timer.
@@ -221,22 +194,20 @@ type Score interface {
 	Colocated() int
 	HintScores() map[string]float64
 	PrioCapacity(cpuPrio) int
+
+	Offer() *libmem.Offer
+
 	String() string
 }
 
-type memoryMap map[memoryType]uint64
-
 // supply implements our Supply interface.
 type supply struct {
-	node                 Node                // node supplying CPUs and memory
-	isolated             cpuset.CPUSet       // isolated CPUs at this node
-	reserved             cpuset.CPUSet       // reserved CPUs at this node
-	sharable             cpuset.CPUSet       // sharable CPUs at this node
-	grantedReserved      int                 // amount of reserved CPUs allocated
-	grantedShared        int                 // amount of shareable CPUs allocated
-	mem                  memoryMap           // available memory for this node
-	grantedMem           memoryMap           // total memory granted
-	extraMemReservations map[Grant]memoryMap // how much memory each workload above has requested
+	node            Node          // node supplying CPUs and memory
+	isolated        cpuset.CPUSet // isolated CPUs at this node
+	reserved        cpuset.CPUSet // reserved CPUs at this node
+	sharable        cpuset.CPUSet // sharable CPUs at this node
+	grantedReserved int           // amount of reserved CPUs allocated
+	grantedShared   int           // amount of shareable CPUs allocated
 }
 
 var _ Supply = &supply{}
@@ -249,10 +220,9 @@ type request struct {
 	isolate   bool            // prefer isolated exclusive CPUs
 	cpuType   cpuClass        // preferred CPU type (normal, reserved)
 	prio      cpuPrio         // CPU priority preference, ignored for fraction requests
-
-	memReq  uint64     // memory request
-	memLim  uint64     // memory limit
-	memType memoryType // requested types of memory
+	memReq    int64
+	memLim    int64
+	memType   memoryType // requested types of memory
 
 	// coldStart tells the timeout (in milliseconds) how long to wait until
 	// a DRAM memory controller should be added to a container asking for a
@@ -268,15 +238,14 @@ var _ Request = &request{}
 type grant struct {
 	container      cache.Container // container CPU is granted to
 	node           Node            // node CPU is supplied from
-	memoryNode     Node            // node memory is supplied from
 	exclusive      cpuset.CPUSet   // exclusive CPUs
 	cpuType        cpuClass        // type of CPUs (normal, reserved, ...)
 	cpuPortion     int             // milliCPUs granted from CPUs of cpuType
 	memType        memoryType      // requested types of memory
-	memset         idset.IDSet     // assigned memory nodes
-	allocatedMem   memoryMap       // memory limit
 	coldStart      time.Duration   // how long until cold start is done
 	coldStartTimer *time.Timer     // timer to trigger cold start timeout
+	memSize        int64           // amount of memory to allocate
+	memZone        libmem.NodeMask // allocated memory zone
 }
 
 var _ Grant = &grant{}
@@ -285,6 +254,7 @@ var _ Grant = &grant{}
 type score struct {
 	supply    Supply             // CPU supply (node)
 	req       Request            // CPU request (container)
+	offer     *libmem.Offer      // possible memory allocation
 	isolated  int                // remaining isolated CPUs
 	reserved  int                // remaining reserved CPUs
 	shared    int                // remaining shared capacity
@@ -297,85 +267,17 @@ var _ Score = &score{}
 
 // newSupply creates CPU supply for the given node, cpusets and existing grant.
 
-func newSupply(n Node, isolated, reserved, sharable cpuset.CPUSet, grantedReserved int, grantedShared int, mem, grantedMem memoryMap) Supply {
-	if mem == nil {
-		mem = createMemoryMap(0, 0, 0)
-	}
-	if grantedMem == nil {
-		grantedMem = createMemoryMap(0, 0, 0)
-	}
+func newSupply(n Node, isolated, reserved, sharable cpuset.CPUSet, grantedReserved int, grantedShared int) Supply {
 	return &supply{
-		node:                 n,
-		isolated:             isolated.Clone(),
-		reserved:             reserved.Clone(),
-		sharable:             sharable.Clone(),
-		grantedReserved:      grantedReserved,
-		grantedShared:        grantedShared,
-		mem:                  mem,
-		grantedMem:           grantedMem,
-		extraMemReservations: make(map[Grant]memoryMap),
-	}
-}
-
-func createMemoryMap(dram, pmem, hbm uint64) memoryMap {
-	return memoryMap{
-		memoryDRAM:   dram,
-		memoryPMEM:   pmem,
-		memoryHBM:    hbm,
-		memoryAll:    dram + pmem + hbm,
-		memoryUnspec: 0,
+		node:            n,
+		isolated:        isolated.Clone(),
+		reserved:        reserved.Clone(),
+		sharable:        sharable.Clone(),
+		grantedReserved: grantedReserved,
+		grantedShared:   grantedShared,
 	}
 }
 
-func (m memoryMap) Add(dram, pmem, hbm uint64) {
-	m[memoryDRAM] += dram
-	m[memoryPMEM] += pmem
-	m[memoryPMEM] += hbm
-	m[memoryAll] += dram + pmem + hbm
-}
-
-func (m memoryMap) AddDRAM(dram uint64) {
-	m[memoryDRAM] += dram
-	m[memoryAll] += dram
-}
-
-func (m memoryMap) AddPMEM(pmem uint64) {
-	m[memoryPMEM] += pmem
-	m[memoryAll] += pmem
-}
-
-func (m memoryMap) AddHBM(hbm uint64) {
-	m[memoryHBM] += hbm
-	m[memoryAll] += hbm
-}
-
-func (m memoryMap) String() string {
-	mem, sep := "", ""
-
-	dram, pmem, hbm, types := m[memoryDRAM], m[memoryPMEM], m[memoryHBM], 0
-	if dram > 0 || pmem > 0 || hbm > 0 {
-		if dram > 0 {
-			mem += "DRAM " + prettyMem(dram)
-			sep = ", "
-			types++
-		}
-		if pmem > 0 {
-			mem += sep + "PMEM " + prettyMem(pmem)
-			sep = ", "
-			types++
-		}
-		if hbm > 0 {
-			mem += sep + "HBM " + prettyMem(hbm)
-			types++
-		}
-		if types > 1 {
-			mem += sep + "total " + prettyMem(pmem+dram+hbm)
-		}
-	}
-
-	return mem
-}
-
 // GetNode returns the node supplying CPU and memory.
 func (cs *supply) GetNode() Node {
 	return cs.node
@@ -383,16 +285,7 @@ func (cs *supply) GetNode() Node {
 
 // Clone clones the given CPU supply.
 func (cs *supply) Clone() Supply {
-	// Copy the maps.
-	mem := make(memoryMap)
-	for key, value := range cs.mem {
-		mem[key] = value
-	}
-	grantedMem := make(memoryMap)
-	for key, value := range cs.grantedMem {
-		grantedMem[key] = value
-	}
-	return newSupply(cs.node, cs.isolated, cs.reserved, cs.sharable, cs.grantedReserved, cs.grantedShared, mem, grantedMem)
+	return newSupply(cs.node, cs.isolated, cs.reserved, cs.sharable, cs.grantedReserved, cs.grantedShared)
 }
 
 // IsolatedCpus returns the isolated CPUSet of this supply.
@@ -420,15 +313,6 @@ func (cs *supply) GrantedShared() int {
 	return cs.grantedShared
 }
 
-func (cs *supply) GrantedMemory(memType memoryType) uint64 {
-	// Return only granted memory of correct type
-	return cs.grantedMem[memType]
-}
-
-func (cs *supply) MemoryLimit() memoryMap {
-	return cs.mem
-}
-
 // Cumulate more CPU to supply.
 func (cs *supply) Cumulate(more Supply) {
 	mcs := more.(*supply)
@@ -438,20 +322,6 @@ func (cs *supply) Cumulate(more Supply) {
 	cs.sharable = cs.sharable.Union(mcs.sharable)
 	cs.grantedReserved += mcs.grantedReserved
 	cs.grantedShared += mcs.grantedShared
-
-	for key, value := range mcs.mem {
-		cs.mem[key] += value
-	}
-	for key, value := range mcs.grantedMem {
-		cs.grantedMem[key] += value
-	}
-}
-
-// AssignMemory adds memory (for extra NUMA nodes assigned to a pool node).
-func (cs *supply) AssignMemory(mem memoryMap) {
-	for key, value := range mem {
-		cs.mem[key] += value
-	}
 }
 
 // AccountAllocateCPU accounts for (removes) allocated exclusive capacity from the supply.
@@ -480,96 +350,29 @@ func (cs *supply) AccountReleaseCPU(g Grant) {
 	cs.sharable = cs.sharable.Union(sharable)
 }
 
-// allocateMemory tries to fulfill the memory allocation part of a request.
-func (cs *supply) allocateMemory(r Request) (memoryMap, error) {
-	reqType := r.MemoryType()
-	if reqType == memoryUnspec || reqType == memoryPreserve {
-		reqType = memoryAll
-	}
-
-	allocated := createMemoryMap(0, 0, 0)
-	requested := r.MemAmountToAllocate()
-	remaining := requested
-
-	//
-	// Notes:
-	//   We try to allocate PMEM, then DRAM, and finally HBM, honoring
-	//   the types allowed by the request. We don't need to care about
-	//   extra memory reservations for this node as all the nodes with
-	//   insufficient memory have been filtered out before allocation.
-	//
-	//   However, for cold started containers we do check if there is
-	//   enough PMEM free to accomodate the full request and bail out
-	//   if that check fails.
-	//
-
-	for _, memType := range []memoryType{memoryPMEM, memoryDRAM, memoryHBM} {
-		if remaining > 0 && (reqType&memType) != 0 {
-			available := cs.mem[memType]
-
-			log.Debug("%s: trying %s %s of %s available",
-				r.GetContainer().PrettyName(),
-				prettyMem(remaining), memType.String(), prettyMem(available))
-
-			if remaining <= available {
-				allocated[memType] = remaining
-			} else {
-				allocated[memType] = available
-			}
-
-			cs.grantedMem[memType] += allocated[memType]
-			cs.mem[memType] -= allocated[memType]
-			remaining -= allocated[memType]
-		}
-
-		if remaining > 0 {
-			if r.ColdStart() > 0 && memType == memoryPMEM {
-				return nil, policyError("internal error: "+
-					"not enough PMEM for cold start at %s", cs.GetNode().Name())
-			}
-		} else {
-			break
-		}
-	}
-
-	if remaining > 0 {
-		log.Debug("%s: %s allocation from %s fell short %s",
-			r.GetContainer().PrettyName(),
-			reqType.String(), cs.GetNode().Name(), prettyMem(remaining))
-
-		for memType, amount := range allocated {
-			if amount > 0 {
-				cs.grantedMem[memType] -= amount
-				cs.mem[memType] += amount
-			}
-		}
-
-		return nil, policyError("internal error: "+
-			"not enough memory at %s", cs.node.Name())
+// Allocate allocates a grant from the supply.
+func (cs *supply) Allocate(r Request, o *libmem.Offer) (Grant, map[string]libmem.NodeMask, error) {
+	if o == nil {
+		return nil, nil, fmt.Errorf("nil libmem offer")
 	}
 
-	cs.grantedMem[memoryAll] += requested
-	cs.mem[memoryAll] -= requested
-
-	return allocated, nil
-}
-
-// Allocate allocates a grant from the supply.
-func (cs *supply) Allocate(r Request) (Grant, error) {
 	grant, err := cs.AllocateCPU(r)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
-	memory, err := cs.allocateMemory(r)
+	zone, updates, err := o.Commit()
 	if err != nil {
 		cs.ReleaseCPU(grant)
-		return nil, err
+		return nil, nil, fmt.Errorf("failed to commit memory offer: %v", err)
 	}
 
-	grant.SetMemoryAllocation(r.MemoryType(), memory, r.ColdStart())
+	grant.SetMemorySize(r.MemAmountToAllocate())
+	grant.SetMemoryType(r.MemoryType())
+	grant.SetMemoryZone(zone)
+	grant.SetColdstart(r.ColdStart())
 
-	return grant, nil
+	return grant, updates, nil
 }
 
 // AllocateCPU allocates CPU for a grant from the supply.
@@ -623,7 +426,7 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) {
 			cs.node.Name(), full, cs.sharable, cs.AllocatableSharedCPU())
 	}
 
-	grant := newGrant(cs.node, cr.GetContainer(), cpuType, exclusive, 0, 0, nil, 0)
+	grant := newGrant(cs.node, cr.GetContainer(), cpuType, exclusive, 0, 0, 0)
 	grant.AccountAllocateCPU()
 
 	if fraction > 0 {
@@ -652,31 +455,6 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) {
 	return grant, nil
 }
 
-func (cs *supply) ReallocateMemory(g Grant) error {
-	log.Debug("%s: reallocating memory (%s) from %s to %s",
-		g.GetContainer().PrettyName(),
-		g.MemLimit().String(),
-		g.GetMemoryNode().Name(),
-		cs.GetNode().Name())
-
-	// The grant has been previously allocated from another supply. Reallocate it here.
-	g.GetMemoryNode().FreeSupply().ReleaseMemory(g)
-
-	mem := uint64(0)
-	allocatedMemory := g.MemLimit()
-	for key, value := range allocatedMemory {
-		if cs.mem[key] < value {
-			return policyError("internal error: not enough memory for reallocation at %s (released from %s)", cs.GetNode().Name(), g.GetMemoryNode().Name())
-		}
-		cs.mem[key] -= value
-		cs.grantedMem[key] += value
-		mem += value
-	}
-	cs.grantedMem[memoryAll] += mem
-	cs.mem[memoryAll] -= mem
-	return nil
-}
-
 func (cs *supply) ReleaseCPU(g Grant) {
 	isolated := g.ExclusiveCPUs().Intersection(cs.node.GetSupply().IsolatedCPUs())
 	sharable := g.ExclusiveCPUs().Difference(isolated)
@@ -689,71 +467,21 @@ func (cs *supply) ReleaseCPU(g Grant) {
 	g.AccountReleaseCPU()
 }
 
-// ReleaseMemory returns memory from the given grant to the supply.
-func (cs *supply) ReleaseMemory(g Grant) {
-	releasedMemory := uint64(0)
-
-	log.Debug("%s: releasing granted memory (%s) from %s",
-		g.GetContainer().PrettyName(),
-		g.MemLimit().String(), cs.GetNode().Name())
-
-	for key, value := range g.MemLimit() {
-		cs.grantedMem[key] -= value
-		cs.mem[key] += value
-		releasedMemory += value
-	}
-	cs.grantedMem[memoryAll] -= releasedMemory
-	cs.mem[memoryAll] += releasedMemory
-
-	cs.node.DepthFirst(func(n Node) error {
-		n.FreeSupply().ReleaseExtraMemoryReservation(g)
-		return nil
-	})
-}
-
-func (cs *supply) ExtraMemoryReservation(memType memoryType) uint64 {
-	extra := uint64(0)
-	for _, res := range cs.extraMemReservations {
-		extra += res[memType]
-	}
-	return extra
-}
-
-func (cs *supply) ReleaseExtraMemoryReservation(g Grant) {
-	if mems, ok := cs.extraMemReservations[g]; ok {
-		log.Debug("%s: releasing extra memory reservation (%s) from %s",
-			g.GetContainer().PrettyName(), mems.String(),
-			cs.GetNode().Name())
-		delete(cs.extraMemReservations, g)
-	}
-}
-
-func (cs *supply) SetExtraMemoryReservation(g Grant) {
-	res := make(memoryMap)
-	extraMemory := uint64(0)
-	for key, value := range g.MemLimit() {
-		res[key] = value
-		extraMemory += value
-	}
-	res[memoryAll] = extraMemory
-	cs.extraMemReservations[g] = res
-}
-
-func (cs *supply) Reserve(g Grant) error {
+func (cs *supply) Reserve(g Grant, o *libmem.Offer) (map[string]libmem.NodeMask, error) {
 	if g.CPUType() == cpuNormal {
 		isolated := g.IsolatedCPUs()
 		exclusive := g.ExclusiveCPUs().Difference(isolated)
 		sharedPortion := g.SharedPortion()
 		if !cs.isolated.Intersection(isolated).Equals(isolated) {
-			return policyError("can't reserve isolated CPUs (%s) of %s from %s",
+			return nil, policyError("can't reserve isolated CPUs (%s) of %s from %s",
 				isolated.String(), g.String(), cs.DumpAllocatable())
 		}
 		if !cs.sharable.Intersection(exclusive).Equals(exclusive) {
-			return policyError("can't reserve exclusive CPUs (%s) of %s from %s",
+			return nil, policyError("can't reserve exclusive CPUs (%s) of %s from %s",
 				exclusive.String(), g.String(), cs.DumpAllocatable())
 		}
 		if cs.AllocatableSharedCPU() < 1000*exclusive.Size()+sharedPortion {
-			return policyError("can't reserve %d shared CPUs of %s from %s",
+			return nil, policyError("can't reserve %d shared CPUs of %s from %s",
 				sharedPortion, g.String(), cs.DumpAllocatable())
 		}
 		cs.isolated = cs.isolated.Difference(isolated)
@@ -762,7 +490,7 @@ func (cs *supply) Reserve(g Grant) error {
 	} else if g.CPUType() == cpuReserved {
 		sharedPortion := 1000*g.ExclusiveCPUs().Size() + g.SharedPortion()
 		if sharedPortion > 0 && cs.AllocatableReservedCPU() < sharedPortion {
-			return policyError("can't reserve %d reserved CPUs of %s from %s",
+			return nil, policyError("can't reserve %d reserved CPUs of %s from %s",
 				sharedPortion, g.String(), cs.DumpAllocatable())
 		}
 		cs.grantedReserved += sharedPortion
@@ -770,24 +498,14 @@ func (cs *supply) Reserve(g Grant) error {
 
 	g.AccountAllocateCPU()
 
-	return nil
-}
-
-func (cs *supply) ReserveMemory(g Grant) error {
-	mem := uint64(0)
-	allocatedMemory := g.MemLimit()
-	for key, value := range allocatedMemory {
-		if cs.mem[key] < value {
-			return policyError("internal error: not enough memory for allocation at %s", g.GetMemoryNode().Name())
-		}
-		cs.mem[key] -= value
-		cs.grantedMem[key] += value
-		mem += value
+	zone, updates, err := o.Commit()
+	if err != nil {
+		g.Release()
+		return nil, policyError("failed to commit offer: %v", err)
 	}
-	cs.grantedMem[memoryAll] += mem
-	cs.mem[memoryAll] -= mem
-	g.UpdateExtraMemoryReservation()
-	return nil
+
+	g.SetMemoryZone(zone)
+	return updates, nil
 }
 
 // takeCPUs takes up to cnt CPUs from a given CPU set to another.
@@ -806,7 +524,7 @@ func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int, prio cpuPrio) (cpus
 
 // DumpCapacity returns a printable representation of the supply's resource capacity.
 func (cs *supply) DumpCapacity() string {
-	cpu, mem, sep := "", cs.mem.String(), ""
+	cpu, mem, sep := "", "", ""
 
 	if !cs.isolated.IsEmpty() {
 		cpu = fmt.Sprintf("isolated:%s", kubernetes.ShortCPUSet(cs.isolated))
@@ -822,6 +540,10 @@ func (cs *supply) DumpCapacity() string {
 			1000*cs.sharable.Size())
 	}
 
+	if amount := cs.node.Policy().poolZoneCapacity(cs.node, memoryAll); amount > 0 {
+		mem = prettyMem(amount)
+	}
+
 	capacity := "<" + cs.node.Name() + " capacity: "
 
 	if cpu == "" && mem == "" {
@@ -843,7 +565,7 @@ func (cs *supply) DumpCapacity() string {
 
 // DumpAllocatable returns a printable representation of the supply's resource capacity.
 func (cs *supply) DumpAllocatable() string {
-	cpu, mem, sep := "", cs.mem.String(), ""
+	cpu, mem, sep := "", "", ""
 
 	if !cs.isolated.IsEmpty() {
 		cpu = fmt.Sprintf("isolated:%s", kubernetes.ShortCPUSet(cs.isolated))
@@ -881,6 +603,10 @@ func (cs *supply) DumpAllocatable() string {
 
 	allocatable := "<" + cs.node.Name() + " allocatable: "
 
+	if amount := cs.node.Policy().poolZoneFree(cs.node, memoryAll); amount > 0 {
+		mem = prettyMem(amount)
+	}
+
 	if cpu == "" && mem == "" {
 		allocatable += "-"
 	} else {
@@ -899,11 +625,11 @@ func (cs *supply) DumpAllocatable() string {
 }
 
 // prettyMem formats the given amount as k, M, G, or T units.
-func prettyMem(value uint64) string {
+func prettyMem(value int64) string {
 	units := []string{"k", "M", "G", "T"}
-	coeffs := []uint64{1 << 10, 1 << 20, 1 << 30, 1 << 40}
+	coeffs := []int64{1 << 10, 1 << 20, 1 << 30, 1 << 40}
 
-	c, u := uint64(1), ""
+	c, u := int64(1), ""
 	for i := 0; i < len(units); i++ {
 		if coeffs[i] > value {
 			break
@@ -915,59 +641,8 @@ func prettyMem(value uint64) string {
 	return strconv.FormatFloat(v, 'f', 2, 64) + u
 }
 
-// DumpMemoryState dumps the state of the available and allocated memory.
-func (cs *supply) DumpMemoryState(prefix string) {
-	memTypes := []memoryType{memoryDRAM, memoryPMEM, memoryHBM}
-	totalFree := uint64(0)
-	totalGranted := uint64(0)
-	for _, kind := range memTypes {
-		free := cs.mem[kind]
-		granted := cs.grantedMem[kind]
-		if free != 0 || granted != 0 {
-			log.Debug(prefix+"- %s: free: %s, granted %s",
-				kind, prettyMem(free), prettyMem(granted))
-		}
-		totalFree += free
-		totalGranted += granted
-	}
-	log.Debug(prefix+"- total free: %s, total granted %s",
-		prettyMem(totalFree), prettyMem(totalGranted))
-
-	printHdr := true
-	if len(cs.extraMemReservations) > 0 {
-		for g, memMap := range cs.extraMemReservations {
-			split := ""
-			sep := ""
-			total := uint64(0)
-			if mem := memMap[memoryDRAM]; mem > 0 {
-				split = "DRAM " + prettyMem(mem)
-				sep = ", "
-				total += mem
-			}
-			if mem := memMap[memoryPMEM]; mem > 0 {
-				split += sep + "PMEM " + prettyMem(mem)
-				sep = ", "
-				total += mem
-			}
-			if mem := memMap[memoryHBM]; mem > 0 {
-				split += sep + "HBMEM " + prettyMem(mem)
-				sep = ", "
-				total += mem
-			}
-			if total > 0 {
-				if printHdr {
-					log.Debug(prefix + "- extra reservations:")
-					printHdr = false
-				}
-				log.Debug(prefix+"  - %s: %s (%s)",
-					g.GetContainer().PrettyName(), prettyMem(total), split)
-			}
-		}
-	}
-}
-
 // newRequest creates a new request for the given container.
-func newRequest(container cache.Container) Request {
+func newRequest(container cache.Container, types libmem.TypeMask) Request {
 	pod, _ := container.GetPod()
 	full, fraction, isolate, cpuType, prio := cpuAllocationPreferences(pod, container)
 	req, lim, mtype := memoryAllocationPreference(pod, container)
@@ -980,25 +655,49 @@ func newRequest(container cache.Container) Request {
 		mtype = defaultMemoryType &^ memoryHBM
 	}
 
-	if mtype&memoryPMEM != 0 && mtype&memoryDRAM != 0 {
-		parsedColdStart, err := coldStartPreference(pod, container)
-		if err != nil {
-			log.Error("Failed to parse cold start preference")
+	if mtype != memoryPreserve {
+		mtype = memoryType(mtype.TypeMask().And(types))
+
+		if coldStartOff {
+			if mtype == memoryPMEM {
+				mtype |= memoryDRAM
+				log.Error("%s: coldstart disabled (movable non-DRAM memory zones present)",
+					container.PrettyName())
+			}
 		} else {
-			if parsedColdStart.Duration.Duration > 0 {
-				if coldStartOff {
-					log.Error("coldstart disabled (movable non-DRAM memory zones present)")
-				} else {
-					coldStart = time.Duration(parsedColdStart.Duration.Duration)
+			pref, err := coldStartPreference(pod, container)
+			if err != nil {
+				log.Error("failed to parse coldstart preference")
+			} else {
+				coldStart = time.Duration(pref.Duration.Duration)
+				if coldStart > 0 {
+					mtype &^= memoryDRAM
 				}
 			}
 		}
-	} else if mtype == memoryPMEM {
-		if coldStartOff {
-			mtype = mtype | memoryDRAM
-			log.Error("%s: forced also DRAM usage (movable non-DRAM memory zones present)",
-				container.PrettyName())
-		}
+
+		/*
+			if mtype&memoryPMEM != 0 && mtype&memoryDRAM != 0 {
+				parsedColdStart, err := coldStartPreference(pod, container)
+				if err != nil {
+					log.Error("Failed to parse cold start preference")
+				} else {
+					if parsedColdStart.Duration.Duration > 0 {
+						if coldStartOff {
+							log.Error("coldstart disabled (movable non-DRAM memory zones present)")
+						} else {
+							coldStart = time.Duration(parsedColdStart.Duration.Duration)
+							mtype &^= memoryDRAM
+						}
+					}
+				}
+			} else if mtype == memoryPMEM {
+				if coldStartOff {
+					mtype = mtype | memoryDRAM
+					log.Error("%s: forced also DRAM usage (movable non-DRAM memory zones present)",
+						container.PrettyName())
+				}
+			}*/
 	}
 
 	return &request{
@@ -1073,27 +772,11 @@ func (cr *request) Isolate() bool {
 }
 
 // MemAmountToAllocate retuns how much memory we need to reserve for a request.
-func (cr *request) MemAmountToAllocate() uint64 {
-	var amount uint64 = 0
-	switch cr.GetContainer().GetQOSClass() {
-	case v1.PodQOSBurstable:
-		// May be a request and/or limit. We focus on the limit because we
-		// need to prepare for the case when all containers are using all
-		// the memory they are allowed to. If limit is not set then we'll
-		// allocate the request (which the container will get).
-		if cr.memLim > 0 {
-			amount = cr.memLim
-		} else {
-			amount = cr.memReq
-		}
-	case v1.PodQOSGuaranteed:
-		// Limit and request are the same.
-		amount = cr.memLim
-	case v1.PodQOSBestEffort:
-		// No requests or limits.
-		amount = 0
+func (cr *request) MemAmountToAllocate() int64 {
+	if cr.memLim == 0 && cr.memReq != 0 {
+		return cr.memReq
 	}
-	return amount
+	return cr.memLim
 }
 
 // MemoryType returns the requested type of memory for the grant.
@@ -1178,6 +861,18 @@ func (cs *supply) GetScore(req Request) Score {
 		score.hints[provider] = cs.node.HintScore(hint)
 	}
 
+	node := cs.node
+	if req.MemoryType() == memoryPreserve {
+		node = cs.node.Policy().root
+	}
+
+	o, err := cs.node.Policy().getMemOffer(node, cr)
+	if err != nil {
+		log.Error("failed to get offer for request %s: %v", req, err)
+	} else {
+		score.offer = o
+	}
+
 	return score
 }
 
@@ -1263,23 +958,25 @@ func (score *score) PrioCapacity(prio cpuPrio) int {
 	return score.prio[prio]
 }
 
+func (score *score) Offer() *libmem.Offer {
+	return score.offer
+}
+
 func (score *score) String() string {
 	return fmt.Sprintf("<CPU score: node %s, isolated:%d, reserved:%d, shared:%d, colocated:%d, hints: %v>",
 		score.supply.GetNode().Name(), score.isolated, score.reserved, score.shared, score.colocated, score.hints)
 }
 
 // newGrant creates a CPU grant from the given node for the container.
-func newGrant(n Node, c cache.Container, cpuType cpuClass, exclusive cpuset.CPUSet, cpuPortion int, mt memoryType, allocated memoryMap, coldstart time.Duration) Grant {
+func newGrant(n Node, c cache.Container, cpuType cpuClass, exclusive cpuset.CPUSet, cpuPortion int, mt memoryType, coldstart time.Duration) Grant {
 	grant := &grant{
 		node:       n,
-		memoryNode: n,
 		container:  c,
 		cpuType:    cpuType,
 		exclusive:  exclusive,
 		cpuPortion: cpuPortion,
-	}
-	if allocated != nil {
-		grant.SetMemoryAllocation(mt, allocated, coldstart)
+		memType:    mt,
+		coldStart:  coldstart,
 	}
 	return grant
 }
@@ -1289,40 +986,36 @@ func (cg *grant) SetCPUPortion(fraction int) {
 	cg.cpuPortion = fraction
 }
 
-// SetMemoryAllocation sets the memory allocation for the grant.
-func (cg *grant) SetMemoryAllocation(mt memoryType, allocated memoryMap, coldstart time.Duration) {
-	initial := memoryPMEM
-	if coldstart <= 0 {
-		initial = mt
-	}
-	mems := cg.node.GetMemset(initial)
-	if mems.Size() == 0 {
-		mems = cg.node.GetMemset(memoryDRAM)
-		if mems.Size() == 0 {
-			mems = cg.node.GetMemset(memoryAll)
-		}
-	}
-	mems = mems.Clone()
+// SetMemoryType sets the memory type for the grant.
+func (cg *grant) SetMemoryType(memType memoryType) {
+	cg.memType = memType
+}
+
+// SetMemoryZone sets the memory zone for the grant.
+func (cg *grant) SetMemoryZone(zone libmem.NodeMask) {
+	cg.memZone = zone
+}
+
+// SetMemorySize sets the amount of memory to allocate.
+func (cg *grant) SetMemorySize(size int64) {
+	cg.memSize = size
+}
 
-	cg.memType = mt
-	cg.memset = mems
-	cg.allocatedMem = allocated
-	cg.coldStart = coldstart
+// SetColdstart sets coldstart period for the grant.
+func (cg *grant) SetColdstart(period time.Duration) {
+	cg.coldStart = period
 }
 
 // Clone creates a copy of this grant.
 func (cg *grant) Clone() Grant {
 	return &grant{
-		node:         cg.GetCPUNode(),
-		memoryNode:   cg.GetMemoryNode(),
-		container:    cg.GetContainer(),
-		exclusive:    cg.ExclusiveCPUs(),
-		cpuType:      cg.CPUType(),
-		cpuPortion:   cg.SharedPortion(),
-		memType:      cg.MemoryType(),
-		memset:       cg.Memset().Clone(),
-		allocatedMem: cg.MemLimit(),
-		coldStart:    cg.ColdStart(),
+		node:       cg.GetCPUNode(),
+		container:  cg.GetContainer(),
+		exclusive:  cg.ExclusiveCPUs(),
+		cpuType:    cg.CPUType(),
+		cpuPortion: cg.SharedPortion(),
+		memType:    cg.MemoryType(),
+		coldStart:  cg.ColdStart(),
 	}
 }
 
@@ -1332,12 +1025,7 @@ func (cg *grant) RefetchNodes() error {
 	if !ok {
 		return policyError("failed to refetch grant cpu node %s", cg.node.Name())
 	}
-	memoryNode, ok := cg.memoryNode.Policy().nodes[cg.memoryNode.Name()]
-	if !ok {
-		return policyError("failed to refetch grant memory node %s", cg.memoryNode.Name())
-	}
 	cg.node = node
-	cg.memoryNode = memoryNode
 	return nil
 }
 
@@ -1351,14 +1039,14 @@ func (cg *grant) GetCPUNode() Node {
 	return cg.node
 }
 
-// GetNode returns the Node this grant gets its memory allocation from.
-func (cg *grant) GetMemoryNode() Node {
-	return cg.memoryNode
+// GetMemorySize returns the amount of memory allocated to this grant.
+func (cg *grant) GetMemorySize() int64 {
+	return cg.memSize
 }
 
-func (cg *grant) SetMemoryNode(n Node) {
-	cg.memoryNode = n
-	cg.memset = n.GetMemset(cg.MemoryType())
+// GetMemoryZone returns the memory zone this grant is allocated to.
+func (cg *grant) GetMemoryZone() libmem.NodeMask {
+	return cg.memZone
 }
 
 // CPUType returns the requested type of CPU for the grant.
@@ -1412,16 +1100,6 @@ func (cg *grant) MemoryType() memoryType {
 	return cg.memType
 }
 
-// Memset returns the granted memory controllers as an IDSet.
-func (cg *grant) Memset() idset.IDSet {
-	return cg.memset
-}
-
-// MemLimit returns the granted memory.
-func (cg *grant) MemLimit() memoryMap {
-	return cg.allocatedMem
-}
-
 // String returns a printable representation of the CPU grant.
 func (cg *grant) String() string {
 	var cpuType, isolated, exclusive, reserved, shared string
@@ -1442,10 +1120,10 @@ func (cg *grant) String() string {
 			cg.node.FreeSupply().SharableCPUs(), cg.SharedPortion())
 	}
 
-	memset := ", MemPin: " + cg.memset.String()
+	mem := fmt.Sprintf(", memory: %s (%s)", cg.memZone, prettyMem(cg.memSize))
 
 	return fmt.Sprintf("<grant for %s from %s: %s%s%s%s%s%s>",
-		cg.container.PrettyName(), cg.node.Name(), cpuType, isolated, exclusive, reserved, shared, memset)
+		cg.container.PrettyName(), cg.node.Name(), cpuType, isolated, exclusive, reserved, shared, mem)
 }
 
 func (cg *grant) AccountAllocateCPU() {
@@ -1460,92 +1138,48 @@ func (cg *grant) AccountAllocateCPU() {
 
 func (cg *grant) Release() {
 	cg.GetCPUNode().FreeSupply().ReleaseCPU(cg)
-	cg.GetMemoryNode().FreeSupply().ReleaseMemory(cg)
+	err := cg.node.Policy().releaseMem(cg.container.GetID())
+	if err != nil {
+		log.Error("releasing memory for %s failed: %v", cg.container.PrettyName(), err)
+	}
 	cg.StopTimer()
 }
 
-func (cg *grant) AccountReleaseCPU() {
-	cg.node.DepthFirst(func(n Node) error {
-		n.FreeSupply().AccountReleaseCPU(cg)
-		return nil
-	})
-	for node := cg.node.Parent(); !node.IsNil(); node = node.Parent() {
-		node.FreeSupply().AccountReleaseCPU(cg)
+func (cg *grant) ReallocMemory(types libmem.TypeMask) error {
+	zone, updates, err := cg.node.Policy().reallocMem(cg.container.GetID(), 0, types)
+	if err != nil {
+		return err
 	}
-}
-
-func (cg *grant) RestoreMemset() {
-	mems := cg.GetMemoryNode().GetMemset(cg.memType)
-	cg.memset = mems
-	cg.GetMemoryNode().Policy().applyGrant(cg)
-}
 
-func (cg *grant) ExpandMemset() (bool, error) {
-	supply := cg.GetMemoryNode().FreeSupply()
-	node := cg.GetMemoryNode()
-	parent := node.Parent()
-
-	// We have to assume that the memory has been allocated how we granted it (if PMEM ran out
-	// the allocations have been made from DRAM and so on).
-
-	// Figure out if there is enough memory now to have grant as-is.
-	extra := supply.ExtraMemoryReservation(memoryAll)
-	free := supply.MemoryLimit()[memoryAll]
-	if extra <= free {
-		// The grant fits in the node even with extra reservations
-		return false, nil
-	}
-	// Else it doesn't fit, so move the grant up in the memory tree.
-	required := uint64(0)
-	for _, memType := range []memoryType{memoryPMEM, memoryDRAM, memoryHBM} {
-		required += cg.MemLimit()[memType]
-	}
-	log.Debug("out-of-memory risk in %s: extra reservations %s > free %s -> moving up %s total memory grant from %s",
-		cg, prettyMem(extra), prettyMem(free), prettyMem(required), node.Name())
-
-	// Find an ancestor where the grant fits. As reservations in
-	// child nodes do not show up in free + extra in parent nodes,
-	// releasing the grant is not necessary before searching.
-	for ; !parent.IsNil(); parent = parent.Parent() {
-		pSupply := parent.FreeSupply()
-		parentFree := pSupply.MemoryLimit()[memoryAll]
-		parentExtra := pSupply.ExtraMemoryReservation(memoryAll)
-		if parentExtra+required <= parentFree {
-			required = 0
-			break
-		}
-		log.Debug("- %s has %s free but %s extra reservations, moving further up",
-			parent.Name(), prettyMem(parentFree), prettyMem(parentExtra))
-	}
-	if required > 0 {
-		return false, fmt.Errorf("internal error: cannot find enough memory (%s) for %s from ancestors of %s", prettyMem(required), cg, node.Name())
+	cg.SetMemoryZone(zone)
+	if opt.PinMemory {
+		cg.container.SetCpusetMems(zone.MemsetString())
 	}
 
-	// Release granted memory from the node and allocate it from the parent node.
-	err := parent.FreeSupply().ReallocateMemory(cg)
-	if err != nil {
-		return false, err
+	for id, z := range updates {
+		g, ok := cg.node.Policy().allocations.grants[id]
+		if !ok {
+			log.Error("offer commit returned zone update %s for unknown container %s", z, id)
+		} else {
+			log.Info("updating memory allocation for %s to %s", g.GetContainer().PrettyName(), z)
+			g.SetMemoryZone(z)
+			if opt.PinMemory {
+				g.GetContainer().SetCpusetMems(z.MemsetString())
+			}
+		}
 	}
-	cg.SetMemoryNode(parent)
-	cg.UpdateExtraMemoryReservation()
-
-	// Make the container to use the new memory set.
-	// FIXME: this could be done in a second pass to avoid doing this many times
-	cg.GetMemoryNode().Policy().applyGrant(cg)
 
-	return true, nil
+	return nil
 }
 
-func (cg *grant) UpdateExtraMemoryReservation() {
-	// For every subnode, make sure that this grant is added to the extra memory allocation.
-	cg.GetMemoryNode().DepthFirst(func(n Node) error {
-		// No extra allocation should be done to the node itself.
-		if !n.IsSameNode(cg.GetMemoryNode()) {
-			supply := n.FreeSupply()
-			supply.SetExtraMemoryReservation(cg)
-		}
+func (cg *grant) AccountReleaseCPU() {
+	cg.node.DepthFirst(func(n Node) error {
+		n.FreeSupply().AccountReleaseCPU(cg)
 		return nil
 	})
+	for node := cg.node.Parent(); !node.IsNil(); node = node.Parent() {
+		node.FreeSupply().AccountReleaseCPU(cg)
+	}
 }
 
 func (cg *grant) ColdStart() time.Duration {
diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go
index efc829cc3..3e04fb649 100644
--- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go
+++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go
@@ -27,10 +27,10 @@ import (
 	"github.com/containers/nri-plugins/pkg/cpuallocator"
 	"github.com/containers/nri-plugins/pkg/resmgr/cache"
 	"github.com/containers/nri-plugins/pkg/resmgr/events"
+	libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
 
 	policyapi "github.com/containers/nri-plugins/pkg/resmgr/policy"
 	system "github.com/containers/nri-plugins/pkg/sysfs"
-	idset "github.com/intel/goresctrl/pkg/utils"
 )
 
 const (
@@ -66,7 +66,8 @@ type policy struct {
 	depth        int                       // tree depth
 	allocations  allocations               // container pool assignments
 	cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy
-	coldstartOff bool                      // coldstart forced off (have movable PMEM zones)
+	memAllocator *libmem.Allocator
+	coldstartOff bool // coldstart forced off (have movable PMEM zones)
 }
 
 var opt = &cfgapi.Config{}
@@ -84,6 +85,8 @@ func New() policyapi.Backend {
 
 // Setup initializes the topology-aware policy instance.
 func (p *policy) Setup(opts *policyapi.BackendOptions) error {
+	var err error
+
 	cfg, ok := opts.Config.(*cfgapi.Config)
 	if !ok {
 		return policyError("failed initialize %s policy: config of wrong type %T",
@@ -96,6 +99,10 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error {
 	p.sys = opts.System
 	p.options = opts
 	p.cpuAllocator = cpuallocator.NewCPUAllocator(opts.System)
+	p.memAllocator, err = libmem.NewAllocator(libmem.WithSystemNodes(opts.System))
+	if err != nil {
+		return policyError("failed to initialize %s policy: %w", err)
+	}
 
 	opt = cfg
 	defaultPrio = cfg.DefaultCPUPriority.Value()
@@ -273,8 +280,10 @@ func (p *policy) GetTopologyZones() []*policyapi.TopologyZone {
 
 		total := pool.GetSupply().(*supply)
 		free := pool.FreeSupply().(*supply)
-		capacity := int64(total.mem[memoryAll])
-		available := int64(free.mem[memoryAll] - free.ExtraMemoryReservation(memoryAll))
+
+		memZone := libmem.NewNodeMask(pool.GetMemset(memoryAll).Members()...)
+		capacity := p.memAllocator.ZoneCapacity(memZone)
+		available := p.memAllocator.ZoneFree(memZone)
 
 		memory := &policyapi.ZoneResource{
 			Name:        policyapi.MemoryResource,
@@ -351,23 +360,10 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string {
 		data[policyapi.ExportExclusiveCPUs] = exclusive
 	}
 
-	mems := grant.Memset()
-	dram := idset.NewIDSet()
-	pmem := idset.NewIDSet()
-	hbm := idset.NewIDSet()
-	for _, id := range mems.SortedMembers() {
-		node := p.sys.Node(id)
-		switch node.GetMemoryType() {
-		case system.MemoryTypeDRAM:
-			dram.Add(id)
-		case system.MemoryTypePMEM:
-			pmem.Add(id)
-			/*
-				case system.MemoryTypeHBM:
-					hbm.Add(id)
-			*/
-		}
-	}
+	mems := grant.GetMemoryZone()
+	dram := mems.And(p.memAllocator.Masks().NodesByTypes(libmem.TypeMaskDRAM))
+	pmem := mems.And(p.memAllocator.Masks().NodesByTypes(libmem.TypeMaskPMEM))
+	hbm := mems.And(p.memAllocator.Masks().NodesByTypes(libmem.TypeMaskHBM))
 	data["ALL_MEMS"] = mems.String()
 	if dram.Size() > 0 {
 		data["DRAM_MEMS"] = dram.String()