From 883894b111b545f50a5876ed5796d5a27f054604 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Tue, 25 Jun 2024 18:14:03 +0300 Subject: [PATCH] WiP: cpuallocator: use last useful cache for clustering. Try finding the last cache which provides non-trivial clustering of CPUs and use that for cache-group based allocation. Note that this current implementation is somewhat simplistic. It expects all CPUs to provide identical cache grouping and picks a single cache level to set up clusters. This might result in sub- optimal cache-based clustering on hybrid core architectures. We can address this shortcoming in the future. Signed-off-by: Krisztian Litkey --- pkg/cpuallocator/allocator.go | 153 +++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 58 deletions(-) diff --git a/pkg/cpuallocator/allocator.go b/pkg/cpuallocator/allocator.go index 89a81e071..730814dfb 100644 --- a/pkg/cpuallocator/allocator.go +++ b/pkg/cpuallocator/allocator.go @@ -38,13 +38,13 @@ const ( AllocIdleNodes // AllocIdleClusters requests allocation of full idle CPU clusters. AllocIdleClusters - // AllocLLCGroups requests allocation and splitting of idle and used LLC groups - AllocLLCGroups + // AllocCacheGroups requests allocation and splitting of idle and used cache groups + AllocCacheGroups // AllocIdleCores requests allocation of full idle cores (all threads in core). AllocIdleCores // AllocDefault is the default allocation preferences. - AllocDefault = AllocIdlePackages | AllocIdleClusters | AllocLLCGroups | AllocIdleCores + AllocDefault = AllocIdlePackages | AllocIdleClusters | AllocCacheGroups | AllocIdleCores logSource = "cpuallocator" ) @@ -96,7 +96,7 @@ type topologyCache struct { cpuPriorities cpuPriorities // CPU priority mapping clusters []*cpuCluster // CPU clusters - llcGroups []*llcGroup // CPU last-level cache groups + cacheGroups []*cacheGroup // CPU cache groups } type cpuPriorities [NumCPUPriorities]cpuset.CPUSet @@ -109,7 +109,7 @@ type cpuCluster struct { kind sysfs.CoreKind } -type llcGroup struct { +type cacheGroup struct { id int pkg idset.ID die idset.ID @@ -403,17 +403,17 @@ func (a *allocatorHelper) takeIdleClusters() { } // Allocate idle or partial CPU last-level cache groups. -func (a *allocatorHelper) takeLLCGroups() { - log.Debug("* takeLLCGroups()...") +func (a *allocatorHelper) takeCacheGroups() { + log.Debug("* takeCacheGroups()...") - if len(a.topology.llcGroups) <= 1 { + if len(a.topology.cacheGroups) <= 1 { return } if a.cnt < 2 { // XXX TODO(klihub): we could also decide based on some criteria, if it was better // to handle such containers here and, for instance, pack them tightly into shared - // LLC groups. + // cache groups. return } @@ -451,7 +451,7 @@ func (a *allocatorHelper) takeLLCGroups() { var ( offline = a.sys.OfflineCPUs() - pickGroups = func(g *llcGroup) (pickVerdict, cpuset.CPUSet) { + pickGroups = func(g *cacheGroup) (pickVerdict, cpuset.CPUSet) { // only take E-groups for low-prio requests if a.prefer != PriorityLow && g.kind == sysfs.EfficientCore { log.Debug(" - ignore %s (CPU preference is %s)", g, a.prefer) @@ -483,7 +483,7 @@ func (a *allocatorHelper) takeLLCGroups() { return pickUsable, free } - sortIdle = func(gA, gB *llcGroup, s *llcGroupSorter) (r int) { + sortIdle = func(gA, gB *cacheGroup, s *cacheGroupSorter) (r int) { defer func() { switch { case r < 0: @@ -594,7 +594,7 @@ func (a *allocatorHelper) takeLLCGroups() { return gA.id - gB.id } - sortUsed = func(gA, gB *llcGroup, s *llcGroupSorter) (r int) { + sortUsed = func(gA, gB *cacheGroup, s *cacheGroupSorter) (r int) { defer func() { switch { case r < 0: @@ -616,7 +616,7 @@ func (a *allocatorHelper) takeLLCGroups() { csetB = s.cpus[gB] full = s.full part = s.part - idle *llcGroup + idle *cacheGroup ) if len(s.prefer) > 0 { @@ -683,7 +683,7 @@ func (a *allocatorHelper) takeLLCGroups() { return 0 } - sorter = &llcGroupSorter{ + sorter = &cacheGroupSorter{ pick: pickGroups, sortPrefer: sortIdle, sortUsable: sortUsed, @@ -692,7 +692,7 @@ func (a *allocatorHelper) takeLLCGroups() { log.Debug("looking for %d CPUs (prio %s) from %s", a.cnt, a.prefer, a.from) - sorter.sortLLCGroups(a) + sorter.sortCacheGroups(a) var ( preferPkgCPUs int @@ -792,7 +792,7 @@ func (a *allocatorHelper) takeLLCGroups() { log.Debug("%d more CPUs needed", cnt) var ( - groupsBySize = map[int][]*llcGroup{} + groupsBySize = map[int][]*cacheGroup{} totalByIndex = make([]int, 0, len(sorter.usable)) totalCPUs = 0 ) @@ -875,7 +875,7 @@ func (a *allocatorHelper) takeLLCGroups() { } // use up smallest number of groups possible (start with the largest group) - log.Debug("=> taking LLC groups in decreasing size order for %d more CPUs...", cnt) + log.Debug("=> taking cache groups in decreasing size order for %d more CPUs...", cnt) var ( grpCnt = 0 @@ -1148,8 +1148,8 @@ func (a *allocatorHelper) allocate() cpuset.CPUSet { if a.cnt > 0 && (a.flags&AllocIdleClusters) != 0 { a.takeIdleClusters() } - if a.cnt > 0 && (a.flags&AllocLLCGroups) != 0 { - a.takeLLCGroups() + if a.cnt > 0 && (a.flags&AllocCacheGroups) != 0 { + a.takeCacheGroups() } if a.cnt > 0 && (a.flags&AllocIdleCores) != 0 { a.takeIdleCores() @@ -1254,59 +1254,59 @@ const ( pickIgnore ) -type llcGroupSorter struct { +type cacheGroupSorter struct { // function to pick preferred and usable cache groups - pick func(*llcGroup) (pickVerdict, cpuset.CPUSet) + pick func(*cacheGroup) (pickVerdict, cpuset.CPUSet) // functions for sorting picked cache groups - sortPrefer func(a, b *llcGroup, s *llcGroupSorter) int - sortUsable func(a, b *llcGroup, s *llcGroupSorter) int + sortPrefer func(a, b *cacheGroup, s *cacheGroupSorter) int + sortUsable func(a, b *cacheGroup, s *cacheGroupSorter) int // preferred groups, available CPU count per package and die - prefer []*llcGroup + prefer []*cacheGroup preferPkg map[idset.ID]int preferDie map[idset.ID]map[idset.ID]int // other usable groups, available CPU count per package and die - usable []*llcGroup + usable []*cacheGroup usablePkg map[idset.ID]int usableDie map[idset.ID]map[idset.ID]int // available CPUs per group - cpus map[*llcGroup]cpuset.CPUSet + cpus map[*cacheGroup]cpuset.CPUSet // full and partial groups worth of requested CPUs full int part int } -func (s *llcGroupSorter) preferPkgCPUCount(pkg idset.ID) int { +func (s *cacheGroupSorter) preferPkgCPUCount(pkg idset.ID) int { return s.preferPkg[pkg] } -func (s *llcGroupSorter) preferDieCPUCount(pkg, die idset.ID) int { +func (s *cacheGroupSorter) preferDieCPUCount(pkg, die idset.ID) int { return s.preferDie[pkg][die] } -func (s *llcGroupSorter) usablePkgCPUCount(pkg idset.ID) int { +func (s *cacheGroupSorter) usablePkgCPUCount(pkg idset.ID) int { return s.usablePkg[pkg] } -func (s *llcGroupSorter) usableDieCPUCount(pkg, die idset.ID) int { +func (s *cacheGroupSorter) usableDieCPUCount(pkg, die idset.ID) int { return s.usableDie[pkg][die] } -func (s *llcGroupSorter) CPUSet(g *llcGroup) cpuset.CPUSet { +func (s *cacheGroupSorter) CPUSet(g *cacheGroup) cpuset.CPUSet { return s.cpus[g] } -func (s *llcGroupSorter) sortLLCGroups(a *allocatorHelper) { - s.prefer = []*llcGroup{} +func (s *cacheGroupSorter) sortCacheGroups(a *allocatorHelper) { + s.prefer = []*cacheGroup{} s.preferPkg = map[idset.ID]int{} s.preferDie = map[idset.ID]map[idset.ID]int{} - s.usable = []*llcGroup{} + s.usable = []*cacheGroup{} s.usablePkg = map[idset.ID]int{} s.usableDie = map[idset.ID]map[idset.ID]int{} - s.cpus = map[*llcGroup]cpuset.CPUSet{} + s.cpus = map[*cacheGroup]cpuset.CPUSet{} log.Debug("picking suitable cache groups") @@ -1315,11 +1315,11 @@ func (s *llcGroupSorter) sortLLCGroups(a *allocatorHelper) { // the same size and use this assumption to split the request into // full cache size multiples and the remaining partial allocation. - s.part = a.cnt % a.topology.llcGroups[0].cpus.Size() + s.part = a.cnt % a.topology.cacheGroups[0].cpus.Size() s.full = a.cnt - s.part // collect preferred and usable groups, count their CPUs per package and die - for _, g := range a.topology.llcGroups { + for _, g := range a.topology.cacheGroups { verdict, cset := s.pick(g) switch verdict { case pickPrefer: @@ -1353,7 +1353,7 @@ func (s *llcGroupSorter) sortLLCGroups(a *allocatorHelper) { if log.DebugEnabled() { if len(s.preferPkg) > 0 { - log.Debug("number of preferred LLC group CPUs per package/die:") + log.Debug("number of preferred cache group CPUs per package/die:") for pkg, cnt := range s.preferPkg { log.Debug(" - package #%d: %d", pkg, cnt) } @@ -1363,11 +1363,11 @@ func (s *llcGroupSorter) sortLLCGroups(a *allocatorHelper) { } } } else { - log.Debug("no preferred LLC groups found") + log.Debug("no preferred cache groups found") } if len(s.usablePkg) > 0 { - log.Debug("number of non-preferred but usable LLC group CPUs per package/die:") + log.Debug("number of non-preferred but usable cache group CPUs per package/die:") for pkg, cnt := range s.usablePkg { log.Debug(" - package #%d: %d", pkg, cnt) } @@ -1377,22 +1377,22 @@ func (s *llcGroupSorter) sortLLCGroups(a *allocatorHelper) { } } } else { - log.Debug("no non-preferred but usable LLC groups found") + log.Debug("no non-preferred but usable cache groups found") } } // sort preferred groups if len(s.prefer) > 0 { - log.Debug("sorting preferred LLC groups") - slices.SortFunc(s.prefer, func(gA, gB *llcGroup) int { + log.Debug("sorting preferred cache groups") + slices.SortFunc(s.prefer, func(gA, gB *cacheGroup) int { return s.sortPrefer(gA, gB, s) }) } // sort other usable groups if len(s.usable) > 0 { - log.Debug("sorting non-preferred but usable LLC groups") - slices.SortFunc(s.usable, func(gA, gB *llcGroup) int { + log.Debug("sorting non-preferred but usable cache groups") + slices.SortFunc(s.usable, func(gA, gB *cacheGroup) int { return s.sortUsable(gA, gB, s) }) } @@ -1467,7 +1467,7 @@ func newTopologyCache(sys sysfs.System) topologyCache { } c.discoverCPUClusters(sys) - c.discoverLLCGroups(sys) + c.discoverCacheGroups(sys) c.discoverCPUPriorities(sys) return c @@ -1736,15 +1736,53 @@ func (c *topologyCache) discoverCPUClusters(sys sysfs.System) { } } -func (c *topologyCache) discoverLLCGroups(sys sysfs.System) { +func (c *topologyCache) pickCacheLevelForClustering(sys sysfs.System) int { if sys == nil { + return -1 + } + + online := sys.OnlineCPUs() + for _, id := range online.List() { + cpu := sys.CPU(id) + pkg := sys.Package(cpu.PackageID()) + for n := cpu.CacheCount() - 1; n > 0; n-- { + cpus := cpu.GetNthLevelCacheCPUSet(n) + + switch { + case cpus.Size() == 0 || cpus.Size() == 1: + continue + case cpus.Equals(cpu.ThreadCPUSet().Intersection(online)): + continue + case cpus.Equals(pkg.DieCPUSet(cpu.DieID()).Intersection(online)): + continue + case cpus.Equals(pkg.CPUSet().Intersection(online)): + continue + } + + return n + } + } + + return -1 +} + +func (c *topologyCache) discoverCacheGroups(sys sysfs.System) { + if sys == nil { + return + } + + n := c.pickCacheLevelForClustering(sys) + if n < 0 { + log.Info("no cache level provides extra clustering") return } + log.Info("picked cache level %d for extra clustering", n) + online := sys.OnlineCPUs() for _, id := range sys.PackageIDs() { pkg := sys.Package(id) - groups := []*llcGroup{} + groups := []*cacheGroup{} assigned := idset.NewIDSet() for _, cpuID := range pkg.CPUSet().Intersection(online).List() { @@ -1753,7 +1791,7 @@ func (c *topologyCache) discoverLLCGroups(sys sysfs.System) { } cpu := sys.CPU(cpuID) - cpus := cpu.GetLastLevelCacheCPUSet().Intersection(online) + cpus := cpu.GetNthLevelCacheCPUSet(n).Intersection(online) switch { case cpus.Size() == 0 || cpus.Size() == 1: @@ -1764,10 +1802,9 @@ func (c *topologyCache) discoverLLCGroups(sys sysfs.System) { continue case cpus.Equals(pkg.CPUSet().Intersection(online)): continue - } - groups = append(groups, &llcGroup{ + groups = append(groups, &cacheGroup{ pkg: cpu.PackageID(), die: cpu.DieID(), node: cpu.NodeID(), @@ -1778,12 +1815,12 @@ func (c *topologyCache) discoverLLCGroups(sys sysfs.System) { } if len(groups) > 1 { - c.llcGroups = append(c.llcGroups, groups...) + c.cacheGroups = append(c.cacheGroups, groups...) } } // sort groups by package, die, NUMA node, and lowest CPU ID. - slices.SortFunc(c.llcGroups, func(a, b *llcGroup) int { + slices.SortFunc(c.cacheGroups, func(a, b *cacheGroup) int { if diff := a.pkg - b.pkg; diff != 0 { return diff } @@ -1796,7 +1833,7 @@ func (c *topologyCache) discoverLLCGroups(sys sysfs.System) { return a.cpus.List()[0] - b.cpus.List()[0] }) - for idx, g := range c.llcGroups { + for idx, g := range c.cacheGroups { g.id = idx for _, cpuID := range g.cpus.UnsortedList() { @@ -1920,20 +1957,20 @@ func (c *cpuCluster) String() string { c.cpus.Size(), c.kind, c.cpus) } -func (c *llcGroup) PackageID() int { +func (c *cacheGroup) PackageID() int { return c.pkg } -func (c *llcGroup) DieID(sys sysfs.System) int { +func (c *cacheGroup) DieID(sys sysfs.System) int { cpu := sys.CPU(c.cpus.List()[0]) return cpu.DieID() } -func (c *llcGroup) SmallestCoreID(sys sysfs.System) int { +func (c *cacheGroup) SmallestCoreID(sys sysfs.System) int { return c.cpus.List()[0] } -func (c *llcGroup) String() string { +func (c *cacheGroup) String() string { return fmt.Sprintf("group #%d/%d, %d %s CPUs (%s)", c.pkg, c.id, c.cpus.Size(), c.kind, c.cpus) }