Skip to content

Commit

Permalink
fix: Scale perf counters based on times enabled and ran
Browse files Browse the repository at this point in the history
* We are certainly using more events than available counters and hence raw counters must be scaled based on time enabled and time running counters

Signed-off-by: Mahendra Paipuri <[email protected]>
  • Loading branch information
mahendrapaipuri committed Oct 31, 2024
1 parent d8f1b76 commit e7dbb78
Showing 1 changed file with 39 additions and 24 deletions.
63 changes: 39 additions & 24 deletions pkg/collector/perf.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ func (c *perfCollector) updateHardwareCounters(
for _, proc := range procs {
pid = proc.PID

var scale float64 = 1.0
if hwProfiler, ok := c.perfHwProfilers[pid]; ok {

Check failure on line 629 in pkg/collector/perf.go

View workflow job for this annotation

GitHub Actions / test-lint / lint

if statements should only be cuddled with assignments used in the if statement itself (wsl)
hwProfile := &perf.HardwareProfile{}
if err := (*hwProfiler).Profile(hwProfile); err != nil {
Expand All @@ -633,32 +634,36 @@ func (c *perfCollector) updateHardwareCounters(
continue
}

if hwProfile.TimeEnabled != nil && hwProfile.TimeRunning != nil {
scale = float64(*hwProfile.TimeEnabled) / float64(*hwProfile.TimeRunning)
}

if hwProfile.CPUCycles != nil {
cgroupHwPerfCounters["cpucycles_total"] += float64(*hwProfile.CPUCycles)
cgroupHwPerfCounters["cpucycles_total"] += scale * float64(*hwProfile.CPUCycles)
}

if hwProfile.Instructions != nil {
cgroupHwPerfCounters["instructions_total"] += float64(*hwProfile.Instructions)
cgroupHwPerfCounters["instructions_total"] += scale * float64(*hwProfile.Instructions)
}

if hwProfile.BranchInstr != nil {
cgroupHwPerfCounters["branch_instructions_total"] += float64(*hwProfile.BranchInstr)
cgroupHwPerfCounters["branch_instructions_total"] += scale * float64(*hwProfile.BranchInstr)
}

if hwProfile.BranchMisses != nil {
cgroupHwPerfCounters["branch_misses_total"] += float64(*hwProfile.BranchMisses)
cgroupHwPerfCounters["branch_misses_total"] += scale * float64(*hwProfile.BranchMisses)
}

if hwProfile.CacheRefs != nil {
cgroupHwPerfCounters["cache_refs_total"] += float64(*hwProfile.CacheRefs)
cgroupHwPerfCounters["cache_refs_total"] += scale * float64(*hwProfile.CacheRefs)
}

if hwProfile.CacheMisses != nil {
cgroupHwPerfCounters["cache_misses_total"] += float64(*hwProfile.CacheMisses)
cgroupHwPerfCounters["cache_misses_total"] += scale * float64(*hwProfile.CacheMisses)
}

if hwProfile.RefCPUCycles != nil {
cgroupHwPerfCounters["ref_cpucycles_total"] += float64(*hwProfile.RefCPUCycles)
cgroupHwPerfCounters["ref_cpucycles_total"] += scale * float64(*hwProfile.RefCPUCycles)
}
}
}
Expand Down Expand Up @@ -695,6 +700,7 @@ func (c *perfCollector) updateSoftwareCounters(
for _, proc := range procs {
pid = proc.PID

var scale float64 = 1.0
if swProfiler, ok := c.perfSwProfilers[pid]; ok {

Check failure on line 704 in pkg/collector/perf.go

View workflow job for this annotation

GitHub Actions / test-lint / lint

if statements should only be cuddled with assignments used in the if statement itself (wsl)
swProfile := &perf.SoftwareProfile{}
if err := (*swProfiler).Profile(swProfile); err != nil {
Expand All @@ -703,24 +709,28 @@ func (c *perfCollector) updateSoftwareCounters(
continue
}

if swProfile.TimeEnabled != nil && swProfile.TimeRunning != nil {
scale = float64(*swProfile.TimeEnabled) / float64(*swProfile.TimeRunning)
}

if swProfile.PageFaults != nil {
cgroupSwPerfCounters["page_faults_total"] += float64(*swProfile.PageFaults)
cgroupSwPerfCounters["page_faults_total"] += scale * float64(*swProfile.PageFaults)
}

if swProfile.ContextSwitches != nil {
cgroupSwPerfCounters["context_switches_total"] += float64(*swProfile.ContextSwitches)
cgroupSwPerfCounters["context_switches_total"] += scale * float64(*swProfile.ContextSwitches)
}

if swProfile.CPUMigrations != nil {
cgroupSwPerfCounters["cpu_migrations_total"] += float64(*swProfile.CPUMigrations)
cgroupSwPerfCounters["cpu_migrations_total"] += scale * float64(*swProfile.CPUMigrations)
}

if swProfile.MinorPageFaults != nil {
cgroupSwPerfCounters["minor_faults_total"] += float64(*swProfile.MinorPageFaults)
cgroupSwPerfCounters["minor_faults_total"] += scale * float64(*swProfile.MinorPageFaults)
}

if swProfile.MajorPageFaults != nil {
cgroupSwPerfCounters["major_faults_total"] += float64(*swProfile.MajorPageFaults)
cgroupSwPerfCounters["major_faults_total"] += scale * float64(*swProfile.MajorPageFaults)
}
}
}
Expand Down Expand Up @@ -753,6 +763,7 @@ func (c *perfCollector) updateCacheCounters(cgroupID string, procs []procfs.Proc
for _, proc := range procs {
pid = proc.PID

var scale float64 = 1.0
if cacheProfiler, ok := c.perfCacheProfilers[pid]; ok {

Check failure on line 767 in pkg/collector/perf.go

View workflow job for this annotation

GitHub Actions / test-lint / lint

if statements should only be cuddled with assignments used in the if statement itself (wsl)
cacheProfile := &perf.CacheProfile{}
if err := (*cacheProfiler).Profile(cacheProfile); err != nil {
Expand All @@ -761,52 +772,56 @@ func (c *perfCollector) updateCacheCounters(cgroupID string, procs []procfs.Proc
continue
}

if cacheProfile.TimeEnabled != nil && cacheProfile.TimeRunning != nil {
scale = float64(*cacheProfile.TimeEnabled) / float64(*cacheProfile.TimeRunning)
}

if cacheProfile.L1DataReadHit != nil {
cgroupCachePerfCounters["cache_l1d_read_hits_total"] += float64(*cacheProfile.L1DataReadHit)
cgroupCachePerfCounters["cache_l1d_read_hits_total"] += scale * float64(*cacheProfile.L1DataReadHit)
}

if cacheProfile.L1DataReadMiss != nil {
cgroupCachePerfCounters["cache_l1d_read_misses_total"] += float64(*cacheProfile.L1DataReadMiss)
cgroupCachePerfCounters["cache_l1d_read_misses_total"] += scale * float64(*cacheProfile.L1DataReadMiss)
}

if cacheProfile.L1DataWriteHit != nil {
cgroupCachePerfCounters["cache_l1d_write_hits_total"] += float64(*cacheProfile.L1DataWriteHit)
cgroupCachePerfCounters["cache_l1d_write_hits_total"] += scale * float64(*cacheProfile.L1DataWriteHit)
}

if cacheProfile.L1InstrReadMiss != nil {
cgroupCachePerfCounters["cache_l1_instr_read_misses_total"] += float64(*cacheProfile.L1InstrReadMiss)
cgroupCachePerfCounters["cache_l1_instr_read_misses_total"] += scale * float64(*cacheProfile.L1InstrReadMiss)
}

if cacheProfile.InstrTLBReadHit != nil {
cgroupCachePerfCounters["cache_tlb_instr_read_hits_total"] += float64(*cacheProfile.InstrTLBReadHit)
cgroupCachePerfCounters["cache_tlb_instr_read_hits_total"] += scale * float64(*cacheProfile.InstrTLBReadHit)
}

if cacheProfile.InstrTLBReadMiss != nil {
cgroupCachePerfCounters["cache_tlb_instr_read_misses_total"] += float64(*cacheProfile.InstrTLBReadMiss)
cgroupCachePerfCounters["cache_tlb_instr_read_misses_total"] += scale * float64(*cacheProfile.InstrTLBReadMiss)
}

if cacheProfile.LastLevelReadHit != nil {
cgroupCachePerfCounters["cache_ll_read_hits_total"] += float64(*cacheProfile.LastLevelReadHit)
cgroupCachePerfCounters["cache_ll_read_hits_total"] += scale * float64(*cacheProfile.LastLevelReadHit)
}

if cacheProfile.LastLevelReadMiss != nil {
cgroupCachePerfCounters["cache_ll_read_misses_total"] += float64(*cacheProfile.LastLevelReadMiss)
cgroupCachePerfCounters["cache_ll_read_misses_total"] += scale * float64(*cacheProfile.LastLevelReadMiss)
}

if cacheProfile.LastLevelWriteHit != nil {
cgroupCachePerfCounters["cache_ll_write_hits_total"] += float64(*cacheProfile.LastLevelWriteHit)
cgroupCachePerfCounters["cache_ll_write_hits_total"] += scale * float64(*cacheProfile.LastLevelWriteHit)
}

if cacheProfile.LastLevelWriteMiss != nil {
cgroupCachePerfCounters["cache_ll_write_misses_total"] += float64(*cacheProfile.LastLevelWriteMiss)
cgroupCachePerfCounters["cache_ll_write_misses_total"] += scale * float64(*cacheProfile.LastLevelWriteMiss)
}

if cacheProfile.BPUReadHit != nil {
cgroupCachePerfCounters["cache_bpu_read_hits_total"] += float64(*cacheProfile.BPUReadHit)
cgroupCachePerfCounters["cache_bpu_read_hits_total"] += scale * float64(*cacheProfile.BPUReadHit)
}

if cacheProfile.BPUReadMiss != nil {
cgroupCachePerfCounters["cache_bpu_read_misses_total"] += float64(*cacheProfile.BPUReadMiss)
cgroupCachePerfCounters["cache_bpu_read_misses_total"] += scale * float64(*cacheProfile.BPUReadMiss)
}
}
}
Expand Down

0 comments on commit e7dbb78

Please sign in to comment.