From cb3a6ccf7fa1b5f0fb9b3b49fa65b867f92463b4 Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Thu, 10 Oct 2024 08:56:00 -0700 Subject: [PATCH] support for Granite Rapids w/o TMA --- Makefile | 1 + _version.txt | 2 +- events/gnr.txt | 96 +++++++++++++++++++++ events/metric_gnr.json | 186 +++++++++++++++++++++++++++++++++++++++++ perf-collect.py | 31 +++++-- perf-collect.spec | 2 +- perf-postprocess.py | 2 + src/perf_helpers.py | 122 +++++++++++++-------------- 8 files changed, 367 insertions(+), 75 deletions(-) create mode 100644 events/gnr.txt create mode 100644 events/metric_gnr.json diff --git a/Makefile b/Makefile index 96c4337..d4abf54 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,7 @@ build-public/postprocess: --add-data "./events/metric_spr_emr.json:." \ --add-data "./events/metric_spr_emr_nofixedtma.json:." \ --add-data "./events/metric_srf.json:." \ + --add-data "./events/metric_gnr.json:." \ --add-data "./src/base.html:." \ --runtime-tmpdir . \ --exclude-module readline diff --git a/_version.txt b/_version.txt index 9df886c..3e1ad72 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.4.2 +1.5.0 \ No newline at end of file diff --git a/events/gnr.txt b/events/gnr.txt new file mode 100644 index 0000000..e485653 --- /dev/null +++ b/events/gnr.txt @@ -0,0 +1,96 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# GraniteRapids event list + +cpu/event=0xd0,umask=0x21,cmask=0x00,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0x51,umask=0x01,cmask=0x00,name='L1D.REPLACEMENT'/, +cpu/event=0xd1,umask=0x01,cmask=0x00,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x24,umask=0xe4,cmask=0x00,name='L2_RQSTS.ALL_CODE_RD'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x08,cmask=0x00,name='IDQ.DSB_UOPS'/, +cpu/event=0x79,umask=0x04,cmask=0x00,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x20,cmask=0x00,name='IDQ.MS_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,name='LSD.UOPS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x11,umask=0x0e,cmask=0x00,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x0e,cmask=0x00,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x13,umask=0x0e,cmask=0x00,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0x3c,umask=0x08,cmask=0x00,name='CPU_CLK_UNHALTED.REF_DISTRIBUTED'/, +cpu/event=0x3c,umask=0x02,cmask=0x00,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x02,cmask=0x00,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu/event=0x25,umask=0x1f,cmask=0x00,name='L2_LINES_IN.ALL'/, +cpu/event=0xd1,umask=0x10,cmask=0x00,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x24,umask=0x24,cmask=0x00,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0xad,umask=0x10,cmask=0x00,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/, +cpu/event=0x00,umask=0x81,period=10000003,name='PERF_METRICS.BAD_SPECULATION'/, +cpu/event=0x00,umask=0x83,period=10000003,name='PERF_METRICS.BACKEND_BOUND'/, +cpu/event=0x00,umask=0x82,period=10000003,name='PERF_METRICS.FRONTEND_BOUND'/, +cpu/event=0x00,umask=0x80,period=10000003,name='PERF_METRICS.RETIRING'/, +cpu/event=0x00,umask=0x86,period=10000003,name='PERF_METRICS.FETCH_LATENCY'/, +cpu/event=0x00,umask=0x87,period=10000003,name='PERF_METRICS.MEMORY_BOUND'/, +cpu/event=0x00,umask=0x85,period=10000003,name='PERF_METRICS.BRANCH_MISPREDICTS'/, +cpu/event=0x00,umask=0x84,period=10000003,name='PERF_METRICS.HEAVY_OPERATIONS'/, +cpu-cycles, +ref-cycles, +instructions; + +# kernel +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +# C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +# UPI +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +# CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +# CHA (IO Bandwidth) +cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +# IMC (memory read/writes) +imc/event=0x05,umask=0xCF,name='UNC_M_CAS_COUNT_SCH0.RD'/, +imc/event=0x05,umask=0xF0,name='UNC_M_CAS_COUNT_SCH0.WR'/, +imc/event=0x06,umask=0xCF,name='UNC_M_CAS_COUNT_SCH1.RD'/, +imc/event=0x06,umask=0xF0,name='UNC_M_CAS_COUNT_SCH1.WR'/; + +# power +power/energy-pkg/, +power/energy-ram/; diff --git a/events/metric_gnr.json b/events/metric_gnr.json new file mode 100644 index 0000000..512493a --- /dev/null +++ b/events/metric_gnr.json @@ -0,0 +1,186 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD] + [UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + } +] diff --git a/perf-collect.py b/perf-collect.py index 1c28a24..46158a6 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -28,6 +28,7 @@ "SapphireRapids", "EmeraldRapids", "SierraForest", + "GraniteRapids", ] @@ -58,10 +59,11 @@ def write_metadata( modified.write("CORES_PER_SOCKET," + str(perf_helpers.get_cpu_count()) + ",\n") modified.write("SOCKET_COUNT," + str(perf_helpers.get_socket_count()) + ",\n") modified.write("HYPERTHREADING_ON," + str(perf_helpers.get_ht_status()) + ",\n") - imc, cha, upi = perf_helpers.get_imc_cha_upi_count() - modified.write("IMC count," + str(imc) + ",\n") - modified.write("CHAS_PER_SOCKET," + str(cha) + ",\n") - modified.write("UPI count," + str(upi) + ",\n") + counts = perf_helpers.get_unc_device_counts() + modified.write("IMC count," + str(counts["imc"]) + ",\n") + modified.write("CHAS_PER_SOCKET," + str(counts["cha"]) + ",\n") + modified.write("UPI count," + str(counts["upi"]) + ",\n") + modified.write("B2CMI count, " + str(counts["b2cmi"]) + ",\n") modified.write("Architecture," + str(arch) + ",\n") modified.write("Model," + str(cpuname) + ",\n") modified.write("kernel version," + perf_helpers.get_version() + "\n") @@ -190,12 +192,14 @@ def fixed_tma_supported(): return False try: if events["TOPDOWN.SLOTS"] == events["PERF_METRICS.BAD_SPECULATION"]: + logging.debug("TOPDOWN.SLOTS and PERF_METRICS.BAD_SPECULATION are equal") return False except KeyError: logging.debug("Failed to find required events in fixed_tma_supported()") return False if events["TOPDOWN.SLOTS"] == 0 or events["PERF_METRICS.BAD_SPECULATION"] == 0: + logging.debug("TOPDOWN.SLOTS or PERF_METRICS.BAD_SPECULATION count is 0") return False return True @@ -212,6 +216,7 @@ def fixed_event_supported(arch, event): or arch == "sapphirerapids" or arch == "emeraldrapids" or arch == "sierraforest" + or arch == "graniterapids" ): num_gp_counters = 8 else: @@ -303,6 +308,8 @@ def get_eventfile_path(arch, script_path, supports_tma_fixed_events): eventfile = "spr_emr_nofixedtma.txt" elif arch == "sierraforest": eventfile = "srf.txt" + elif arch == "graniterapids": + eventfile = "gnr.txt" if eventfile is None: return None @@ -454,7 +461,12 @@ def get_eventfile_path(arch, script_path, supports_tma_fixed_events): # The fixed-purpose PMU counters for TMA events are not supported on architectures older than Icelake # They are also not supported on some VMs, e.g., AWS ICX and SPR VMs supports_tma_fixed_events = False - if arch == "icelake" or arch == "sapphirerapids" or arch == "emeraldrapids": + if ( + arch == "icelake" + or arch == "sapphirerapids" + or arch == "emeraldrapids" + or arch == "graniterapids" + ): supports_tma_fixed_events = fixed_tma_supported() if not supports_tma_fixed_events: logging.warning( @@ -561,10 +573,11 @@ def get_eventfile_path(arch, script_path, supports_tma_fixed_events): logging.info("Cores per socket: " + str(perf_helpers.get_cpu_count())) logging.info("Socket: " + str(perf_helpers.get_socket_count())) logging.info("Hyperthreading on: " + str(perf_helpers.get_ht_status())) - imc, cha, upi = perf_helpers.get_imc_cha_upi_count() - logging.info("IMC count: " + str(imc)) - logging.info("CHA per socket: " + str(cha)) - logging.info("UPI count: " + str(upi)) + counts = perf_helpers.get_unc_device_counts() + logging.info("IMC count: " + str(counts["imc"])) + logging.info("CHA per socket: " + str(counts["cha"])) + logging.info("UPI count: " + str(counts["upi"])) + logging.info("B2CMI count: " + str(counts["b2cmi"])) logging.info("PerfSpect version: " + perf_helpers.get_tool_version()) if args.verbose: logging.info("/sys/devices/: " + str(sys_devs)) diff --git a/perf-collect.spec b/perf-collect.spec index 5a53fc8..ce697e9 100644 --- a/perf-collect.spec +++ b/perf-collect.spec @@ -7,7 +7,7 @@ block_cipher = None a = Analysis( ['perf-collect.py'], pathex=[], - datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/icx_nofixedtma.txt', '.'), ('./events/spr_emr.txt', '.'), ('./events/spr_emr_nofixedtma.txt', '.'), ('./events/srf.txt', '.')], + datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/icx_nofixedtma.txt', '.'), ('./events/spr_emr.txt', '.'), ('./events/spr_emr_nofixedtma.txt', '.'), ('./events/srf.txt', '.'), ('./events/gnr.txt', '.')], hiddenimports=[], hookspath=[], hooksconfig={}, diff --git a/perf-postprocess.py b/perf-postprocess.py index a4a996f..7657586 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -451,6 +451,8 @@ def get_metric_file_name(microarchitecture, fixed_tma_supported): metric_file = "metric_spr_emr_nofixedtma.json" elif microarchitecture == "sierraforest": metric_file = "metric_srf.json" + elif microarchitecture == "graniterapids": + metric_file = "metric_gnr.json" else: crash("Suitable metric file not found") diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 19e0638..c00b881 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -92,24 +92,35 @@ def get_sys_devices(): return devs -# get imc and uncore counts +# get relevant uncore device counts # TODO:fix for memory config with some channels populated -def get_imc_cha_upi_count(): +def get_unc_device_counts(): sys_devs = get_sys_devices() - cha_count = 0 - imc_count = 0 - upi_count = 0 + counts = {} if "uncore_cha" in sys_devs: - cha_count = int(sys_devs["uncore_cha"]) - if "uncore_cbox" in sys_devs: - cha_count = int(sys_devs["uncore_cbox"]) + counts["cha"] = int(sys_devs["uncore_cha"]) + elif "uncore_cbox" in sys_devs: # alternate name for cha + counts["cha"] = int(sys_devs["uncore_cbox"]) + else: + counts["cha"] = 0 + if "uncore_upi" in sys_devs: - upi_count = int(sys_devs["uncore_upi"]) - if "uncore_qpi" in sys_devs: - upi_count = int(sys_devs["uncore_qpi"]) + counts["upi"] = int(sys_devs["uncore_upi"]) + elif "uncore_qpi" in sys_devs: # alternate name for upi + counts["upi"] = int(sys_devs["uncore_qpi"]) + else: + counts["upi"] = 0 + if "uncore_imc" in sys_devs: - imc_count = int(sys_devs["uncore_imc"]) - return imc_count, cha_count, upi_count + counts["imc"] = int(sys_devs["uncore_imc"]) + else: + counts["imc"] = 0 + + if "uncore_b2cmi" in sys_devs: + counts["b2cmi"] = int(sys_devs["uncore_b2cmi"]) + else: + counts["b2cmi"] = 0 + return counts # return a sorted list of device ids for a given device type pattern, e.g., uncore_cha_, uncore_imc_, etc. @@ -312,6 +323,8 @@ def get_arch_and_name(procinfo): arch = "emeraldrapids" elif model == 175 and cpufamily == 6: arch = "sierraforest" + elif model == 173 and cpufamily == 6: + arch = "graniterapids" return arch, modelname @@ -368,60 +381,41 @@ def get_epoch(start_time): return epoch -# get cgroups +# get_cgroups +# cid: a comma-separated list of container ids +# note: works only for cgroup v2 def get_cgroups(cid): + # check cgroup version + if not os.path.exists("/sys/fs/cgroup/cgroup.controllers"): + crash("cgroup v1 detected, cgroup v2 required") + # get cgroups from /sys/fs/cgroup directory recursively. They must start with 'docker' or 'containerd' and end with '.scope'. + # if cid is provided, only return cgroups that match the provided container ids cids = cid.split(",") - try: - stat = subprocess.Popen( - ["stat", "-fc", "%T", "/sys/fs/cgroup/"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - except subprocess.SubprocessError as e: - crash( - "Cannot determine cgroup version. failed to open stat subprocess: " + str(e) - ) - out, err = stat.communicate() - out = out.decode("utf-8").strip() - if out == "tmpfs": - logging.info("cgroup v1 detected") - elif out == "cgroup2fs": - logging.info("cgroup v2 detected") - else: - logging.info("unknown cgroup version " + out) - - try: - p = subprocess.Popen( - ["ps", "-a", "-x", "-o", "cgroup", "--sort=-%cpu"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - except subprocess.SubprocessError as e: - crash("failed to open ps subprocess: " + str(e)) - out, err = p.communicate() - if err: - crash(f"error reading cgroups: {err}") - - cgroups = [ - *dict.fromkeys( - filter( - lambda x: ( # must be container runtime - "docker" in x or "containerd" in x + cgroups = [] + # get all cgroups + for dirpath, dirnames, filenames in os.walk("/sys/fs/cgroup"): + for dirname in dirnames: + if ( + ("docker" in dirname or "containerd" in dirname) + and dirname.endswith(".scope") + and (len(cids) == 0 or any(map(lambda y: y in dirname, cids))) + ): + cgroups.append( + os.path.relpath(os.path.join(dirpath, dirname), "/sys/fs/cgroup") ) - and x.endswith(".scope") # don't include services - and ( # select all or provided cids - len(cids) == 0 or any(map(lambda y: y in x, cids)) - ), - map( - lambda x: x.split(":")[-1], # get trailing cgroup name - filter( # remove extraneous lines - lambda x: x != "" and x != "CGROUP" and x != "-", - out.decode("utf-8").split("\n"), - ), - ), - ) - ) - ] + # associate cgroups with their cpu utilization found in the usage_usec field of the cgroup's cpu.stat file + cgroup_cpu_usage = {} + for cgroup in cgroups: + try: + with open(f"/sys/fs/cgroup/{cgroup}/cpu.stat", "r") as f: + for line in f: + if "usage_usec" in line: + cgroup_cpu_usage[cgroup] = int(line.split()[1]) + except EnvironmentError as e: + logging.warning(str(e), UserWarning) + # sort cgroups by cpu usage, highest usage first + cgroups = sorted(cgroup_cpu_usage, key=cgroup_cpu_usage.get, reverse=True) + if len(cgroups) == 0: crash("no matching cgroups found") elif len(cgroups) > 5: