From a95e4191ac849cf301f9e8e3f0c0be44a6a1e104 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Tue, 28 Nov 2023 13:33:10 -0800 Subject: [PATCH] move LLC under L2 and add pivoted csv's for cpu and socket runmodes --- _version.txt | 2 +- events/metric_icx.json | 72 ++++++++++++++++++------------------ events/metric_spr_emr.json | 76 +++++++++++++++++++------------------- perf-postprocess.py | 69 +++++++++++++++++++++++++++------- 4 files changed, 131 insertions(+), 88 deletions(-) diff --git a/_version.txt b/_version.txt index 0c00f61..17e63e7 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.10 +1.3.11 diff --git a/events/metric_icx.json b/events/metric_icx.json index 535ed94..b63f7cb 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -86,6 +86,42 @@ "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" @@ -138,42 +174,6 @@ "name": "metric_memory bandwidth total (MB/sec)", "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" }, - { - "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" - }, - { - "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" - }, - { - "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" - }, - { - "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" - }, - { - "name": "metric_Average LLC demand data read miss latency (in ns)", - "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" - }, - { - "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", - "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" - }, - { - "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", - "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" - }, { "name": "metric_ITLB (2nd level) MPI", "name-txn": "metric_ITLB (2nd level) misses per txn", diff --git a/events/metric_spr_emr.json b/events/metric_spr_emr.json index 55eab80..e81e6ed 100644 --- a/events/metric_spr_emr.json +++ b/events/metric_spr_emr.json @@ -86,6 +86,44 @@ "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" @@ -140,44 +178,6 @@ "name": "metric_memory bandwidth total (MB/sec)", "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" }, - { - "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" - }, - { - "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" - }, - { - "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", - "origin": "perfspect" - }, - { - "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", - "origin": "perfspect" - }, - { - "name": "metric_Average LLC demand data read miss latency (in ns)", - "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" - }, - { - "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", - "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" - }, - { - "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", - "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" - }, { "name": "metric_ITLB (2nd level) MPI", "name-txn": "metric_ITLB (2nd level) misses per txn", diff --git a/perf-postprocess.py b/perf-postprocess.py index 76f61c9..a2d32a0 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -39,12 +39,28 @@ def get_extra_out_file(out_file, t): text = "socket" elif t == "sa": text = "socket.average" + elif t == "savg": + text = "socket.avg.pivot" + elif t == "smax": + text = "socket.max.pivot" + elif t == "smin": + text = "socket.min.pivot" + elif t == "sp95": + text = "socket.p95.pivot" elif t == "sr": text = "socket.raw" elif t == "c": text = "cpu" elif t == "ca": text = "cpu.average" + elif t == "cavg": + text = "cpu.avg.pivot" + elif t == "cmax": + text = "cpu.max.pivot" + elif t == "cmin": + text = "cpu.min.pivot" + elif t == "cp95": + text = "cpu.p95.pivot" elif t == "cr": text = "cpu.raw" elif t == "m": @@ -571,16 +587,8 @@ def generate_metrics_time_series(time_series_df, perf_mode, out_file_path): def generate_metrics_averages( - time_series_df: pd.DataFrame, perf_mode: Mode, out_file_path: str + time_series_df: pd.DataFrame, perf_mode: Mode, out_file_path: str, metrics ) -> None: - average_metric_file_name = "" - if perf_mode == Mode.System: - average_metric_file_name = get_extra_out_file(out_file_path, "a") - if perf_mode == Mode.Socket: - average_metric_file_name = get_extra_out_file(out_file_path, "sa") - if perf_mode == Mode.CPU: - average_metric_file_name = get_extra_out_file(out_file_path, "ca") - time_series_df.index.name = "metrics" avgcol = time_series_df.mean(numeric_only=True, axis=1).to_frame().reset_index() p95col = time_series_df.quantile(q=0.95, axis=1).to_frame().reset_index() @@ -591,15 +599,45 @@ def generate_metrics_averages( p95col.columns = ["metrics", "p95"] mincol.columns = ["metrics", "min"] maxcol.columns = ["metrics", "max"] + # merge columns time_series_df = time_series_df.merge(avgcol, on="metrics", how="outer") time_series_df = time_series_df.merge(p95col, on="metrics", how="outer") time_series_df = time_series_df.merge(mincol, on="metrics", how="outer") time_series_df = time_series_df.merge(maxcol, on="metrics", how="outer") + average_metric_file_name = "" + if perf_mode == Mode.System: + average_metric_file_name = get_extra_out_file(out_file_path, "a") + elif perf_mode == Mode.CPU: + average_metric_file_name = get_extra_out_file(out_file_path, "ca") + elif perf_mode == Mode.Socket: + average_metric_file_name = get_extra_out_file(out_file_path, "sa") + time_series_df[["metrics", "avg", "p95", "min", "max"]].to_csv( average_metric_file_name, index=False ) + if perf_mode != Mode.System: + for table, type in [ + [avgcol, "avg"], + [p95col, "p95"], + [mincol, "min"], + [maxcol, "max"], + ]: + table["part"] = table["metrics"].map( + lambda x: int("".join(filter(str.isdigit, x.split(".")[-1]))) + ) + table["metrics"] = table["metrics"].map(lambda x: x.rsplit(".", 1)[0]) + table = table.pivot_table( + index=["metrics"], columns=["part"], values=table.columns[1] + ) + table = table.reindex(index=metrics) + table = table.reindex(sorted(table.columns), axis=1) + + average_metric_file_name = get_extra_out_file( + out_file_path, ("s" if perf_mode == Mode.Socket else "c") + type + ) + table.to_csv(average_metric_file_name) return @@ -994,9 +1032,9 @@ def generate_metrics( verbose, filtered_metrics, metadata, group_to_event, group_to_df, errors ) - time_series_df = pd.DataFrame(time_metrics_result).reindex( - index=list(time_metrics_result[list(time_metrics_result.keys())[0]].keys()) - ) + metrics = list(time_metrics_result[list(time_metrics_result.keys())[0]].keys()) + + time_series_df = pd.DataFrame(time_metrics_result).reindex(index=metrics) if verbose: for error in errors: @@ -1025,7 +1063,12 @@ def generate_metrics( ] generate_metrics_time_series(time_series_df, perf_mode, out_file_path) - generate_metrics_averages(time_series_df, perf_mode, out_file_path) + generate_metrics_averages( + time_series_df, + perf_mode, + out_file_path, + [*dict.fromkeys([e.rsplit(".", 1)[0] for e in metrics])], + ) if perf_mode == Mode.System: write_html(time_series_df, perf_mode, out_file_path, meta_data, pertxn) return