From 7cf78eb25d45a53e97cb143ddd0185eddc10672c Mon Sep 17 00:00:00 2001 From: happy-lx <54952983+happy-lx@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:50:34 +0800 Subject: [PATCH] More rolling (#2319) * util: more rolling support * observe more rolling cnt at the same time * diff 2 or more dbs * fix comment * remove boringutil * fix py * util: move 2 py file to rolling dir --- scripts/rolling/rolling.py | 170 ++++++++++++++++++ scripts/{ => rolling}/rollingplot.py | 0 src/main/scala/utils/PerfCounterUtils.scala | 34 ++++ src/main/scala/xiangshan/XSCore.scala | 1 + .../scala/xiangshan/backend/CtrlBlock.scala | 2 + .../scala/xiangshan/backend/MemBlock.scala | 4 +- .../scala/xiangshan/backend/rob/Rob.scala | 8 + .../cache/dcache/DCacheWrapper.scala | 3 + .../scala/xiangshan/mem/prefetch/FDP.scala | 27 +++ 9 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 scripts/rolling/rolling.py rename scripts/{ => rolling}/rollingplot.py (100%) diff --git a/scripts/rolling/rolling.py b/scripts/rolling/rolling.py new file mode 100644 index 00000000000..e1f9823d96b --- /dev/null +++ b/scripts/rolling/rolling.py @@ -0,0 +1,170 @@ +import sys +import argparse +import sqlite3 +import matplotlib.pyplot as plt +import numpy as np + + +# usage: single db file +# python3 rolling.py plot DB_FILE_PATH [--perf-name PERF_NAME] [--aggregate AGGREGATE] [--interval INTERVAL] [--perf-file PERF_FILE] +# +# usage: diff mutiple db files +# python3 rolling.py diff MUTI_DB_FILE_PATH [--perf-name PERF_NAME] [--aggregate AGGREGATE] [--interval INTERVAL] [--perf-file PERF_FILE] +# +# If you only observe one rolling counter, indicate the --perf-name parameter. +# If you want to observe multiple at the same time, you can indicate the --perf-file parameter, +# pointing to the path to a description file, each line in the file is a rolling counter, +# and you can use the '//' comment at the beginning of the line to remove the unconcerned counter. +# +# Note that generally speaking, when observing multiple rolling counters, +# the meaning of the x-axis needs to be the same, then you can use the intervalBased mode. +# +# If you want to compare multiple dbs to observe the difference between multiple runs, you can use diff mode. +# This requires specifying the path of a description file. Each line in this description file contains a specific db path. +# +# eg. +# exec emu twice with different parameters and obtained different db files (db0, db1). +# want to observe the changes in IPC and prefetch accuracy. +# create a file named db.txt: +# path to db0 +# path to db1 +# create a file named perf.txt: +# IPC +# L1PrefetchAccuracy +# run `python3 rolling.py diff db.txt --perf-file perf.txt -I (interval in RTL)` +# eg. +# want to observe the IPC rolling in single db (db0). +# run `python3 rolling.py plot path-to-db0 --perf-name IPC` +# + + +class DataSet: + + def __init__(self, db_path): + self.conn = sqlite3.connect(db_path) + self.cursor = self.conn.cursor() + self.xdata = [] + self.ydata = [] + + def derive(self, perf_name, aggregate, clk_itval, hart): + sql = "SELECT xAxisPt, yAxisPt FROM {}_rolling_{}".format(perf_name, hart) + self.cursor.execute(sql) + result = self.cursor.fetchall() + aggcnt = 0 + recordcnt = 0 + aggydata = 0 + aggxdata = 0 + self.xdata = [] + self.ydata = [] + if clk_itval == -1: + # normal mode + # db log in normal mode: (xAxis, ydata) + # xAxis is current position in X Axis, ydata is the Increment value between this point and last point + for row in result: + aggcnt += 1 + aggydata += row[1] + if aggcnt == aggregate: + self.xdata.append(row[0]) + self.ydata.append(aggydata/(row[0]-aggxdata)) + aggcnt = 0 + aggydata = 0 + aggxdata = row[0] + else: + # intervalBased mode, -I interval should be specified + # db log in intervalBased mode: (xdata, ydata) + # xdata, ydata in the Increment value in a certain interval + for row in result: + aggcnt += 1 + aggxdata += row[0] + aggydata += row[1] + if aggcnt == aggregate: + self.xdata.append((clk_itval * aggregate) * (recordcnt + 1)) + self.ydata.append(0 if aggydata == 0 else aggxdata/aggydata) + aggcnt = 0 + aggxdata = 0 + aggydata = 0 + recordcnt += 1 + + def plot(self, lb='PERF'): + plt.plot(self.xdata, self.ydata, lw=1, ls='-', label=lb) + + def legend(): + plt.legend() + + def show(): + plt.show() + +def err_exit(msg): + print(msg) + sys.exit(1) + +def check_args(args): + if args.aggregate <= 0: + err_exit("aggregation ratio must be no less than 1") + if not args.perf_name and not args.perf_file: + err_exit("should either specify perf-name or perf-file") + +def plot_dataset(path, perf_name, aggregate, clk_itval, perf_file, db_id=-1): + dataset = DataSet(path) + label = '_' + str(db_id) if db_id != -1 else '' + + if perf_file: + with open(perf_file) as fp: + perfs = fp.readlines() + perfs = [perf.strip() for perf in perfs] + perfs = list(filter(lambda x: not x.startswith('//'), perfs)) + for perf in perfs: + dataset.derive(perf, aggregate, clk_itval, 0) + dataset.plot(perf + label) + else: + dataset.derive(perf_name, aggregate, clk_itval, 0) + dataset.plot(perf_name + label) + +def handle_plot(args): + check_args(args) + + plot_dataset(args.db_path, args.perf_name, args.aggregate, args.interval, args.perf_file) + + DataSet.legend() + DataSet.show() + +def handle_diff(args): + check_args(args) + + db_path = args.db_path + + with open(db_path) as fp: + for (idx, db) in enumerate(fp): + plot_dataset(db.strip(), args.perf_name, args.aggregate, args.interval, args.perf_file, idx) + + DataSet.legend() + DataSet.show() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="performance rolling plot script for xs") + subparsers = parser.add_subparsers(title='useful sub function', dest='subcommand', help='useful sub function') + + # sub function for single db file + cmd1_parser = subparsers.add_parser('plot', help='for single db file') + cmd1_parser.add_argument('db_path', metavar='db_path', type=str, help='path to chiseldb file') + cmd1_parser.add_argument('--perf-name', default=None, type=str, help="name of the performance counter") + cmd1_parser.add_argument('--aggregate', '-A', default=1, type=int, help="aggregation ratio") + cmd1_parser.add_argument('--interval', '-I', default=-1, type=int, help="interval value in the interval based mode") + cmd1_parser.add_argument('--perf-file', '-F', default=None, type=str, help="path to a file including all interested performance counters") + + # sub function for diff multiple db files + cmd2_parser = subparsers.add_parser('diff', help='for diff multiple db files') + cmd2_parser.add_argument('db_path', metavar='muti_db_path', type=str, help="path to a file including all path to chiseldb files") + cmd2_parser.add_argument('--perf-name', default=None, type=str, help="name of the performance counter") + cmd2_parser.add_argument('--aggregate', '-A', default=1, type=int, help="aggregation ratio") + cmd2_parser.add_argument('--interval', '-I', default=-1, type=int, help="interval value in the interval based mode") + cmd2_parser.add_argument('--perf-file', '-F', default=None, type=str, help="path to a file including all interested performance counters") + + args = parser.parse_args() + + if args.subcommand == 'plot': + handle_plot(args) + elif args.subcommand == 'diff': + handle_diff(args) + else: + err_exit('invalid command') \ No newline at end of file diff --git a/scripts/rollingplot.py b/scripts/rolling/rollingplot.py similarity index 100% rename from scripts/rollingplot.py rename to scripts/rolling/rollingplot.py diff --git a/src/main/scala/utils/PerfCounterUtils.scala b/src/main/scala/utils/PerfCounterUtils.scala index 9b02385a4ff..da391aff9fa 100644 --- a/src/main/scala/utils/PerfCounterUtils.scala +++ b/src/main/scala/utils/PerfCounterUtils.scala @@ -255,6 +255,40 @@ object XSPerfRolling extends HasRegularPerfName { rollingTable.log(rollingPt, triggerDB, "", clock, reset) } } + + // event interval based mode + def apply( + perfName: String, + perfCntX: UInt, + perfCntY: UInt, + granularity: Int, + eventTrigger: UInt, + clock: Clock, + reset: Reset + )(implicit p: Parameters) = { + judgeName(perfName) + val env = p(DebugOptionsKey) + if (env.EnableRollingDB && !env.FPGAPlatform) { + val tableName = perfName + "_rolling_" + p(XSCoreParamsKey).HartId.toString + val rollingTable = ChiselDB.createTable(tableName, new RollingEntry(), basicDB=true) + + val xAxisCnt = RegInit(0.U(64.W)) + val yAxisCnt = RegInit(0.U(64.W)) + val eventCnt = RegInit(0.U(64.W)) + xAxisCnt := xAxisCnt + perfCntX + yAxisCnt := yAxisCnt + perfCntY + eventCnt := eventCnt + eventTrigger + + val triggerDB = eventCnt >= granularity.U + when(triggerDB) { + eventCnt := eventTrigger + xAxisCnt := perfCntX + yAxisCnt := perfCntY + } + val rollingPt = new RollingEntry().apply(xAxisCnt, yAxisCnt) + rollingTable.log(rollingPt, triggerDB, "", clock, reset) + } + } } object XSPerfPrint { diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 1eaa5c44edb..22d7115aa17 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -459,6 +459,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) ctrlBlock.io.debugTopDown.fromCore.l2MissMatch := io.debugTopDown.l2MissMatch ctrlBlock.io.debugTopDown.fromCore.l3MissMatch := io.debugTopDown.l3MissMatch ctrlBlock.io.debugTopDown.fromCore.fromMem := memBlock.io.debugTopDown.toCore + memBlock.io.debugRolling := ctrlBlock.io.debugRolling // Modules are reset one by one val resetTree = ResetGenNode( diff --git a/src/main/scala/xiangshan/backend/CtrlBlock.scala b/src/main/scala/xiangshan/backend/CtrlBlock.scala index c4269779eac..75562a2afa2 100644 --- a/src/main/scala/xiangshan/backend/CtrlBlock.scala +++ b/src/main/scala/xiangshan/backend/CtrlBlock.scala @@ -306,6 +306,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI val fromRob = new RobCoreTopDownIO val fromCore = new CoreDispatchTopDownIO } + val debugRolling = new RobDebugRollingIO }) override def writebackSource: Option[Seq[Seq[Valid[ExuOutput]]]] = { @@ -675,6 +676,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI io.debugTopDown.fromRob := rob.io.debugTopDown.toCore dispatch.io.debugTopDown.fromRob := rob.io.debugTopDown.toDispatch dispatch.io.debugTopDown.fromCore := io.debugTopDown.fromCore + io.debugRolling := rob.io.debugRolling io.perfInfo.ctrlInfo.robFull := RegNext(rob.io.robFull) io.perfInfo.ctrlInfo.intdqFull := RegNext(intDq.io.dqFull) diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index 543caae58bb..e3e24964194 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -28,7 +28,7 @@ import utility._ import xiangshan._ import xiangshan.backend.exu.StdExeUnit import xiangshan.backend.fu._ -import xiangshan.backend.rob.{DebugLSIO, LsTopdownInfo, RobLsqIO, RobPtr} +import xiangshan.backend.rob.{DebugLSIO, LsTopdownInfo, RobLsqIO, RobPtr, RobDebugRollingIO} import xiangshan.cache._ import xiangshan.cache.mmu._ import xiangshan.mem._ @@ -172,6 +172,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) val toCore = new MemCoreTopDownIO } + val debugRolling = Flipped(new RobDebugRollingIO) }) override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.mem_to_ooo.writeback)) @@ -921,6 +922,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay + dcache.io.debugRolling := io.debugRolling val ldDeqCount = PopCount(io.ooo_to_mem.issue.take(exuParameters.LduCnt).map(_.valid)) val stDeqCount = PopCount(io.ooo_to_mem.issue.drop(exuParameters.LduCnt).map(_.valid)) diff --git a/src/main/scala/xiangshan/backend/rob/Rob.scala b/src/main/scala/xiangshan/backend/rob/Rob.scala index d72491f529a..75e827fdea4 100644 --- a/src/main/scala/xiangshan/backend/rob/Rob.scala +++ b/src/main/scala/xiangshan/backend/rob/Rob.scala @@ -199,6 +199,10 @@ class RobDispatchTopDownIO extends Bundle { val robHeadLsIssue = Output(Bool()) } +class RobDebugRollingIO extends Bundle { + val robTrueCommit = Output(UInt(64.W)) +} + class RobDeqPtrWrapper(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper { val io = IO(new Bundle { // for commits/flush @@ -443,6 +447,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer) val toDispatch = new RobDispatchTopDownIO val robHeadLqIdx = Valid(new LqPtr) } + val debugRolling = new RobDebugRollingIO }) def selectWb(index: Int, func: Seq[ExuConfig] => Boolean): Seq[(Seq[ExuConfig], ValidIO[ExuOutput])] = { @@ -1176,6 +1181,9 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer) io.debugTopDown.toDispatch.robHeadLsIssue := debug_lsIssue(deqPtr.value) io.debugTopDown.robHeadLqIdx.valid := debug_lqIdxValid(deqPtr.value) io.debugTopDown.robHeadLqIdx.bits := debug_microOp(deqPtr.value).lqIdx + + // rolling + io.debugRolling.robTrueCommit := ifCommitReg(trueCommitCnt) /** * DataBase info: diff --git a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala index 1d0feafdf48..8d6314d1dec 100644 --- a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala @@ -24,6 +24,7 @@ import xiangshan._ import utils._ import utility._ import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} +import xiangshan.backend.rob.RobDebugRollingIO import freechips.rocketchip.tilelink._ import freechips.rocketchip.util.{BundleFieldBase, UIntToOH1} import device.RAMHelper @@ -752,6 +753,7 @@ class DCacheIO(implicit p: Parameters) extends DCacheBundle { val pf_ctrl = Output(new PrefetchControlBundle) val force_write = Input(Bool()) val debugTopDown = new DCacheTopDownIO + val debugRolling = Flipped(new RobDebugRollingIO) } class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters { @@ -1329,6 +1331,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame } for (w <- 0 until LoadPipelineWidth) { fdpMonitor.io.pollution.cache_pollution(w) := ldu(w).io.prefetch_info.fdp.pollution } for (w <- 0 until LoadPipelineWidth) { fdpMonitor.io.pollution.demand_miss(w) := ldu(w).io.prefetch_info.fdp.demand_miss } + fdpMonitor.io.debugRolling := io.debugRolling //---------------------------------------- // Bloom Filter diff --git a/src/main/scala/xiangshan/mem/prefetch/FDP.scala b/src/main/scala/xiangshan/mem/prefetch/FDP.scala index c2e4932d08e..422b53ffe7a 100644 --- a/src/main/scala/xiangshan/mem/prefetch/FDP.scala +++ b/src/main/scala/xiangshan/mem/prefetch/FDP.scala @@ -23,6 +23,7 @@ import freechips.rocketchip.tilelink.ClientStates._ import freechips.rocketchip.tilelink.MemoryOpCategories._ import freechips.rocketchip.tilelink.TLPermissions._ import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions} +import xiangshan.backend.rob.RobDebugRollingIO import utils._ import utility._ import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey} @@ -206,6 +207,7 @@ class FDPrefetcherMonitorBundle()(implicit p: Parameters) extends XSBundle { } val pf_ctrl = Output(new PrefetchControlBundle) + val debugRolling = Flipped(new RobDebugRollingIO) } class FDPrefetcherMonitor()(implicit p: Parameters) extends XSModule { @@ -252,6 +254,31 @@ class FDPrefetcherMonitor()(implicit p: Parameters) extends XSModule { } } + // rolling by instr + XSPerfRolling( + "L1PrefetchAccuracyIns", + PopCount(io.accuracy.useful_prefetch), PopCount(io.accuracy.total_prefetch), + 1000, io.debugRolling.robTrueCommit, clock, reset + ) + + XSPerfRolling( + "L1PrefetchLatenessIns", + PopCount(io.timely.late_prefetch), PopCount(io.accuracy.total_prefetch), + 1000, io.debugRolling.robTrueCommit, clock, reset + ) + + XSPerfRolling( + "L1PrefetchPollutionIns", + PopCount(io.pollution.cache_pollution), PopCount(io.pollution.demand_miss), + 1000, io.debugRolling.robTrueCommit, clock, reset + ) + + XSPerfRolling( + "IPCIns", + io.debugRolling.robTrueCommit, 1.U, + 1000, io.debugRolling.robTrueCommit, clock, reset + ) + XSPerfAccumulate("io_refill", io.refill) XSPerfAccumulate("total_prefetch_en", io.accuracy.total_prefetch) XSPerfAccumulate("useful_prefetch_en", PopCount(io.accuracy.useful_prefetch) + io.timely.late_prefetch)