Skip to content

Commit

Permalink
TLB: optimize tlb filter for lower latency and tlb hint (#2366)
Browse files Browse the repository at this point in the history
* TLB: optimize tlb filter for lower latency and tlb hint

WIP: TLB hint will be achieved soon

* add tlb hint wakeup

* TLB: fix bugs of tlb hint

* fix delay hint wakeup

* Revert "fix delay hint wakeup"

This reverts commit 311f33f.

* TLB: Fix wrong condition of ptw_just_back

* TLB: Fix dtlbrepeater resettree

* TLBRepeater: Fix bug of hint full logic

---------

Co-authored-by: sfencevma <[email protected]>
  • Loading branch information
good-circle and cz4e authored Nov 8, 2023
1 parent b191d68 commit 185e616
Show file tree
Hide file tree
Showing 9 changed files with 374 additions and 45 deletions.
30 changes: 20 additions & 10 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
ptw.io.sfence <> sfence
ptw.io.csr.tlb <> tlbcsr
ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
ptw.io.tlb(0) <> io.fetch_to_mem.itlb

val perfEventsPTW = Wire(Vec(19, new PerfEvent))
if (!coreParams.softPTW) {
Expand All @@ -434,7 +433,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
tlb_prefetch.io // let the module have name in waveform
})
val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
val ptwio = Wire(new VectorTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt + 2)) // load + store + hw prefetch
val ptwio = Wire(new VectorTlbPtwIO(exuParameters.LduCnt + 1 + exuParameters.StuCnt + 1)) // load + store + hw prefetch
val dtlb_reqs = dtlb.map(_.requestor).flatten
val dtlb_pmps = dtlb.map(_.pmp).flatten
dtlb.map(_.hartId := io.hartId)
Expand All @@ -446,11 +445,11 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
require(ldtlbParams.outReplace == pftlbParams.outReplace)
require(ldtlbParams.outReplace)

val replace = Module(new TlbReplace(exuParameters.LduCnt + exuParameters.StuCnt + 2, ldtlbParams))
val replace = Module(new TlbReplace(exuParameters.LduCnt + 1 + exuParameters.StuCnt + 1, ldtlbParams))
replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.entry.tag)
} else {
if (ldtlbParams.outReplace) {
val replace_ld = Module(new TlbReplace(exuParameters.LduCnt, ldtlbParams))
val replace_ld = Module(new TlbReplace(exuParameters.LduCnt + 1, ldtlbParams))
replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.entry.tag)
}
if (sttlbParams.outReplace) {
Expand All @@ -467,6 +466,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed), init = false.B)
ptwio.resp.ready := true.B

val tlbreplay = WireInit(VecInit(Seq.fill(2)(false.B)))
dontTouch(tlbreplay)
for (i <- 0 until 2) {
tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, allType = true, ignoreAsid = true)
}

dtlb.flatMap(a => a.ptw.req)
.zipWithIndex
.foreach{ case (tlb, i) =>
Expand All @@ -475,7 +481,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
else if (i < (exuParameters.LduCnt + 1)) Cat(ptw_resp_next.vector.take(exuParameters.LduCnt + 1)).orR
else if (i < (exuParameters.LduCnt + 1 + exuParameters.StuCnt)) Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt + 1).take(exuParameters.StuCnt)).orR
else Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt + exuParameters.StuCnt + 1)).orR
else Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt + 1 + exuParameters.StuCnt)).orR
ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit &&
ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, allType = true, ignoreAsid = true))
}
Expand All @@ -488,11 +494,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt + exuParameters.StuCnt + 1)).orR)
}

val dtlbRepeater1 = PTWFilter(ldtlbParams.fenceDelay, ptwio, sfence, tlbcsr, l2tlbParams.dfilterSize)
val dtlbRepeater2 = PTWRepeaterNB(passReady = false, ldtlbParams.fenceDelay, dtlbRepeater1.io.ptw, ptw.io.tlb(1), sfence, tlbcsr)
val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
val itlbRepeater2 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)

lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater1.io.rob_head_miss_in_tlb
lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb

// pmp
val pmp = Module(new PMP())
Expand Down Expand Up @@ -651,6 +656,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.replay <> lsq.io.replay(i)

loadUnits(i).io.l2_hint <> io.l2_hint
loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
RegNext(tlbreplay(i)) || RegNext(dtlb_ld(0).tlbreplay(i))

// passdown to lsq (load s2)
lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
Expand All @@ -661,6 +669,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.l2_hint.valid := io.l2_hint.valid
lsq.io.l2_hint.bits.sourceId := io.l2_hint.bits.sourceId

lsq.io.tlb_hint <> dtlbRepeater.io.hint.get

// alter writeback exception info
io.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err

Expand Down Expand Up @@ -998,7 +1008,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
ResetGenNode(Seq(ResetGenNode(Seq(CellNode(reset_io_frontend))))),
CellNode(reset_io_backend),
ModuleNode(itlbRepeater2),
ModuleNode(dtlbRepeater2),
ModuleNode(dtlbRepeater),
ModuleNode(ptw),
ModuleNode(ptw_to_l2_buffer)
)
Expand All @@ -1011,7 +1021,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)

// top-down info
dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
dtlbRepeater1.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
Expand Down
22 changes: 21 additions & 1 deletion src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,26 @@ class TlbPtwIOwithMemIdx(Width: Int = 1)(implicit p: Parameters) extends TlbBund
}
}

class TlbHintReq(implicit p: Parameters) extends TlbBundle {
val id = Output(UInt(log2Up(loadfiltersize).W))
val full = Output(Bool())
}

class TLBHintResp(implicit p: Parameters) extends TlbBundle {
val id = Output(UInt(log2Up(loadfiltersize).W))
// When there are multiple matching entries for PTW resp in filter
// e.g. vaddr 0, 0x80000000. vaddr 1, 0x80010000
// these two vaddrs are not in a same 4K Page, so will send to ptw twice
// However, when ptw resp, if they are in a 1G or 2M huge page
// The two entries will both hit, and both need to replay
val replay_all = Output(Bool())
}

class TlbHintIO(implicit p: Parameters) extends TlbBundle {
val req = Vec(exuParameters.LduCnt, new TlbHintReq)
val resp = ValidIO(new TLBHintResp)
}

class MMUIOBaseBundle(implicit p: Parameters) extends TlbBundle {
val sfence = Input(new SfenceBundle)
val csr = Input(new TlbCsrBundle)
Expand Down Expand Up @@ -620,7 +640,7 @@ class TlbIO(Width: Int, nRespDups: Int = 1, q: TLBParameters)(implicit p: Parame
val refill_to_mem = Output(new TlbRefilltoMemIO())
val replace = if (q.outReplace) Flipped(new TlbReplaceIO(Width, q)) else null
val pmp = Vec(Width, ValidIO(new PMPReqBundle()))

val tlbreplay = Vec(Width, Output(Bool()))
}

class VectorTlbPtwIO(Width: Int)(implicit p: Parameters) extends TlbBundle {
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/xiangshan/cache/mmu/MMUConst.scala
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ trait HasTlbConst extends HasXSParameter {
val sectorppnLen = ppnLen - sectortlbwidth
val sectorvpnLen = vpnLen - sectortlbwidth

val loadfiltersize = 16
val storefiltersize = 8
val prefetchfiltersize = 8

val sramSinglePort = true

val timeOutThreshold = 10000
Expand Down
Loading

0 comments on commit 185e616

Please sign in to comment.