From 4fc3a30c9a0fc8037a144fb6b0a1f80effc7b4ef Mon Sep 17 00:00:00 2001 From: "Xu, Zefan" Date: Mon, 2 Dec 2024 11:50:10 +0800 Subject: [PATCH] fix(TLB): avoid freeze when GPF occurs (#3964) L1TLB does not store gpaddr, but gpaddr is needed when a guest page fault occurs. In that situation, L1TLB needs to send a PTW request to get the gpaddr, which we called it getGpa. The getGpa mechanism could only handle one GPF TLB request (the first one) and expects the corresponding TLB entry is still in L1TLB. L1TLB replacement uses PLRU (Pseudo-LRU) algorithm, which may replace intems that are not necessarily the least recently used. We found an case that L1TLB replace that GPF TLB entry, although that GPF TLB entry is accessed recently. This results in a deadlock in getGpa mechanism, which eventually causes the entire core to freeze. To solve this problem, we decides to prevent any unrelated ptw refills when getGpa mechanism is working (need_gpa). After solve such problem, we identified that under certain conditions, as other PTW response is never refilled, other TLB requests keep replying which trigger PTW requests and occupy the L2TLB request path, preventing the GPF PTW request from responding, ultimately leading to a processor freeze. To resolve this, we decides to prevent any unrelated ptw request when need_gpa. This patch also changes the code style of some combinational logic signals. Using when/otherwise is clearer and easier to understand than complex logical expression. --- src/main/scala/xiangshan/cache/mmu/TLB.scala | 23 +++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/main/scala/xiangshan/cache/mmu/TLB.scala b/src/main/scala/xiangshan/cache/mmu/TLB.scala index 9931392265..0a9da82b12 100644 --- a/src/main/scala/xiangshan/cache/mmu/TLB.scala +++ b/src/main/scala/xiangshan/cache/mmu/TLB.scala @@ -160,7 +160,9 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters) } } - val refill = ptw.resp.fire && !(ptw.resp.bits.getGpa) && !flush_mmu + val refill = ptw.resp.fire && !(ptw.resp.bits.getGpa) && !need_gpa && !flush_mmu + // prevent ptw refill when: 1) it's a getGpa request; 2) l1tlb is in need_gpa state; 3) mmu is being flushed. + refill_to_mem := DontCare val entries = Module(new TlbStorageWrapper(Width, q, nRespDups)) entries.io.base_connect(sfence, csr, satp) @@ -439,12 +441,27 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters) val ptw_already_back = GatedValidRegNext(ptw.resp.fire) && req_s2xlate === ptw_resp_bits_reg.s2xlate && ptw_resp_bits_reg.hit(get_pn(req_out(idx).vaddr), io.csr.satp.asid, io.csr.vsatp.asid, io.csr.hgatp.vmid, allType = true) val ptw_getGpa = req_need_gpa && hitVec(idx) val need_gpa_vpn_hit = need_gpa_vpn === get_pn(req_out(idx).vaddr) - io.ptw.req(idx).valid := req_out_v(idx) && missVec(idx) && !(ptw_just_back || ptw_already_back || (!need_gpa_vpn_hit && req_out_v(idx) && need_gpa && !resp_gpa_refill && ptw_getGpa)) // TODO: remove the regnext, timing - io.tlbreplay(idx) := req_out_v(idx) && missVec(idx) && (ptw_just_back || ptw_already_back || (!need_gpa_vpn_hit && req_out_v(idx) && need_gpa && !resp_gpa_refill && ptw_getGpa)) + + io.ptw.req(idx).valid := false.B; + io.tlbreplay(idx) := false.B; + + when (req_out_v(idx) && missVec(idx)) { + // NOTE: for an miss tlb request: either send a ptw request, or ask for a replay + when (ptw_just_back || ptw_already_back) { + io.tlbreplay(idx) := true.B; + } .elsewhen (need_gpa && !need_gpa_vpn_hit && !resp_gpa_refill) { + // not send any unrelated ptw request when l1tlb is in need_gpa state + io.tlbreplay(idx) := true.B; + } .otherwise { + io.ptw.req(idx).valid := true.B; + } + } + when (io.requestor(idx).req_kill && GatedValidRegNext(io.requestor(idx).req.fire)) { io.ptw.req(idx).valid := false.B io.tlbreplay(idx) := true.B } + io.ptw.req(idx).bits.vpn := get_pn(req_out(idx).vaddr) io.ptw.req(idx).bits.s2xlate := req_s2xlate io.ptw.req(idx).bits.getGpa := ptw_getGpa