Skip to content

Commit

Permalink
feat(DCache): merge CMO requests into DCache TL-A Channel (#3968)
Browse files Browse the repository at this point in the history
* remove previous cmo datapath in memblock.
* add datapath for cmo requests between lsq and dcache.
* add new CMOUnit in MissQueue.
* bump rocket-chip & coupledL2.
  • Loading branch information
bosscharlie authored Dec 2, 2024
1 parent 4fc3a30 commit dc4fac1
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 37 deletions.
2 changes: 1 addition & 1 deletion rocket-chip
2 changes: 0 additions & 2 deletions src/main/scala/top/Configs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ class MinimalConfig(n: Int = 1) extends Config(
"dcache",
isKeywordBitsOpt = p.dcacheParametersOpt.get.isKeywordBitsOpt
)),
hasCMO = p.HasCMO && site(EnableCHI),
)),
L2NBanks = 2,
prefetcher = None // if L2 pf_recv_node does not exist, disable SMS prefetcher
Expand Down Expand Up @@ -299,7 +298,6 @@ class WithNKBL2
prefetch = Seq(BOPParameters()) ++
(if (tp) Seq(TPParameters()) else Nil) ++
(if (p.prefetcher.nonEmpty) Seq(PrefetchReceiverParams()) else Nil),
hasCMO = p.HasCMO && site(EnableCHI),
enablePerf = !site(DebugOptionsKey).FPGAPlatform && site(DebugOptionsKey).EnablePerfDebug,
enableRollingDB = site(DebugOptionsKey).EnableRollingDB,
enableMonitor = site(DebugOptionsKey).AlwaysBasicDB,
Expand Down
12 changes: 0 additions & 12 deletions src/main/scala/xiangshan/XSTile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,6 @@ class XSTile()(implicit p: Parameters) extends LazyModule
case None =>
}

// CMO
l2top.inner.l2cache match {
case Some(l2) =>
l2.cmo_sink_node.foreach(recv => {
recv := memBlock.cmo_sender.get
})
l2.cmo_source_node.foreach(resp => {
memBlock.cmo_reciver.get := resp
})
case None =>
}

val core_l3_tpmeta_source_port = l2top.inner.l2cache match {
case Some(l2) => l2.tpmeta_source_node
case None => None
Expand Down
20 changes: 3 additions & 17 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModul
import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
import freechips.rocketchip.tile.HasFPUParameters
import freechips.rocketchip.tilelink._
import coupledL2.{PrefetchRecv, CMOReq, CMOResp}
import coupledL2.{PrefetchRecv}
import device.MsiInfoBundle
import utils._
import utility._
Expand Down Expand Up @@ -248,8 +248,6 @@ class MemBlockInlined()(implicit p: Parameters) extends LazyModule
val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
BundleBridgeSource(() => new huancun.PrefetchRecv)
) else None
val cmo_sender = if (HasCMO) Some(BundleBridgeSource(() => DecoupledIO(new CMOReq))) else None
val cmo_reciver = if (HasCMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new CMOResp)))) else None
val frontendBridge = LazyModule(new FrontendBridge)
// interrupt sinks
val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
Expand Down Expand Up @@ -1099,20 +1097,8 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)

lsq.io.maControl <> storeMisalignBuffer.io.sqControl

// lsq to l2 CMO
outer.cmo_sender match {
case Some(x) =>
x.out.head._1 <> lsq.io.cmoOpReq
case None =>
lsq.io.cmoOpReq.ready := false.B
}
outer.cmo_reciver match {
case Some(x) =>
x.in.head._1 <> lsq.io.cmoOpResp
case None =>
lsq.io.cmoOpResp.valid := false.B
lsq.io.cmoOpResp.bits := 0.U.asTypeOf(new CMOResp)
}
lsq.io.cmoOpReq <> dcache.io.cmoOpReq
lsq.io.cmoOpResp <> dcache.io.cmoOpResp

// Prefetcher
val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
Expand Down
15 changes: 14 additions & 1 deletion src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,15 @@ class AtomicWordIO(implicit p: Parameters) extends DCacheBundle
val block_lr = Input(Bool())
}

class CMOReq(implicit p: Parameters) extends Bundle {
val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero
val address = UInt(64.W)
}

class CMOResp(implicit p: Parameters) extends Bundle {
val address = UInt(64.W)
}

// used by load unit
class DCacheLoadIO(implicit p: Parameters) extends DCacheWordIO
{
Expand Down Expand Up @@ -786,6 +795,8 @@ class DCacheIO(implicit p: Parameters) extends DCacheBundle {
val debugTopDown = new DCacheTopDownIO
val debugRolling = Flipped(new RobDebugRollingIO)
val l2_hint = Input(Valid(new L2ToL1Hint()))
val cmoOpReq = Flipped(DecoupledIO(new CMOReq))
val cmoOpResp = DecoupledIO(new CMOResp)
}

private object ArbiterCtrl {
Expand Down Expand Up @@ -1425,6 +1436,8 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame

missReqArb.io.out <> missQueue.io.req
missReadyGen.io.queryMQ <> missQueue.io.queryMQ
io.cmoOpReq <> missQueue.io.cmo_req
io.cmoOpResp <> missQueue.io.cmo_resp

for (w <- 0 until LoadPipelineWidth) { ldu(w).io.mq_enq_cancel := missQueue.io.mq_enq_cancel }

Expand Down Expand Up @@ -1514,7 +1527,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame

// in L1DCache, we ony expect Grant[Data] and ReleaseAck
bus.d.ready := false.B
when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData) {
when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData || bus.d.bits.opcode === TLMessages.CBOAck) {
missQueue.io.mem_grant <> bus.d
} .elsewhen (bus.d.bits.opcode === TLMessages.ReleaseAck) {
wb.io.mem_grant <> bus.d
Expand Down
74 changes: 73 additions & 1 deletion src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import difftest._
import freechips.rocketchip.tilelink.ClientStates._
import freechips.rocketchip.tilelink.MemoryOpCategories._
import freechips.rocketchip.tilelink.TLPermissions._
import freechips.rocketchip.tilelink.TLMessages._
import freechips.rocketchip.tilelink._
import huancun.{AliasKey, DirtyKey, PrefetchKey}
import org.chipsalliance.cde.config.Parameters
Expand Down Expand Up @@ -285,6 +286,63 @@ class MissReqPipeRegBundle(edge: TLEdgeOut)(implicit p: Parameters) extends DCac
}
}

class CMOUnit(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
val io = IO(new Bundle() {
val req = Flipped(DecoupledIO(new CMOReq))
val req_chanA = DecoupledIO(new TLBundleA(edge.bundle))
val resp_chanD = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val resp_to_lsq = DecoupledIO(new CMOResp)
})

val s_idle :: s_sreq :: s_wresp :: s_lsq_resp :: Nil = Enum(4)
val state = RegInit(s_idle)
val state_next = WireInit(state)
val req = RegEnable(io.req.bits, io.req.fire)

state := state_next

switch (state) {
is(s_idle) {
when (io.req.fire) {
state_next := s_sreq
}
}
is(s_sreq) {
when (io.req_chanA.fire) {
state_next := s_wresp
}
}
is(s_wresp) {
when (io.resp_chanD.fire) {
state_next := s_lsq_resp
}
}
is(s_lsq_resp) {
when (io.resp_to_lsq.fire) {
state_next := s_idle
}
}
}

io.req.ready := state === s_idle

io.req_chanA.valid := state === s_sreq
io.req_chanA.bits := edge.CacheBlockOperation(
fromSource = (cfg.nMissEntries + 1).U,
toAddress = req.address,
lgSize = (log2Up(cfg.blockBytes)).U,
opcode = req.opcode
)._2

io.resp_chanD.ready := state === s_wresp

io.resp_to_lsq.valid := state === s_lsq_resp
io.resp_to_lsq.bits.address := req.address

assert(!(state =/= s_idle && io.req.valid))
assert(!(state =/= s_wresp && io.resp_chanD.valid))
}

class MissEntry(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DCacheModule
with HasCircularQueuePtrHelper
{
Expand Down Expand Up @@ -844,6 +902,10 @@ class MissQueue(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC
val resp = Output(new MissResp)
val refill_to_ldq = ValidIO(new Refill)

// cmo req
val cmo_req = Flipped(DecoupledIO(new CMOReq))
val cmo_resp = DecoupledIO(new CMOResp)

val queryMQ = Vec(reqNum, Flipped(new DCacheMQQueryIOBundle))

val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle))
Expand Down Expand Up @@ -898,6 +960,7 @@ class MissQueue(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC
// 128KBL1: FIXME: provide vaddr for l2

val entries = Seq.fill(cfg.nMissEntries)(Module(new MissEntry(edge, reqNum)))
val cmo_unit = Module(new CMOUnit(edge))

val miss_req_pipe_reg = RegInit(0.U.asTypeOf(new MissReqPipeRegBundle(edge)))
val acquire_from_pipereg = Wire(chiselTypeOf(io.mem_acquire))
Expand Down Expand Up @@ -1055,6 +1118,15 @@ class MissQueue(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC
}
}

cmo_unit.io.req <> io.cmo_req
io.cmo_resp <> cmo_unit.io.resp_to_lsq
when (io.mem_grant.valid && io.mem_grant.bits.opcode === TLMessages.CBOAck) {
cmo_unit.io.resp_chanD <> io.mem_grant
} .otherwise {
cmo_unit.io.resp_chanD.valid := false.B
cmo_unit.io.resp_chanD.bits := DontCare
}

io.req.ready := accept
io.mq_enq_cancel := io.req.bits.cancel
io.refill_to_ldq.valid := Cat(entries.map(_.io.refill_to_ldq.valid)).orR
Expand All @@ -1069,7 +1141,7 @@ class MissQueue(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC
XSPerfAccumulate("acquire_fire_from_pipereg", acquire_from_pipereg.fire)
XSPerfAccumulate("pipereg_valid", miss_req_pipe_reg.reg_valid())

val acquire_sources = Seq(acquire_from_pipereg) ++ entries.map(_.io.mem_acquire)
val acquire_sources = Seq(cmo_unit.io.req_chanA, acquire_from_pipereg) ++ entries.map(_.io.mem_acquire)
TLArbiter.lowest(edge, io.mem_acquire, acquire_sources:_*)
TLArbiter.lowest(edge, io.mem_finish, entries.map(_.io.mem_finish):_*)

Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ import xiangshan._
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
import xiangshan.cache._
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
import xiangshan.cache.{CMOReq, CMOResp}
import xiangshan.cache.mmu.{TlbRequestIO, TlbHintIO}
import xiangshan.mem._
import xiangshan.backend._
import xiangshan.backend.rob.RobLsqIO
import coupledL2.{CMOReq, CMOResp}
import xiangshan.backend.fu.FuType

class ExceptionAddrIO(implicit p: Parameters) extends XSBundle {
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ import utils._
import xiangshan._
import xiangshan.cache._
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants}
import xiangshan.cache.{CMOReq, CMOResp}
import xiangshan.backend._
import xiangshan.backend.rob.{RobLsqIO, RobPtr}
import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
import xiangshan.backend.decode.isa.bitfield.{Riscv32BitInst, XSInstBitFields}
import xiangshan.backend.fu.FuConfig._
import xiangshan.backend.fu.FuType
import xiangshan.ExceptionNO._
import coupledL2.{CMOReq, CMOResp}

class SqPtr(implicit p: Parameters) extends CircularQueuePtr[SqPtr](
p => p(XSCoreParamsKey).StoreQueueSize
Expand Down

0 comments on commit dc4fac1

Please sign in to comment.