Skip to content

Commit

Permalink
feat(Zicclsm): minor refactoring misalign and support for vector misa…
Browse files Browse the repository at this point in the history
…lign
  • Loading branch information
Anzooooo committed Dec 6, 2024
1 parent 0502d09 commit 63095de
Show file tree
Hide file tree
Showing 18 changed files with 1,085 additions and 551 deletions.
74 changes: 57 additions & 17 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -457,14 +457,17 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)

// misalignBuffer will overwrite the source from ldu if it is about to writeback
val misalignWritebackOverride = Mux(
loadMisalignBuffer.io.writeBack.valid,
loadMisalignBuffer.io.writeBack.bits,
loadUnits(MisalignWBPort).io.ldout.bits
loadUnits(MisalignWBPort).io.ldout.valid,
loadUnits(MisalignWBPort).io.ldout.bits,
loadMisalignBuffer.io.writeBack.bits
)
ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride
loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready
ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride
loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid
loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid

// loadUnit will overwrite the source from uncache if it is about to writeback
ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
Expand Down Expand Up @@ -805,6 +808,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc
dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc
dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc
dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req
}.otherwise {
loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready

Expand All @@ -816,6 +820,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc
dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc
dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc
dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req
}

// forward
Expand Down Expand Up @@ -917,7 +922,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
// connect misalignBuffer
loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf

if (i == 0) {
if (i == MisalignWBPort) {
loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq
loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
} else {
Expand Down Expand Up @@ -1088,7 +1093,7 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr
loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext

lsq.io.flushFrmMaBuf := loadMisalignBuffer.io.flushLdExpBuff
lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull

storeMisalignBuffer.io.redirect <> redirect
storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit
Expand Down Expand Up @@ -1258,16 +1263,22 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
stOut(0).bits := mmioStout.bits
mmioStout.ready := true.B
}

// vec mmio writeback
lsq.io.vecmmioStout.ready := false.B
when (lsq.io.vecmmioStout.valid && !storeUnits(0).io.vecstout.valid) {
stOut(0).valid := true.B
stOut(0).bits := lsq.io.vecmmioStout.bits
lsq.io.vecmmioStout.ready := true.B
}
// when (lsq.io.vecmmioStout.valid && !storeUnits(0).io.vecstout.valid) {
// stOut(0).valid := true.B
// stOut(0).bits := lsq.io.vecmmioStout.bits
// lsq.io.vecmmioStout.ready := true.B
// }
//

// miss align buffer will overwrite stOut(0)
storeMisalignBuffer.io.writeBack.ready := true.B
when (storeMisalignBuffer.io.writeBack.valid) {
val storeMisalignCanWriteBack = !mmioStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
stOut(0).valid := true.B
stOut(0).bits := storeMisalignBuffer.io.writeBack.bits
}
Expand Down Expand Up @@ -1448,6 +1459,9 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
(0 until VstuCnt).foreach{i =>
vsMergeBuffer(i).io.fromPipeline := DontCare
vsMergeBuffer(i).io.fromSplit := DontCare

vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
}

(0 until VstuCnt).foreach{i =>
Expand All @@ -1463,6 +1477,9 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
)
vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data

vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid

}
(0 until VlduCnt).foreach{i =>
vlSplit(i).io.redirect <> redirect
Expand All @@ -1481,12 +1498,35 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits
}
(0 until LduCnt).foreach{i=>
vlMergeBuffer.io.fromPipeline(i) <> loadUnits(i).io.vecldout
loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready
loadMisalignBuffer.io.vecWriteBack.ready := true.B

if (i == 1) {
when(loadUnits(i).io.vecldout.valid) {
vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits
} .otherwise {
vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid
vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits
}
} else {
vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits
}
}

(0 until StaCnt).foreach{i=>
if(i < VstuCnt){
vsMergeBuffer(i).io.fromPipeline.head <> storeUnits(i).io.vecstout
storeUnits(i).io.vecstout.ready := true.B
storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready

when(storeUnits(i).io.vecstout.valid) {
vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits
} .otherwise {
vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid
vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,8 @@ class DCacheWordReqWithVaddr(implicit p: Parameters) extends DCacheWordReq {
class DCacheWordReqWithVaddrAndPfFlag(implicit p: Parameters) extends DCacheWordReqWithVaddr {
val prefetch = Bool()
val vecValid = Bool()

val sqNeedDeq = Bool()

def toDCacheWordReqWithVaddr() = {
val res = Wire(new DCacheWordReqWithVaddr)
res.vaddr := vaddr
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/xiangshan/cache/mmu/Repeater.scala
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ class PTWFilter(Width: Int, Size: Int, FenceDelay: Int)(implicit p: Parameters)

val issue_valid = v(issPtr) && !isEmptyIss && !inflight_full
val issue_filtered = ptwResp_valid && ptwResp_hit(io.ptw.req(0).bits.vpn, io.ptw.req(0).bits.s2xlate, ptwResp)
val issue_fire_fake = issue_valid && (io.ptw.req(0).ready || (issue_filtered && false.B /*timing-opt*/))
val issue_fire_fake = issue_valid && io.ptw.req(0).ready
io.ptw.req(0).valid := issue_valid && !issue_filtered
io.ptw.req(0).bits.vpn := vpn(issPtr)
io.ptw.req(0).bits.s2xlate := s2xlate(issPtr)
Expand Down
62 changes: 46 additions & 16 deletions src/main/scala/xiangshan/mem/MemCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,28 @@ object genVWmask {
}
}

object genBasemask {
/**
*
* @param addr
* @param sizeEncode
* @return Return 16-byte aligned mask.
*
* Example:
* Address: 0x80000003 Encoding size: ‘b11
* Return: 0xff
*/
def apply(addr: UInt, sizeEncode: UInt): UInt = {
LookupTree(sizeEncode, List(
"b00".U -> 0x1.U,
"b01".U -> 0x3.U,
"b10".U -> 0xf.U,
"b11".U -> 0xff.U
))
}
}


object genWdata {
def apply(data: UInt, sizeEncode: UInt): UInt = {
LookupTree(sizeEncode, List(
Expand Down Expand Up @@ -171,6 +193,13 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle
val schedIndex = UInt(log2Up(LoadQueueReplaySize).W)
// hardware prefetch and fast replay no need to query tlb
val tlbNoQuery = Bool()

// misalign
val isMisalign = Bool()
val isFinalSplit = Bool()
val misalignWith16Byte = Bool()
val misalignNeedWakeUp = Bool()
val updateAddrValid = Bool()
}

class LdPrefetchTrainBundle(implicit p: Parameters) extends LsPipelineBundle {
Expand Down Expand Up @@ -412,28 +441,29 @@ class StoreNukeQueryIO(implicit p: Parameters) extends XSBundle {

class StoreMaBufToSqControlIO(implicit p: Parameters) extends XSBundle {
// from storeMisalignBuffer to storeQueue, control it's sbuffer write
val control = Output(new XSBundle {
// control sq to write-into sb
val writeSb = Bool()
val wdata = UInt(VLEN.W)
val wmask = UInt((VLEN / 8).W)
val toStoreQueue = Output(new XSBundle {
// This entry is a cross page
val crossPageWithHit = Bool()
val crossPageCanDeq = Bool()
// High page Paddr
val paddr = UInt(PAddrBits.W)
val vaddr = UInt(VAddrBits.W)
val last = Bool()
val hasException = Bool()
// remove this entry in sq
val removeSq = Bool()

val withSameUop = Bool()
})
// from storeQueue to storeMisalignBuffer, provide detail info of this store
val storeInfo = Input(new XSBundle {
val data = UInt(VLEN.W)
// is the data of the unaligned store ready at sq?
val dataReady = Bool()
// complete a data transfer from sq to sb
val completeSbTrans = Bool()
val toStoreMisalignBuffer = Input(new XSBundle {
val sqPtr = new SqPtr
val doDeq = Bool()

val uop = new DynInst()
})
}

class StoreMaBufToVecStoreMergeBufferIO(implicit p: Parameters) extends VLSUBundle{
val mbIndex = Output(UInt(vsmBindexBits.W))
val flush = Output(Bool())
}

// Store byte valid mask write bundle
//
// Store byte valid mask write to SQ takes 2 cycles
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
val lqDeqPtr = Output(new LqPtr)
val sqDeqPtr = Output(new SqPtr)
val exceptionAddr = new ExceptionAddrIO
val flushFrmMaBuf = Input(Bool())
val loadMisalignFull = Input(Bool())
val issuePtrExt = Output(new SqPtr)
val l2_hint = Input(Valid(new L2ToL1Hint()))
val tlb_hint = Flipped(new TlbHintIO)
Expand Down Expand Up @@ -208,7 +208,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
loadQueue.io.tl_d_channel <> io.tl_d_channel
loadQueue.io.release <> io.release
loadQueue.io.exceptionAddr.isStore := DontCare
loadQueue.io.flushFrmMaBuf := io.flushFrmMaBuf
loadQueue.io.loadMisalignFull := io.loadMisalignFull
loadQueue.io.lqCancelCnt <> io.lqCancelCnt
loadQueue.io.sq.stAddrReadySqPtr <> storeQueue.io.stAddrReadySqPtr
loadQueue.io.sq.stAddrReadyVec <> storeQueue.io.stAddrReadyVec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircula
val io = IO(new Bundle() {
val redirect = Flipped(Valid(new Redirect))
val req = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle)))
val flushFrmMaBuf = Input(Bool())
val exceptionAddr = new ExceptionAddrIO
})

Expand Down Expand Up @@ -67,7 +66,7 @@ class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircula
when (req_valid && req.uop.robIdx.needFlush(io.redirect)) {
req_valid := s2_enqueue.asUInt.orR
} .elsewhen (s2_enqueue.asUInt.orR) {
req_valid := req_valid || true.B
req_valid := true.B
}

def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
Expand Down Expand Up @@ -111,10 +110,6 @@ class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircula
io.exceptionAddr.gpaddr := req.gpaddr
io.exceptionAddr.isForVSnonLeafPTE := req.isForVSnonLeafPTE

when(req_valid && io.flushFrmMaBuf) {
req_valid := false.B
}

XSPerfAccumulate("exception", !RegNext(req_valid) && req_valid)

// end
Expand Down
Loading

0 comments on commit 63095de

Please sign in to comment.