From 6e1689447afb3812cd3f214fe774d86f97c3017a Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Mon, 9 Dec 2024 15:38:29 +0800 Subject: [PATCH] [rtl] connect vrf write with shifter. --- t1/src/Bundles.scala | 3 +- t1/src/Lane.scala | 47 +++++++------------------ t1/src/T1.scala | 35 ++++++------------ t1/src/laneStage/MaskExchangeUnit.scala | 7 ++-- t1/src/laneStage/SlotTokenManager.scala | 29 ++++----------- t1/src/mask/MaskUnit.scala | 15 +++----- t1/src/package.scala | 2 +- t1/src/vrf/VRF.scala | 4 +-- 8 files changed, 39 insertions(+), 103 deletions(-) diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index 492fc195d..5dd255384 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -792,6 +792,5 @@ class MaskUnitReadVs1(parameter: T1Parameter) extends Bundle { } class LaneTokenBundle extends Bundle { - val maskResponseRelease: Bool = Output(Bool()) - val maskRequestRelease: Bool = Input(Bool()) + val maskRequestRelease: Bool = Input(Bool()) } diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 8a481c3ee..d36e7e10c 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -350,9 +350,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val maskIndexVec: Vec[UInt] = RegInit(VecInit(Seq.fill(parameter.chainingSize)(0.U(log2Ceil(parameter.maskGroupWidth).W)))) - /** the find first one index register in this lane. */ - val ffoIndexReg: UInt = RegInit(0.U(log2Ceil(parameter.vLen / 8).W)) - /** result of reduce instruction. */ val reduceResult: UInt = RegInit(0.U(parameter.datapathWidth.W)) @@ -360,7 +357,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ */ val vrfWriteArbiter: Vec[DecoupledIO[VRFWriteRequest]] = Wire( Vec( - parameter.chainingSize + 2, + parameter.chainingSize + 1, Decoupled( new VRFWriteRequest( parameter.vrfParam.regNumBits, @@ -372,30 +369,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ ) ) - val lsuWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, 1, flow = true) - // connect lsuWriteQueue.enq - lsuWriteQueue.enq.valid := vrfWriteChannel.valid && !writeFromMask - lsuWriteQueue.enq.bits := vrfWriteChannel.bits - vrfWriteChannel.ready := writeFromMask || lsuWriteQueue.enq.ready - - val maskWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, parameter.maskUnitVefWriteQueueSize) - // connect maskWriteQueue.enq - maskWriteQueue.enq.valid := vrfWriteChannel.valid && writeFromMask - maskWriteQueue.enq.bits := vrfWriteChannel.bits - - vrfWriteArbiter(parameter.chainingSize).valid := lsuWriteQueue.deq.valid - vrfWriteArbiter(parameter.chainingSize).bits := lsuWriteQueue.deq.bits - lsuWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize).ready + vrfWriteArbiter(parameter.chainingSize).valid := vrfWriteChannel.valid + vrfWriteArbiter(parameter.chainingSize).bits := vrfWriteChannel.bits + vrfWriteChannel.ready := vrfWriteArbiter(parameter.chainingSize).ready - vrfWriteArbiter(parameter.chainingSize + 1).valid := maskWriteQueue.deq.valid - vrfWriteArbiter(parameter.chainingSize + 1).bits := maskWriteQueue.deq.bits - maskWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize + 1).ready - - val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 4) { i => + val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 3) { i => RegInit(0.U.asTypeOf(vrfWriteArbiter.head.bits)) } - val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 4) { _ => RegInit(false.B) } - val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 4, Bool())) + val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 3) { _ => RegInit(false.B) } + val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 3, Bool())) val afterCheckDequeueFire: Seq[Bool] = afterCheckValid.zip(afterCheckDequeueReady).map { case (v, r) => v && r } /** for each slot, assert when it is asking [[T1]] to change mask */ @@ -595,7 +577,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ maskUnitRequest <> mask.maskReq maskRequestToLSU <> mask.maskRequestToLSU tokenIO <> mask.tokenIO - tokenIO.maskResponseRelease := maskWriteQueue.deq.fire mask.dequeue }.getOrElse(stage3EnqWire) stage3.enqueue <> stage3EnqSelect @@ -850,7 +831,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // It’s been a long time since I selected it. Need pipe val queueBeforeMaskWrite: QueueIO[VRFWriteRequest] = Queue.io(chiselTypeOf(maskedWriteUnit.enqueue.bits), entries = 1, pipe = true) - val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 4).W)) + val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 3).W)) val writeCavitation: UInt = VecInit(allVrfWriteAfterCheck.map(_.mask === 0.U)).asUInt // 处理 rf @@ -1157,8 +1138,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ writeReadyForLsu := vrf.writeReadyForLsu vrfReadyToStore := vrf.vrfReadyToStore tokenManager.crossWriteReports.zipWithIndex.foreach { case (rpt, rptIndex) => - rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 2 + rptIndex) - rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 2 + rptIndex).instructionIndex + rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex) + rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex } // todo: add mask unit write token tokenManager.responseReport.valid := maskUnitRequest.valid @@ -1194,13 +1175,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ tokenManager.topWriteEnq.valid := vrfWriteChannel.fire tokenManager.topWriteEnq.bits := vrfWriteChannel.bits.instructionIndex - tokenManager.fromMask := writeFromMask - - tokenManager.lsuWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) - tokenManager.lsuWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex - tokenManager.maskWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize + 1) - tokenManager.maskWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1).instructionIndex + tokenManager.topWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) + tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex tokenManager.maskUnitLastReport := lsuLastReport diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 70c515d23..95d333f9e 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -666,20 +666,6 @@ class T1(val parameter: T1Parameter) val completeIndexInstruction: Bool = ohCheck(lsu.lastReport, slots.last.record.instructionIndex, parameter.chainingSize) && !slots.last.state.idle - val vrfWrite: Vec[DecoupledIO[VRFWriteRequest]] = Wire( - Vec( - parameter.laneNumber, - Decoupled( - new VRFWriteRequest( - parameter.vrfParam.regNumBits, - parameter.vrfParam.vrfOffsetBits, - parameter.instructionIndexBits, - parameter.datapathWidth - ) - ) - ) - ) - val freeOR: Bool = VecInit(slots.map(_.state.idle)).asUInt.orR /** slot is ready to accept new instructions. */ @@ -776,13 +762,15 @@ class T1(val parameter: T1Parameter) Some(Seq(maskUnit.readResult(index), lsu.vrfReadResults(index))) ) - val maskTryToWrite = maskUnit.exeResp(index) - // lsu & mask unit write lane - // Mask write has absolute priority because it has a token - lane.vrfWriteChannel.valid := vrfWrite(index).valid || maskTryToWrite.valid - lane.vrfWriteChannel.bits := Mux(maskTryToWrite.valid, maskTryToWrite.bits, vrfWrite(index).bits) - vrfWrite(index).ready := lane.vrfWriteChannel.ready && !maskTryToWrite.valid - lane.writeFromMask := maskTryToWrite.valid + connectVrfAccess( + Seq(parameter.maskUnitReadShifterSize(index), parameter.lsuReadShifterSize(index)), + Seq(parameter.maskUnitReadTokenSize(index), parameter.lsuReadTokenSize(index)) + )( + VecInit(Seq(maskUnit.exeResp(index), lsu.vrfWritePort(index))), + lane.vrfWriteChannel, + 0 + ) + lane.writeFromMask := maskUnit.exeResp(index).fire lsu.offsetReadResult(index).valid := lane.maskUnitRequest.valid && lane.maskRequestToLSU lsu.offsetReadResult(index).bits := lane.maskUnitRequest.bits.source2 @@ -860,7 +848,6 @@ class T1(val parameter: T1Parameter) } maskUnit.tokenIO.zip(laneVec).zipWithIndex.foreach { case ((token, lane), index) => - token.maskResponseRelease := lane.tokenIO.maskResponseRelease lane.tokenIO.maskRequestRelease := token.maskRequestRelease || lsu.tokenIO.offsetGroupRelease(index) } @@ -897,8 +884,6 @@ class T1(val parameter: T1Parameter) io.highBandwidthLoadStorePort <> lsu.axi4Port io.indexedLoadStorePort <> lsu.simpleAccessPorts - // 暂时直接连lsu的写,后续需要处理scheduler的写 - vrfWrite.zip(lsu.vrfWritePort).foreach { case (sink, source) => sink <> source } /** Slot has free entries. */ val free = VecInit(slots.map(_.state.idle)).asUInt @@ -984,7 +969,7 @@ class T1(val parameter: T1Parameter) probeWire.requestRegReady := requestRegDequeue.ready // maskUnitWrite maskUnitWriteReady probeWire.writeQueueEnqVec.zip(maskUnit.exeResp).foreach { case (probe, write) => - probe.valid := write.valid && write.bits.mask.orR + probe.valid := write.fire && write.bits.mask.orR probe.bits := write.bits.instructionIndex } probeWire.instructionValid := maskAnd( diff --git a/t1/src/laneStage/MaskExchangeUnit.scala b/t1/src/laneStage/MaskExchangeUnit.scala index cb8257f83..49faff45f 100644 --- a/t1/src/laneStage/MaskExchangeUnit.scala +++ b/t1/src/laneStage/MaskExchangeUnit.scala @@ -54,8 +54,7 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module { val maskRequestEnqReady: Bool = !enqIsMaskRequest || maskRequestAllow - dequeue.valid := enqueue.valid && enqSendToDeq - dequeue.bits := enqueue.bits - enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady) - tokenIO.maskResponseRelease := DontCare + dequeue.valid := enqueue.valid && enqSendToDeq + dequeue.bits := enqueue.bits + enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady) } diff --git a/t1/src/laneStage/SlotTokenManager.scala b/t1/src/laneStage/SlotTokenManager.scala index 22eef760b..dadea0fd5 100644 --- a/t1/src/laneStage/SlotTokenManager.scala +++ b/t1/src/laneStage/SlotTokenManager.scala @@ -94,13 +94,7 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val topWriteEnq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) @public - val fromMask: Bool = IO(Input(Bool())) - - @public - val lsuWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) - - @public - val maskWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) + val topWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) @public val instructionValid: UInt = IO(Output(UInt((2 * parameter.chainingSize).W))) @@ -212,27 +206,16 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val instructionInWritePipe: UInt = tokenUpdate(writePipeToken, writePipeEnq, writePipeDeq) // lsu & mask write token - val lsuWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) - val maskWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) + val topWriteToken: Seq[UInt] = Seq.tabulate(2 * parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) val topWriteDoEnq: UInt = maskAnd(topWriteEnq.valid, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt - val lsuWriteDoEnq: UInt = - maskAnd(topWriteEnq.valid && !fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt - - val maskWriteDoEnq: UInt = - maskAnd(topWriteEnq.valid && fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt - - val lsuWriteDoDeq: UInt = - maskAnd(lsuWriteDeq.valid, indexToOH(lsuWriteDeq.bits, parameter.chainingSize)).asUInt - - val maskWriteDoDeq: UInt = - maskAnd(maskWriteDeq.valid, indexToOH(maskWriteDeq.bits, parameter.chainingSize)).asUInt + val topWriteDoDeq: UInt = + maskAnd(topWriteDeq.valid, indexToOH(topWriteDeq.bits, parameter.chainingSize)).asUInt - val lsuInTopWrite = tokenUpdate(lsuWriteToken, lsuWriteDoEnq, lsuWriteDoDeq) - val maskInTopWrite = tokenUpdate(maskWriteToken, maskWriteDoEnq, maskWriteDoDeq) + val topWrite: UInt = tokenUpdate(topWriteToken, topWriteDoEnq, topWriteDoDeq) - dataInWritePipe := instructionInWritePipe | lsuInTopWrite | maskInTopWrite + dataInWritePipe := instructionInWritePipe | topWrite instructionValid := dataInWritePipe | instructionInSlot } diff --git a/t1/src/mask/MaskUnit.scala b/t1/src/mask/MaskUnit.scala index ab3f8c526..4694ca4a8 100644 --- a/t1/src/mask/MaskUnit.scala +++ b/t1/src/mask/MaskUnit.scala @@ -50,9 +50,9 @@ class MaskUnit(parameter: T1Parameter) extends Module { } @public - val exeResp: Seq[ValidIO[VRFWriteRequest]] = Seq.tabulate(parameter.laneNumber) { _ => + val exeResp: Seq[DecoupledIO[VRFWriteRequest]] = Seq.tabulate(parameter.laneNumber) { _ => IO( - Valid( + Decoupled( new VRFWriteRequest( parameter.vrfParam.regNumBits, parameter.laneParam.vrfOffsetBits, @@ -1078,17 +1078,10 @@ class MaskUnit(parameter: T1Parameter) extends Module { } queue.enq.bits.index := instReg.instructionIndex - // write token - val tokenCounter = RegInit(0.U(log2Ceil(parameter.maskUnitVefWriteQueueSize + 1).W)) - val tokenAllow: Bool = queue.deq.fire - val counterChange: UInt = Mux(tokenAllow, 1.U, -1.S(tokenCounter.getWidth.W).asUInt) - when(tokenAllow ^ tokenIO(index).maskResponseRelease) { - tokenCounter := tokenCounter + counterChange - } // write vrf val writePort = exeResp(index) - queue.deq.ready := !tokenCounter.asBools.last - writePort.valid := tokenAllow + queue.deq.ready := writePort.ready + writePort.valid := queue.deq.valid writePort.bits.last := DontCare writePort.bits.instructionIndex := instReg.instructionIndex writePort.bits.data := Mux(queue.deq.bits.ffoByOther, queue.deq.bits.pipeData, queue.deq.bits.writeData.data) diff --git a/t1/src/package.scala b/t1/src/package.scala index 5eca78e39..412d62ddf 100644 --- a/t1/src/package.scala +++ b/t1/src/package.scala @@ -277,7 +277,7 @@ package object rtl { def connectVrfAccess[T <: Data]( latencyVec: Seq[Int], tokenSizeVec: Seq[Int], - vrfReadLatency: Option[Int] + vrfReadLatency: Option[Int] = None )(sourceVec: Vec[DecoupledIO[T]], sink: DecoupledIO[T], arb: Int, diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index cafaeb5e4..a4cc63bdc 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -202,7 +202,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar @public val writeCheck: Vec[LSUWriteCheck] = IO( Vec( - parameter.chainingSize + 4, + parameter.chainingSize + 3, Input( new LSUWriteCheck( parameter.regNumBits, @@ -214,7 +214,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar ) @public - val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 4, Output(Bool()))) + val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 3, Output(Bool()))) /** when instruction is fired, record it in the VRF for chaining. */ @public