diff --git a/configgen/generated/blastoise.json b/configgen/generated/blastoise.json index 290ef86c1..241ea7eff 100644 --- a/configgen/generated/blastoise.json +++ b/configgen/generated/blastoise.json @@ -167,8 +167,9 @@ ] ] ], - "zvbbModuleParameters": [] + "zvbbModuleParameters": [], + "zvkModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" -} \ No newline at end of file +} diff --git a/configgen/generated/machamp.json b/configgen/generated/machamp.json index ceeaf5e59..144ce49fa 100644 --- a/configgen/generated/machamp.json +++ b/configgen/generated/machamp.json @@ -151,8 +151,9 @@ ] ], "floatModuleParameters": [], - "zvbbModuleParameters": [] + "zvbbModuleParameters": [], + "zvkModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" -} \ No newline at end of file +} diff --git a/configgen/generated/psyduck.json b/configgen/generated/psyduck.json index 04a2f3572..9efbc04e4 100644 --- a/configgen/generated/psyduck.json +++ b/configgen/generated/psyduck.json @@ -184,8 +184,25 @@ 3 ] ] + ], + "zvkModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneZvk" + }, + [ + 0, + 1, + 2, + 3 + ] + ] ] } }, "generator": "org.chipsalliance.t1.rtl.T1" -} \ No newline at end of file +} diff --git a/configgen/generated/sandslash.json b/configgen/generated/sandslash.json index 688085fe1..08d52748a 100644 --- a/configgen/generated/sandslash.json +++ b/configgen/generated/sandslash.json @@ -151,8 +151,9 @@ ] ], "floatModuleParameters": [], - "zvbbModuleParameters": [] + "zvbbModuleParameters": [], + "zvkModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" -} \ No newline at end of file +} diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index 88e3bc326..357707f3a 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -100,7 +100,8 @@ object Main { Seq(0, 1, 2, 3))), floatModuleParameters = Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), - zvbbModuleParameters = Seq() + zvbbModuleParameters = Seq(), + zvkModuleParameters = Seq(), ) ) if (doEmit) param.emit(targetFile) @@ -151,7 +152,9 @@ object Main { floatModuleParameters = Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), zvbbModuleParameters = - Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))) + Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))), + zvkModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneZvk], LaneZvkParam(32, 3)), Seq(0, 1, 2, 3))), ) ) if (doEmit) param.emit(targetFile) @@ -201,7 +204,8 @@ object Main { ), Seq(0, 1, 2, 3))), floatModuleParameters = Seq(), - zvbbModuleParameters = Seq() // TODO + zvbbModuleParameters = Seq(), + zvkModuleParameters = Seq(), ) ) if (doEmit) param.emit(targetFile) @@ -251,7 +255,8 @@ object Main { ), Seq(0, 1, 2, 3))), floatModuleParameters = Seq(), - zvbbModuleParameters = Seq() // TODO + zvbbModuleParameters = Seq(), + zvkModuleParameters = Seq(), ) ) if (doEmit) param.emit(targetFile) diff --git a/t1/src/Bundles.scala b/t1/src/Bundles.scala index e873f946b..90a4f4f55 100644 --- a/t1/src/Bundles.scala +++ b/t1/src/Bundles.scala @@ -621,6 +621,7 @@ class ExecutionUnitRecord(parameter: LaneParameter)(isLastSlot: Boolean) extends val executeIndex: Bool = Bool() val source: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W)) val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W)) + val zvkCrossReadSource: Option[UInt] = Option.when(isLastSlot && parameter.zvkEnable)(UInt((parameter.datapathWidth * 2).W)) /** groupCounter need use to update `Lane.maskFormatResultForGroup` */ val groupCounter: UInt = UInt(parameter.groupNumberBits.W) val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool()) @@ -725,4 +726,4 @@ class T1Retire(xLen: Int) extends Bundle { val rd: ValidIO[T1RdRetire] = Valid(new T1RdRetire(xLen)) val csr: ValidIO[T1CSRRetire] = Valid(new T1CSRRetire) val mem: ValidIO[EmptyBundle] = Valid(new EmptyBundle) -} \ No newline at end of file +} diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 6b7dec0c2..d16392e6d 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -62,6 +62,7 @@ class LaneProbe(parameter: LaneParameter) extends Bundle { val instructionValid: UInt = UInt(parameter.chainingSize.W) val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(parameter.instructionIndexBits))) + val zvkCrossWriteProbe: Option[Vec[ValidIO[LaneWriteProbe]]] = Option.when(parameter.zvkEnable)(Vec(4, Valid(new LaneWriteProbe(parameter.instructionIndexBits)))) val vrfProbe: VRFProbe = new VRFProbe(parameter.vrfParam) } @@ -86,7 +87,9 @@ case class LaneParameter( laneNumber: Int, chainingSize: Int, crossLaneVRFWriteEscapeQueueSize: Int, + crossLaneVRFWriteEscapeZvkQueueSize: Int, fpuEnable: Boolean, + zvkEnable: Boolean, portFactor: Int, vrfRamType: RamType, decoderParam: DecoderParam, @@ -132,7 +135,7 @@ case class LaneParameter( * * for each number in table below, it represent a [[datapathWidth]] * {{{ - * lane0 | lane1 | ... | lane8 + * lane0 | lane1 | ... | lane7 * offset0 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 * offset1 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 * offset2 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 @@ -178,7 +181,7 @@ case class LaneParameter( val executionQueueSize: Int = 4 /** Parameter for [[VRF]] */ - def vrfParam: VRFParam = VRFParam(vLen, laneNumber, datapathWidth, chainingSize, portFactor, vrfRamType) + def vrfParam: VRFParam = VRFParam(vLen, laneNumber, datapathWidth, chainingSize, portFactor, zvkEnable, vrfRamType) } /** Instantiate [[Lane]] from [[T1]], @@ -210,8 +213,26 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ * TODO: benchmark the usecase for tuning the Ring Bus width. * find a real world case for using `narrow` and `widen` aggressively. */ + // 0: 0.0 - 0.1 + // 1: 0.2 - 0.3 + // 2: 0.4 - 0.5 + // 3: 0.6 - 0.7 + // 4: 1.0 - 1.1 + // 5: 1.2 - 1.3 + // 6: 1.4 - 1.5 + // 7: 1.6 - 1.7 + + // 0: 0.0 - 0.1 - 0.2 - 0.3 + // 1: 0.4 - 0.5 - 0.6 - 0.7 + // 2: 1.0 - 1.1 - 1.2 - 1.3 + // 3: 1.4 - 1.5 - 1.6 - 1.7 + // 4: 2.0 - 2.1 - 2.2 - 2.3 + // 5: 2.4 - 2.5 - 2.6 - 2.7 + // 6: 3.0 - 3.1 - 3.2 - 3.3 + // 7: 3.4 - 3.5 - 3.6 - 3.7 @public val readBusPort: Vec[RingPort[ReadBusData]] = IO(Vec(2, new RingPort(new ReadBusData(parameter)))) + val zvkReadBusPort: Option[Vec[RingPort[ReadBusData]]] = Option.when(parameter.zvkEnable)(IO(Vec(4, new RingPort(new ReadBusData(parameter))))) /** VRF Write Interface. * only used for `narrow` an `widen` @@ -220,6 +241,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ */ @public val writeBusPort: Vec[RingPort[WriteBusData]] = IO(Vec(2, new RingPort(new WriteBusData(parameter)))) + val zvkWriteBusPort: Option[Vec[RingPort[WriteBusData]]] = Option.when(parameter.zvkEnable)(IO(Vec(4, new RingPort(new WriteBusData(parameter))))) /** request from [[T1.decode]] to [[Lane]]. */ @public @@ -301,7 +323,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ @public val loadDataInLSUWriteQueue: UInt = IO(Input(UInt(parameter.chainingSize.W))) - /** How many dataPath will writ by instruction in this lane */ + /** How many dataPath will write by instruction in this lane */ @public val writeCount: UInt = IO(Input(UInt((parameter.vlMaxBits - log2Ceil(parameter.laneNumber) - log2Ceil(parameter.dataPathByteWidth)).W))) @@ -322,6 +344,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // TODO: remove dontTouch(writeBusPort) + if(parameter.zvkEnable) { + dontTouch(zvkWriteBusPort.get) + } /** VRF instantces. */ val vrf: Instance[VRF] = Instantiate(new VRF(parameter.vrfParam)) @@ -442,8 +467,12 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val readCheckRequestVec: Vec[VRFReadRequest] = Wire(Vec(parameter.chainingSize * 3 + 2, new VRFReadRequest(parameter.vrfParam.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits) )) + val zvkReadCheckRequestVec: Option[Vec[VRFReadRequest]] = Option.when(parameter.zvkEnable)(Wire(Vec(parameter.chainingSize * 3 + 4, + new VRFReadRequest(parameter.vrfParam.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits) + ))) val readCheckResult: Vec[Bool] = Wire(Vec(parameter.chainingSize * 3 + 2, Bool())) + val zvkReadCheckResult: Option[Vec[Bool]] = Option.when(parameter.zvkEnable)(Wire(Vec(parameter.chainingSize * 3 + 4, Bool()))) /** signal used for prohibiting slots to access VRF. * a slot will become inactive when: @@ -467,7 +496,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val slotCanShift: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) /** Which data group is waiting for the result of the cross-lane read */ - val readBusDequeueGroup: UInt = Wire(UInt(parameter.groupNumberBits.W)) + val readBusDequeueGroup: UInt = Wire(UInt(parameter.groupNumberBits.W)) // TODO: readBusDequeueGroup is currently unused /** enqueue valid for execution unit */ val executeEnqueueValid: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool())) @@ -524,6 +553,18 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ pipe = true ) )) + val zvkCrossLaneWriteQueue: Option[Seq[Queue[VRFWriteRequest]]] = Option.when(parameter.zvkEnable)(Seq.tabulate(4)(i => Module( + new Queue( + new VRFWriteRequest( + parameter.vrfParam.regNumBits, + parameter.vrfOffsetBits, + parameter.instructionIndexBits, + parameter.datapathWidth + ), + parameter.crossLaneVRFWriteEscapeZvkQueueSize, + pipe = true + ) + ))) val maskedWriteUnit: Instance[MaskedWrite] = Instantiate(new MaskedWrite(parameter)) val tokenManager: Instance[SlotTokenManager] = Instantiate(new SlotTokenManager(parameter)) slotControl.zipWithIndex.foreach { @@ -671,9 +712,17 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ readCheckRequestVec((parameter.chainingSize - index - 1) * 3 + portIndex) := stage1.vrfCheckRequest(portIndex) stage1.checkResult(portIndex) := readCheckResult((parameter.chainingSize - index - 1) * 3 + portIndex) } + val zvkCheckSize = if (isLastSlot && parameter.zvkEnable) 7 else 5 + if(parameter.zvkEnable) { + Seq.tabulate(zvkCheckSize){ portIndex => + zvkReadCheckRequestVec.get((parameter.chainingSize - index - 1) * 3 + portIndex) := stage1.zvkVrfCheckRequest(portIndex) + stage1.zvkCheckResult(portIndex) := zvkReadCheckResult.get((parameter.chainingSize - index - 1) * 3 + portIndex) + } + } // connect cross read bus if(isLastSlot) { val tokenSize = parameter.crossLaneVRFWriteEscapeQueueSize + val zvKTokenSize = parameter.crossLaneVRFWriteEscapeZvkQueueSize readBusPort.zipWithIndex.foreach {case (readPort, portIndex) => // tx val tokenReg = RegInit(0.U(log2Ceil(tokenSize + 1).W)) @@ -695,6 +744,27 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // dequeue to cross read unit stage1.readBusDequeue.get(portIndex) <> queue.io.deq } + zvkReadBusPort.get.zipWithIndex.foreach {case (readPort, portIndex) => + // tx + val tokenReg = RegInit(0.U(log2Ceil(zvKTokenSize + 1).W)) + val tokenReady: Bool = tokenReg =/= zvKTokenSize.U + stage1.readBusRequest4.get(portIndex).ready := tokenReady + readPort.deq.valid := stage1.readBusRequest4.get(portIndex).valid && tokenReady + readPort.deq.bits := stage1.readBusRequest4.get(portIndex).bits + val tokenUpdate = Mux(readPort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) + when(readPort.deq.valid ^ readPort.deqRelease) { + tokenReg := tokenReg + tokenUpdate + } + // rx + // rx queue + val queue = Module(new Queue(chiselTypeOf(readPort.deq.bits), zvKTokenSize, pipe=true)) + queue.io.enq.valid := readPort.enq.valid + queue.io.enq.bits := readPort.enq.bits + readPort.enqRelease := queue.io.deq.fire + assert(queue.io.enq.ready || !readPort.enq.valid) + // dequeue to cross read unit + stage1.readBusDequeue4.get(portIndex) <> queue.io.deq + } // cross write writeBusPort.zipWithIndex.foreach {case (writePort, portIndex) => @@ -704,6 +774,19 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ writePort.deq.bits := stage3.crossWritePort.get(portIndex).bits stage3.crossWritePort.get(portIndex).ready := tokenReady + // update token + val tokenUpdate = Mux(writePort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) + when(writePort.deq.valid ^ writePort.deqRelease) { + tokenReg := tokenReg + tokenUpdate + } + } + zvkWriteBusPort.get.zipWithIndex.foreach {case (writePort, portIndex) => + val tokenReg = RegInit(0.U(log2Ceil(zvKTokenSize + 1).W)) + val tokenReady: Bool = tokenReg =/= zvKTokenSize.U + writePort.deq.valid := stage3.crossWritePort4.get(portIndex).valid && tokenReady + writePort.deq.bits := stage3.crossWritePort4.get(portIndex).bits + stage3.crossWritePort4.get(portIndex).ready := tokenReady + // update token val tokenUpdate = Mux(writePort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) when(writePort.deq.valid ^ writePort.deqRelease) { @@ -853,6 +936,22 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ assert(queue.io.enq.ready || !port.enq.valid) port.enqRelease := queue.io.deq.fire } + zvkCrossLaneWriteQueue.get.zipWithIndex.foreach {case (queue, index) => + val port = writeBusPort(index) + // ((counter << 1) >> parameter.vrfParam.vrfOffsetBits).low(3) + val registerIncreaseBase = parameter.vrfParam.vrfOffsetBits - 1 + queue.io.enq.valid := port.enq.valid + queue.io.enq.bits.vd := + // 3: 8 reg => log(2, 8) + slotControl.head.laneRequest.vd + port.enq.bits.counter(registerIncreaseBase + 3 - 1, registerIncreaseBase) + queue.io.enq.bits.offset := port.enq.bits.counter ## index.U(1.W) + queue.io.enq.bits.data := port.enq.bits.data + queue.io.enq.bits.last := DontCare + queue.io.enq.bits.instructionIndex := port.enq.bits.instructionIndex + queue.io.enq.bits.mask := FillInterleaved(2, port.enq.bits.mask) + assert(queue.io.enq.ready || !port.enq.valid) + port.enqRelease := queue.io.deq.fire + } val vfus: Seq[Instance[VFUModule]] = instantiateVFU(parameter.vfuInstantiateParameter)( requestVec, @@ -899,7 +998,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ } // all vrf write - val allVrfWrite: Seq[DecoupledIO[VRFWriteRequest]] = vrfWriteArbiter ++ crossLaneWriteQueue.map(_.io.deq) + val allVrfWrite: Seq[DecoupledIO[VRFWriteRequest]] = vrfWriteArbiter ++ crossLaneWriteQueue.map(_.io.deq) ++ zvkCrossLaneWriteQueue.get.map(_.io.deq) // check all write vrf.writeCheck.zip(allVrfWrite).foreach {case (check, write) => check.vd := write.bits.vd @@ -908,7 +1007,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ } vrf.readCheck.zip(readCheckRequestVec).foreach{case (sink, source) => sink := source} + vrf.zvkReadCheck.get.zip(zvkReadCheckRequestVec).foreach{case (sink, source) => sink := source} readCheckResult.zip(vrf.readCheckResult).foreach{case (sink, source) => sink := source} + zvkReadCheckResult.get.zip(vrf.zvkReadCheckResult).foreach{case (sink, source) => sink := source} allVrfWriteAfterCheck.zipWithIndex.foreach { case (req, i) => val check = vrf.writeAllow(i) @@ -1192,6 +1293,10 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex) rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex } + tokenManager.crossWriteReports.zipWithIndex.foreach {case (rpt, rptIndex) => + rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex) + rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex + } // todo: add mask unit write token tokenManager.responseReport.valid := laneResponse.valid tokenManager.responseReport.bits := laneResponse.bits.instructionIndex diff --git a/t1/src/LaneZvk.scala b/t1/src/LaneZvk.scala new file mode 100644 index 000000000..7764c680a --- /dev/null +++ b/t1/src/LaneZvk.scala @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl + +import chisel3.experimental.hierarchy.instantiable +import chisel3._ +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} + +object LaneZvkParam { + implicit def rw: upickle.default.ReadWriter[LaneZvkParam] = upickle.default.macroRW +} + +case class LaneZvkParam(datapathWidth: Int, latency: Int) extends VFUParameter with SerializableModuleParameter { + val inputBundle = new LaneZvkRequest(datapathWidth) + val decodeField: BoolField = Decoder.zvbb + val outputBundle = new LaneZvkResponse(datapathWidth) + override val NeedSplit: Boolean = false +} + +class LaneZvkRequest(datapathWidth: Int) extends VFUPipeBundle { + val src = Vec(3, UInt(datapathWidth.W)) + val opcode = UInt(4.W) + val vSew = UInt(2.W) + val shifterSize = UInt(log2Ceil(datapathWidth).W) +} + +class LaneZvkResponse(datapathWidth: Int) extends VFUPipeBundle { + val data = UInt(datapathWidth.W) +} + +@instantiable +class LaneZvk(val parameter: LaneZvkParam) + extends VFUModule(parameter) with SerializableModule[LaneZvkParam]{ + val response: LaneZvkResponse = Wire(new LaneZvkResponse(parameter.datapathWidth)) + val request : LaneZvkRequest = connectIO(response).asTypeOf(parameter.inputBundle) + + val zvbbSrc: UInt = request.src(1) // vs2 + val zvbbRs: UInt = request.src(0) // vs1 or rs1 + val vSew: UInt = UIntToOH(request.vSew) // sew = 0, 1, 2 + + response.data := Mux1H(UIntToOH(request.opcode), Seq( + )) +} + diff --git a/t1/src/T1.scala b/t1/src/T1.scala index afc7ebb68..34e2157f0 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -122,6 +122,14 @@ case class T1Parameter( instruction => instruction.instructionSet.name match { case "rv_v" => true case "rv_zvbb" => if (zvbbEnable) true else false + // Zvk + case "rv_zvkg" => if (zvkEnable) true else false + // case "rv_zvkn" => if (zvkEnable) true else false // TODO: no implementations for SEW=64 + case "rv_zvkned" => if (zvkEnable) true else false + case "rv_zvknha" => if (zvkEnable) true else false + // case "rv_zvknhb" => if (zvkEnable) true else false // TODO: no implementations for SEW=64 + case "rv_zvksed" => if (zvkEnable) true else false + case "rv_zvksh" => if (zvkEnable) true else false case _ => false }} ++ t1customInstructions.map(_.instruction) @@ -143,15 +151,19 @@ case class T1Parameter( /** TODO: configure it. */ val instructionQueueSize: Int = 4 - /** crosslane write token size */ + /** crosslane write token size, unclear how many would be good */ val vrfWriteQueueSize: Int = 4 + val vrfWriteZvkQueueSize: Int = 8 /** does t1 has floating datapath? */ val fpuEnable: Boolean = extensions.contains("Zve32f") - /** support of zvbb */ + /** support of Zvbb */ lazy val zvbbEnable: Boolean = extensions.contains("Zvbb") + /** support of Zvk */ + lazy val zvkEnable: Boolean = extensions.contains("Zvk") + /** how many chaining does T1 support, this is not a parameter yet. */ val chainingSize: Int = 4 @@ -225,7 +237,7 @@ case class T1Parameter( // and the values are their respective delays. val crossLaneConnectCycles: Seq[Seq[Int]] = Seq.tabulate(laneNumber)(_ => Seq(1, 1)) - val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions) + val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, zvkEnable, allInstructions) /** paraemter for AXI4. */ val axi4BundleParameter: AXI4BundleParameter = AXI4BundleParameter( @@ -261,7 +273,9 @@ case class T1Parameter( laneNumber = laneNumber, chainingSize = chainingSize, crossLaneVRFWriteEscapeQueueSize = vrfWriteQueueSize, + crossLaneVRFWriteEscapeZvkQueueSize = vrfWriteZvkQueueSize, fpuEnable = fpuEnable, + zvkEnable = zvkEnable, portFactor = vrfBankSize, vrfRamType = vrfRamType, decoderParam = decoderParam, @@ -287,7 +301,7 @@ case class T1Parameter( axi4BundleParameter = axi4BundleParameter, name = "main" ) - def vrfParam: VRFParam = VRFParam(vLen, laneNumber, datapathWidth, chainingSize, vrfBankSize, vrfRamType) + def vrfParam: VRFParam = VRFParam(vLen, laneNumber, datapathWidth, chainingSize, vrfBankSize, zvkEnable, vrfRamType) require(xLen == datapathWidth) def adderParam: LaneAdderParam = LaneAdderParam(datapathWidth, 0) } diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index cf06a66af..cee525256 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -106,7 +106,8 @@ case class VFUInstantiateParameter( divfpModuleParameters: Seq[(SerializableModuleGenerator[LaneDivFP, LaneDivFPParam], Seq[Int])], otherModuleParameters: Seq[(SerializableModuleGenerator[OtherUnit, OtherUnitParam], Seq[Int])], floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])], - zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneZvbb, LaneZvbbParam], Seq[Int])] + zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneZvbb, LaneZvbbParam], Seq[Int])], + zvkModuleParameters: Seq[(SerializableModuleGenerator[LaneZvk, LaneZvkParam], Seq[Int])], ) { val genVec: Seq[(SerializableModuleGenerator[_ <: VFUModule, _ <: VFUParameter], Seq[Int])] = logicModuleParameters ++ @@ -117,7 +118,8 @@ case class VFUInstantiateParameter( divfpModuleParameters ++ otherModuleParameters ++ floatModuleParameters ++ - zvbbModuleParameters + zvbbModuleParameters ++ + zvkModuleParameters genVec.foreach { case (_, connect) => connect.foreach(connectIndex => require(connectIndex < slotCount)) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 3a0299389..112e34f74 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -13,7 +13,7 @@ import org.chipsalliance.t1.rtl.decoder.attribute._ object DecoderParam { implicit def rwP: upickle.default.ReadWriter[DecoderParam] = upickle.default.macroRW } -case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) +case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, zvkEnable: Boolean, allInstructions: Seq[Instruction]) trait T1DecodeFiled[D <: Data] extends DecodeField[T1DecodePattern, D] with FieldName @@ -225,6 +225,10 @@ object Decoder { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isZvbb.value } + object zvk extends BoolField { + override def getTriState(pattern: T1DecodePattern): TriState = pattern.isZvk.value + } + object topUop extends T1TopUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.topUop.value match { case _: TopT0.type => BitPat("b000") @@ -345,6 +349,19 @@ object Decoder { case _: zvbbUop8.type => BitPat("b1000") // andn case _ => BitPat.dontCare(4) } + case zvkCase: ZvkUOPType => + zvkCase match { + case _: zvkUop0.type => BitPat("b0000") // + case _: zvkUop1.type => BitPat("b0001") // + case _: zvkUop2.type => BitPat("b0010") // + case _: zvkUop3.type => BitPat("b0011") // + case _: zvkUop4.type => BitPat("b0100") // + case _: zvkUop5.type => BitPat("b0101") // + case _: zvkUop6.type => BitPat("b0110") // + case _: zvkUop7.type => BitPat("b0111") // + case _: zvkUop8.type => BitPat("b1000") // + case _ => BitPat.dontCare(4) + } case _ => BitPat.dontCare(4) } } @@ -422,6 +439,12 @@ object Decoder { zvbb, ) else Seq() + } ++ { + if (param.zvkEnable) + Seq( + zvk, + ) + else Seq() } def allDecodePattern(param: DecoderParam): Seq[T1DecodePattern] = param.allInstructions.map(T1DecodePattern(_, param)).toSeq.sortBy(_.instruction.name) diff --git a/t1/src/decoder/InstructionDocumentation.scala b/t1/src/decoder/InstructionDocumentation.scala index 86c5a7e35..b506c61c3 100644 --- a/t1/src/decoder/InstructionDocumentation.scala +++ b/t1/src/decoder/InstructionDocumentation.scala @@ -439,5 +439,31 @@ case class InstructionDocumentation(instruction: Instruction, param: DecoderPara case "vwsll.vv" => "TODO!" case "vwsll.vx" => "TODO!" case "vwsll.vi" => "TODO!" + // rv_zvkg + case "vghsh.vv" => "TODO!" + case "vgmul.vv" => "TODO!" + // rv_zvkned + case "vaesdf.vv" => "TODO!" + case "vaesdf.vs" => "TODO!" + case "vaesdm.vv" => "TODO!" + case "vaesdm.vs" => "TODO!" + case "vaesef.vv" => "TODO!" + case "vaesef.vs" => "TODO!" + case "vaesem.vv" => "TODO!" + case "vaesem.vs" => "TODO!" + case "vaesz.vs" => "TODO!" + case "vaeskf1.vi" => "TODO!" + case "vaeskf2.vi" => "TODO!" + // rv_zvknha + case "vsha2ms.vv" => "TODO!" + case "vsha2ch.vv" => "TODO!" + case "vsha2cl.vv" => "TODO!" + // rv_zvksed + case "vsm4k.vi" => "TODO!" + case "vsm4r.vv" => "TODO!" + case "vsm4r.vs" => "TODO!" + // rv_zvksh + case "vsm3c.vi" => "TODO!" + case "vsm3me.vv" => "TODO!" } } diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index 5c7d10733..3b7d9b3a7 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -108,6 +108,7 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def isVwmacc: isVwmacc = attribute.isVwmacc(this) def isWidenreduce: isWidenreduce = attribute.isWidenreduce(this) def isZvbb: isZvbb = attribute.isZvbb(this) + def isZvk: isZvk = attribute.isZvk(this) def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) diff --git a/t1/src/decoder/attribute/isItype.scala b/t1/src/decoder/attribute/isItype.scala index 5ba9baf2e..c3db64750 100644 --- a/t1/src/decoder/attribute/isItype.scala +++ b/t1/src/decoder/attribute/isItype.scala @@ -54,6 +54,13 @@ object isItype { // rv_zvbb "vror.vi", "vwsll.vi", + // rv_zvkned + "vaeskf1.vi", + "vaeskf2.vi", + // rv_zvksed + "vsm4k.vi", + // rv_zvksh + "vsm3c.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isUnsigned0.scala b/t1/src/decoder/attribute/isUnsigned0.scala index fb041c3c7..29b2bf9d3 100644 --- a/t1/src/decoder/attribute/isUnsigned0.scala +++ b/t1/src/decoder/attribute/isUnsigned0.scala @@ -146,6 +146,32 @@ object isUnsigned0 { "vwsll.vv", "vwsll.vx", "vwsll.vi", + // rv_zvkg + "vghsh.vv", + "vgmul.vv", + // rv_zvkned + "vaesdf.vv", + "vaesdf.vs", + "vaesdm.vv", + "vaesdm.vs", + "vaesef.vv", + "vaesef.vs", + "vaesem.vv", + "vaesem.vs", + "vaesz.vs", + "vaeskf1.vi", + "vaeskf2.vi", + // rv_zvknha + "vsha2ms.vv", + "vsha2ch.vv", + "vsha2cl.vv", + // rv_zvksed + "vsm4k.vi", + "vsm4r.vv", + "vsm4r.vs", + // rv_zvksh + "vsm3c.vi", + "vsm3me.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isUnsigned1.scala b/t1/src/decoder/attribute/isUnsigned1.scala index cf4f517a0..595ecfbab 100644 --- a/t1/src/decoder/attribute/isUnsigned1.scala +++ b/t1/src/decoder/attribute/isUnsigned1.scala @@ -118,6 +118,32 @@ object isUnsigned1 { "vwsll.vv", "vwsll.vx", "vwsll.vi", + // rv_zvkg + "vghsh.vv", + "vgmul.vv", + // rv_zvkned + "vaesdf.vv", + "vaesdf.vs", + "vaesdm.vv", + "vaesdm.vs", + "vaesef.vv", + "vaesef.vs", + "vaesem.vv", + "vaesem.vs", + "vaesz.vs", + "vaeskf1.vi", + "vaeskf2.vi", + // rv_zvknha + "vsha2ms.vv", + "vsha2ch.vv", + "vsha2cl.vv", + // rv_zvksed + "vsm4k.vi", + "vsm4r.vv", + "vsm4r.vs", + // rv_zvksh + "vsm3c.vi", + "vsm3me.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 7649d715a..0ecb480e5 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -186,6 +186,22 @@ object isVtype { "vrol.vv", "vror.vv", "vwsll.vv", + // rv_zvkg + "vghsh.vv", + "vgmul.vv", + // rv_zvkned + "vaesdf.vv", + "vaesdm.vv", + "vaesef.vv", + "vaesem.vv", + // rv_zvknha + "vsha2ms.vv", + "vsha2ch.vv", + "vsha2cl.vv", + // rv_zvksed + "vsm4r.vv", + // rv_zvksh + "vsm3me.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isZvk.scala b/t1/src/decoder/attribute/isZvk.scala new file mode 100644 index 000000000..459423778 --- /dev/null +++ b/t1/src/decoder/attribute/isZvk.scala @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +object isZvk { + def apply(t1DecodePattern: T1DecodePattern): isZvk = + Seq( + y _ -> Y, + n _ -> N, + dc _ -> DC + ).collectFirst { + case (fn, tri) if fn(t1DecodePattern) => isZvk(tri) + }.get + + def y(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvkEnable) Seq( + "vghsh.vv", + "vgmul.vv", + "vaesdf.vv", + "vaesdf.vs", + "vaesdm.vv", + "vaesdm.vs", + "vaesef.vv", + "vaesef.vs", + "vaesem.vv", + "vaesem.vs", + "vaesz.vs", + "vaeskf1.vi", + "vaeskf2.vi", + "vsha2ms.vv", + "vsha2ch.vv", + "vsha2cl.vv", + "vsm4k.vi", + "vsm4r.vv", + "vsm4r.vs", + "vsm3c.vi", + "vsm3me.vv", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + def n(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = t1DecodePattern.param.allInstructions.filter(i => + !(y(t1DecodePattern) || dc(t1DecodePattern)) + ) + allMatched.contains(t1DecodePattern.instruction) + } + + def dc(t1DecodePattern: T1DecodePattern): Boolean = false +} + +case class isZvk(value: TriState) extends BooleanDecodeAttribute { + override val description: String = "goes to [[org.chipsalliance.t1.rtl.LaneZvk]]." +} diff --git a/t1/src/decoder/attribute/zvkUop.scala b/t1/src/decoder/attribute/zvkUop.scala new file mode 100644 index 000000000..6194e3234 --- /dev/null +++ b/t1/src/decoder/attribute/zvkUop.scala @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +trait ZvkUOPType extends Uop +object zvkUop0 extends ZvkUOPType // +object zvkUop1 extends ZvkUOPType // +object zvkUop2 extends ZvkUOPType // +object zvkUop3 extends ZvkUOPType // +object zvkUop4 extends ZvkUOPType // +object zvkUop5 extends ZvkUOPType // +object zvkUop6 extends ZvkUOPType // +object zvkUop7 extends ZvkUOPType // +object zvkUop8 extends ZvkUOPType // + +object ZvkUOP { + def apply(t1DecodePattern: T1DecodePattern): Uop = { + Seq( + t0 _ -> zvkUop0, + t1 _ -> zvkUop1, + t2 _ -> zvkUop2, + t3 _ -> zvkUop3, + t4 _ -> zvkUop4, + t5 _ -> zvkUop5, + t6 _ -> zvkUop6, + t7 _ -> zvkUop7, + t8 _ -> zvkUop8, + ).collectFirst { + case (fn, tpe) if fn(t1DecodePattern) => tpe + }.getOrElse(UopDC) + } + def t0(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t1(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t2(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t3(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t4(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t5(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t6(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t7(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t8(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + ) + allMatched.contains(t1DecodePattern.instruction.name) + } +} diff --git a/t1/src/laneStage/LaneExecutionBridge.scala b/t1/src/laneStage/LaneExecutionBridge.scala index cf3cf1c9d..ae3c6684b 100644 --- a/t1/src/laneStage/LaneExecutionBridge.scala +++ b/t1/src/laneStage/LaneExecutionBridge.scala @@ -13,6 +13,7 @@ import org.chipsalliance.t1.rtl.decoder.Decoder class LaneExecuteRequest(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { val src: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W)) val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W)) + val zvkCrossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 4).W)) val bordersForMaskLogic: Bool = Bool() val mask: UInt = UInt((parameter.datapathWidth / 8).W) val maskForFilter: UInt = UInt((parameter.datapathWidth / 8).W) @@ -149,6 +150,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd executionRecord.maskForFilter := enqueue.bits.maskForFilter executionRecord.source := enqueue.bits.src executionRecord.crossReadSource.foreach(_ := enqueue.bits.crossReadSource.get) + executionRecord.zvkCrossReadSource.foreach(_ := enqueue.bits.zvkCrossReadSource.get) executionRecord.sSendResponse.foreach(_ := enqueue.bits.sSendResponse.get) executionRecord.groupCounter := enqueue.bits.groupCounter executionRecord.decodeResult := enqueue.bits.decodeResult @@ -163,7 +165,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd * it can be vd or src2. */ val doubleCollapse: Option[UInt] = Option.when(isLastSlot) { - val cutCrossReadData: Vec[UInt] = cutUInt(executionRecord.crossReadSource.get, parameter.datapathWidth) + val cutCrossReadData: Vec[UInt] = cutUInt(executionRecord.crossReadSource.get, parameter.datapathWidth) // TODO: zvkCrossReadSource Mux(executionRecord.executeIndex, cutCrossReadData(1), cutCrossReadData(0)) } diff --git a/t1/src/laneStage/LaneStage1.scala b/t1/src/laneStage/LaneStage1.scala index f44826e79..ef8dc156d 100644 --- a/t1/src/laneStage/LaneStage1.scala +++ b/t1/src/laneStage/LaneStage1.scala @@ -9,7 +9,7 @@ import chisel3.probe.{Probe, ProbeValue, define} import chisel3.util._ import chisel3.util.experimental.decode.DecodeBundle import org.chipsalliance.t1.rtl.decoder.Decoder -import org.chipsalliance.t1.rtl.lane.{CrossReadUnit, LaneState, VrfReadPipe} +import org.chipsalliance.t1.rtl.lane.{CrossReadUnit, ZvkCrossReadUnit, LaneState, VrfReadPipe} class LaneStage1Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle { val groupCounter: UInt = UInt(parameter.groupNumberBits.W) @@ -46,6 +46,7 @@ class LaneStage1Dequeue(parameter: LaneParameter, isLastSlot: Boolean) extends B // read result val src: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W)) val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W)) + val zvkCrossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 4).W)) // pipe state // for exe stage @@ -83,8 +84,14 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { @public val vrfCheckRequest: Vec[VRFReadRequest] = IO(Vec(readCheckSize, Output(readRequestType))) + val zvkReadCheckSize: Int = if(isLastSlot && parameter.zvkEnable) 7 else 3 + @public + val zvkVrfCheckRequest: Vec[VRFReadRequest] = IO(Vec(zvkReadCheckSize, Output(readRequestType))) + @public val checkResult: Vec[Bool] = IO(Vec(readCheckSize, Input(Bool()))) + @public + val zvkCheckResult: Vec[Bool] = IO(Vec(zvkReadCheckSize, Input(Bool()))) /** VRF read result for each slot, * 3 is for [[source1]] [[source2]] [[source3]] @@ -96,11 +103,19 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { val readBusDequeue: Option[Vec[DecoupledIO[ReadBusData]]] = Option.when(isLastSlot)(IO( Vec(2, Flipped(Decoupled(new ReadBusData(parameter: LaneParameter)))) )) + @public + val readBusDequeue4: Option[Vec[DecoupledIO[ReadBusData]]] = Option.when(isLastSlot & parameter.zvkEnable)(IO( + Vec(4, Flipped(Decoupled(new ReadBusData(parameter: LaneParameter)))) + )) @public val readBusRequest: Option[Vec[DecoupledIO[ReadBusData]]] = Option.when(isLastSlot)(IO(Vec(2, Decoupled(new ReadBusData(parameter))))) + @public + val readBusRequest4: Option[Vec[DecoupledIO[ReadBusData]]] = + Option.when(isLastSlot & parameter.zvkEnable)(IO(Vec(4, Decoupled(new ReadBusData(parameter))))) + val groupCounter: UInt = enqueue.bits.groupCounter // todo: param @@ -124,12 +139,28 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { Option.when(isLastSlot)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeAfterCheck))) val queueAfterCheckMSB: Option[Queue[VRFReadQueueEntry]] = Option.when(isLastSlot)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeAfterCheck))) + val queueAfterCheckZvkLSBLSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeAfterCheck))) + val queueAfterCheckZvkLSBMSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeAfterCheck))) + val queueAfterCheckZvkMSBLSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeAfterCheck))) + val queueAfterCheckZvkMSBMSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeAfterCheck))) // read request queue for cross read lsb & msb val queueBeforeCheckLSB: Option[Queue[VRFReadQueueEntry]] = Option.when(isLastSlot)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeBeforeCheck))) val queueBeforeCheckMSB: Option[Queue[VRFReadQueueEntry]] = Option.when(isLastSlot)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeBeforeCheck))) + val queueBeforeCheckZvkLSBLSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeBeforeCheck))) + val queueBeforeCheckZvkLSBMSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeBeforeCheck))) + val queueBeforeCheckZvkMSBLSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeBeforeCheck))) + val queueBeforeCheckZvkMSBMSB: Option[Queue[VRFReadQueueEntry]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(vrfReadEntryType, readRequestQueueSizeBeforeCheck))) // pipe from enqueue val pipeQueue: Queue[LaneStage1Enqueue] = @@ -147,11 +178,33 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { val afterCheckQueueVec: Seq[Queue[VRFReadQueueEntry]] = Seq(queueAfterCheck1, queueAfterCheck2, queueAfterCheckVd) ++ queueAfterCheckLSB ++ queueAfterCheckMSB - val allReadQueueReady: Bool = beforeCheckQueueVec.map(_.io.enq.ready).reduce(_ && _) + val beforeCheckZvkQueueVec: Seq[Queue[VRFReadQueueEntry]] = + Seq(queueBeforeCheck1, queueBeforeCheck2, queueBeforeCheckVd) ++ + queueBeforeCheckZvkLSBLSB ++ queueBeforeCheckZvkLSBMSB ++ + queueBeforeCheckZvkMSBLSB ++ queueBeforeCheckZvkMSBMSB + val afterCheckZvkQueueVec: Seq[Queue[VRFReadQueueEntry]] = + Seq(queueAfterCheck1, queueAfterCheck2, queueAfterCheckVd) ++ + queueAfterCheckZvkLSBLSB ++ queueAfterCheckZvkLSBMSB ++ + queueAfterCheckZvkMSBLSB ++ queueAfterCheckZvkMSBMSB + val allReadQueueReady: Bool = { + val ready = beforeCheckQueueVec.map(_.io.enq.ready).reduce(_ && _) + if(parameter.zvkEnable) { + val zvkReady = beforeCheckZvkQueueVec.map(_.io.enq.ready).reduce(_ && _) + Mux(enqueue.bits.decodeResult(Decoder.crossRead) & enqueue.bits.decodeResult(Decoder.zvk), zvkReady, ready) + } else { + ready + } + } beforeCheckQueueVec.foreach{ q => q.io.enq.bits.instructionIndex := enqueue.bits.instructionIndex q.io.enq.bits.groupIndex := enqueue.bits.groupCounter } + if(parameter.zvkEnable) { + beforeCheckZvkQueueVec.foreach{ q => + q.io.enq.bits.instructionIndex := enqueue.bits.instructionIndex + q.io.enq.bits.groupIndex := enqueue.bits.groupCounter + } + } enqueue.ready := allReadQueueReady && pipeQueue.io.enq.ready @@ -162,6 +215,14 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { after.io.enq.valid := before.io.deq.valid && checkResult(i) after.io.enq.bits := before.io.deq.bits } + if(parameter.zvkEnable) { + beforeCheckZvkQueueVec.zip(afterCheckZvkQueueVec).zipWithIndex.foreach { case ((before, after), i) => + zvkVrfCheckRequest(i) := before.io.deq.bits + before.io.deq.ready := after.io.enq.ready && zvkCheckResult(i) + after.io.enq.valid := before.io.deq.valid && zvkCheckResult(i) + after.io.enq.bits := before.io.deq.bits + } + } // request enqueue queueBeforeCheck1.io.enq.valid := enqueue.fire && enqueue.bits.decodeResult(Decoder.vtype) && !enqueue.bits.skipRead queueBeforeCheck2.io.enq.valid := enqueue.fire && !enqueue.bits.skipRead @@ -169,6 +230,11 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { (queueBeforeCheckLSB ++ queueBeforeCheckMSB).foreach { q => q.io.enq.valid := enqueue.valid && allReadQueueReady && enqueue.bits.decodeResult(Decoder.crossRead) } + if(parameter.zvkEnable) { + (queueBeforeCheckZvkLSBLSB ++ queueBeforeCheckZvkLSBMSB ++ queueBeforeCheckZvkMSBLSB ++ queueBeforeCheckZvkMSBMSB).foreach { q => + q.io.enq.valid := enqueue.valid && allReadQueueReady && enqueue.bits.decodeResult(Decoder.crossRead) && enqueue.bits.decodeResult(Decoder.zvk) + } + } // calculate vs queueBeforeCheck1.io.enq.bits.vs := Mux( @@ -223,6 +289,30 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B } + if(parameter.zvkEnable) { + queueBeforeCheckZvkLSBLSB.foreach { q => + q.io.enq.bits.vs := enqueue.bits.vs2 + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + q.io.enq.bits.readSource := 1.U + q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B + } + queueBeforeCheckZvkLSBMSB.foreach { q => + q.io.enq.bits.vs := enqueue.bits.vs2 + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + q.io.enq.bits.readSource := 1.U + q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B + } + queueBeforeCheckZvkMSBLSB.foreach { q => + q.io.enq.bits.vs := enqueue.bits.vs2 + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + q.io.enq.bits.readSource := 1.U + q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B + } + queueBeforeCheckZvkMSBMSB.foreach { q => + q.io.enq.bits.vs := enqueue.bits.vs2 + groupCounter(parameter.groupNumberBits - 2, parameter.vrfOffsetBits - 1) + q.io.enq.bits.readSource := 1.U + q.io.enq.bits.offset := groupCounter(parameter.vrfOffsetBits - 2, 0) ## true.B + } + } + + // read pipe val readPipe0: Instance[VrfReadPipe] = Instantiate(new VrfReadPipe(parameter, arbitrate = false)) val readPipe1: Instance[VrfReadPipe] = Instantiate(new VrfReadPipe(parameter, arbitrate = isLastSlot)) @@ -240,6 +330,10 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { // cross lane queue val dataQueueLSB = Option.when(isLastSlot)(Module(new Queue(UInt(parameter.datapathWidth.W), dataQueueSize))) val dataQueueMSB = Option.when(isLastSlot)(Module(new Queue(UInt(parameter.datapathWidth.W), dataQueueSize))) + val dataQueueZvkLSBLSB = Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(UInt(parameter.datapathWidth.W), dataQueueSize))) + val dataQueueZvkLSBMSB = Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(UInt(parameter.datapathWidth.W), dataQueueSize))) // TODO + val dataQueueZvkMSBLSB = Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(UInt(parameter.datapathWidth.W), dataQueueSize))) // TODO + val dataQueueZvkMSBMSB = Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(UInt(parameter.datapathWidth.W), dataQueueSize))) val dataQueueNotFull2: Bool = { val counterReg = RegInit(0.U(log2Ceil(dataQueueSize + 1).W)) @@ -268,6 +362,21 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { blockingHandshake(readPipe2.enqueue, queueAfterCheckVd.io.deq, dataQueueNotFullVd) // contender for cross read + if(parameter.zvkEnable) { + readPipe1.contender.zip(queueAfterCheckZvkLSBLSB).foreach { case (port, queue) => + val dataQueueNotFullLSB: Bool = { + val counterReg = RegInit(0.U(log2Ceil(dataQueueSize + 1).W)) + val doEnq = queue.io.deq.fire + val doDeq = dataQueueZvkLSBLSB.get.io.deq.fire || dataQueueLSB.get.io.deq.fire + val countChange = Mux(doEnq, 1.U, -1.S(log2Ceil(dataQueueSize + 1).W).asUInt) + when(doEnq ^ doDeq) { + counterReg := counterReg + countChange + } + !counterReg(log2Ceil(dataQueueSize)) + } + blockingHandshake(port, queue.io.deq, dataQueueNotFullLSB) + } + } readPipe1.contender.zip(queueAfterCheckLSB).foreach { case (port, queue) => val dataQueueNotFullLSB: Bool = { val counterReg = RegInit(0.U(log2Ceil(dataQueueSize + 1).W)) @@ -281,6 +390,21 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { } blockingHandshake(port, queue.io.deq, dataQueueNotFullLSB) } + if(parameter.zvkEnable) { + readPipe2.contender.zip(queueAfterCheckZvkMSBMSB).foreach { case (port, queue) => + val dataQueueNotFullMSB: Bool = { + val counterReg = RegInit(0.U(log2Ceil(dataQueueSize + 1).W)) + val doEnq = queue.io.deq.fire + val doDeq = dataQueueZvkMSBMSB.get.io.deq.fire || dataQueueMSB.get.io.deq.fire + val countChange = Mux(doEnq, 1.U, -1.S(log2Ceil(dataQueueSize + 1).W).asUInt) + when(doEnq ^ doDeq) { + counterReg := counterReg + countChange + } + !counterReg(log2Ceil(dataQueueSize)) + } + blockingHandshake(port, queue.io.deq, dataQueueNotFullMSB) + } + } readPipe2.contender.zip(queueAfterCheckMSB).foreach { case (port, queue) => val dataQueueNotFullMSB: Bool = { val counterReg = RegInit(0.U(log2Ceil(dataQueueSize + 1).W)) @@ -301,11 +425,18 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { dataQueueVs2.io.enq <> readPipe1.dequeue // pipe1 <> dataQueueLSB dataQueueLSB.zip(readPipe1.contenderDequeue).foreach { case (sink, source) => sink.io.enq <> source } + if(parameter.zvkEnable) { + dataQueueZvkLSBLSB.zip(readPipe1.contenderDequeue).foreach { case (sink, source) => sink.io.enq <> source } + } + // pipe2 <-> dataQueueVd dataQueueVd.io.enq <> readPipe2.dequeue // pipe2 <-> dataQueueMSB dataQueueMSB.zip(readPipe2.contenderDequeue).foreach { case (sink, source) => sink.io.enq <> source } + if(parameter.zvkEnable) { + dataQueueZvkMSBMSB.zip(readPipe2.contenderDequeue).foreach { case (sink, source) => sink.io.enq <> source } + } } else { dataQueueVs2.io.enq <> readPipe1.dequeue dataQueueVd.io.enq <> readPipe2.dequeue @@ -316,6 +447,12 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { Option.when(isLastSlot)(Module(new Queue(UInt((parameter.datapathWidth * 2).W), 1))) val crossReadStageFree: Option[Bool] = Option.when(isLastSlot)(Wire(Bool())) val crossReadUnitOp: Option[Instance[CrossReadUnit]] = Option.when(isLastSlot)(Instantiate(new CrossReadUnit(parameter))) + + val zvkCrossReadResultQueue: Option[Queue[UInt]] = + Option.when(isLastSlot && parameter.zvkEnable)(Module(new Queue(UInt((parameter.datapathWidth * 4).W), 1))) + val zvkCrossReadStageFree: Option[Bool] = Option.when(isLastSlot && parameter.zvkEnable)(Wire(Bool())) + val zvkCrossReadUnitOp: Option[Instance[ZvkCrossReadUnit]] = Option.when(isLastSlot && parameter.zvkEnable)(Instantiate(new ZvkCrossReadUnit(parameter))) + if (isLastSlot) { val dataGroupQueue: Queue[UInt] = Module( @@ -327,6 +464,7 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { // todo: need pipe ? val laneIndexReg = RegInit(enqueue.bits.laneIndex) val crossReadUnit = crossReadUnitOp.get + crossReadUnit.dataInputLSB <> dataQueueLSB.get.io.deq crossReadUnit.dataInputMSB <> dataQueueMSB.get.io.deq crossReadUnit.laneIndex := laneIndexReg @@ -341,7 +479,34 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { assert(dataGroupQueue.io.enq.ready || !dataGroupQueue.io.enq.valid) dataGroupQueue.io.enq.bits := enqueue.bits.groupCounter dataGroupQueue.io.deq.ready := crossReadUnit.dataInputLSB.fire - dequeue.bits.readBusDequeueGroup.get := crossReadUnitOp.get.currentGroup + dequeue.bits.readBusDequeueGroup.get := crossReadUnitOp.get.currentGroup // TODO: readBusDequeueGroup is currently unused + + if(parameter.zvkEnable) { + val zvkDataGroupQueue: Queue[UInt] = + Module( + new Queue( + UInt(parameter.groupNumberBits.W), + readRequestQueueSizeBeforeCheck + readRequestQueueSizeBeforeCheck + dataQueueSize + 2 + ) + ) + val zvkCrossReadUnit = zvkCrossReadUnitOp.get + zvkCrossReadUnit.dataInputLSBLSB <> dataQueueZvkLSBLSB.get.io.deq + zvkCrossReadUnit.dataInputLSBMSB <> dataQueueZvkLSBMSB.get.io.deq + zvkCrossReadUnit.dataInputMSBLSB <> dataQueueZvkMSBLSB.get.io.deq + zvkCrossReadUnit.dataInputMSBMSB <> dataQueueZvkMSBMSB.get.io.deq + zvkCrossReadUnit.laneIndex := laneIndexReg + zvkCrossReadUnit.dataGroup := zvkDataGroupQueue.io.deq.bits + readBusRequest4.get.zip(zvkCrossReadUnit.readBusRequest).foreach { case (sink, source) => sink <> source} + zvkCrossReadUnit.readBusDequeue.get.zip(readBusDequeue4.get).foreach { case (sink, source) => sink <> source} + zvkCrossReadResultQueue.get.io.enq <> zvkCrossReadUnit.crossReadDequeue + zvkCrossReadStageFree.get := zvkCrossReadUnit.crossReadStageFree + + // data group + zvkDataGroupQueue.io.enq.valid := enqueue.fire && enqueue.bits.decodeResult(Decoder.crossRead) + assert(zvkDataGroupQueue.io.enq.ready || !zvkDataGroupQueue.io.enq.valid) + zvkDataGroupQueue.io.enq.bits := enqueue.bits.groupCounter + zvkDataGroupQueue.io.deq.ready := zvkCrossReadUnit.dataInputLSBLSB.fire + } } val source1Select: UInt = Mux( @@ -353,6 +518,7 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { dequeue.bits.groupCounter := pipeQueue.io.deq.bits.groupCounter dequeue.bits.src := VecInit(Seq(source1Select, dataQueueVs2.io.deq.bits, dataQueueVd.io.deq.bits)) dequeue.bits.crossReadSource.foreach(_ := crossReadResultQueue.get.io.deq.bits) + dequeue.bits.zvkCrossReadSource.foreach(_ := zvkCrossReadResultQueue.get.io.deq.bits) dequeue.bits.sSendResponse.foreach(_ := pipeQueue.io.deq.bits.sSendResponse.get) dequeue.bits.decodeResult := pipeQueue.io.deq.bits.decodeResult dequeue.bits.vSew1H := pipeQueue.io.deq.bits.vSew1H @@ -374,7 +540,12 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { dataQueueVs2.io.deq.valid || pipeQueue.io.deq.bits.skipRead, dataQueueVd.io.deq.valid || (pipeQueue.io.deq.bits.decodeResult(Decoder.sReadVD)) ) ++ - crossReadResultQueue.map(_.io.deq.valid || !pipeQueue.io.deq.bits.decodeResult(Decoder.crossRead)) + crossReadResultQueue.map(_.io.deq.valid || !pipeQueue.io.deq.bits.decodeResult(Decoder.crossRead)) ++ { + if(parameter.zvkEnable) zvkCrossReadResultQueue.map(_.io.deq.valid || + (!pipeQueue.io.deq.bits.decodeResult(Decoder.crossRead) && + !pipeQueue.io.deq.bits.decodeResult(Decoder.zvk)) + ) else Seq() + } val allDataQueueValid: Bool = VecInit(dataQueueValidVec).asUInt.andR dequeue.valid := allDataQueueValid && pipeQueue.io.deq.valid dataQueueVs1.ready := allDataQueueValid && dequeue.ready && pipeQueue.io.deq.bits.decodeResult(Decoder.vtype) @@ -382,6 +553,9 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { dataQueueVd.io.deq.ready := allDataQueueValid && dequeue.ready && !pipeQueue.io.deq.bits.decodeResult(Decoder.sReadVD) crossReadResultQueue.foreach(_.io.deq.ready := allDataQueueValid && dequeue.ready && pipeQueue.io.deq.bits.decodeResult(Decoder.crossRead)) + if(parameter.zvkEnable) { + zvkCrossReadResultQueue.foreach(_.io.deq.ready := allDataQueueValid && dequeue.ready && (pipeQueue.io.deq.bits.decodeResult(Decoder.crossRead) && pipeQueue.io.deq.bits.decodeResult(Decoder.zvk))) + } stageValid := pipeQueue.io.deq.valid val stageFinish = !stageValid @@ -400,10 +574,26 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { @public val sSendCrossReadResultMSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) @public + val sSendZvkCrossReadResultLSBLSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public + val sSendZvkCrossReadResultLSBMSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public + val sSendZvkCrossReadResultMSBLSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public + val sSendZvkCrossReadResultMSBMSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public val wCrossReadLSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) @public val wCrossReadMSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) @public + val wZvkCrossReadLSBLSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public + val wZvkCrossReadLSBMSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public + val wZvkCrossReadMSBLSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public + val wZvkCrossReadMSBMSBProbe = Option.when(isLastSlot && parameter.zvkEnable)(IO(Output(Probe(Bool())))) + @public val vrfReadRequestProbe: Seq[(Bool, Bool)] = Seq.fill(3)((IO(Output(Probe(Bool()))),IO(Output(Probe(Bool()))))) @@ -416,8 +606,19 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { readFinishProbe.foreach(p => define(p, ProbeValue(dataQueueVs2.io.deq.valid))) sSendCrossReadResultLSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.sSendCrossReadResultLSB))) sSendCrossReadResultMSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.sSendCrossReadResultMSB))) + println(sSendCrossReadResultMSBProbe) wCrossReadLSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.wCrossReadLSB))) wCrossReadMSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.wCrossReadMSB))) + if (parameter.zvkEnable) { + sSendZvkCrossReadResultLSBLSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.sSendCrossReadResult(0)))) + sSendZvkCrossReadResultLSBMSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.sSendCrossReadResult(1)))) + sSendZvkCrossReadResultMSBLSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.sSendCrossReadResult(2)))) + sSendZvkCrossReadResultMSBMSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.sSendCrossReadResult(3)))) + wZvkCrossReadLSBLSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.wCrossRead(0)))) + wZvkCrossReadLSBMSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.wCrossRead(1)))) + wZvkCrossReadMSBLSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.wCrossRead(2)))) + wZvkCrossReadMSBMSBProbe.foreach(p => define(p, ProbeValue(zvkCrossReadUnitOp.get.crossWriteState.wCrossRead(3)))) + } } vrfReadRequestProbe.zipWithIndex.foreach { case((ready, valid), i) => diff --git a/t1/src/laneStage/LaneStage3.scala b/t1/src/laneStage/LaneStage3.scala index 8ccc6fbc1..eee9ad3be 100644 --- a/t1/src/laneStage/LaneStage3.scala +++ b/t1/src/laneStage/LaneStage3.scala @@ -17,6 +17,7 @@ class LaneStage3Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends B val mask: UInt = UInt((parameter.datapathWidth/8).W) val ffoIndex: UInt = UInt(log2Ceil(parameter.vLen / 8).W) val crossWriteData: Vec[UInt] = Vec(2, UInt(parameter.datapathWidth.W)) + val zvkCrossWriteData: Vec[UInt] = Vec(4, UInt(parameter.datapathWidth.W)) val sSendResponse: Bool = Bool() val ffoSuccess: Bool = Bool() val fpReduceValid: Option[Bool] = Option.when(parameter.fpuEnable && isLastSlot)(Bool()) @@ -56,6 +57,9 @@ class LaneStage3(parameter: LaneParameter, isLastSlot: Boolean) extends Module { @public val crossWritePort: Option[Vec[DecoupledIO[WriteBusData]]] = Option.when(isLastSlot)(IO(Vec(2, Decoupled(new WriteBusData(parameter))))) + @public + val crossWritePort4: Option[Vec[DecoupledIO[WriteBusData]]] = + Option.when(isLastSlot & parameter.zvkEnable)(IO(Vec(4, Decoupled(new WriteBusData(parameter))))) val stageValidReg: Option[Bool] = Option.when(isLastSlot) (RegInit(false.B)) @@ -107,6 +111,16 @@ class LaneStage3(parameter: LaneParameter, isLastSlot: Boolean) extends Module { sendState(index) := true.B } } + crossWritePort4.get.zipWithIndex.foreach { case (port, index) => + port.valid := stageValidReg.get && !sendState(index) + port.bits.mask := pipeEnqueue.get.mask(4 * index + 1, 4 * index) + port.bits.data := pipeEnqueue.get.zvkCrossWriteData(index) + port.bits.counter := pipeEnqueue.get.groupCounter + port.bits.instructionIndex := pipeEnqueue.get.instructionIndex + when(port.fire) { + sendState(index) := true.B + } + } // scheduler synchronization val schedulerFinish: Bool = (sSendResponse ++ wResponseFeedback).reduce(_ && _) @@ -185,4 +199,4 @@ class LaneStage3(parameter: LaneParameter, isLastSlot: Boolean) extends Module { vrfWriteRequest <> vrfWriteQueue.io.deq vrfWriteRequest.bits.offset := vrfPtrReplica.io.deq.bits vrfWriteRequest.valid := vrfPtrReplica.io.deq.valid -} \ No newline at end of file +} diff --git a/t1/src/laneStage/SlotTokenManager.scala b/t1/src/laneStage/SlotTokenManager.scala index ae8bb531e..9be374ac5 100644 --- a/t1/src/laneStage/SlotTokenManager.scala +++ b/t1/src/laneStage/SlotTokenManager.scala @@ -73,6 +73,9 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { @public val crossWriteReports: Vec[ValidIO[UInt]] = IO(Vec(2, Flipped(Valid(UInt(parameter.instructionIndexBits.W))))) + @public + val zvkCrossWriteReports: Option[Vec[ValidIO[UInt]]] = Option.when(parameter.zvkEnable)(IO(Vec(4, Flipped(Valid(UInt(parameter.instructionIndexBits.W)))))) + @public val responseReport: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) diff --git a/t1/src/laneStage/ZvkCrossReadUnit.scala b/t1/src/laneStage/ZvkCrossReadUnit.scala new file mode 100644 index 000000000..bb3490d72 --- /dev/null +++ b/t1/src/laneStage/ZvkCrossReadUnit.scala @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.lane + +import chisel3._ +import chisel3.experimental.hierarchy.{instantiable, public} +import chisel3.util._ +import org.chipsalliance.t1.rtl.{LaneParameter, ReadBusData} + +class ZvkCrossReadState extends Bundle { + val sSendCrossReadResult: Seq[Bool] = Seq.fill(4)(Bool()) + val wCrossRead: Seq[Bool] = Seq.fill(4)(Bool()) +} + +@instantiable +class ZvkCrossReadUnit(parameter: LaneParameter) extends Module { + @public + val dataInputLSBLSB: DecoupledIO[UInt] = IO(Flipped(Decoupled(UInt(parameter.datapathWidth.W)))) + @public + val dataInputLSBMSB: DecoupledIO[UInt] = IO(Flipped(Decoupled(UInt(parameter.datapathWidth.W)))) + @public + val dataInputMSBLSB: DecoupledIO[UInt] = IO(Flipped(Decoupled(UInt(parameter.datapathWidth.W)))) + @public + val dataInputMSBMSB: DecoupledIO[UInt] = IO(Flipped(Decoupled(UInt(parameter.datapathWidth.W)))) + @public + val laneIndex: UInt = IO(Input(UInt(parameter.laneNumberBits.W))) + @public + val dataGroup: UInt = IO(Input(UInt(parameter.groupNumberBits.W))) + @public + val currentGroup: UInt = IO(Output(UInt(parameter.groupNumberBits.W))) + + @public + val readBusDequeue: Option[Vec[DecoupledIO[ReadBusData]]] = + Option.when(parameter.zvkEnable)( + IO( + Vec(4, Flipped(Decoupled(new ReadBusData(parameter: LaneParameter))) + )) + ) + @public + val readBusRequest: Option[Vec[DecoupledIO[ReadBusData]]] = + Option.when(parameter.zvkEnable)(IO(Vec(4, Decoupled(new ReadBusData(parameter))))) + + @public + val crossReadDequeue: DecoupledIO[UInt] = IO(Decoupled(UInt((parameter.datapathWidth * 4).W))) + @public + val crossReadStageFree: Bool = IO(Output(Bool())) + @public + val crossWriteState = IO(Output(new ZvkCrossReadState)) + + val stageValid: Bool = RegInit(false.B) + val sSendCrossReadResultLSBLSB, sSendCrossReadResultMSBLSB, wCrossReadLSBLSB, wCrossReadMSBLSB = RegInit(true.B) + val sSendCrossReadResultLSBMSB, sSendCrossReadResultMSBMSB, wCrossReadLSBMSB, wCrossReadMSBMSB = RegInit(true.B) + val stateVec: Seq[Bool] = Seq( + sSendCrossReadResultLSBLSB, + sSendCrossReadResultLSBMSB, + sSendCrossReadResultMSBLSB, + sSendCrossReadResultMSBMSB, + wCrossReadLSBLSB, + wCrossReadLSBMSB, + wCrossReadMSBLSB, + wCrossReadMSBMSB, + ) + val sendDataVec: Vec[UInt] = RegInit(VecInit(Seq.fill(4)(0.U(parameter.datapathWidth.W)))) + val groupCounter: UInt = RegInit(0.U(parameter.groupNumberBits.W)) + val receiveDataVec: Vec[UInt] = RegInit(VecInit(Seq.fill(4)(0.U(parameter.datapathWidth.W)))) + val sendState = Seq( + sSendCrossReadResultLSBLSB, + sSendCrossReadResultLSBMSB, + sSendCrossReadResultMSBLSB, + sSendCrossReadResultMSBMSB, + ) + val receiveState = Seq( + wCrossReadLSBLSB, + wCrossReadLSBMSB, + wCrossReadMSBLSB, + wCrossReadMSBMSB, + ) + + readBusRequest.get.zipWithIndex.foreach { case (port, index) => + port.valid := stageValid && !sendState(index) + port.bits.data := sendDataVec(index) + when(port.fire) { sendState(index) := true.B} + } + + readBusDequeue.get.zipWithIndex.foreach { case (port, index) => + when(port.fire) { + receiveState(index) := true.B + receiveDataVec(index) := port.bits.data + } + port.ready := !receiveState(index) + } + val allStateReady: Bool = stateVec.reduce(_ && _) + val stageReady: Bool = !stageValid || (allStateReady && crossReadDequeue.ready) + val allSourceValid: Bool = Seq( + dataInputLSBLSB.valid, + dataInputLSBMSB.valid, + dataInputMSBLSB.valid, + dataInputMSBMSB.valid, + ).reduce(_ && _) + val enqueueFire: Bool = stageReady && allSourceValid + dataInputLSBLSB.ready := allSourceValid && stageReady + dataInputLSBMSB.ready := allSourceValid && stageReady + dataInputMSBLSB.ready := allSourceValid && stageReady + dataInputMSBMSB.ready := allSourceValid && stageReady + + when(enqueueFire ^ crossReadDequeue.fire) { + stageValid := enqueueFire + } + when(enqueueFire) { + stateVec.foreach(_ := false.B) + sendDataVec := VecInit(Seq( + dataInputLSBLSB.bits, + dataInputLSBMSB.bits, + dataInputMSBLSB.bits, + dataInputMSBMSB.bits, + )) + groupCounter := dataGroup + } + currentGroup := groupCounter + crossReadDequeue.bits := receiveDataVec.asUInt + crossReadDequeue.valid := allStateReady && stageValid + crossReadStageFree := (!stageValid) && stateVec.reduce(_ && _) + + crossWriteState.sSendCrossReadResult(0) := sSendCrossReadResultLSBLSB + crossWriteState.sSendCrossReadResult(1) := sSendCrossReadResultLSBMSB + crossWriteState.sSendCrossReadResult(2) := sSendCrossReadResultMSBLSB + crossWriteState.sSendCrossReadResult(3) := sSendCrossReadResultMSBMSB + crossWriteState.wCrossRead(0) := wCrossReadLSBLSB + crossWriteState.wCrossRead(1) := wCrossReadLSBMSB + crossWriteState.wCrossRead(2) := wCrossReadMSBLSB + crossWriteState.wCrossRead(3) := wCrossReadMSBMSB +} diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 724ef6372..88f503293 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -51,6 +51,7 @@ case class VRFParam( datapathWidth: Int, chainingSize: Int, portFactor: Int, + zvkEnable: Boolean, ramType: RamType) extends SerializableModuleParameter { @@ -150,9 +151,15 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar val readCheck: Vec[VRFReadRequest] = IO(Vec(parameter.chainingSize * 3 + 2, Input( new VRFReadRequest(parameter.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits) ))) + @public + val zvkReadCheck: Option[Vec[VRFReadRequest]] = Option.when(parameter.zvkEnable)(IO(Vec(parameter.chainingSize * 3 + 4, Input( + new VRFReadRequest(parameter.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits) + )))) @public val readCheckResult: Vec[Bool] = IO(Vec(parameter.chainingSize * 3 + 2, Output(Bool()))) + @public + val zvkReadCheckResult: Option[Vec[Bool]] = Option.when(parameter.zvkEnable)(IO(Vec(parameter.chainingSize * 3 + 4, Output(Bool())))) /** VRF read results. */ @public @@ -273,6 +280,22 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar checkModule.checkResult }.reduce(_ && _) } + zvkReadCheck.get.zip(zvkReadCheckResult.get).foreach { case (req, res) => + val recordSelect = chainingRecord + // 先找到自的record + val readRecord = + Mux1H(recordSelect.map(_.bits.instIndex === req.instructionIndex), recordSelect.map(_.bits)) + res := + recordSelect.zip(recordValidVec).zipWithIndex.map { + case ((r, f), recordIndex) => + val checkModule = Instantiate(new ChainingCheck(parameter)) + checkModule.read := req + checkModule.readRecord := readRecord + checkModule.record := r + checkModule.recordValid := f + checkModule.checkResult + }.reduce(_ && _) + } val checkSize: Int = readRequests.size val (firstOccupied, secondOccupied) = readRequests.zipWithIndex.foldLeft(