Skip to content

Commit

Permalink
[rtl] support zvk
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas-Wye committed Aug 21, 2024
1 parent 9192d5f commit 0a75a41
Show file tree
Hide file tree
Showing 25 changed files with 871 additions and 30 deletions.
5 changes: 3 additions & 2 deletions configgen/generated/blastoise.json
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,9 @@
]
]
],
"zvbbModuleParameters": []
"zvbbModuleParameters": [],
"zvkModuleParameters": []
}
},
"generator": "org.chipsalliance.t1.rtl.T1"
}
}
5 changes: 3 additions & 2 deletions configgen/generated/machamp.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,9 @@
]
],
"floatModuleParameters": [],
"zvbbModuleParameters": []
"zvbbModuleParameters": [],
"zvkModuleParameters": []
}
},
"generator": "org.chipsalliance.t1.rtl.T1"
}
}
19 changes: 18 additions & 1 deletion configgen/generated/psyduck.json
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,25 @@
3
]
]
],
"zvkModuleParameters": [
[
{
"parameter": {
"datapathWidth": 32,
"latency": 3
},
"generator": "org.chipsalliance.t1.rtl.LaneZvk"
},
[
0,
1,
2,
3
]
]
]
}
},
"generator": "org.chipsalliance.t1.rtl.T1"
}
}
5 changes: 3 additions & 2 deletions configgen/generated/sandslash.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,9 @@
]
],
"floatModuleParameters": [],
"zvbbModuleParameters": []
"zvbbModuleParameters": [],
"zvkModuleParameters": []
}
},
"generator": "org.chipsalliance.t1.rtl.T1"
}
}
13 changes: 9 additions & 4 deletions configgen/src/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ object Main {
Seq(0, 1, 2, 3))),
floatModuleParameters =
Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))),
zvbbModuleParameters = Seq()
zvbbModuleParameters = Seq(),
zvkModuleParameters = Seq(),
)
)
if (doEmit) param.emit(targetFile)
Expand Down Expand Up @@ -151,7 +152,9 @@ object Main {
floatModuleParameters =
Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))),
zvbbModuleParameters =
Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3)))
Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))),
zvkModuleParameters =
Seq((SerializableModuleGenerator(classOf[LaneZvk], LaneZvkParam(32, 3)), Seq(0, 1, 2, 3))),
)
)
if (doEmit) param.emit(targetFile)
Expand Down Expand Up @@ -201,7 +204,8 @@ object Main {
),
Seq(0, 1, 2, 3))),
floatModuleParameters = Seq(),
zvbbModuleParameters = Seq() // TODO
zvbbModuleParameters = Seq(),
zvkModuleParameters = Seq(),
)
)
if (doEmit) param.emit(targetFile)
Expand Down Expand Up @@ -251,7 +255,8 @@ object Main {
),
Seq(0, 1, 2, 3))),
floatModuleParameters = Seq(),
zvbbModuleParameters = Seq() // TODO
zvbbModuleParameters = Seq(),
zvkModuleParameters = Seq(),
)
)
if (doEmit) param.emit(targetFile)
Expand Down
3 changes: 2 additions & 1 deletion t1/src/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ class ExecutionUnitRecord(parameter: LaneParameter)(isLastSlot: Boolean) extends
val executeIndex: Bool = Bool()
val source: Vec[UInt] = Vec(3, UInt(parameter.datapathWidth.W))
val crossReadSource: Option[UInt] = Option.when(isLastSlot)(UInt((parameter.datapathWidth * 2).W))
val zvkCrossReadSource: Option[UInt] = Option.when(isLastSlot && parameter.zvkEnable)(UInt((parameter.datapathWidth * 4).W))
/** groupCounter need use to update `Lane.maskFormatResultForGroup` */
val groupCounter: UInt = UInt(parameter.groupNumberBits.W)
val sSendResponse: Option[Bool] = Option.when(isLastSlot)(Bool())
Expand Down Expand Up @@ -725,4 +726,4 @@ class T1Retire(xLen: Int) extends Bundle {
val rd: ValidIO[T1RdRetire] = Valid(new T1RdRetire(xLen))
val csr: ValidIO[T1CSRRetire] = Valid(new T1CSRRetire)
val mem: ValidIO[EmptyBundle] = Valid(new EmptyBundle)
}
}
125 changes: 120 additions & 5 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class LaneProbe(parameter: LaneParameter) extends Bundle {
val instructionValid: UInt = UInt(parameter.chainingSize.W)

val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(parameter.instructionIndexBits)))
val zvkCrossWriteProbe: Option[Vec[ValidIO[LaneWriteProbe]]] = Option.when(parameter.zvkEnable)(Vec(4, Valid(new LaneWriteProbe(parameter.instructionIndexBits))))

val vrfProbe: VRFProbe = new VRFProbe(parameter.vrfParam)
}
Expand All @@ -86,7 +87,9 @@ case class LaneParameter(
laneNumber: Int,
chainingSize: Int,
crossLaneVRFWriteEscapeQueueSize: Int,
crossLaneVRFWriteEscapeZvkQueueSize: Int,
fpuEnable: Boolean,
zvkEnable: Boolean,
portFactor: Int,
vrfRamType: RamType,
decoderParam: DecoderParam,
Expand Down Expand Up @@ -132,7 +135,7 @@ case class LaneParameter(
*
* for each number in table below, it represent a [[datapathWidth]]
* {{{
* lane0 | lane1 | ... | lane8
* lane0 | lane1 | ... | lane7
* offset0 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7
* offset1 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15
* offset2 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23
Expand Down Expand Up @@ -178,7 +181,7 @@ case class LaneParameter(
val executionQueueSize: Int = 4

/** Parameter for [[VRF]] */
def vrfParam: VRFParam = VRFParam(vLen, laneNumber, datapathWidth, chainingSize, portFactor, vrfRamType)
def vrfParam: VRFParam = VRFParam(vLen, laneNumber, datapathWidth, chainingSize, portFactor, zvkEnable, vrfRamType)
}

/** Instantiate [[Lane]] from [[T1]],
Expand Down Expand Up @@ -210,8 +213,26 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
* TODO: benchmark the usecase for tuning the Ring Bus width.
* find a real world case for using `narrow` and `widen` aggressively.
*/
// 0: 0.0 - 0.1
// 1: 0.2 - 0.3
// 2: 0.4 - 0.5
// 3: 0.6 - 0.7
// 4: 1.0 - 1.1
// 5: 1.2 - 1.3
// 6: 1.4 - 1.5
// 7: 1.6 - 1.7

// 0: 0.0 - 0.1 - 0.2 - 0.3
// 1: 0.4 - 0.5 - 0.6 - 0.7
// 2: 1.0 - 1.1 - 1.2 - 1.3
// 3: 1.4 - 1.5 - 1.6 - 1.7
// 4: 2.0 - 2.1 - 2.2 - 2.3
// 5: 2.4 - 2.5 - 2.6 - 2.7
// 6: 3.0 - 3.1 - 3.2 - 3.3
// 7: 3.4 - 3.5 - 3.6 - 3.7
@public
val readBusPort: Vec[RingPort[ReadBusData]] = IO(Vec(2, new RingPort(new ReadBusData(parameter))))
val zvkReadBusPort: Option[Vec[RingPort[ReadBusData]]] = Option.when(parameter.zvkEnable)(IO(Vec(4, new RingPort(new ReadBusData(parameter)))))

/** VRF Write Interface.
* only used for `narrow` an `widen`
Expand All @@ -220,6 +241,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
*/
@public
val writeBusPort: Vec[RingPort[WriteBusData]] = IO(Vec(2, new RingPort(new WriteBusData(parameter))))
val zvkWriteBusPort: Option[Vec[RingPort[WriteBusData]]] = Option.when(parameter.zvkEnable)(IO(Vec(4, new RingPort(new WriteBusData(parameter)))))

/** request from [[T1.decode]] to [[Lane]]. */
@public
Expand Down Expand Up @@ -301,7 +323,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
@public
val loadDataInLSUWriteQueue: UInt = IO(Input(UInt(parameter.chainingSize.W)))

/** How many dataPath will writ by instruction in this lane */
/** How many dataPath will write by instruction in this lane */
@public
val writeCount: UInt =
IO(Input(UInt((parameter.vlMaxBits - log2Ceil(parameter.laneNumber) - log2Ceil(parameter.dataPathByteWidth)).W)))
Expand All @@ -322,6 +344,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

// TODO: remove
dontTouch(writeBusPort)
if(parameter.zvkEnable) {
dontTouch(zvkWriteBusPort.get)
}

/** VRF instantces. */
val vrf: Instance[VRF] = Instantiate(new VRF(parameter.vrfParam))
Expand Down Expand Up @@ -442,8 +467,12 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
val readCheckRequestVec: Vec[VRFReadRequest] = Wire(Vec(parameter.chainingSize * 3 + 2,
new VRFReadRequest(parameter.vrfParam.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits)
))
val zvkReadCheckRequestVec: Option[Vec[VRFReadRequest]] = Option.when(parameter.zvkEnable)(Wire(Vec(parameter.chainingSize * 3 + 4,
new VRFReadRequest(parameter.vrfParam.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits)
)))

val readCheckResult: Vec[Bool] = Wire(Vec(parameter.chainingSize * 3 + 2, Bool()))
val zvkReadCheckResult: Option[Vec[Bool]] = Option.when(parameter.zvkEnable)(Wire(Vec(parameter.chainingSize * 3 + 4, Bool())))

/** signal used for prohibiting slots to access VRF.
* a slot will become inactive when:
Expand All @@ -467,7 +496,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
val slotCanShift: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))

/** Which data group is waiting for the result of the cross-lane read */
val readBusDequeueGroup: UInt = Wire(UInt(parameter.groupNumberBits.W))
val readBusDequeueGroup: UInt = Wire(UInt(parameter.groupNumberBits.W)) // TODO: readBusDequeueGroup is currently unused

/** enqueue valid for execution unit */
val executeEnqueueValid: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))
Expand Down Expand Up @@ -524,6 +553,18 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
pipe = true
)
))
val zvkCrossLaneWriteQueue: Option[Seq[Queue[VRFWriteRequest]]] = Option.when(parameter.zvkEnable)(Seq.tabulate(4)(i => Module(
new Queue(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
parameter.vrfOffsetBits,
parameter.instructionIndexBits,
parameter.datapathWidth
),
parameter.crossLaneVRFWriteEscapeZvkQueueSize,
pipe = true
)
)))
val maskedWriteUnit: Instance[MaskedWrite] = Instantiate(new MaskedWrite(parameter))
val tokenManager: Instance[SlotTokenManager] = Instantiate(new SlotTokenManager(parameter))
slotControl.zipWithIndex.foreach {
Expand Down Expand Up @@ -671,6 +712,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
readCheckRequestVec((parameter.chainingSize - index - 1) * 3 + portIndex) := stage1.vrfCheckRequest(portIndex)
stage1.checkResult(portIndex) := readCheckResult((parameter.chainingSize - index - 1) * 3 + portIndex)
}
val zvkCheckSize = if (isLastSlot && parameter.zvkEnable) 7 else 5
if(parameter.zvkEnable) {
Seq.tabulate(zvkCheckSize){ portIndex =>
zvkReadCheckRequestVec.get((parameter.chainingSize - index - 1) * 3 + portIndex) := stage1.zvkVrfCheckRequest.get(portIndex)
stage1.zvkCheckResult.get(portIndex) := zvkReadCheckResult.get((parameter.chainingSize - index - 1) * 3 + portIndex)
}
}
// connect cross read bus
if(isLastSlot) {
val tokenSize = parameter.crossLaneVRFWriteEscapeQueueSize
Expand All @@ -695,6 +743,30 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
// dequeue to cross read unit
stage1.readBusDequeue.get(portIndex) <> queue.io.deq
}
val zvKTokenSize = parameter.crossLaneVRFWriteEscapeZvkQueueSize
if(parameter.zvkEnable) {
zvkReadBusPort.get.zipWithIndex.foreach {case (readPort, portIndex) =>
// tx
val tokenReg = RegInit(0.U(log2Ceil(zvKTokenSize + 1).W))
val tokenReady: Bool = tokenReg =/= zvKTokenSize.U
stage1.readBusRequest4.get(portIndex).ready := tokenReady
readPort.deq.valid := stage1.readBusRequest4.get(portIndex).valid && tokenReady
readPort.deq.bits := stage1.readBusRequest4.get(portIndex).bits
val tokenUpdate = Mux(readPort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt)
when(readPort.deq.valid ^ readPort.deqRelease) {
tokenReg := tokenReg + tokenUpdate
}
// rx
// rx queue
val queue = Module(new Queue(chiselTypeOf(readPort.deq.bits), zvKTokenSize, pipe=true))
queue.io.enq.valid := readPort.enq.valid
queue.io.enq.bits := readPort.enq.bits
readPort.enqRelease := queue.io.deq.fire
assert(queue.io.enq.ready || !readPort.enq.valid)
// dequeue to cross read unit
stage1.readBusDequeue4.get(portIndex) <> queue.io.deq
}
}

// cross write
writeBusPort.zipWithIndex.foreach {case (writePort, portIndex) =>
Expand All @@ -710,6 +782,21 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
tokenReg := tokenReg + tokenUpdate
}
}
if(parameter.zvkEnable) {
zvkWriteBusPort.get.zipWithIndex.foreach {case (writePort, portIndex) =>
val tokenReg = RegInit(0.U(log2Ceil(zvKTokenSize + 1).W))
val tokenReady: Bool = tokenReg =/= zvKTokenSize.U
writePort.deq.valid := stage3.zvkCrossWritePort.get(portIndex).valid && tokenReady
writePort.deq.bits := stage3.zvkCrossWritePort.get(portIndex).bits
stage3.zvkCrossWritePort.get(portIndex).ready := tokenReady

// update token
val tokenUpdate = Mux(writePort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt)
when(writePort.deq.valid ^ writePort.deqRelease) {
tokenReg := tokenReg + tokenUpdate
}
}
}
}

stage2.enqueue.valid := stage1.dequeue.valid && executionUnit.enqueue.ready
Expand Down Expand Up @@ -853,6 +940,24 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
assert(queue.io.enq.ready || !port.enq.valid)
port.enqRelease := queue.io.deq.fire
}
if(parameter.zvkEnable) {
zvkCrossLaneWriteQueue.get.zipWithIndex.foreach {case (queue, index) =>
val port = writeBusPort(index)
// ((counter << 1) >> parameter.vrfParam.vrfOffsetBits).low(3)
val registerIncreaseBase = parameter.vrfParam.vrfOffsetBits - 1
queue.io.enq.valid := port.enq.valid
queue.io.enq.bits.vd :=
// 3: 8 reg => log(2, 8)
slotControl.head.laneRequest.vd + port.enq.bits.counter(registerIncreaseBase + 3 - 1, registerIncreaseBase)
queue.io.enq.bits.offset := port.enq.bits.counter ## index.U(1.W)
queue.io.enq.bits.data := port.enq.bits.data
queue.io.enq.bits.last := DontCare
queue.io.enq.bits.instructionIndex := port.enq.bits.instructionIndex
queue.io.enq.bits.mask := FillInterleaved(2, port.enq.bits.mask)
assert(queue.io.enq.ready || !port.enq.valid)
port.enqRelease := queue.io.deq.fire
}
}

val vfus: Seq[Instance[VFUModule]] = instantiateVFU(parameter.vfuInstantiateParameter)(
requestVec,
Expand Down Expand Up @@ -899,7 +1004,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
}

// all vrf write
val allVrfWrite: Seq[DecoupledIO[VRFWriteRequest]] = vrfWriteArbiter ++ crossLaneWriteQueue.map(_.io.deq)
val allVrfWrite: Seq[DecoupledIO[VRFWriteRequest]] = vrfWriteArbiter ++ crossLaneWriteQueue.map(_.io.deq) ++ zvkCrossLaneWriteQueue.get.map(_.io.deq)
// check all write
vrf.writeCheck.zip(allVrfWrite).foreach {case (check, write) =>
check.vd := write.bits.vd
Expand All @@ -909,6 +1014,10 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

vrf.readCheck.zip(readCheckRequestVec).foreach{case (sink, source) => sink := source}
readCheckResult.zip(vrf.readCheckResult).foreach{case (sink, source) => sink := source}
if(parameter.zvkEnable) {
vrf.zvkReadCheck.get.zip(zvkReadCheckRequestVec).foreach{case (sink, source) => sink := source}
zvkReadCheckResult.get.zip(vrf.zvkReadCheckResult).foreach{case (sink, source) => sink := source}
}

allVrfWriteAfterCheck.zipWithIndex.foreach { case (req, i) =>
val check = vrf.writeAllow(i)
Expand Down Expand Up @@ -1192,6 +1301,12 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex)
rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex
}
if(parameter.zvkEnable) {
tokenManager.crossWriteReports.zipWithIndex.foreach {case (rpt, rptIndex) =>
rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex)
rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex
}
}
// todo: add mask unit write token
tokenManager.responseReport.valid := laneResponse.valid
tokenManager.responseReport.bits := laneResponse.bits.instructionIndex
Expand Down
Loading

0 comments on commit 0a75a41

Please sign in to comment.