Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rtl] Slots can always be shifted when slot idle. #400

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 66 additions & 48 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
val writeQueueValid: Bool = IO(Output(Bool()))
val writeReadyForLsu: Bool = IO(Output(Bool()))
val vrfReadyToStore: Bool = IO(Output(Bool()))
val recordFree: Bool = IO(Output(Bool()))

// TODO: remove
dontTouch(writeBusPort)
Expand Down Expand Up @@ -360,14 +361,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
*/
val slotShiftValid: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))

/** The slots start to shift in these rules:
* - instruction can only enqueue to the last slot.
* - all slots can only shift at the same time which means:
* if one slot is finished earlier -> 1101,
* it will wait for the first slot to finish -> 1100,
* and then take two cycles to move to xx11.
*/
val slotCanShift: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))
val slotHeadFinish: Bool = Wire(Bool())

/** Which data group is waiting for the result of the cross-lane read */
val readBusDequeueGroup: UInt = Wire(UInt(parameter.groupNumberBits.W))
Expand All @@ -378,6 +372,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
/** request from slot to vfu. */
val requestVec: Vec[SlotRequestToVFU] = Wire(Vec(parameter.chainingSize, new SlotRequestToVFU(parameter)))

val pipeDecode: Seq[DecodeBundle] =
Seq.tabulate(parameter.chainingSize) { _ => WireDefault(slotControl.head.laneRequest.decodeResult)}

/** response from vfu to slot. */
val responseVec: Vec[ValidIO[VFUResponseToSlot]] = Wire(Vec(parameter.chainingSize, Valid(new VFUResponseToSlot(parameter))))

Expand All @@ -389,13 +386,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

val VFUNotClear: Bool = Wire(Bool())

val slot0EnqueueFire: Bool = Wire(Bool())
val slotEnqueueFireVec: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))

/** assert when a instruction is finished in the slot. */
val instructionFinishedVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.chainingSize.W)))
/** assert when a instruction is valid in the slot. */
val instructionValidVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.chainingSize.W)))

/** assert when a instruction will not use mask unit */
val instructionUnrelatedMaskUnitVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.chainingSize.W)))
val instructionUnrelatedMaskUnit = Wire(UInt(parameter.chainingSize.W))

/** queue for cross lane writing.
* TODO: benchmark the size of the queue
Expand Down Expand Up @@ -464,12 +461,25 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
}

if(isLastSlot) {
slotCanShift(index) := pipeClear && pipeFinishVec(index)
} else {
slotCanShift(index) := pipeClear
slotHeadFinish := pipeClear && pipeFinishVec(index)
}

val newInstruction: Bool = slotEnqueueFireVec(index)
// state for each stage
val laneState: LaneState = Wire(new LaneState(parameter))
val readFromScalarReg: UInt = RegInit(0.U(parameter.datapathWidth.W))
val laneStateReg: LaneState = RegInit(0.U.asTypeOf(laneState))
pipeDecode(index) := laneStateReg.decodeResult
val laneStateReady = RegInit(true.B)
val readyToSendData = laneStateReady && !newInstruction
when(pipeClear ^ newInstruction) {
laneStateReady := pipeClear
}
when(pipeClear && (RegNext(newInstruction) || !laneStateReady)) {
laneStateReg := laneState
readFromScalarReg := record.laneRequest.readFromScalar
}

val stage0: LaneStage0 = Module(new LaneStage0(parameter, isLastSlot))
val stage1 = Module(new LaneStage1(parameter, isLastSlot))
val stage2 = Module(new LaneStage2(parameter, isLastSlot))
Expand All @@ -495,9 +505,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
laneState.instructionIndex := record.laneRequest.instructionIndex
laneState.maskForMaskGroup := maskForMaskGroup
laneState.ffoByOtherLanes := record.ffoByOtherLanes
laneState.newInstruction.foreach(_ := slot0EnqueueFire)
laneState.newInstruction.foreach(_ := newInstruction)

stage0.enqueue.valid := slotActive(index) && (record.mask.valid || !record.laneRequest.mask)
stage0.enqueue.valid := slotActive(index) && (record.mask.valid || !record.laneRequest.mask) && readyToSendData
stage0.enqueue.bits.maskIndex := maskIndexVec(index)
stage0.enqueue.bits.maskForMaskGroup := record.mask.bits
stage0.enqueue.bits.maskGroupCount := maskGroupCountVec(index)
Expand Down Expand Up @@ -533,12 +543,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
val instructionIndex1H: UInt = UIntToOH(
record.laneRequest.instructionIndex(parameter.instructionIndexBits - 2, 0)
)
instructionFinishedVec(index) := 0.U
instructionUnrelatedMaskUnitVec(index) :=
Mux(decodeResult(Decoder.maskUnit) && decodeResult(Decoder.readOnly), 0.U, instructionIndex1H)
instructionValidVec(index) :=
// instruction in record
Mux(slotOccupied(index), indexToOH(record.laneRequest.instructionIndex, parameter.chainingSize), 0.U) |
// instruction in pipe
Mux(!pipeClear, indexToOH(laneStateReg.instructionIndex, parameter.chainingSize), 0.U)
when(slotOccupied(index) && pipeClear && pipeFinishVec(index)) {
slotOccupied(index) := false.B
instructionFinishedVec(index) := instructionIndex1H
}

// stage 1: read stage
Expand All @@ -551,8 +562,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
}
stage1.dequeue.bits.readBusDequeueGroup.foreach(data => readBusDequeueGroup := data)

stage1.state := laneState
stage1.readFromScalar := record.laneRequest.readFromScalar
stage1.readFromScalar := readFromScalarReg
stage1.state := laneStateReg
vrfReadRequest(index).zip(stage1.vrfReadRequest).foreach{ case (sink, source) => sink <> source }
vrfReadResult(index).zip(stage1.vrfReadResult).foreach{ case (source, sink) => sink := source }
// connect cross read bus
Expand Down Expand Up @@ -594,13 +605,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
tokenReg := tokenReg + tokenUpdate
}
}
instructionUnrelatedMaskUnit :=
(~Mux(decodeResult(Decoder.maskUnit) && decodeResult(Decoder.readOnly), instructionIndex1H, 0.U)).asUInt
}

stage2.enqueue.valid := stage1.dequeue.valid && executionUnit.enqueue.ready
stage1.dequeue.ready := stage2.enqueue.ready && executionUnit.enqueue.ready
executionUnit.enqueue.valid := stage1.dequeue.valid && stage2.enqueue.ready

stage2.state := laneState
stage2.state := laneStateReg
stage2.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter
stage2.enqueue.bits.mask := stage1.dequeue.bits.mask
stage2.enqueue.bits.maskForFilter := stage1.dequeue.bits.maskForFilter
Expand All @@ -609,7 +622,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
sink := source
}

executionUnit.state := laneState
executionUnit.state := laneStateReg
executionUnit.enqueue.bits.src := stage1.dequeue.bits.src
executionUnit.enqueue.bits.bordersForMaskLogic :=
(stage1.dequeue.bits.groupCounter === record.lastGroupForInstruction && record.isLastLaneForMaskLogic)
Expand Down Expand Up @@ -639,7 +652,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
if (!isLastSlot) {
stage3.enqueue.bits := DontCare
}
stage3.state := laneState
stage3.state := laneStateReg
stage3.state.ffoByOtherLanes := laneState.ffoByOtherLanes
stage3.enqueue.bits.groupCounter := stage2.dequeue.bits.groupCounter
stage3.enqueue.bits.mask := stage2.dequeue.bits.mask
if (isLastSlot) {
Expand Down Expand Up @@ -743,20 +757,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[


// VFU
// TODO: reuse logic, adder, multiplier datapath
{
val decodeResultVec: Seq[DecodeBundle] = slotControl.map(_.laneRequest.decodeResult)

vfuConnect(parameter.vfuInstantiateParameter)(
requestVec,
executeEnqueueValid,
decodeResultVec,
executeEnqueueFire,
responseVec,
executeOccupied,
VFUNotClear
)
}
vfuConnect(parameter.vfuInstantiateParameter)(
requestVec,
executeEnqueueValid,
pipeDecode,
executeEnqueueFire,
responseVec,
executeOccupied,
VFUNotClear
)

val maskedWriteUnit: MaskedWrite = Module(new MaskedWrite(parameter))

Expand Down Expand Up @@ -933,8 +942,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
})


val slotEnqReady: Vec[Bool] = Wire(Vec(parameter.chainingSize, Bool()))
val requestReady: Bool = slotOccupied.zipWithIndex.foldLeft(false.B) {case (p, (o, i)) =>
slotEnqReady(i) := p || !o
p || !o
}
val slotEnqueueFire: Seq[Bool] = Seq.tabulate(parameter.chainingSize) { slotIndex =>
val enqueueReady: Bool = !slotOccupied(slotIndex)
val enqueueReady: Bool = slotEnqReady(slotIndex)
val enqueueValid: Bool = Wire(Bool())
val enqueueFire: Bool = enqueueReady && enqueueValid
// enqueue from lane request
Expand All @@ -949,7 +963,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
enqueueFire
} else {
// shifter for slot
enqueueValid := slotCanShift(slotIndex + 1) && slotOccupied(slotIndex + 1)
enqueueValid := slotOccupied(slotIndex + 1)
when(enqueueFire) {
slotControl(slotIndex) := slotControl(slotIndex + 1)
maskGroupCountVec(slotIndex) := maskGroupCountVec(slotIndex + 1)
Expand All @@ -960,16 +974,16 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
}
}

val slotDequeueFire: Seq[Bool] = (slotCanShift.head && slotOccupied.head) +: slotEnqueueFire
val slotDequeueFire: Seq[Bool] = (slotHeadFinish && slotOccupied.head) +: slotEnqueueFire
Seq.tabulate(parameter.chainingSize) { slotIndex =>
when(slotEnqueueFire(slotIndex) ^ slotDequeueFire(slotIndex)) {
slotOccupied(slotIndex) := slotEnqueueFire(slotIndex)
}
}
slot0EnqueueFire := slotEnqueueFire.head
slotEnqueueFireVec := VecInit(slotEnqueueFire)

// handshake
laneRequest.ready := !slotOccupied.last && vrf.instructionWriteReport.ready
laneRequest.ready := requestReady

// normal instruction, LSU instruction will be report to VRF.
vrf.lsuInstructionFire := laneRequest.bits.LSUFire
Expand Down Expand Up @@ -1029,21 +1043,25 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
vrf.instructionWriteReport.bits.elementMask := shifterMask

// clear record by instructionFinished
vrf.instructionLastReport := lsuLastReport | (instructionFinished & instructionUnrelatedMaskUnitVec.reduce(_ | _))
vrf.instructionLastReport := lsuLastReport | (instructionFinished & instructionUnrelatedMaskUnit)
vrf.lsuMaskGroupChange := lsuMaskGroupChange
vrf.loadDataInLSUWriteQueue := loadDataInLSUWriteQueue
vrf.crossWriteBusClear := crossWriteBusClear
vrf.dataInWriteQueue :=
crossLaneWriteQueue.map(q => Mux(q.io.deq.valid, indexToOH(q.io.deq.bits.instructionIndex, parameter.chainingSize), 0.U)).reduce(_ | _)|
Mux(topWriteQueue.valid, indexToOH(topWriteQueue.bits.instructionIndex, parameter.chainingSize), 0.U) |
maskedWriteUnit.maskedWrite1H
instructionFinished := instructionFinishedVec.reduce(_ | _)
writeReadyForLsu := vrf.writeReadyForLsu
vrfReadyToStore := vrf.vrfReadyToStore
vrf.lsuWriteCheck.vd := topWriteQueue.bits.vd
vrf.lsuWriteCheck.offset := topWriteQueue.bits.offset
vrf.lsuWriteCheck.instructionIndex := topWriteQueue.bits.instructionIndex
lsuWritAllow := vrf.lsuWriteAllow
recordFree := vrf.recordFree
val instructionValid: UInt = instructionValidVec.reduce(_ | _)
val instructionValidNext: UInt = RegNext(instructionValid, 0.U)
// He was alive in the last cycle, but dead in this cycle.
instructionFinished := instructionValidNext & (~instructionValid).asUInt

/**
* probes
Expand All @@ -1057,7 +1075,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
define(lastSlotOccupiedProbe, ProbeValue(slotOccupied.last))

val vrfInstructionWriteReportReadyProbe = IO(Output(Probe(Bool())))
define(vrfInstructionWriteReportReadyProbe, ProbeValue(vrf.instructionWriteReport.ready))
define(vrfInstructionWriteReportReadyProbe, ProbeValue(true.B))

val slotOccupiedProbe = slotOccupied.map(occupied => {
val occupiedProbe = IO(Output(Probe(Bool())))
Expand Down
10 changes: 7 additions & 3 deletions t1/src/V.scala
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,8 @@ class V(val parameter: VParameter) extends Module with SerializableModule[VParam
*/
val instructionRAWReady: Bool = Wire(Bool())
val allSlotFree: Bool = Wire(Bool())
val existMaskType: Bool = Wire(Bool())
val existMaskType: Bool = Wire(Bool())
val recordReady: Bool = Wire(Bool())

// mask Unit 与lane交换数据
val writeType: VRFWriteRequest = new VRFWriteRequest(
Expand Down Expand Up @@ -1234,7 +1235,7 @@ class V(val parameter: VParameter) extends Module with SerializableModule[VParam
// valid: requestReg.valid
// ready: slotReady && lsu.request.ready && instructionRAWReady
lane.laneRequest.bits.LSUFire := slotReady && requestReg.valid && noOffsetReadLoadStore &&
lsu.request.ready && instructionRAWReady
lsu.request.ready && instructionRAWReady && recordReady
lane.laneRequest.bits.loadStore := isLoadStoreType
// let record in VRF to know there is a store instruction.
lane.laneRequest.bits.store := isStoreType
Expand Down Expand Up @@ -1393,6 +1394,9 @@ class V(val parameter: VParameter) extends Module with SerializableModule[VParam
/** try to issue instruction to which slot. */
val slotToEnqueue: UInt = Mux(specialInstruction, true.B ## 0.U((parameter.chainingSize - 1).W), free1H)

/** all lane have vrf record free */
recordReady := laneVec.map(_.recordFree).reduce(_ && _)

/** for lsu instruction lsu is ready, for normal instructions, lanes are ready. */
val executionReady: Bool = (!isLoadStoreType || lsu.request.ready) && (noOffsetReadLoadStore || allLaneReady)
// - ready to issue instruction
Expand All @@ -1403,7 +1407,7 @@ class V(val parameter: VParameter) extends Module with SerializableModule[VParam
// we detect the hazard and decide should we issue this slide or
// issue the instruction after the slide which already in the slot.
requestRegDequeue.ready := executionReady && slotReady && (!gatherNeedRead || gatherReadFinish) &&
instructionRAWReady
instructionRAWReady && recordReady

// TODO: change to `requestRegDequeue.fire`.
instructionToSlotOH := Mux(requestRegDequeue.ready, slotToEnqueue, 0.U)
Expand Down
9 changes: 4 additions & 5 deletions t1/src/vrf/VRF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
val crossWriteAllow: Bool = IO(Output(Bool()))

/** when instruction is fired, record it in the VRF for chaining. */
val instructionWriteReport: DecoupledIO[VRFWriteReport] = IO(Flipped(Decoupled(new VRFWriteReport(parameter))))
val instructionWriteReport: ValidIO[VRFWriteReport] = IO(Flipped(Valid(new VRFWriteReport(parameter))))

val lsuInstructionFire: Bool = IO(Input(Bool()))

Expand All @@ -169,6 +169,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
val lsuMaskGroupChange: UInt = IO(Input(UInt(parameter.chainingSize.W)))
val writeReadyForLsu: Bool = IO(Output(Bool()))
val vrfReadyToStore: Bool = IO(Output(Bool()))
val recordFree: Bool = IO(Output(Bool()))

/** we can only chain LSU instructions, after [[LSU.writeQueueVec]] is cleared. */
val loadDataInLSUWriteQueue: UInt = IO(Input(UInt(parameter.chainingSize.W)))
Expand Down Expand Up @@ -249,12 +250,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
val initRecord: ValidIO[VRFWriteReport] = WireDefault(0.U.asTypeOf(Valid(new VRFWriteReport(parameter))))
initRecord.valid := true.B
initRecord.bits := instructionWriteReport.bits
val freeRecord: UInt = VecInit(chainingRecord.map(!_.valid)).asUInt
val freeRecord: UInt = VecInit(recordValidVec.map(!_)).asUInt
val recordFFO: UInt = ffo(freeRecord)
val recordEnq: UInt = Wire(UInt((parameter.chainingSize + 1).W))
// handle VRF hazard
// TODO: move to [[V]]
instructionWriteReport.ready := true.B
recordEnq := Mux(
// 纯粹的lsu指令的记录不需要ready
instructionWriteReport.valid,
Expand Down Expand Up @@ -382,4 +380,5 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
(crossWriteOH & record.bits.elementMask) === 0.U
!((!older && waw) && !sameInst && record.valid)
}.reduce(_ && _) || !crossReadNeedCheck
recordFree := !recordValidVec.reduce(_ && _)
}
Loading