Skip to content

Commit

Permalink
[emulator] rewrite Lane Slots probes to latest perf model
Browse files Browse the repository at this point in the history
Signed-off-by: Avimitin <[email protected]>
  • Loading branch information
Avimitin committed Oct 31, 2023
1 parent 9e43c0f commit 898975f
Show file tree
Hide file tree
Showing 5 changed files with 313 additions and 10 deletions.
42 changes: 42 additions & 0 deletions elaborator/src/PerfMonitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,46 @@ class LaneMonitor(param: LaneParam) extends PerfMonitor {
val slotOccupied = dpiIn("slotOccupied", Seq.fill(param.slot)(Input(Bool())))
val instructionFinished = dpiIn("instructionFinished", Input(UInt(32.W)))
}

class LaneSlotMonitor extends PerfMonitor {
val laneIndex = dpiIn("laneIndex", Input(UInt(32.W)))
val slotIndex = dpiIn("slotIndex", Input(UInt(32.W)))

val stage0EnqueueReady = dpiIn("stage0EnqueueReady", Input(Bool()))
val stage0EnqueueValid = dpiIn("stage0EnqueueValid", Input(Bool()))

val changingMaskSet = dpiIn("changingMaskSet", Input(Bool()))

val slotActive = dpiIn("slotActive", Input(Bool()))
val slotOccupied = dpiIn("slotOccupied", Input(Bool()))
val pipeFinish = dpiIn("pipeFinish", Input(Bool()))

val stage1DequeueReady = dpiIn("stage1DequeueReady", Input(Bool()))
val stage1DequeueValid = dpiIn("stage1DequeueValid", Input(Bool()))

val stage1HasDataOccupied = dpiIn("stage1HasDataOccpied", Input(Bool()))
val stage1Finishing = dpiIn("stage1Finishing", Input(Bool()))

val stage1VrfReadReadyRequest = dpiIn("stage1VrfReadReadyRequest", Seq.fill(3)(Input(Bool())))
val stage1VrfReadValidRequest = dpiIn("stage1VrfReadValidRequest", Seq.fill(3)(Input(Bool())))

val executionUnitVfuRequestReady = dpiIn("executionUnitVfuRequestReady", Input(Bool()))
val executionUnitVfuRequestValid = dpiIn("executionUnitVfuRequestValid", Input(Bool()))

val stage3VrfWriteReady = dpiIn("stage3VrfWriteReady", Input(Bool()))
val stage3VrfWriteValid = dpiIn("stage3VrfWriteValid", Input(Bool()))
}

class LaneLastSlotMonitor() extends LaneSlotMonitor {
val slotShiftValid = dpiIn("slotShiftValid", Input(Bool()))
val decodeResultIsCrossReadOrWrite = dpiIn("decodeResultIsCrossReadOrWrite", Input(Bool()))
val decodeResultIsScheduler = dpiIn("decodeResultIsScheduler", Input(Bool()))

val stage1ReadFinish = dpiIn("stage1ReadFinish", Input(Bool()))

val stage1sSendCrossReadResultLSB = dpiIn("sSendCrossReadResultLSB", Input(Bool()))
val stage1sSendCrossReadResultMSB = dpiIn("sSendCrossReadResultMSB", Input(Bool()))
val stage1wCrossReadLSB = dpiIn("wCrossReadLSB", Input(Bool()))
val stage1wCrossReadMSB = dpiIn("wCrossReadMSB", Input(Bool()))
}
// End of Lane monitor
81 changes: 79 additions & 2 deletions elaborator/src/TestBench.scala
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,10 @@ class TestBench(generator: SerializableModuleGenerator[V, VParameter]) extends R
otherUnitMonitor.stateValue.ref := read(bore(dut.lsu.otherUnit.stateValueProbe))
// End of [[v.SimpleAccessUnit]] related probe connection

dut.laneVec.zipWithIndex.foreach({ case (lane, i) =>
dut.laneVec.zipWithIndex.foreach({ case (lane, laneIndex) =>
val laneMonitor = Module(new LaneMonitor(LaneParam(generator.parameter.chainingSize)))
laneMonitor.clock.ref := clock.asBool
laneMonitor.index.ref := i.U
laneMonitor.index.ref := laneIndex.U
laneMonitor.laneRequestReady.ref := read(bore(lane.laneRequestReadyProbe))
laneMonitor.laneRequestValid.ref := read(bore(lane.laneRequestValidProbe))
laneMonitor.lastSlotOccupied.ref := read(bore(lane.lastSlotOccupiedProbe))
Expand All @@ -155,5 +155,82 @@ class TestBench(generator: SerializableModuleGenerator[V, VParameter]) extends R
dpi.ref := read(bore(lane.slotOccupiedProbe(index)))
}
laneMonitor.instructionFinished.ref := read(bore(lane.instructionFinishedProbe))

lane.slotProbes.zipWithIndex.foreach({ case(probes, slotIndex) =>
val isLastSlot = probes.stage1Probes.sSendCrossReadResultLSBProbe.isDefined
if (isLastSlot) {
val slotMonitor = Module(new LaneLastSlotMonitor())
slotMonitor.clock.ref := clock.asBool
slotMonitor.laneIndex.ref := laneIndex.U
slotMonitor.slotIndex.ref := slotIndex.U

slotMonitor.stage0EnqueueReady.ref := read(bore(probes.stage0EnqueueReady))
slotMonitor.stage0EnqueueValid.ref := read(bore(probes.stage0EnqueueValid))
slotMonitor.changingMaskSet.ref := read(bore(probes.changingMaskSet))
slotMonitor.slotActive.ref := read(bore(probes.slotActive))
slotMonitor.slotOccupied.ref := read(bore(probes.slotOccupied))
slotMonitor.pipeFinish.ref := read(bore(probes.pipeFinish))

slotMonitor.slotShiftValid.ref := read(bore(probes.slotShiftValid))
slotMonitor.decodeResultIsCrossReadOrWrite.ref := read(bore(probes.decodeResultIsCrossReadOrWrite))
slotMonitor.decodeResultIsScheduler.ref := read(bore(probes.decodeResultIsScheduler))

slotMonitor.stage1DequeueReady.ref := read(bore(probes.stage1Probes.dequeueReadyProbe))
slotMonitor.stage1DequeueValid.ref := read(bore(probes.stage1Probes.dequeueValidProbe))
slotMonitor.stage1HasDataOccupied.ref := read(bore(probes.stage1Probes.hasDataOccupiedProbe))
slotMonitor.stage1Finishing.ref := read(bore(probes.stage1Probes.stageFinishProbe))

probes.stage1Probes.readFinishProbe.map(p => slotMonitor.stage1ReadFinish.ref := read(bore(p)))
probes.stage1Probes.sSendCrossReadResultLSBProbe.map(p => slotMonitor.stage1sSendCrossReadResultLSB.ref := read(bore(p)))
probes.stage1Probes.sSendCrossReadResultMSBProbe.map(p => slotMonitor.stage1sSendCrossReadResultMSB.ref := read(bore(p)))
probes.stage1Probes.wCrossReadLSBProbe.map(p => slotMonitor.stage1wCrossReadLSB.ref := read(bore(p)))
probes.stage1Probes.wCrossReadMSBProbe.map(p => slotMonitor.stage1wCrossReadMSB.ref := read(bore(p)))

slotMonitor.stage1VrfReadReadyRequest
.zip(slotMonitor.stage1VrfReadValidRequest)
.zipWithIndex.foreach{ case((ready, valid), index) =>
val (readyProbe, validProbe) = probes.stage1Probes.vrfReadRequestProbe(index)
ready.ref := read(bore(readyProbe))
valid.ref := read(bore(validProbe))
}

slotMonitor.executionUnitVfuRequestReady.ref := read(bore(probes.executionUnitVfuRequestReady))
slotMonitor.executionUnitVfuRequestValid.ref := read(bore(probes.executionUnitVfuRequestValid))

slotMonitor.stage3VrfWriteReady.ref := read(bore(probes.stage3VrfWriteReady))
slotMonitor.stage3VrfWriteValid.ref := read(bore(probes.stage3VrfWriteValid))
} else {
val slotMonitor = Module(new LaneSlotMonitor())
slotMonitor.clock.ref := clock.asBool
slotMonitor.laneIndex.ref := laneIndex.U
slotMonitor.slotIndex.ref := slotIndex.U

slotMonitor.stage0EnqueueReady.ref := read(bore(probes.stage0EnqueueReady))
slotMonitor.stage0EnqueueValid.ref := read(bore(probes.stage0EnqueueValid))
slotMonitor.changingMaskSet.ref := read(bore(probes.changingMaskSet))
slotMonitor.slotActive.ref := read(bore(probes.slotActive))
slotMonitor.slotOccupied.ref := read(bore(probes.slotOccupied))
slotMonitor.pipeFinish.ref := read(bore(probes.pipeFinish))

slotMonitor.stage1DequeueReady.ref := read(bore(probes.stage1Probes.dequeueReadyProbe))
slotMonitor.stage1DequeueValid.ref := read(bore(probes.stage1Probes.dequeueValidProbe))
slotMonitor.stage1HasDataOccupied.ref := read(bore(probes.stage1Probes.hasDataOccupiedProbe))
slotMonitor.stage1Finishing.ref := read(bore(probes.stage1Probes.stageFinishProbe))

slotMonitor.stage1VrfReadReadyRequest
.zip(slotMonitor.stage1VrfReadValidRequest)
.zipWithIndex.foreach{ case((ready, valid), index) =>
val (readyProbe, validProbe) = probes.stage1Probes.vrfReadRequestProbe(index)
ready.ref := read(bore(readyProbe))
valid.ref := read(bore(validProbe))
}

slotMonitor.executionUnitVfuRequestReady.ref := read(bore(probes.executionUnitVfuRequestReady))
slotMonitor.executionUnitVfuRequestValid.ref := read(bore(probes.executionUnitVfuRequestValid))

slotMonitor.stage3VrfWriteReady.ref := read(bore(probes.stage3VrfWriteReady))
slotMonitor.stage3VrfWriteValid.ref := read(bore(probes.stage3VrfWriteValid))
}
})
})
}
114 changes: 107 additions & 7 deletions emulator/src/dpi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,18 +386,118 @@ void lane_monitor(const svBitVecVal *index, svLogic laneRequestValid,
.with("lane_request", json{{"valid", (bool)laneRequestValid},
{"ready", (bool)laneRequestReady}})
.with("last_slot_occpied", (bool)lastSlotOccupied)
.with("vrf_instruction_write_report_ready", (bool)vrfInstructionWriteReportReady)
.with("slot_occpied", std::vector{
(bool)slotOccupied0,
(bool)slotOccupied1,
(bool)slotOccupied2,
(bool)slotOccupied3,
})
.with("vrf_instruction_write_report_ready",
(bool)vrfInstructionWriteReportReady)
.with("slot_occpied",
std::vector{
(bool)slotOccupied0,
(bool)slotOccupied1,
(bool)slotOccupied2,
(bool)slotOccupied3,
})
.with("instruction_finished", (int)(*instructionFinished))
.info();
})
}

void lane_slot_monitor(
const svBitVecVal *laneIndex, const svBitVecVal *slotIndex,
svLogic stage0EnqueueReady, svLogic stage0EnqueueValid,
svLogic changingMaskSet, svLogic slotActive, svLogic slotOccupied,
svLogic pipeFinish, svLogic stage1DequeueReady, svLogic stage1DequeueValid,
svLogic stage1HasDataOccpied, svLogic stage1Finishing,
svLogic stage1VrfReadReadyRequest0, svLogic stage1VrfReadReadyRequest1,
svLogic stage1VrfReadReadyRequest2, svLogic stage1VrfReadValidRequest0,
svLogic stage1VrfReadValidRequest1, svLogic stage1VrfReadValidRequest2,
svLogic executionUnitVfuRequestReady, svLogic executionUnitVfuRequestValid,
svLogic stage3VrfWriteReady, svLogic stage3VrfWriteValid) {
TRY({
Log("Lane")
.with("index", (int)(*laneIndex))
.with("slot_index", (int)(*slotIndex))
.with("stage_0_enqueue", json{{"valid", (bool)stage0EnqueueValid},
{"ready", (bool)stage0EnqueueReady}})
.with("changing_mask_set", (bool)(changingMaskSet))
.with("slot_active", (bool)(slotActive))
.with("slot_occupied", (bool)(slotOccupied))
.with("pipe_finish", (bool)(pipeFinish))
.with("stage_1",
json{{"dequeue",
{"valid", (bool)stage1DequeueValid},
{"ready", (bool)stage1DequeueReady}},
{"has_data_occupied", (bool)stage1HasDataOccpied},
{"finishing", (bool)stage1Finishing},
{"VRF_read_request",
std::vector{
json{{"ready", (bool)stage1VrfReadReadyRequest0},
{"valid", (bool)stage1VrfReadValidRequest0}},
json{{"ready", (bool)stage1VrfReadReadyRequest1},
{"valid", (bool)stage1VrfReadValidRequest1}},
json{{"ready", (bool)stage1VrfReadReadyRequest2},
{"valid", (bool)stage1VrfReadValidRequest2}},
}}})
.with("stage_3_vrf_write", json{{"valid", (bool)stage3VrfWriteValid},
{"ready", (bool)stage3VrfWriteReady}})
.info();
})
}

void lane_last_slot_monitor(
const svBitVecVal *laneIndex, const svBitVecVal *slotIndex,
svLogic stage0EnqueueReady, svLogic stage0EnqueueValid,
svLogic changingMaskSet, svLogic slotActive, svLogic slotOccupied,
svLogic pipeFinish, svLogic stage1DequeueReady, svLogic stage1DequeueValid,
svLogic stage1HasDataOccpied, svLogic stage1Finishing,
svLogic stage1VrfReadReadyRequest0, svLogic stage1VrfReadReadyRequest1,
svLogic stage1VrfReadReadyRequest2, svLogic stage1VrfReadValidRequest0,
svLogic stage1VrfReadValidRequest1, svLogic stage1VrfReadValidRequest2,
svLogic executionUnitVfuRequestReady, svLogic executionUnitVfuRequestValid,
svLogic stage3VrfWriteReady, svLogic stage3VrfWriteValid,
svLogic slotShiftValid, svLogic decodeResultIsCrossReadOrWrite,
svLogic decodeResultIsScheduler, svLogic stage1ReadFinish,
svLogic sSendCrossReadResultLSB, svLogic sSendCrossReadResultMSB,
svLogic wCrossReadLSB, svLogic wCrossReadMSB) {
TRY({
Log("Lane")
.with("index", (int)(*laneIndex))
.with("slot_index", (int)(*slotIndex))
.with("stage_0_enqueue", json{{"valid", (bool)stage0EnqueueValid},
{"ready", (bool)stage0EnqueueReady}})
.with("changing_mask_set", (bool)(changingMaskSet))
.with("slot_active", (bool)(slotActive))
.with("slot_occupied", (bool)(slotOccupied))
.with("pipe_finish", (bool)(pipeFinish))
.with("stage_1",
json{{"dequeue",
{"valid", (bool)stage1DequeueValid},
{"ready", (bool)stage1DequeueReady}},
{"has_data_occupied", (bool)stage1HasDataOccpied},
{"finishing", (bool)stage1Finishing},
{"read_finish", (bool)stage1ReadFinish},
{"sSendCrossReadResultLSB", (bool)sSendCrossReadResultLSB},
{"sSendCrossReadResultMSB", (bool)sSendCrossReadResultMSB},
{"wCrossReadLSB", (bool)wCrossReadLSB},
{"wCrossReadMSB", (bool)wCrossReadMSB},
{"VRF_read_request",
std::vector{
json{{"ready", (bool)stage1VrfReadReadyRequest0},
{"valid", (bool)stage1VrfReadValidRequest0}},
json{{"ready", (bool)stage1VrfReadReadyRequest1},
{"valid", (bool)stage1VrfReadValidRequest1}},
json{{"ready", (bool)stage1VrfReadReadyRequest2},
{"valid", (bool)stage1VrfReadValidRequest2}},
}}})
.with("stage_3_vrf_write", json{{"valid", (bool)stage3VrfWriteValid},
{"ready", (bool)stage3VrfWriteReady}})
.with("slot_shift_valid", (bool)slotShiftValid)
.with("decode_result",
json{{"is_cross_read_or_write",
(bool)decodeResultIsCrossReadOrWrite},
{"is_scheduler", (bool)decodeResultIsScheduler}})
.info();
})
}

void print_perf_summary() {
auto output_file_path = get_env_arg_default("PERF_output_file", nullptr);
if (output_file_path != nullptr) {
Expand Down
49 changes: 48 additions & 1 deletion v/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
)
)

slotControl.zipWithIndex.foreach {
val slotProbes = slotControl.zipWithIndex.map {
case (record, index) =>
val decodeResult: DecodeBundle = record.laneRequest.decodeResult
val isLastSlot: Boolean = index == 0
Expand Down Expand Up @@ -663,6 +663,53 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
stage3.vrfWriteRequest.ready := vrfWriteFire(index)

pipeClear := !Seq(stage0.stageValid, stage1.stageValid, stage2.stageValid, stage3.stageValid).reduce(_ || _)

// Probes
object probe {
def newProbe() = IO(Output(Probe(Bool())))

val stage0EnqueueReady = newProbe().suggestName(s"stage0EnqueueReady${index}")
val stage0EnqueueValid = newProbe().suggestName(s"stage0EnqueueValid${index}")

val changingMaskSet = newProbe().suggestName(s"changingMaskSet${index}")

val slotActive = newProbe().suggestName(s"slotActive${index}")
val slotOccupied = newProbe().suggestName(s"slotOccupied${index}")
val pipeFinish = newProbe().suggestName(s"pipeFinish${index}")

val slotShiftValid = newProbe().suggestName(s"slotShiftValid${index}")
val decodeResultIsCrossReadOrWrite = newProbe().suggestName(s"decodeResultIsCrossReadOrWrite${index}")
val decodeResultIsScheduler = newProbe().suggestName(s"decodeResultIsScheduler${index}")

val executionUnitVfuRequestReady = newProbe().suggestName(s"executionUnitVfuRequestReady${index}")
val executionUnitVfuRequestValid = newProbe().suggestName(s"executionUnitVfuRequestValid${index}")

val stage3VrfWriteReady = newProbe().suggestName(s"stage3VrfWriteReady${index}")
val stage3VrfWriteValid = newProbe().suggestName(s"stage3VrfWriteValid${index}")

val stage1Probes = stage1.stageProbe
}

define(probe.stage0EnqueueReady, ProbeValue(stage0.enqueue.ready))
define(probe.stage0EnqueueValid, ProbeValue(stage0.enqueue.valid))
define(probe.changingMaskSet, ProbeValue(record.mask.valid || !record.laneRequest.mask))
define(probe.slotActive, ProbeValue(slotActive(index)))

// Signals about why slot is stall
define(probe.slotOccupied, ProbeValue(slotOccupied(index)))
define(probe.pipeFinish, ProbeValue(pipeFinishVec(index)))

// If this is not the last slot, don't populate probe for these signals
define(probe.slotShiftValid, ProbeValue(slotShiftValid(index)))
define(probe.decodeResultIsCrossReadOrWrite, ProbeValue(decodeResult(Decoder.crossRead) || decodeResult(Decoder.crossWrite)))
define(probe.decodeResultIsScheduler, ProbeValue(decodeResult(Decoder.scheduler)))

define(probe.executionUnitVfuRequestReady, ProbeValue(executionUnit.vfuRequest.ready))
define(probe.executionUnitVfuRequestValid, ProbeValue(executionUnit.vfuRequest.valid))

define(probe.stage3VrfWriteReady, ProbeValue(stage3.vrfWriteRequest.ready))
define(probe.stage3VrfWriteValid, ProbeValue(stage3.vrfWriteRequest.valid))
probe
}

// Read Ring
Expand Down
37 changes: 37 additions & 0 deletions v/src/laneStage/LaneStage1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package v
import chisel3._
import chisel3.util._
import chisel3.util.experimental.decode.DecodeBundle
import chisel3.probe.{Probe, ProbeValue, define}

class LaneStage1Enqueue(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle {
val groupCounter: UInt = UInt(parameter.groupNumberBits.W)
Expand Down Expand Up @@ -329,4 +330,40 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends
when(enqueue.fire ^ dequeue.fire) {
stageValidReg := enqueue.fire
}

object stageProbe {
def newProbe = () => IO(Output(Probe(Bool())))

val dequeueReadyProbe = newProbe()
val dequeueValidProbe = newProbe()

val hasDataOccupiedProbe = newProbe()

val stageFinishProbe = newProbe()
val readFinishProbe = Option.when(isLastSlot)(newProbe())
val sSendCrossReadResultLSBProbe = Option.when(isLastSlot)(newProbe())
val sSendCrossReadResultMSBProbe = Option.when(isLastSlot)(newProbe())
val wCrossReadLSBProbe = Option.when(isLastSlot)(newProbe())
val wCrossReadMSBProbe = Option.when(isLastSlot)(newProbe())

val vrfReadRequestProbe: Seq[(Bool, Bool)] = Seq.fill(3)((newProbe(),newProbe()))
}

define(stageProbe.dequeueReadyProbe, ProbeValue(dequeue.ready))
define(stageProbe.dequeueValidProbe, ProbeValue(dequeue.valid))
define(stageProbe.hasDataOccupiedProbe, ProbeValue(stageValid))
define(stageProbe.stageFinishProbe, ProbeValue(stageFinish))

if (isLastSlot) {
stageProbe.readFinishProbe.map(p => define(p, ProbeValue(readFinish)))
stageProbe.sSendCrossReadResultLSBProbe.map(p => define(p, ProbeValue(sSendCrossReadResultLSB.get)))
stageProbe.sSendCrossReadResultMSBProbe.map(p => define(p, ProbeValue(sSendCrossReadResultMSB.get)))
stageProbe.wCrossReadLSBProbe.map(p => define(p, ProbeValue(wCrossReadLSB.get)))
stageProbe.wCrossReadMSBProbe.map(p => define(p, ProbeValue(wCrossReadMSB.get)))
}

stageProbe.vrfReadRequestProbe.zipWithIndex.foreach { case((ready, valid), i) =>
define(ready, ProbeValue(vrfReadRequest(i).ready))
define(valid, ProbeValue(vrfReadRequest(i).valid))
}
}

0 comments on commit 898975f

Please sign in to comment.