diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index c3c96b318..897931d49 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -27,6 +27,7 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends RawModule with ImplicitClock with ImplicitReset { + layer.enable(layers.Verification) val omInstance: Instance[TestBenchOM] = Instantiate(new TestBenchOM) val omType: ClassType = omInstance.toDefinition.getClassType @public diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 6b7dec0c2..bcefeddcc 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -313,9 +313,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val vrfReadyToStore: Bool = IO(Output(Bool())) @public - val laneProbe = IO(Output(Probe(new LaneProbe(parameter)))) - val probeWire = Wire(new LaneProbe(parameter)) - define(laneProbe, ProbeValue(probeWire)) + val laneProbe = IO(Output(Probe(new LaneProbe(parameter), layers.Verification))) @public val vrfAllocateIssue: Bool = IO(Output(Bool())) @@ -526,315 +524,298 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ )) val maskedWriteUnit: Instance[MaskedWrite] = Instantiate(new MaskedWrite(parameter)) val tokenManager: Instance[SlotTokenManager] = Instantiate(new SlotTokenManager(parameter)) - slotControl.zipWithIndex.foreach { - case (record, index) => - val decodeResult: DecodeBundle = record.laneRequest.decodeResult - val isLastSlot: Boolean = index == 0 - - /** We will ignore the effect of mask since: - * [[Decoder.crossRead]]: We need to read data to another lane - * [[Decoder.crossWrite]]: We need to send cross write report to another lane - * [[Decoder.scheduler]]: We need to synchronize with [[T1]] every group - * [[record.laneRequest.loadStore]]: We need to read data to lsu every group - */ - val alwaysNextGroup: Bool = decodeResult(Decoder.crossRead) || decodeResult(Decoder.crossWrite) || - decodeResult(Decoder.nr) || !decodeResult(Decoder.scheduler) || record.laneRequest.loadStore - - // mask not use for mask element - val maskNotMaskedElement = !record.laneRequest.mask || - record.laneRequest.decodeResult(Decoder.maskSource) || - record.laneRequest.decodeResult(Decoder.maskLogic) - - /** onehot value of SEW. */ - val vSew1H: UInt = UIntToOH(record.csr.vSew)(2, 0) - - /** if asserted, the element won't be executed. - * adc: vm = 0; madc: vm = 0 -> s0 + s1 + c, vm = 1 -> s0 + s1 - */ - val skipEnable: Bool = record.laneRequest.mask && - !record.laneRequest.decodeResult(Decoder.maskSource) && - !record.laneRequest.decodeResult(Decoder.maskLogic) && - !alwaysNextGroup - - // register for s0 enqueue, it will move with the slot - // 'maskGroupCountVec' 'maskIndexVec' 'pipeFinishVec' - - if (isLastSlot) { - // todo: Reach vfu - slotActive(index) := slotOccupied(index) - } else { - slotActive(index) := slotOccupied(index) && !slotShiftValid(index) && - !(decodeResult(Decoder.crossRead) || decodeResult(Decoder.crossWrite) || decodeResult(Decoder.widenReduce)) && - decodeResult(Decoder.scheduler) - } + // TODO: do we need to expose the slot to a module? + class Slot(val record: InstructionControlRecord, val index: Int) { + val decodeResult: DecodeBundle = record.laneRequest.decodeResult + val isLastSlot: Boolean = index == 0 + + /** We will ignore the effect of mask since: + * [[Decoder.crossRead]]: We need to read data to another lane + * [[Decoder.crossWrite]]: We need to send cross write report to another lane + * [[Decoder.scheduler]]: We need to synchronize with [[T1]] every group + * [[record.laneRequest.loadStore]]: We need to read data to lsu every group + */ + val alwaysNextGroup: Bool = decodeResult(Decoder.crossRead) || decodeResult(Decoder.crossWrite) || + decodeResult(Decoder.nr) || !decodeResult(Decoder.scheduler) || record.laneRequest.loadStore + + // mask not use for mask element + val maskNotMaskedElement = !record.laneRequest.mask || + record.laneRequest.decodeResult(Decoder.maskSource) || + record.laneRequest.decodeResult(Decoder.maskLogic) + + /** onehot value of SEW. */ + val vSew1H: UInt = UIntToOH(record.csr.vSew)(2, 0) + + /** if asserted, the element won't be executed. + * adc: vm = 0; madc: vm = 0 -> s0 + s1 + c, vm = 1 -> s0 + s1 + */ + val skipEnable: Bool = record.laneRequest.mask && + !record.laneRequest.decodeResult(Decoder.maskSource) && + !record.laneRequest.decodeResult(Decoder.maskLogic) && + !alwaysNextGroup + + // register for s0 enqueue, it will move with the slot + // 'maskGroupCountVec' 'maskIndexVec' 'pipeFinishVec' + + if (isLastSlot) { + // todo: Reach vfu + slotActive(index) := slotOccupied(index) + } else { + slotActive(index) := slotOccupied(index) && !slotShiftValid(index) && + !(decodeResult(Decoder.crossRead) || decodeResult(Decoder.crossWrite) || decodeResult(Decoder.widenReduce)) && + decodeResult(Decoder.scheduler) + } - if(isLastSlot) { - slotCanShift(index) := !slotOccupied(index) - } else { - slotCanShift(index) := true.B - } + if(isLastSlot) { + slotCanShift(index) := !slotOccupied(index) + } else { + slotCanShift(index) := true.B + } - val laneState: LaneState = Wire(new LaneState(parameter)) - val stage0: Instance[LaneStage0] = Instantiate(new LaneStage0(parameter, isLastSlot)) - val stage1: Instance[LaneStage1] = Instantiate(new LaneStage1(parameter, isLastSlot)) - val stage2: Instance[LaneStage2] = Instantiate(new LaneStage2(parameter, isLastSlot)) - val executionUnit: Instance[LaneExecutionBridge] = Instantiate(new LaneExecutionBridge(parameter, isLastSlot, index)) - val stage3: Instance[LaneStage3] = Instantiate(new LaneStage3(parameter, isLastSlot)) - - // slot state - laneState.vSew1H := vSew1H - laneState.loadStore := record.laneRequest.loadStore - laneState.laneIndex := laneIndex - laneState.decodeResult := record.laneRequest.decodeResult - laneState.lastGroupForInstruction := record.lastGroupForInstruction - laneState.isLastLaneForInstruction := record.isLastLaneForInstruction - laneState.instructionFinished := record.instructionFinished - laneState.csr := record.csr - laneState.maskType := record.laneRequest.mask - laneState.maskNotMaskedElement := !record.laneRequest.mask || - record.laneRequest.decodeResult(Decoder.maskSource) || - record.laneRequest.decodeResult(Decoder.maskLogic) - laneState.vs1 := record.laneRequest.vs1 - laneState.vs2 := record.laneRequest.vs2 - laneState.vd := record.laneRequest.vd - laneState.instructionIndex := record.laneRequest.instructionIndex - laneState.skipEnable := skipEnable - laneState.ffoByOtherLanes := ffoRecord.ffoByOtherLanes - laneState.additionalRW := record.additionalRW - laneState.skipRead := record.laneRequest.decodeResult(Decoder.other) && - (record.laneRequest.decodeResult(Decoder.uop) === 9.U) - - stage0.enqueue.valid := slotActive(index) && (record.mask.valid || !record.laneRequest.mask) - stage0.enqueue.bits.maskIndex := maskIndexVec(index) - stage0.enqueue.bits.maskForMaskGroup := record.mask.bits - stage0.enqueue.bits.maskGroupCount := maskGroupCountVec(index) - // todo: confirm - stage0.enqueue.bits.elements.foreach { case (k ,d) => - laneState.elements.get(k).foreach(stateData => d := stateData) - } + val laneState: LaneState = Wire(new LaneState(parameter)) + val stage0: Instance[LaneStage0] = Instantiate(new LaneStage0(parameter, isLastSlot)) + val stage1: Instance[LaneStage1] = Instantiate(new LaneStage1(parameter, isLastSlot)) + val stage2: Instance[LaneStage2] = Instantiate(new LaneStage2(parameter, isLastSlot)) + val executionUnit: Instance[LaneExecutionBridge] = Instantiate(new LaneExecutionBridge(parameter, isLastSlot, index)) + val stage3: Instance[LaneStage3] = Instantiate(new LaneStage3(parameter, isLastSlot)) + + // slot state + laneState.vSew1H := vSew1H + laneState.loadStore := record.laneRequest.loadStore + laneState.laneIndex := laneIndex + laneState.decodeResult := record.laneRequest.decodeResult + laneState.lastGroupForInstruction := record.lastGroupForInstruction + laneState.isLastLaneForInstruction := record.isLastLaneForInstruction + laneState.instructionFinished := record.instructionFinished + laneState.csr := record.csr + laneState.maskType := record.laneRequest.mask + laneState.maskNotMaskedElement := !record.laneRequest.mask || + record.laneRequest.decodeResult(Decoder.maskSource) || + record.laneRequest.decodeResult(Decoder.maskLogic) + laneState.vs1 := record.laneRequest.vs1 + laneState.vs2 := record.laneRequest.vs2 + laneState.vd := record.laneRequest.vd + laneState.instructionIndex := record.laneRequest.instructionIndex + laneState.skipEnable := skipEnable + laneState.ffoByOtherLanes := ffoRecord.ffoByOtherLanes + laneState.additionalRW := record.additionalRW + laneState.skipRead := record.laneRequest.decodeResult(Decoder.other) && + (record.laneRequest.decodeResult(Decoder.uop) === 9.U) + + stage0.enqueue.valid := slotActive(index) && (record.mask.valid || !record.laneRequest.mask) + stage0.enqueue.bits.maskIndex := maskIndexVec(index) + stage0.enqueue.bits.maskForMaskGroup := record.mask.bits + stage0.enqueue.bits.maskGroupCount := maskGroupCountVec(index) + // todo: confirm + stage0.enqueue.bits.elements.foreach { case (k ,d) => + laneState.elements.get(k).foreach(stateData => d := stateData) + } - // update lane state - when(stage0.enqueue.fire) { - maskGroupCountVec(index) := stage0.updateLaneState.maskGroupCount - // todo: handle all elements in first group are masked - maskIndexVec(index) := stage0.updateLaneState.maskIndex - when(stage0.updateLaneState.outOfExecutionRange) { - slotOccupied(index) := false.B - } + // update lane state + when(stage0.enqueue.fire) { + maskGroupCountVec(index) := stage0.updateLaneState.maskGroupCount + // todo: handle all elements in first group are masked + maskIndexVec(index) := stage0.updateLaneState.maskIndex + when(stage0.updateLaneState.outOfExecutionRange) { + slotOccupied(index) := false.B } + } - // update mask todo: handle maskRequestFireOH - slotMaskRequestVec(index).valid := - record.laneRequest.mask && - ((stage0.enqueue.fire && stage0.updateLaneState.maskExhausted) || !record.mask.valid) - slotMaskRequestVec(index).bits := stage0.updateLaneState.maskGroupCount - // There are new masks - val maskUpdateFire: Bool = slotMaskRequestVec(index).valid && maskRequestFireOH(index) - // The old mask is used up - val maskFailure: Bool = stage0.updateLaneState.maskExhausted && stage0.enqueue.fire - // update mask register - when(maskUpdateFire) { - record.mask.bits := maskInput - } - when(maskUpdateFire ^ maskFailure) { - record.mask.valid := maskUpdateFire - } + // update mask todo: handle maskRequestFireOH + slotMaskRequestVec(index).valid := + record.laneRequest.mask && + ((stage0.enqueue.fire && stage0.updateLaneState.maskExhausted) || !record.mask.valid) + slotMaskRequestVec(index).bits := stage0.updateLaneState.maskGroupCount + // There are new masks + val maskUpdateFire: Bool = slotMaskRequestVec(index).valid && maskRequestFireOH(index) + // The old mask is used up + val maskFailure: Bool = stage0.updateLaneState.maskExhausted && stage0.enqueue.fire + // update mask register + when(maskUpdateFire) { + record.mask.bits := maskInput + } + when(maskUpdateFire ^ maskFailure) { + record.mask.valid := maskUpdateFire + } - val instructionIndex1H: UInt = UIntToOH( - record.laneRequest.instructionIndex(parameter.instructionIndexBits - 2, 0) - ) - instructionUnrelatedMaskUnitVec(index) := - Mux(decodeResult(Decoder.maskUnit) && decodeResult(Decoder.readOnly), 0.U, instructionIndex1H) - - // stage 1: read stage - stage1.enqueue.valid := stage0.dequeue.valid - stage0.dequeue.ready := stage1.enqueue.ready - stage1.enqueue.bits.groupCounter := stage0.dequeue.bits.groupCounter - stage1.enqueue.bits.maskForMaskInput := stage0.dequeue.bits.maskForMaskInput - stage1.enqueue.bits.boundaryMaskCorrection := stage0.dequeue.bits.boundaryMaskCorrection - stage1.enqueue.bits.sSendResponse.zip(stage0.dequeue.bits.sSendResponse).foreach { case (sink, source) => - sink := source - } - stage1.dequeue.bits.readBusDequeueGroup.foreach(data => readBusDequeueGroup := data) + val instructionIndex1H: UInt = UIntToOH( + record.laneRequest.instructionIndex(parameter.instructionIndexBits - 2, 0) + ) + instructionUnrelatedMaskUnitVec(index) := + Mux(decodeResult(Decoder.maskUnit) && decodeResult(Decoder.readOnly), 0.U, instructionIndex1H) + + // stage 1: read stage + stage1.enqueue.valid := stage0.dequeue.valid + stage0.dequeue.ready := stage1.enqueue.ready + stage1.enqueue.bits.groupCounter := stage0.dequeue.bits.groupCounter + stage1.enqueue.bits.maskForMaskInput := stage0.dequeue.bits.maskForMaskInput + stage1.enqueue.bits.boundaryMaskCorrection := stage0.dequeue.bits.boundaryMaskCorrection + stage1.enqueue.bits.sSendResponse.zip(stage0.dequeue.bits.sSendResponse).foreach { case (sink, source) => + sink := source + } + stage1.dequeue.bits.readBusDequeueGroup.foreach(data => readBusDequeueGroup := data) - stage1.enqueue.bits.elements.foreach { case (k ,d) => - stage0.dequeue.bits.elements.get(k).foreach(stateData => d := stateData) - } - stage0.enqueue.bits.readFromScalar := record.laneRequest.readFromScalar - vrfReadRequest(index).zip(stage1.vrfReadRequest).foreach{ case (sink, source) => sink <> source } - vrfReadResult(index).zip(stage1.vrfReadResult).foreach{ case (source, sink) => sink := source } - // 3: read vs1 vs2 vd - // 2: cross read lsb & msb - val checkSize = if (isLastSlot) 5 else 3 - Seq.tabulate(checkSize){ portIndex => - // parameter.chainingSize - index: slot 0 need 5 port, so reverse connection - readCheckRequestVec((parameter.chainingSize - index - 1) * 3 + portIndex) := stage1.vrfCheckRequest(portIndex) - stage1.checkResult(portIndex) := readCheckResult((parameter.chainingSize - index - 1) * 3 + portIndex) - } - // connect cross read bus - if(isLastSlot) { - val tokenSize = parameter.crossLaneVRFWriteEscapeQueueSize - readBusPort.zipWithIndex.foreach {case (readPort, portIndex) => - // tx - val tokenReg = RegInit(0.U(log2Ceil(tokenSize + 1).W)) - val tokenReady: Bool = tokenReg =/= tokenSize.U - stage1.readBusRequest.get(portIndex).ready := tokenReady - readPort.deq.valid := stage1.readBusRequest.get(portIndex).valid && tokenReady - readPort.deq.bits := stage1.readBusRequest.get(portIndex).bits - val tokenUpdate = Mux(readPort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) - when(readPort.deq.valid ^ readPort.deqRelease) { - tokenReg := tokenReg + tokenUpdate - } - // rx - // rx queue - val queue = Module(new Queue(chiselTypeOf(readPort.deq.bits), tokenSize, pipe=true)) - queue.io.enq.valid := readPort.enq.valid - queue.io.enq.bits := readPort.enq.bits - readPort.enqRelease := queue.io.deq.fire - assert(queue.io.enq.ready || !readPort.enq.valid) - // dequeue to cross read unit - stage1.readBusDequeue.get(portIndex) <> queue.io.deq + stage1.enqueue.bits.elements.foreach { case (k ,d) => + stage0.dequeue.bits.elements.get(k).foreach(stateData => d := stateData) + } + stage0.enqueue.bits.readFromScalar := record.laneRequest.readFromScalar + vrfReadRequest(index).zip(stage1.vrfReadRequest).foreach{ case (sink, source) => sink <> source } + vrfReadResult(index).zip(stage1.vrfReadResult).foreach{ case (source, sink) => sink := source } + // 3: read vs1 vs2 vd + // 2: cross read lsb & msb + val checkSize = if (isLastSlot) 5 else 3 + Seq.tabulate(checkSize){ portIndex => + // parameter.chainingSize - index: slot 0 need 5 port, so reverse connection + readCheckRequestVec((parameter.chainingSize - index - 1) * 3 + portIndex) := stage1.vrfCheckRequest(portIndex) + stage1.checkResult(portIndex) := readCheckResult((parameter.chainingSize - index - 1) * 3 + portIndex) + } + // connect cross read bus + if(isLastSlot) { + val tokenSize = parameter.crossLaneVRFWriteEscapeQueueSize + readBusPort.zipWithIndex.foreach {case (readPort, portIndex) => + // tx + val tokenReg = RegInit(0.U(log2Ceil(tokenSize + 1).W)) + val tokenReady: Bool = tokenReg =/= tokenSize.U + stage1.readBusRequest.get(portIndex).ready := tokenReady + readPort.deq.valid := stage1.readBusRequest.get(portIndex).valid && tokenReady + readPort.deq.bits := stage1.readBusRequest.get(portIndex).bits + val tokenUpdate = Mux(readPort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) + when(readPort.deq.valid ^ readPort.deqRelease) { + tokenReg := tokenReg + tokenUpdate } + // rx + // rx queue + val queue = Module(new Queue(chiselTypeOf(readPort.deq.bits), tokenSize, pipe=true)) + queue.io.enq.valid := readPort.enq.valid + queue.io.enq.bits := readPort.enq.bits + readPort.enqRelease := queue.io.deq.fire + assert(queue.io.enq.ready || !readPort.enq.valid) + // dequeue to cross read unit + stage1.readBusDequeue.get(portIndex) <> queue.io.deq + } - // cross write - writeBusPort.zipWithIndex.foreach {case (writePort, portIndex) => - val tokenReg = RegInit(0.U(log2Ceil(tokenSize + 1).W)) - val tokenReady: Bool = tokenReg =/= tokenSize.U - writePort.deq.valid := stage3.crossWritePort.get(portIndex).valid && tokenReady - writePort.deq.bits := stage3.crossWritePort.get(portIndex).bits - stage3.crossWritePort.get(portIndex).ready := tokenReady - - // update token - val tokenUpdate = Mux(writePort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) - when(writePort.deq.valid ^ writePort.deqRelease) { - tokenReg := tokenReg + tokenUpdate - } + // cross write + writeBusPort.zipWithIndex.foreach {case (writePort, portIndex) => + val tokenReg = RegInit(0.U(log2Ceil(tokenSize + 1).W)) + val tokenReady: Bool = tokenReg =/= tokenSize.U + writePort.deq.valid := stage3.crossWritePort.get(portIndex).valid && tokenReady + writePort.deq.bits := stage3.crossWritePort.get(portIndex).bits + stage3.crossWritePort.get(portIndex).ready := tokenReady + + // update token + val tokenUpdate = Mux(writePort.deq.valid, 1.U, -1.S(tokenReg.getWidth.W).asUInt) + when(writePort.deq.valid ^ writePort.deqRelease) { + tokenReg := tokenReg + tokenUpdate } } + } - stage2.enqueue.valid := stage1.dequeue.valid && executionUnit.enqueue.ready - stage1.dequeue.ready := stage2.enqueue.ready && executionUnit.enqueue.ready - executionUnit.enqueue.valid := stage1.dequeue.valid && stage2.enqueue.ready + stage2.enqueue.valid := stage1.dequeue.valid && executionUnit.enqueue.ready + stage1.dequeue.ready := stage2.enqueue.ready && executionUnit.enqueue.ready + executionUnit.enqueue.valid := stage1.dequeue.valid && stage2.enqueue.ready - stage2.enqueue.bits.elements.foreach { case (k ,d) => - stage1.dequeue.bits.elements.get(k).foreach( pipeData => d := pipeData) - } - stage2.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter - stage2.enqueue.bits.mask := stage1.dequeue.bits.mask - stage2.enqueue.bits.maskForFilter := stage1.dequeue.bits.maskForFilter - stage2.enqueue.bits.src := stage1.dequeue.bits.src - stage2.enqueue.bits.sSendResponse.zip(stage1.dequeue.bits.sSendResponse).foreach { case (sink, source) => - sink := source - } - stage2.enqueue.bits.bordersForMaskLogic := executionUnit.enqueue.bits.bordersForMaskLogic + stage2.enqueue.bits.elements.foreach { case (k ,d) => + stage1.dequeue.bits.elements.get(k).foreach( pipeData => d := pipeData) + } + stage2.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter + stage2.enqueue.bits.mask := stage1.dequeue.bits.mask + stage2.enqueue.bits.maskForFilter := stage1.dequeue.bits.maskForFilter + stage2.enqueue.bits.src := stage1.dequeue.bits.src + stage2.enqueue.bits.sSendResponse.zip(stage1.dequeue.bits.sSendResponse).foreach { case (sink, source) => + sink := source + } + stage2.enqueue.bits.bordersForMaskLogic := executionUnit.enqueue.bits.bordersForMaskLogic - executionUnit.enqueue.bits.elements.foreach { case (k ,d) => - stage1.dequeue.bits.elements.get(k).foreach( pipeData => d := pipeData) - } - executionUnit.enqueue.bits.src := stage1.dequeue.bits.src - executionUnit.enqueue.bits.bordersForMaskLogic := stage1.dequeue.bits.bordersForMaskLogic - executionUnit.enqueue.bits.mask := stage1.dequeue.bits.mask - executionUnit.enqueue.bits.maskForFilter := stage1.dequeue.bits.maskForFilter - executionUnit.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter - executionUnit.enqueue.bits.sSendResponse.zip(stage1.dequeue.bits.sSendResponse).foreach { case (sink, source) => - sink := source - } - executionUnit.enqueue.bits.crossReadSource.zip(stage1.dequeue.bits.crossReadSource).foreach { case (sink, source) => - sink := source - } + executionUnit.enqueue.bits.elements.foreach { case (k ,d) => + stage1.dequeue.bits.elements.get(k).foreach( pipeData => d := pipeData) + } + executionUnit.enqueue.bits.src := stage1.dequeue.bits.src + executionUnit.enqueue.bits.bordersForMaskLogic := stage1.dequeue.bits.bordersForMaskLogic + executionUnit.enqueue.bits.mask := stage1.dequeue.bits.mask + executionUnit.enqueue.bits.maskForFilter := stage1.dequeue.bits.maskForFilter + executionUnit.enqueue.bits.groupCounter := stage1.dequeue.bits.groupCounter + executionUnit.enqueue.bits.sSendResponse.zip(stage1.dequeue.bits.sSendResponse).foreach { case (sink, source) => + sink := source + } + executionUnit.enqueue.bits.crossReadSource.zip(stage1.dequeue.bits.crossReadSource).foreach { case (sink, source) => + sink := source + } - executionUnit.ffoByOtherLanes := ffoRecord.ffoByOtherLanes - executionUnit.selfCompleted := ffoRecord.selfCompleted - - // executionUnit <> vfu - requestVec(index) := executionUnit.vfuRequest.bits - executeDecodeVec(index) := executionUnit.executeDecode - responseDecodeVec(index) := executionUnit.responseDecode - executeEnqueueValid(index) := executionUnit.vfuRequest.valid - executionUnit.vfuRequest.ready := executeEnqueueFire(index) - executionUnit.dataResponse := responseVec(index) - - vxsatEnq(index) := Mux( - executionUnit.dataResponse.valid && - (executionUnit.dataResponse.bits.clipFail ## executionUnit.dataResponse.bits.vxsat).orR, - UIntToOH(executionUnit.responseIndex(parameter.instructionIndexBits - 2, 0)), - 0.U(parameter.chainingSize.W) - ) - when(executionUnit.dequeue.valid)(assert(stage2.dequeue.valid)) - stage3.enqueue.valid := executionUnit.dequeue.valid - executionUnit.dequeue.ready := stage3.enqueue.ready - stage2.dequeue.ready := executionUnit.dequeue.fire + executionUnit.ffoByOtherLanes := ffoRecord.ffoByOtherLanes + executionUnit.selfCompleted := ffoRecord.selfCompleted + + // executionUnit <> vfu + requestVec(index) := executionUnit.vfuRequest.bits + executeDecodeVec(index) := executionUnit.executeDecode + responseDecodeVec(index) := executionUnit.responseDecode + executeEnqueueValid(index) := executionUnit.vfuRequest.valid + executionUnit.vfuRequest.ready := executeEnqueueFire(index) + executionUnit.dataResponse := responseVec(index) + + vxsatEnq(index) := Mux( + executionUnit.dataResponse.valid && + (executionUnit.dataResponse.bits.clipFail ## executionUnit.dataResponse.bits.vxsat).orR, + UIntToOH(executionUnit.responseIndex(parameter.instructionIndexBits - 2, 0)), + 0.U(parameter.chainingSize.W) + ) + when(executionUnit.dequeue.valid)(assert(stage2.dequeue.valid)) + stage3.enqueue.valid := executionUnit.dequeue.valid + executionUnit.dequeue.ready := stage3.enqueue.ready + stage2.dequeue.ready := executionUnit.dequeue.fire - if (!isLastSlot) { - stage3.enqueue.bits := DontCare - } + if (!isLastSlot) { + stage3.enqueue.bits := DontCare + } - // pipe state from stage0 - stage3.enqueue.bits.decodeResult := stage2.dequeue.bits.decodeResult - stage3.enqueue.bits.instructionIndex := stage2.dequeue.bits.instructionIndex - stage3.enqueue.bits.loadStore := stage2.dequeue.bits.loadStore - stage3.enqueue.bits.vd := stage2.dequeue.bits.vd - stage3.enqueue.bits.ffoByOtherLanes := ffoRecord.ffoByOtherLanes - stage3.enqueue.bits.groupCounter := stage2.dequeue.bits.groupCounter - stage3.enqueue.bits.mask := stage2.dequeue.bits.mask - if (isLastSlot) { - stage3.enqueue.bits.sSendResponse := stage2.dequeue.bits.sSendResponse.get - stage3.enqueue.bits.ffoSuccess := executionUnit.dequeue.bits.ffoSuccess.get - stage3.enqueue.bits.fpReduceValid.zip(executionUnit.dequeue.bits.fpReduceValid).foreach { - case (sink, source) => sink := source - } + // pipe state from stage0 + stage3.enqueue.bits.decodeResult := stage2.dequeue.bits.decodeResult + stage3.enqueue.bits.instructionIndex := stage2.dequeue.bits.instructionIndex + stage3.enqueue.bits.loadStore := stage2.dequeue.bits.loadStore + stage3.enqueue.bits.vd := stage2.dequeue.bits.vd + stage3.enqueue.bits.ffoByOtherLanes := ffoRecord.ffoByOtherLanes + stage3.enqueue.bits.groupCounter := stage2.dequeue.bits.groupCounter + stage3.enqueue.bits.mask := stage2.dequeue.bits.mask + if (isLastSlot) { + stage3.enqueue.bits.sSendResponse := stage2.dequeue.bits.sSendResponse.get + stage3.enqueue.bits.ffoSuccess := executionUnit.dequeue.bits.ffoSuccess.get + stage3.enqueue.bits.fpReduceValid.zip(executionUnit.dequeue.bits.fpReduceValid).foreach { + case (sink, source) => sink := source } - stage3.enqueue.bits.data := executionUnit.dequeue.bits.data - stage3.enqueue.bits.pipeData := stage2.dequeue.bits.pipeData.getOrElse(DontCare) - stage3.enqueue.bits.ffoIndex := executionUnit.dequeue.bits.ffoIndex - executionUnit.dequeue.bits.crossWriteData.foreach(data => stage3.enqueue.bits.crossWriteData := data) - stage2.dequeue.bits.sSendResponse.foreach(_ => stage3.enqueue.bits.sSendResponse := _) - executionUnit.dequeue.bits.ffoSuccess.foreach(_ => stage3.enqueue.bits.ffoSuccess := _) - - if (isLastSlot){ - when(laneResponseFeedback.valid) { - when(laneResponseFeedback.bits.complete) { - ffoRecord.ffoByOtherLanes := true.B - } - } - when(stage3.enqueue.fire) { - executionUnit.dequeue.bits.ffoSuccess.foreach(ffoRecord.selfCompleted := _) - // This group found means the next group ended early - ffoRecord.ffoByOtherLanes := ffoRecord.ffoByOtherLanes || ffoRecord.selfCompleted + } + stage3.enqueue.bits.data := executionUnit.dequeue.bits.data + stage3.enqueue.bits.pipeData := stage2.dequeue.bits.pipeData.getOrElse(DontCare) + stage3.enqueue.bits.ffoIndex := executionUnit.dequeue.bits.ffoIndex + executionUnit.dequeue.bits.crossWriteData.foreach(data => stage3.enqueue.bits.crossWriteData := data) + stage2.dequeue.bits.sSendResponse.foreach(_ => stage3.enqueue.bits.sSendResponse := _) + executionUnit.dequeue.bits.ffoSuccess.foreach(_ => stage3.enqueue.bits.ffoSuccess := _) + + if (isLastSlot){ + when(laneResponseFeedback.valid) { + when(laneResponseFeedback.bits.complete) { + ffoRecord.ffoByOtherLanes := true.B } - - laneResponse <> stage3.laneResponse.get - stage3.laneResponseFeedback.get <> laneResponseFeedback + } + when(stage3.enqueue.fire) { + executionUnit.dequeue.bits.ffoSuccess.foreach(ffoRecord.selfCompleted := _) + // This group found means the next group ended early + ffoRecord.ffoByOtherLanes := ffoRecord.ffoByOtherLanes || ffoRecord.selfCompleted } - // --- stage 3 end & stage 4 start --- - // vrfWriteQueue try to write vrf - vrfWriteArbiter(index).valid := stage3.vrfWriteRequest.valid - vrfWriteArbiter(index).bits := stage3.vrfWriteRequest.bits - stage3.vrfWriteRequest.ready := vrfWriteArbiter(index).ready + laneResponse <> stage3.laneResponse.get + stage3.laneResponseFeedback.get <> laneResponseFeedback + } - tokenManager.enqReports(index) := stage0.tokenReport + // --- stage 3 end & stage 4 start --- + // vrfWriteQueue try to write vrf + vrfWriteArbiter(index).valid := stage3.vrfWriteRequest.valid + vrfWriteArbiter(index).bits := stage3.vrfWriteRequest.bits + stage3.vrfWriteRequest.ready := vrfWriteArbiter(index).ready - // probes - probeWire.slots(index).stage0EnqueueReady := stage0.enqueue.ready - probeWire.slots(index).stage0EnqueueValid := stage0.enqueue.valid - probeWire.slots(index).changingMaskSet := record.mask.valid || !record.laneRequest.mask - probeWire.slots(index).slotActive := slotActive(index) - probeWire.slots(index).slotOccupied := slotOccupied(index) - probeWire.slots(index).pipeFinish := !slotOccupied(index) - probeWire.slots(index).slotShiftValid := slotShiftValid(index) - probeWire.slots(index).decodeResultIsCrossReadOrWrite := decodeResult(Decoder.crossRead) || decodeResult(Decoder.crossWrite) - probeWire.slots(index).decodeResultIsScheduler := decodeResult(Decoder.scheduler) - probeWire.slots(index).executionUnitVfuRequestReady := executionUnit.vfuRequest.ready - probeWire.slots(index).executionUnitVfuRequestValid := executionUnit.vfuRequest.valid - probeWire.slots(index).stage3VrfWriteReady := stage3.vrfWriteRequest.ready - probeWire.slots(index).stage3VrfWriteValid := stage3.vrfWriteRequest.valid - probeWire.slots(index).writeQueueEnq := stage3.vrfWriteRequest.fire - probeWire.slots(index).writeTag := stage3.vrfWriteRequest.bits.instructionIndex - probeWire.slots(index).writeMask := stage3.vrfWriteRequest.bits.mask - // probeWire.slots(index).probeStage1 := ??? + tokenManager.enqReports(index) := stage0.tokenReport + } + val slots = slotControl.zipWithIndex.map { + case (record: InstructionControlRecord, index: Int) => new Slot(record, index) } - // cross write bus <> write queue crossLaneWriteQueue.zipWithIndex.foreach {case (queue, index) => @@ -1226,15 +1207,43 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ tokenManager.topWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex - // probe wire - probeWire.laneRequestStall := laneRequest.valid && !laneRequest.ready - probeWire.lastSlotOccupied := slotOccupied.last - probeWire.instructionFinished := instructionFinished - probeWire.instructionValid := instructionValid - probeWire.crossWriteProbe.zip(writeBusPort).foreach {case (pb, port) => - pb.valid := port.deq.valid - pb.bits.writeTag := port.deq.bits.instructionIndex - pb.bits.writeMask := port.deq.bits.mask + layer.block(layers.Verification) { + val probeWire = Wire(new LaneProbe(parameter)) + define(laneProbe, ProbeValue(probeWire)) + slots.foreach { slot => + slots.map { slot: Slot => + probeWire.slots(slot.index).stage0EnqueueReady := slot.stage0.enqueue.ready + probeWire.slots(slot.index).stage0EnqueueValid := slot.stage0.enqueue.valid + probeWire.slots(slot.index).changingMaskSet := slot.record.mask.valid || !slot.record.laneRequest.mask + probeWire.slots(slot.index).slotActive := slotActive(slot.index) + probeWire.slots(slot.index).slotOccupied := slotOccupied(slot.index) + probeWire.slots(slot.index).pipeFinish := !slotOccupied(slot.index) + probeWire.slots(slot.index).slotShiftValid := slotShiftValid(slot.index) + probeWire.slots(slot.index).decodeResultIsCrossReadOrWrite := slot.decodeResult(Decoder.crossRead) || slot.decodeResult(Decoder.crossWrite) + probeWire.slots(slot.index).decodeResultIsScheduler := slot.decodeResult(Decoder.scheduler) + probeWire.slots(slot.index).executionUnitVfuRequestReady := slot.executionUnit.vfuRequest.ready + probeWire.slots(slot.index).executionUnitVfuRequestValid := slot.executionUnit.vfuRequest.valid + probeWire.slots(slot.index).stage3VrfWriteReady := slot.stage3.vrfWriteRequest.ready + probeWire.slots(slot.index).stage3VrfWriteValid := slot.stage3.vrfWriteRequest.valid + probeWire.slots(slot.index).writeQueueEnq := slot.stage3.vrfWriteRequest.fire + probeWire.slots(slot.index).writeTag := slot.stage3.vrfWriteRequest.bits.instructionIndex + probeWire.slots(slot.index).writeMask := slot.stage3.vrfWriteRequest.bits.mask + + } + // probes + + } + // probe wire + probeWire.laneRequestStall := laneRequest.valid && !laneRequest.ready + probeWire.lastSlotOccupied := slotOccupied.last + probeWire.instructionFinished := instructionFinished + probeWire.instructionValid := instructionValid + probeWire.crossWriteProbe.zip(writeBusPort).foreach {case (pb, port) => + pb.valid := port.deq.valid + pb.bits.writeTag := port.deq.bits.instructionIndex + pb.bits.writeMask := port.deq.bits.mask + } + probeWire.vrfProbe := probe.read(vrf.vrfProbe) } - probeWire.vrfProbe := probe.read(vrf.vrfProbe) + } diff --git a/t1/src/T1.scala b/t1/src/T1.scala index afc7ebb68..68bf07448 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -327,7 +327,7 @@ class T1Interface(parameter: T1Parameter) extends Record { "highBandwidthLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter), "indexedLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32)), "om" -> Output(Property[AnyClassType]()), - "t1Probe" -> Output(Probe(new T1Probe(parameter))), + "t1Probe" -> Output(Probe(new T1Probe(parameter), layers.Verification)), ) ) } @@ -1721,26 +1721,27 @@ class T1(val parameter: T1Parameter) // don't care有可能会导致先读后写失败 maskUnitReadVec.foreach(_.bits.instructionIndex := slots.last.record.instructionIndex) - /** - * Probes - */ - val probeWire = Wire(new T1Probe(parameter)) - define(io.t1Probe, ProbeValue(probeWire)) - probeWire.instructionCounter := instructionCounter - probeWire.instructionIssue := requestRegDequeue.fire - probeWire.issueTag := requestReg.bits.instructionIndex - probeWire.retireValid := retire - // maskUnitWrite maskUnitWriteReady - probeWire.writeQueueEnq.valid := maskUnitWrite.valid && maskUnitWriteReady - probeWire.writeQueueEnq.bits := maskUnitWrite.bits.instructionIndex - probeWire.writeQueueEnqMask := maskUnitWrite.bits.mask - probeWire.instructionValid := maskAnd( - !slots.last.state.sMaskUnitExecution && !slots.last.state.idle, - indexToOH(slots.last.record.instructionIndex, parameter.chainingSize * 2)).asUInt - probeWire.responseCounter := responseCounter - probeWire.laneProbes.zip(laneVec).foreach { case (p, l) => p := probe.read(l.laneProbe) } - probeWire.lsuProbe := probe.read(lsu.lsuProbe) - + layer.block(layers.Verification) { + /** + * Probes + */ + val probeWire = Wire(new T1Probe(parameter)) + define(io.t1Probe, ProbeValue(probeWire)) + probeWire.instructionCounter := instructionCounter + probeWire.instructionIssue := requestRegDequeue.fire + probeWire.issueTag := requestReg.bits.instructionIndex + probeWire.retireValid := retire + // maskUnitWrite maskUnitWriteReady + probeWire.writeQueueEnq.valid := maskUnitWrite.valid && maskUnitWriteReady + probeWire.writeQueueEnq.bits := maskUnitWrite.bits.instructionIndex + probeWire.writeQueueEnqMask := maskUnitWrite.bits.mask + probeWire.instructionValid := maskAnd( + !slots.last.state.sMaskUnitExecution && !slots.last.state.idle, + indexToOH(slots.last.record.instructionIndex, parameter.chainingSize * 2)).asUInt + probeWire.responseCounter := responseCounter + probeWire.laneProbes.zip(laneVec).foreach { case (p, l) => p := probe.read(l.laneProbe) } + probeWire.lsuProbe := probe.read(lsu.lsuProbe) + } // new V Request from core // val requestValidProbe: Bool = IO(Output(Probe(Bool()))) diff --git a/t1/src/laneStage/LaneStage1.scala b/t1/src/laneStage/LaneStage1.scala index f44826e79..e7acab62f 100644 --- a/t1/src/laneStage/LaneStage1.scala +++ b/t1/src/laneStage/LaneStage1.scala @@ -385,43 +385,45 @@ class LaneStage1(parameter: LaneParameter, isLastSlot: Boolean) extends Module { stageValid := pipeQueue.io.deq.valid val stageFinish = !stageValid + // TODO: gather these logic into a Probe Bundle @public - val dequeueReadyProbe = IO(Output(Probe(Bool()))) + val dequeueReadyProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val dequeueValidProbe = IO(Output(Probe(Bool()))) + val dequeueValidProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val hasDataOccupiedProbe = IO(Output(Probe(Bool()))) + val hasDataOccupiedProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val stageFinishProbe = IO(Output(Probe(Bool()))) + val stageFinishProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val readFinishProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) + val readFinishProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val sSendCrossReadResultLSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) + val sSendCrossReadResultLSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val sSendCrossReadResultMSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) + val sSendCrossReadResultMSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val wCrossReadLSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) + val wCrossReadLSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val wCrossReadMSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool())))) + val wCrossReadMSBProbe = Option.when(isLastSlot)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val vrfReadRequestProbe: Seq[(Bool, Bool)] = Seq.fill(3)((IO(Output(Probe(Bool()))),IO(Output(Probe(Bool()))))) - - - define(dequeueReadyProbe, ProbeValue(dequeue.ready)) - define(dequeueValidProbe, ProbeValue(dequeue.valid)) - define(hasDataOccupiedProbe, ProbeValue(stageValid)) - define(stageFinishProbe, ProbeValue(stageFinish)) - - if (isLastSlot) { - readFinishProbe.foreach(p => define(p, ProbeValue(dataQueueVs2.io.deq.valid))) - sSendCrossReadResultLSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.sSendCrossReadResultLSB))) - sSendCrossReadResultMSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.sSendCrossReadResultMSB))) - wCrossReadLSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.wCrossReadLSB))) - wCrossReadMSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.wCrossReadMSB))) - } + val vrfReadRequestProbe: Seq[(Bool, Bool)] = Seq.fill(3)((IO(Output(Probe(Bool(), layers.Verification))),IO(Output(Probe(Bool(), layers.Verification))))) + + layer.block(layers.Verification) { + define(dequeueReadyProbe, ProbeValue(dequeue.ready)) + define(dequeueValidProbe, ProbeValue(dequeue.valid)) + define(hasDataOccupiedProbe, ProbeValue(stageValid)) + define(stageFinishProbe, ProbeValue(stageFinish)) + + if (isLastSlot) { + readFinishProbe.foreach(p => define(p, ProbeValue(dataQueueVs2.io.deq.valid))) + sSendCrossReadResultLSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.sSendCrossReadResultLSB))) + sSendCrossReadResultMSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.sSendCrossReadResultMSB))) + wCrossReadLSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.wCrossReadLSB))) + wCrossReadMSBProbe.foreach(p => define(p, ProbeValue(crossReadUnitOp.get.crossWriteState.wCrossReadMSB))) + } - vrfReadRequestProbe.zipWithIndex.foreach { case((ready, valid), i) => - define(ready, ProbeValue(vrfReadRequest(i).ready)) - define(valid, ProbeValue(vrfReadRequest(i).valid)) + vrfReadRequestProbe.zipWithIndex.foreach { case ((ready, valid), i) => + define(ready, ProbeValue(vrfReadRequest(i).ready)) + define(valid, ProbeValue(vrfReadRequest(i).valid)) + } } } diff --git a/t1/src/lsu/LSU.scala b/t1/src/lsu/LSU.scala index 9cb3b7d58..c8c2cd090 100644 --- a/t1/src/lsu/LSU.scala +++ b/t1/src/lsu/LSU.scala @@ -246,9 +246,7 @@ class LSU(param: LSUParameter) extends Module { ) @public - val lsuProbe = IO(Output(Probe(new LSUProbe(param)))) - val probeWire = Wire(new LSUProbe(param)) - define(lsuProbe, ProbeValue(probeWire)) + val lsuProbe = IO(Output(Probe(new LSUProbe(param), layers.Verification))) // read vrf val otherTryReadVrf: UInt = Mux(otherUnit.vrfReadDataPorts.valid, otherUnit.status.targetLane, 0.U) @@ -275,26 +273,31 @@ class LSU(param: LSUParameter) extends Module { write.io.enq.bits.data := Mux(otherTryToWrite(index), otherUnit.vrfWritePort.bits, loadUnit.vrfWritePort(index).bits) write.io.enq.bits.targetLane := (BigInt(1) << index).U loadUnit.vrfWritePort(index).ready := write.io.enq.ready && !otherTryToWrite(index) + } - // probes - probeWire.slots(index).dataVd := write.io.enq.bits.data.vd - probeWire.slots(index).dataOffset := write.io.enq.bits.data.offset - probeWire.slots(index).dataMask := write.io.enq.bits.data.mask - probeWire.slots(index).dataData := write.io.enq.bits.data.data - probeWire.slots(index).dataInstruction := write.io.enq.bits.data.instructionIndex - probeWire.slots(index).writeValid := write.io.enq.valid - probeWire.slots(index).targetLane := OHToUInt(write.io.enq.bits.targetLane) + layer.block(layers.Verification) { + val probeWire = Wire(new LSUProbe(param)) + define(lsuProbe, ProbeValue(probeWire)) + writeQueueVec.zipWithIndex.foreach { case (write, index) => + probeWire.slots(index).dataVd := write.io.enq.bits.data.vd + probeWire.slots(index).dataOffset := write.io.enq.bits.data.offset + probeWire.slots(index).dataMask := write.io.enq.bits.data.mask + probeWire.slots(index).dataData := write.io.enq.bits.data.data + probeWire.slots(index).dataInstruction := write.io.enq.bits.data.instructionIndex + probeWire.slots(index).writeValid := write.io.enq.valid + probeWire.slots(index).targetLane := OHToUInt(write.io.enq.bits.targetLane) + } + probeWire.reqEnq := reqEnq.asUInt + + probeWire.storeUnitProbe := probe.read(storeUnit.probe) + probeWire.otherUnitProbe := probe.read(otherUnit.probe) + probeWire.lsuInstructionValid := + // The load unit becomes idle when it writes vrf for the last time. + maskAnd(!loadUnit.status.idle || VecInit(loadUnit.vrfWritePort.map(_.valid)).asUInt.orR, + indexToOH(loadUnit.status.instructionIndex, 2 * param.chainingSize)).asUInt | + maskAnd(!storeUnit.status.idle, indexToOH(storeUnit.status.instructionIndex, 2 * param.chainingSize)).asUInt | + maskAnd(!otherUnit.status.idle, indexToOH(otherUnit.status.instructionIndex, 2 * param.chainingSize)).asUInt } - probeWire.reqEnq := reqEnq.asUInt - - probeWire.storeUnitProbe := probe.read(storeUnit.probe) - probeWire.otherUnitProbe := probe.read(otherUnit.probe) - probeWire.lsuInstructionValid := - // The load unit becomes idle when it writes vrf for the last time. - maskAnd(!loadUnit.status.idle || VecInit(loadUnit.vrfWritePort.map(_.valid)).asUInt.orR, - indexToOH(loadUnit.status.instructionIndex, 2 * param.chainingSize)).asUInt | - maskAnd(!storeUnit.status.idle, indexToOH(storeUnit.status.instructionIndex, 2 * param.chainingSize)).asUInt | - maskAnd(!otherUnit.status.idle, indexToOH(otherUnit.status.instructionIndex, 2 * param.chainingSize)).asUInt vrfWritePort.zip(writeQueueVec).foreach { case (p, q) => p.valid := q.io.deq.valid diff --git a/t1/src/lsu/LoadUnit.scala b/t1/src/lsu/LoadUnit.scala index 13ecb3f43..5d2514cf2 100644 --- a/t1/src/lsu/LoadUnit.scala +++ b/t1/src/lsu/LoadUnit.scala @@ -217,28 +217,23 @@ class LoadUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { */ // Load Unit ready to accpet LSU request @public - val lsuRequestValidProbe = IO(Output(Probe(Bool()))) - define(lsuRequestValidProbe, ProbeValue(lsuRequest.valid)) + val lsuRequestValidProbe = IO(Output(Probe(Bool(), layers.Verification))) // Load Unit is idle @public - val idleProbe = IO(Output(Probe(Bool()))) - define(idleProbe, ProbeValue(status.idle)) + val idleProbe = IO(Output(Probe(Bool(), layers.Verification))) // Tilelink Channel A decouple IO status // ready: channel A is ready to accept signal // valid: Load Unit try to send signal to channel A @public - val tlPortAValidProbe = IO(Output(Probe(Bool()))) - define(tlPortAValidProbe, ProbeValue(memRequest.valid)) + val tlPortAValidProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val tlPortAReadyProbe = IO(Output(Probe(Bool()))) - define(tlPortAReadyProbe, ProbeValue(memRequest.ready)) + val tlPortAReadyProbe = IO(Output(Probe(Bool(), layers.Verification))) // Fail to send signal to tilelink Channel A because of address conflict @public - val addressConflictProbe = IO(Output(Probe(Bool()))) - define(addressConflictProbe, ProbeValue(addressConflict)) + val addressConflictProbe = IO(Output(Probe(Bool(), layers.Verification))) // // Tilelink used for accepting signal from receive signal from Channel D // @public @@ -285,37 +280,52 @@ class LoadUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // After receiving new cacheline from top, or current item is the last cacheline, // pop out data and transform it to an aligned cacheline, go through alignedDequeue to next level @public - val unalignedCacheLineProbe = IO(Output(Probe(Bool()))) - define(unalignedCacheLineProbe, ProbeValue(unalignedCacheLine.valid)) + val unalignedCacheLineProbe = IO(Output(Probe(Bool(), layers.Verification))) // Used for transmitting data from unalignedCacheline to dataBuffer @public - val alignedDequeueValidProbe = IO(Output(Probe(Bool()))) - define(alignedDequeueValidProbe, ProbeValue(alignedDequeue.valid)) + val alignedDequeueValidProbe = IO(Output(Probe(Bool(), layers.Verification))) + @public - val alignedDequeueReadyProbe = IO(Output(Probe(Bool()))) - define(alignedDequeueReadyProbe, ProbeValue(alignedDequeue.ready)) + val alignedDequeueReadyProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val bufferEnqueueSelectProbe = IO(Output(Probe(chiselTypeOf(bufferEnqueueSelect)))) - define(bufferEnqueueSelectProbe, ProbeValue(bufferEnqueueSelect)) + val bufferEnqueueSelectProbe = IO(Output(Probe(chiselTypeOf(bufferEnqueueSelect), layers.Verification))) // Load Unit can write VRF after writeReadyForLSU is true @public - val writeReadyForLSUProbe: Bool = IO(Output(Probe(chiselTypeOf(writeReadyForLsu)))) - define(writeReadyForLSUProbe, ProbeValue(writeReadyForLsu)) + val writeReadyForLSUProbe: Bool = IO(Output(Probe(chiselTypeOf(writeReadyForLsu), layers.Verification))) + // Write to VRF @public val vrfWriteValidProbe: Seq[Bool] = vrfWritePort.map(port => { - val probe = IO(Output(Probe(Bool()))) - define(probe, ProbeValue(port.valid)) + val probe = IO(Output(Probe(Bool(), layers.Verification))) + layer.block(layers.Verification) { + define(probe, ProbeValue(port.valid)) + } probe - }).toSeq + }) @public val vrfWriteReadyProbe: Seq[Bool] = vrfWritePort.map(port => { - val probe = IO(Output(Probe(Bool()))) - define(probe, ProbeValue(port.ready)) + val probe = IO(Output(Probe(Bool(), layers.Verification))) + layer.block(layers.Verification) { + define(probe, ProbeValue(port.ready)) + } probe }).toSeq + + layer.block(layers.Verification) { + define(lsuRequestValidProbe, ProbeValue(lsuRequest.valid)) + define(idleProbe, ProbeValue(status.idle)) + define(tlPortAValidProbe, ProbeValue(memRequest.valid)) + define(tlPortAReadyProbe, ProbeValue(memRequest.ready)) + define(addressConflictProbe, ProbeValue(addressConflict)) + define(unalignedCacheLineProbe, ProbeValue(unalignedCacheLine.valid)) + define(alignedDequeueValidProbe, ProbeValue(alignedDequeue.valid)) + define(alignedDequeueReadyProbe, ProbeValue(alignedDequeue.ready)) + define(bufferEnqueueSelectProbe, ProbeValue(bufferEnqueueSelect)) + define(writeReadyForLSUProbe, ProbeValue(writeReadyForLsu)) + } + } diff --git a/t1/src/lsu/SimpleAccessUnit.scala b/t1/src/lsu/SimpleAccessUnit.scala index aa22ddd39..51516e6d1 100644 --- a/t1/src/lsu/SimpleAccessUnit.scala +++ b/t1/src/lsu/SimpleAccessUnit.scala @@ -193,9 +193,7 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { // other unit probe @public - val probe = IO(Output(Probe(new MemoryWriteProbe(param)))) - val probeWire = Wire(new MemoryWriteProbe(param)) - define(probe, ProbeValue(probeWire)) + val probe = IO(Output(Probe(new MemoryWriteProbe(param), layers.Verification))) val s0Fire: Bool = Wire(Bool()) val s1Fire: Bool = Wire(Bool()) @@ -1036,39 +1034,25 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { */ val dataOffset = (s1EnqQueue.io.deq.bits.indexInMaskGroup << dataEEW)(1, 0) ## 0.U(3.W) - probeWire.valid := memWriteRequest.fire - probeWire.index := 2.U - probeWire.data := memWriteRequest.bits.data - probeWire.mask := memWriteRequest.bits.mask - probeWire.address := memWriteRequest.bits.address - @public - val lsuRequestValidProbe = IO(Output(Probe(Bool()))) - define(lsuRequestValidProbe, ProbeValue(lsuRequest.valid)) + val lsuRequestValidProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val s0EnqueueValidProbe = IO(Output(Probe(Bool()))) - define(s0EnqueueValidProbe, ProbeValue(s0EnqueueValid)) + val s0EnqueueValidProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val stateIsRequestProbe = IO(Output(Probe(Bool()))) - define(stateIsRequestProbe, ProbeValue(stateIsRequest)) + val stateIsRequestProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val maskCheckProbe = IO(Output(Probe(Bool()))) - define(maskCheckProbe, ProbeValue(maskCheck)) + val maskCheckProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val indexCheckProbe = IO(Output(Probe(Bool()))) - define(indexCheckProbe, ProbeValue(indexCheck)) + val indexCheckProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val fofCheckProbe = IO(Output(Probe(Bool()))) - define(fofCheckProbe, ProbeValue(fofCheck)) + val fofCheckProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val s0FireProbe: Bool = IO(Output(Probe(chiselTypeOf(s0Fire)))) - define(s0FireProbe, ProbeValue(s0Fire)) + val s0FireProbe: Bool = IO(Output(Probe(chiselTypeOf(s0Fire), layers.Verification))) @public - val s1FireProbe: Bool = IO(Output(Probe(chiselTypeOf(s1Fire)))) - define(s1FireProbe, ProbeValue(s1Fire)) + val s1FireProbe: Bool = IO(Output(Probe(chiselTypeOf(s1Fire), layers.Verification))) // @public // val tlPortAReadyProbe = IO(Output(Probe(Bool()))) @@ -1078,15 +1062,12 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { // define(tlPortAValidProbe, ProbeValue(tlPort.a.valid)) @public - val s1ValidProbe = IO(Output(Probe(Bool()))) - define(s1ValidProbe, ProbeValue(s1Valid)) + val s1ValidProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val sourceFreeProbe = IO(Output(Probe(Bool()))) - define(sourceFreeProbe, ProbeValue(sourceFree)) + val sourceFreeProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val s2FireProbe: Bool = IO(Output(Probe(chiselTypeOf(s2Fire)))) - define(s2FireProbe, ProbeValue(s2Fire)) + val s2FireProbe: Bool = IO(Output(Probe(chiselTypeOf(s2Fire), layers.Verification))) // @public // val tlPortDReadyProbe = IO(Output(Probe(Bool()))) @@ -1097,13 +1078,35 @@ class SimpleAccessUnit(param: MSHRParam) extends Module with LSUPublic { @public - val stateValueProbe: UInt = IO(Output(Probe(chiselTypeOf(state)))) - define(stateValueProbe, ProbeValue(state)) + val stateValueProbe: UInt = IO(Output(Probe(chiselTypeOf(state), layers.Verification))) @public - val vrfWritePortIsValidProbe: Bool = IO(Output(Probe(Bool()))) - define(vrfWritePortIsValidProbe, ProbeValue(vrfWritePort.valid)) + val vrfWritePortIsValidProbe: Bool = IO(Output(Probe(Bool(), layers.Verification))) @public - val vrfWritePortIsReadyProbe: Bool = IO(Output(Probe(Bool()))) - define(vrfWritePortIsReadyProbe, ProbeValue(vrfWritePort.ready)) + val vrfWritePortIsReadyProbe: Bool = IO(Output(Probe(Bool(), layers.Verification))) + + layer.block(layers.Verification) { + val probeWire = Wire(new MemoryWriteProbe(param)) + define(probe, ProbeValue(probeWire)) + probeWire.valid := memWriteRequest.fire + probeWire.index := 2.U + probeWire.data := memWriteRequest.bits.data + probeWire.mask := memWriteRequest.bits.mask + probeWire.address := memWriteRequest.bits.address + define(lsuRequestValidProbe, ProbeValue(lsuRequest.valid)) + define(s0EnqueueValidProbe, ProbeValue(s0EnqueueValid)) + define(stateIsRequestProbe, ProbeValue(stateIsRequest)) + define(maskCheckProbe, ProbeValue(maskCheck)) + define(indexCheckProbe, ProbeValue(indexCheck)) + define(fofCheckProbe, ProbeValue(fofCheck)) + define(s0FireProbe, ProbeValue(s0Fire)) + define(s1FireProbe, ProbeValue(s1Fire)) + define(s1ValidProbe, ProbeValue(s1Valid)) + define(sourceFreeProbe, ProbeValue(sourceFree)) + define(s2FireProbe, ProbeValue(s2Fire)) + define(stateValueProbe, ProbeValue(state)) + define(vrfWritePortIsValidProbe, ProbeValue(vrfWritePort.valid)) + define(vrfWritePortIsReadyProbe, ProbeValue(vrfWritePort.ready)) + } + } diff --git a/t1/src/lsu/StoreUnit.scala b/t1/src/lsu/StoreUnit.scala index 69f28a6a0..96c1cf26c 100644 --- a/t1/src/lsu/StoreUnit.scala +++ b/t1/src/lsu/StoreUnit.scala @@ -46,9 +46,7 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // store unit probe @public - val probe = IO(Output(Probe(new MemoryWriteProbe(param)))) - val probeWire = Wire(new MemoryWriteProbe(param)) - define(probe, ProbeValue(probeWire)) + val probe = IO(Output(Probe(new MemoryWriteProbe(param), layers.Verification))) // stage 0, 处理 vl, mask ... val dataGroupByteSize: Int = param.datapathWidth * param.laneNumber / 8 @@ -272,24 +270,13 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { 0.U(param.cacheLineBits.W) dontTouch(status) - /** - * Probes - */ - probeWire.valid := alignedDequeueFire - probeWire.index := 1.U - probeWire.data := memRequest.bits.data - probeWire.mask := memRequest.bits.mask - probeWire.address := alignedDequeueAddress - // Store Unit is idle @public - val idleProbe = IO(Output(Probe(Bool()))) - define(idleProbe, ProbeValue(status.idle)) + val idleProbe = IO(Output(Probe(Bool(), layers.Verification))) // lsuRequest is valid @public - val lsuRequestValidProbe = IO(Output(Probe(Bool()))) - define(lsuRequestValidProbe, ProbeValue(lsuRequest.valid)) + val lsuRequestValidProbe = IO(Output(Probe(Bool(), layers.Verification))) // @public // val tlPortAIsValidProbe = Seq.fill(param.memoryBankSize)(IO(Output(Probe(Bool())))) @@ -301,19 +288,32 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // }) @public - val addressConflictProbe = IO(Output(Probe(Bool()))) - define(addressConflictProbe, ProbeValue(addressConflict)) + val addressConflictProbe = IO(Output(Probe(Bool(), layers.Verification))) @public - val vrfReadDataPortIsValidProbe = Seq.fill(param.laneNumber)(IO(Output(Probe(Bool())))) + val vrfReadDataPortIsValidProbe = Seq.fill(param.laneNumber)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val vrfReadDataPortIsReadyProbe = Seq.fill(param.laneNumber)(IO(Output(Probe(Bool())))) - vrfReadDataPorts.zipWithIndex.foreach({ case(port, i) => - define(vrfReadDataPortIsValidProbe(i), ProbeValue(port.valid)) - define(vrfReadDataPortIsReadyProbe(i), ProbeValue(port.ready)) - }) + val vrfReadDataPortIsReadyProbe = Seq.fill(param.laneNumber)(IO(Output(Probe(Bool(), layers.Verification)))) @public - val vrfReadyToStoreProbe = IO(Output(Probe(Bool()))) - define(vrfReadyToStoreProbe, ProbeValue(vrfReadyToStore)) + val vrfReadyToStoreProbe = IO(Output(Probe(Bool(), layers.Verification))) + + layer.block(layers.Verification) { + val probeWire = Wire(new MemoryWriteProbe(param)) + define(probe, ProbeValue(probeWire)) + probeWire.valid := alignedDequeueFire + probeWire.index := 1.U + probeWire.data := memRequest.bits.data + probeWire.mask := memRequest.bits.mask + probeWire.address := alignedDequeueAddress + + define(idleProbe, ProbeValue(status.idle)) + define(lsuRequestValidProbe, ProbeValue(lsuRequest.valid)) + define(addressConflictProbe, ProbeValue(addressConflict)) + vrfReadDataPorts.zipWithIndex.foreach({ case(port, i) => + define(vrfReadDataPortIsValidProbe(i), ProbeValue(port.valid)) + define(vrfReadDataPortIsReadyProbe(i), ProbeValue(port.ready)) + }) + define(vrfReadyToStoreProbe, ProbeValue(vrfReadyToStore)) + } } diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 724ef6372..9a39fd638 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -215,6 +215,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar @public val loadDataInLSUWriteQueue: UInt = IO(Input(UInt(parameter.chainingSize.W))) + @public + val vrfProbe = IO(Output(Probe(new VRFProbe(parameter), layers.Verification))) + // reset sram val sramReady: Bool = RegInit(false.B) val sramResetCount: UInt = RegInit(0.U(log2Ceil(parameter.rfDepth).W)) @@ -560,18 +563,15 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar }.reduce(_ && _) } - /* - * Probe - */ - @public - val vrfProbe = IO(Output(Probe(new VRFProbe(parameter)))) - val probeWire = Wire(new VRFProbe(parameter)) - define(vrfProbe, ProbeValue(probeWire)) - - probeWire.valid := writePipe.valid - probeWire.requestVd := writePipe.bits.vd - probeWire.requestOffset := writePipe.bits.offset - probeWire.requestMask := writePipe.bits.mask - probeWire.requestData := writePipe.bits.data - probeWire.requestInstruction := writePipe.bits.instructionIndex + layer.block(layers.Verification) { + val probeWire = Wire(new VRFProbe(parameter)) + define(vrfProbe, ProbeValue(probeWire)) + + probeWire.valid := writePipe.valid + probeWire.requestVd := writePipe.bits.vd + probeWire.requestOffset := writePipe.bits.offset + probeWire.requestMask := writePipe.bits.mask + probeWire.requestData := writePipe.bits.data + probeWire.requestInstruction := writePipe.bits.instructionIndex + } }