diff --git a/.github/cases/blastoise/default.json b/.github/cases/blastoise/default.json index 0101d2268..c1ac6a116 100644 --- a/.github/cases/blastoise/default.json +++ b/.github/cases/blastoise/default.json @@ -26,7 +26,7 @@ "codegen.vasubu_vv": 92300, "codegen.vasubu_vx": 300458, "codegen.vcompress_vm": 19981, - "codegen.vcpop_m": 1691, + "codegen.vcpop_m": 1661, "codegen.vdiv_vv": 32739, "codegen.vdiv_vx": 225056, "codegen.vdivu_vv": 32835, @@ -499,4 +499,4 @@ "codegen.vfredusum_vs": 99859, "codegen.vfredmax_vs": 99859, "codegen.vfredmin_vs": 99859 -} \ No newline at end of file +} diff --git a/.github/cases/machamp/default.json b/.github/cases/machamp/default.json index 8a0365a29..06799caf2 100644 --- a/.github/cases/machamp/default.json +++ b/.github/cases/machamp/default.json @@ -24,7 +24,7 @@ "codegen.vasubu_vv": 112654, "codegen.vasubu_vx": 334220, "codegen.vcompress_vm": 31353, - "codegen.vcpop_m": 1967, + "codegen.vcpop_m": 1943, "codegen.vdiv_vv": 38992, "codegen.vdiv_vx": 236842, "codegen.vdivu_vv": 39002, @@ -435,4 +435,4 @@ "codegen.vxor_vx": 83889, "codegen.vzext_vf2": 40007, "codegen.vzext_vf4": 6592 -} \ No newline at end of file +} diff --git a/.github/cases/sandslash/default.json b/.github/cases/sandslash/default.json index cd4cb24c5..b78d1fb13 100644 --- a/.github/cases/sandslash/default.json +++ b/.github/cases/sandslash/default.json @@ -24,7 +24,7 @@ "codegen.vasubu_vv": 278200, "codegen.vasubu_vx": 2090344, "codegen.vcompress_vm": 136872, - "codegen.vcpop_m": 4955, + "codegen.vcpop_m": 4931, "codegen.vdiv_vv": 193913, "codegen.vdiv_vx": 727252, "codegen.vdivu_vv": 193676, @@ -435,4 +435,4 @@ "codegen.vxor_vx": 523387, "codegen.vzext_vf2": 167243, "codegen.vzext_vf4": 26777 -} \ No newline at end of file +} diff --git a/t1/src/LaneFFO.scala b/t1/src/LaneFFO.scala index 15c8fc13c..dce652b1c 100644 --- a/t1/src/LaneFFO.scala +++ b/t1/src/LaneFFO.scala @@ -21,7 +21,7 @@ class LaneFFO(datapathWidth: Int) extends Module { @public val maskType: Bool = IO(Input(Bool())) - val truthMask: UInt = Mux(maskType, src.head, -1.S(datapathWidth.W).asUInt) + val truthMask: UInt = Mux(maskType, src.head, -1.S(datapathWidth.W).asUInt) & src(3) val srcData: UInt = truthMask & src(1) val notZero: Bool = srcData.orR val lo: UInt = scanLeftOr(srcData) diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 106a26c4e..08088cda5 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -751,6 +751,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa completedVec.foreach(_ := false.B) WARRedResult.valid := false.B unOrderTypeInstruction := unOrderType + dataResult := 0.U.asTypeOf(dataResult) }.elsewhen(control.state.wLast && maskUnitIdle) { // 如果真需要执行的lane会wScheduler,不会提前发出last确认 when(!mixedUnit) { @@ -806,7 +807,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa csrRegForMaskUnit.vl > csrRegForMaskUnit.vStart mvToVRF.foreach(d => when(requestRegDequeue.fire){d := writeMv}) // 读后写中的读 - val needWAR = maskTypeInstruction || border || (reduce && !popCount) || readMv + val needWAR = (maskTypeInstruction || border || reduce || readMv) && !popCount val skipLaneData: Bool = decodeResultReg(Decoder.mv) mixedUnit := writeMv || readMv maskReadLaneSelect.head := UIntToOH(writeBackCounter) @@ -1240,8 +1241,9 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa decodeResultReg(Decoder.maskDestination) || decodeResultReg(Decoder.ffo) // How many data path(32 bit) will used by maskDestination instruction. + val maskDestinationByteSize: Bits = csrRegForMaskUnit.vl(log2Ceil(parameter.dLen) - 1, 0) << csrRegForMaskUnit.vSew val maskDestinationUseDataPathSize = - (csrRegForMaskUnit.vl(log2Ceil(parameter.dLen) - 1, 0) << csrRegForMaskUnit.vSew >> 2).asUInt + (maskDestinationByteSize >> 2).asUInt + maskDestinationByteSize(1, 0).orR val lastGroupCountForThisGroup: UInt = maskDestinationUseDataPathSize(log2Ceil(parameter.laneNumber) - 1, 0) val counterForMaskDestination: UInt = if(parameter.laneNumber > 1) { (lastGroupCountForThisGroup - 1.U) | diff --git a/t1/src/laneStage/LaneExecutionBridge.scala b/t1/src/laneStage/LaneExecutionBridge.scala index e10f94d68..f36b6f5c2 100644 --- a/t1/src/laneStage/LaneExecutionBridge.scala +++ b/t1/src/laneStage/LaneExecutionBridge.scala @@ -541,5 +541,8 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd recordNotExecute)) || reduceLastResponse assert(!queue.io.enq.valid || queue.io.enq.ready) dequeue <> queue.io.deq - updateMaskResult.foreach(_ := !recordQueue.io.deq.bits.sSendResponse.get && queue.io.enq.fire) + updateMaskResult.foreach(_ := + (!recordQueue.io.deq.bits.sSendResponse.get && queue.io.enq.fire) || + (enqueue.fire && enqueue.bits.groupCounter === 0.U) + ) }