From b83d25bd2393cc01d6de3c191466a45dda15d9d4 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Mon, 22 Jul 2024 14:09:27 +0800 Subject: [PATCH 1/2] [rtl] Remove empty write. --- t1/src/lsu/StoreUnit.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t1/src/lsu/StoreUnit.scala b/t1/src/lsu/StoreUnit.scala index fb5d9ee4a..69f28a6a0 100644 --- a/t1/src/lsu/StoreUnit.scala +++ b/t1/src/lsu/StoreUnit.scala @@ -58,7 +58,6 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { val lastDataGroupReg: UInt = RegEnable(lastDataGroupForInstruction, 0.U, lsuRequest.valid) val nextDataGroup: UInt = Mux(lsuRequest.valid, 0.U, dataGroup + 1.U) val isLastRead: Bool = dataGroup === lastDataGroupReg - val lastGroupAndNeedAlign: Bool = initOffset.orR && isLastRead // stage1, 读vrf // todo: need hazardCheck? @@ -159,12 +158,12 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // 存每条cache 的mask, 也许能优化, 暂时先这样 val maskForBufferData: Vec[UInt] = RegInit(VecInit(Seq.fill(8)(0.U(param.lsuTransposeSize.W)))) val maskForBufferDequeue: UInt = maskForBufferData(cacheLineIndexInBuffer) - val tailLeft2: Bool = RegInit(false.B) + val lastDataGroupInDataBuffer: Bool = RegInit(false.B) val alignedDequeueFire: Bool = memRequest.fire // cache 不对齐的时候的上一条残留 val cacheLineTemp: UInt = RegEnable(dataBuffer.head, 0.U((param.lsuTransposeSize * 8).W), alignedDequeueFire) val maskTemp: UInt = RegInit(0.U(param.lsuTransposeSize.W)) - val tailValid: Bool = RegInit(false.B) + val canSendTail: Bool = RegInit(false.B) val isLastCacheLineInBuffer: Bool = cacheLineIndexInBuffer === lsuRequestReg.instructionInformation.nf val bufferWillClear: Bool = alignedDequeueFire && isLastCacheLineInBuffer accessBufferDequeueReady := !bufferValid || (memRequest.ready && isLastCacheLineInBuffer) @@ -176,7 +175,7 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // 把数据regroup, 然后放去 [[dataBuffer]] when(accessBufferDequeueFire) { maskForBufferData := cutUInt(fillBySeg, param.lsuTransposeSize) - tailLeft2 := lastGroupAndNeedAlign + lastDataGroupInDataBuffer := isLastRead // todo: 只是因为参数恰好是一个方形的, 需要写一个反的 dataBuffer := Mux1H(dataEEWOH, Seq.tabulate(3) { sewSize => // 每个数据块 2 ** sew byte @@ -238,11 +237,12 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { when(lsuRequest.valid || alignedDequeueFire) { maskTemp := Mux(lsuRequest.valid, 0.U, maskForBufferDequeue) - tailValid := Mux(lsuRequest.valid, false.B, bufferValid && tailLeft2 && isLastCacheLineInBuffer) + canSendTail := !lsuRequest.valid && bufferValid && isLastCacheLineInBuffer && lastDataGroupInDataBuffer } // 连接 alignedDequeue - memRequest.valid := bufferValid || tailValid + val needSendTail: Bool = bufferBaseCacheLineIndex === cacheLineNumberReg + memRequest.valid := bufferValid || (canSendTail && needSendTail) // aligned memRequest.bits.data := multiShifter(right = false, multiSize = 8)(dataBuffer.head ## cacheLineTemp, initOffset) >> cacheLineTemp.getWidth From 76f0caebc96f3f1972a1ab85ee001fe0a78b115f Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 22 Jul 2024 07:00:05 +0000 Subject: [PATCH 2/2] [ci] update test case cycle data --- .github/cases/blastoise/default.json | 18 +++++++++--------- .github/cases/machamp/default.json | 14 +++++++------- .github/cases/sandslash/default.json | 14 +++++++------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/cases/blastoise/default.json b/.github/cases/blastoise/default.json index 3a46b2e41..14a4b6fd1 100644 --- a/.github/cases/blastoise/default.json +++ b/.github/cases/blastoise/default.json @@ -5,9 +5,9 @@ "mlir.stripmining": 26736, "asm.mmm": 92573, "asm.smoke": 4867, - "intrinsic.conv2d_less_m2": 2612, + "intrinsic.conv2d_less_m2": 2594, "intrinsic.linear_normalization": 3280, - "intrinsic.softmax": 8348, + "intrinsic.softmax": 8347, "codegen.vaadd_vv": 91859, "codegen.vaadd_vx": 253663, "codegen.vaaddu_vv": 91859, @@ -499,14 +499,14 @@ "codegen.vfredusum_vs": 122275, "codegen.vfredmax_vs": 122275, "codegen.vfredmin_vs": 122275, - "rvv_bench.ascii_to_utf16": 1583921, - "rvv_bench.ascii_to_utf32": 704111, - "rvv_bench.byteswap": 3353234, + "rvv_bench.ascii_to_utf16": 1583663, + "rvv_bench.ascii_to_utf32": 703954, + "rvv_bench.byteswap": 3353148, "rvv_bench.chacha20": 2, - "rvv_bench.mandelbrot": 4056018, - "rvv_bench.memcpy": 2152069, - "rvv_bench.memset": 439171, - "rvv_bench.mergelines": 3338064, + "rvv_bench.mandelbrot": 4055961, + "rvv_bench.memcpy": 2131904, + "rvv_bench.memset": 438545, + "rvv_bench.mergelines": 3337870, "rvv_bench.poly1305": 2, "rvv_bench.strlen": 877539, "rvv_bench.utf8_count": 6340756 diff --git a/.github/cases/machamp/default.json b/.github/cases/machamp/default.json index 000e0c79f..d7cde8503 100644 --- a/.github/cases/machamp/default.json +++ b/.github/cases/machamp/default.json @@ -5,7 +5,7 @@ "mlir.stripmining": 13425, "asm.mmm": 91428, "asm.smoke": 5005, - "intrinsic.conv2d_less_m2": 2612, + "intrinsic.conv2d_less_m2": 2585, "codegen.vaadd_vv": 90595, "codegen.vaadd_vx": 250851, "codegen.vaaddu_vv": 90595, @@ -435,13 +435,13 @@ "codegen.vxor_vx": 63090, "codegen.vzext_vf2": 39398, "codegen.vzext_vf4": 6420, - "rvv_bench.ascii_to_utf16": 1460381, - "rvv_bench.ascii_to_utf32": 631370, - "rvv_bench.byteswap": 3259113, + "rvv_bench.ascii_to_utf16": 1460078, + "rvv_bench.ascii_to_utf32": 631187, + "rvv_bench.byteswap": 3259002, "rvv_bench.chacha20": 2, - "rvv_bench.memcpy": 1905444, - "rvv_bench.memset": 244925, - "rvv_bench.mergelines": 3137885, + "rvv_bench.memcpy": 1875713, + "rvv_bench.memset": 244319, + "rvv_bench.mergelines": 3137759, "rvv_bench.poly1305": 2, "rvv_bench.strlen": 710421, "rvv_bench.utf8_count": 5729721 diff --git a/.github/cases/sandslash/default.json b/.github/cases/sandslash/default.json index 6fa9e360a..32e69fe3a 100644 --- a/.github/cases/sandslash/default.json +++ b/.github/cases/sandslash/default.json @@ -5,7 +5,7 @@ "mlir.stripmining": 3577, "asm.mmm": 91437, "asm.smoke": 3558, - "intrinsic.conv2d_less_m2": 2612, + "intrinsic.conv2d_less_m2": 2576, "codegen.vaadd_vv": 119061, "codegen.vaadd_vx": 336401, "codegen.vaaddu_vv": 119061, @@ -435,13 +435,13 @@ "codegen.vxor_vx": 84837, "codegen.vzext_vf2": 134873, "codegen.vzext_vf4": 19800, - "rvv_bench.ascii_to_utf16": 1371886, - "rvv_bench.ascii_to_utf32": 583519, - "rvv_bench.byteswap": 3556432, + "rvv_bench.ascii_to_utf16": 1371550, + "rvv_bench.ascii_to_utf32": 583318, + "rvv_bench.byteswap": 3556315, "rvv_bench.chacha20": 2, - "rvv_bench.memcpy": 1739436, - "rvv_bench.memset": 131052, - "rvv_bench.mergelines": 3038652, + "rvv_bench.memcpy": 1704869, + "rvv_bench.memset": 130483, + "rvv_bench.mergelines": 3038600, "rvv_bench.poly1305": 2, "rvv_bench.strlen": 715272, "rvv_bench.utf8_count": 4797732