From e844dc2aa3b02795a8b4f23503104bc9e16bec24 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 20 May 2024 09:28:55 -0700 Subject: [PATCH] Disable lowering scf.for loop into repeat count, unless the dimension has non-zero offset (#583) * Disable repeat_count * Disabling repeat count unless offset --- mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp | 3 ++- mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp b/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp index a406df23c..bb70a2fe5 100644 --- a/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp +++ b/mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp @@ -186,7 +186,8 @@ int air::getRepeatCount(Operation *memcpy_op) { getConstantIntValue(chan_op.getSizes()[0])) { auto const_highest_stride = getConstantIntValue(chan_op.getStrides()[0]); auto const_highest_size = getConstantIntValue(chan_op.getSizes()[0]); - if (*const_highest_stride == 0) { + auto const_highest_offset = getConstantIntValue(chan_op.getOffsets()[0]); + if (*const_highest_stride == 0 && !const_highest_offset) { // Highest dimension data access pattern is repeat. return *const_highest_size; } diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir index 89fda38e1..c6ac65037 100644 --- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir @@ -640,15 +640,15 @@ func.func @func9(%arg0: memref<128xf32>, %arg1: memref<128xf32>) { // Tile / memtile DMA repeat count support. // CHECK: aie.device(xcve2802) // CHECK: %[[tileDMA_0_4:.*]] = aie.mem -// CHECK: aie.dma_start(S2MM, 0, ^bb1, ^bb2, repeat_count = 32) +// CHECK: aie.dma_start(S2MM, 0, ^bb1, ^bb2, repeat_count = 1) // CHECK: aie.dma_bd({{.*}} : memref<32x256xi32, 2>, 0, 8192) // CHECK: %[[tileDMA_0_3:.*]] = aie.mem -// CHECK: aie.dma_start(S2MM, 0, ^bb1, ^bb2, repeat_count = 32) +// CHECK: aie.dma_start(S2MM, 0, ^bb1, ^bb2, repeat_count = 1) // CHECK: aie.dma_bd({{.*}} : memref<32x256xi32, 2>, 0, 8192) // CHECK: %[[memTileDMA_2_1:.*]] = aie.memtile_dma -// CHECK: aie.dma_start(MM2S, 0, ^bb1, ^bb3, repeat_count = 32) +// CHECK: aie.dma_start(MM2S, 0, ^bb1, ^bb3, repeat_count = 1) // CHECK: aie.dma_bd({{.*}} : memref<32x256xi32, 1>, 0, 8192) -// CHECK: aie.dma_start(MM2S, 1, ^bb4, ^bb2, repeat_count = 32) +// CHECK: aie.dma_start(MM2S, 1, ^bb4, ^bb2, repeat_count = 1) // CHECK: aie.dma_bd({{.*}} : memref<32x256xi32, 1>, 0, 8192) // CHECK: @func10 #map = affine_map<()[s0] -> (s0 * 32)>