From 51fc89269c1ac53fc53d727ac3f04f54dedf3f06 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 23 Feb 2024 16:36:54 +0800 Subject: [PATCH] [tests] fix test cases name and type name collision Signed-off-by: Avimitin --- tests/mlir/axpy_masked/axpy_masked.mlir | 53 +++++++++++ tests/mlir/axpy_masked/default.nix | 2 +- tests/mlir/maxvl_tail_setvl_front/default.nix | 2 +- .../maxvl_tail_setvl_front.mlir | 56 ++++++++++++ tests/mlir/rvv_vp_intrinsic_add/default.nix | 2 +- .../rvv_vp_intrinsic_add.mlir | 64 ++++++++++++++ .../rvv_vp_intrinsic_add_scalable/default.nix | 2 +- .../rvv_vp_intrinsic_add_scalable.mlir | 88 +++++++++++++++++++ 8 files changed, 265 insertions(+), 4 deletions(-) create mode 100644 tests/mlir/axpy_masked/axpy_masked.mlir create mode 100644 tests/mlir/maxvl_tail_setvl_front/maxvl_tail_setvl_front.mlir create mode 100644 tests/mlir/rvv_vp_intrinsic_add/rvv_vp_intrinsic_add.mlir create mode 100644 tests/mlir/rvv_vp_intrinsic_add_scalable/rvv_vp_intrinsic_add_scalable.mlir diff --git a/tests/mlir/axpy_masked/axpy_masked.mlir b/tests/mlir/axpy_masked/axpy_masked.mlir new file mode 100644 index 000000000..20716bf97 --- /dev/null +++ b/tests/mlir/axpy_masked/axpy_masked.mlir @@ -0,0 +1,53 @@ +#map0 = affine_map<(d0) -> (d0)> +#map1 = affine_map<(d0) -> (d0 ceildiv 64)> + +memref.global "private" @gv_i32 : memref<4100xi32> // 4100 = 128 * 32 + 4 + +func.func @test() -> i32 { + + %input1 = memref.get_global @gv_i32 : memref<4100xi32> + + %input2 = memref.get_global @gv_i32 : memref<4100xi32> + %output = memref.get_global @gv_i32 : memref<4100xi32> + + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %c0_vector = arith.constant dense<0> : vector<64xi32> + %c64 = arith.constant 64 : index + %dim = memref.dim %input1, %c0 : memref<4100xi32> + + %a_vector = affine.vector_load %input1[%c0] : memref<4100xi32>, vector<64xi32> + + affine.for %idx = #map0(%c0) to #map1(%dim) { + %curlen = arith.muli %idx, %c64 : index + %remain = arith.subi %dim, %curlen : index + %cmp = arith.cmpi sge, %remain, %c64 : index + scf.if %cmp { + %x_vector = affine.vector_load %input1[%idx * 64] : memref<4100xi32>, vector<64xi32> + %y_vector = affine.vector_load %input2[%idx * 64] : memref<4100xi32>, vector<64xi32> + %mul_vector = arith.muli %x_vector, %a_vector : vector<64xi32> + %result_vector = arith.addi %mul_vector, %y_vector : vector<64xi32> + affine.vector_store %result_vector, %output[%idx * 64] : memref<4100xi32>, vector<64xi32> + } else { + // TODO: `vector.create_mask` operation will result in the error "spike trapped with trap_illegal_instruction", which needs further analysis. + // %mask64 = vector.create_mask %remain : vector<64xi1> + %mask64 = arith.constant dense<1> : vector<64xi1> + %remain_i32 = arith.index_cast %remain : index to i32 + %x_vector = vector.maskedload %input1[%curlen], %mask64, %c0_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> into vector<64xi32> + %y_vector = vector.maskedload %input2[%curlen], %mask64, %c0_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> into vector<64xi32> + %mul_vector = arith.muli %x_vector, %a_vector : vector<64xi32> + %result_vector = arith.addi %mul_vector, %y_vector : vector<64xi32> + vector.maskedstore %output[%curlen], %mask64, %result_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> + } + } + + %result = vector.load %output[%c0] : memref<4100xi32>, vector<8xi32> + + %mask_res = arith.constant dense<1> : vector<8xi1> + %c1_i32 = arith.constant 1 : i32 + %evl = arith.constant 8 : i32 + %res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %result, %mask_res, %evl) : + (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + + return %res_reduce_add_mask_driven : i32 +} diff --git a/tests/mlir/axpy_masked/default.nix b/tests/mlir/axpy_masked/default.nix index 3624637f2..64ed99362 100644 --- a/tests/mlir/axpy_masked/default.nix +++ b/tests/mlir/axpy_masked/default.nix @@ -1,7 +1,7 @@ { testcase-env }: testcase-env.mkMlirCase { caseName = "axpy_masked"; - src = ./axpy-masked.mlir; + src = ./axpy_masked.mlir; linkSrcs = [ ../main.S ]; diff --git a/tests/mlir/maxvl_tail_setvl_front/default.nix b/tests/mlir/maxvl_tail_setvl_front/default.nix index 757dd3e22..672457bd1 100644 --- a/tests/mlir/maxvl_tail_setvl_front/default.nix +++ b/tests/mlir/maxvl_tail_setvl_front/default.nix @@ -1,7 +1,7 @@ { testcase-env }: testcase-env.mkMlirCase { caseName = "maxvl_tail_setvl_front"; - src = ./maxvl-tail-setvl-front.mlir; + src = ./maxvl_tail_setvl_front.mlir; linkSrcs = [ ../main.S ]; diff --git a/tests/mlir/maxvl_tail_setvl_front/maxvl_tail_setvl_front.mlir b/tests/mlir/maxvl_tail_setvl_front/maxvl_tail_setvl_front.mlir new file mode 100644 index 000000000..b1a3def4c --- /dev/null +++ b/tests/mlir/maxvl_tail_setvl_front/maxvl_tail_setvl_front.mlir @@ -0,0 +1,56 @@ +memref.global "private" @input_A : memref<1500xi32> +memref.global "private" @input_B : memref<1500xi32> +memref.global "private" @output : memref<1500xi32> + +#map_1 = affine_map<(d)[B, N] -> (N*d + B)> + +func.func @test() -> i32 { + // for (i = 0; i < n; i++) C[i] = A[i] + B[i] + // use MAXVL as a fix vector length + // use setvl to do tail-processing + %A = memref.get_global @input_A : memref<1500xi32> + %B = memref.get_global @input_B : memref<1500xi32> + %C = memref.get_global @output : memref<1500xi32> + %n = arith.constant 1500 : i32 + + // just need 2 vec-reg, use SEV=32, LMUL=8 + // e32 = 0b010, m8 = 0b011, vscale = [16] + %sew = arith.constant 2 : i32 + %lmul = arith.constant 3 : i32 + %maxvl = "rvv.setvl"(%n, %sew, %lmul) : (i32, i32, i32) -> i32 + %maxvl_idx = arith.index_cast %maxvl : i32 to index + + + %iter_end = arith.divui %n, %maxvl : i32 + %rem = arith.remui %n, %maxvl : i32 + + %c0 = arith.constant 0 : i32 + %c0_idx = arith.constant 0 : index + + %tail = arith.cmpi ne, %rem, %c0 : i32 + scf.if %tail { + %new_vl = "rvv.setvl"(%rem, %sew, %lmul) : (i32, i32, i32) -> i32 + + %A_vec = "rvv.load"(%A, %c0_idx, %new_vl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> + %B_vec = "rvv.load"(%B, %c0_idx, %new_vl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> + %sum = "rvv.add"(%A_vec, %B_vec, %new_vl) : (vector<[16]xi32>, vector<[16]xi32>, i32) -> vector<[16]xi32> + "rvv.store"(%sum, %C, %c0_idx, %new_vl) : (vector<[16]xi32>, memref<1500xi32>, index, i32) -> () + } + + %new_maxvl = "rvv.setvl"(%n, %sew, %lmul) : (i32, i32, i32) -> i32 + %new_maxvl_idx = arith.index_cast %new_maxvl : i32 to index + %iter_end_idx = arith.index_cast %iter_end : i32 to index + %rem_idx = arith.index_cast %rem : i32 to index + affine.for %i_ = 0 to %iter_end_idx step 1 { + // i = REM + i_ * MAXVL, this make loop for i_ be a normalized loop + %i = affine.apply #map_1(%i_)[%rem_idx, %new_maxvl_idx] + + %A_vec = "rvv.load"(%A, %i, %new_maxvl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> + %B_vec = "rvv.load"(%B, %i, %new_maxvl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> + %sum = "rvv.add"(%A_vec, %B_vec, %new_maxvl) : (vector<[16]xi32>, vector<[16]xi32>, i32) -> vector<[16]xi32> + "rvv.store"(%sum, %C, %i, %new_maxvl) : (vector<[16]xi32>, memref<1500xi32>, index, i32) -> () + } + + %ret = arith.constant 0 : i32 + return %ret : i32 +} diff --git a/tests/mlir/rvv_vp_intrinsic_add/default.nix b/tests/mlir/rvv_vp_intrinsic_add/default.nix index 5105a8591..64f7454ba 100644 --- a/tests/mlir/rvv_vp_intrinsic_add/default.nix +++ b/tests/mlir/rvv_vp_intrinsic_add/default.nix @@ -1,7 +1,7 @@ { testcase-env }: testcase-env.mkMlirCase { caseName = "rvv_vp_intrinsic_add"; - src = ./rvv-vp-intrinsic-add.mlir; + src = ./rvv_vp_intrinsic_add.mlir; linkSrcs = [ ../main.S ]; diff --git a/tests/mlir/rvv_vp_intrinsic_add/rvv_vp_intrinsic_add.mlir b/tests/mlir/rvv_vp_intrinsic_add/rvv_vp_intrinsic_add.mlir new file mode 100644 index 000000000..f000a4bfc --- /dev/null +++ b/tests/mlir/rvv_vp_intrinsic_add/rvv_vp_intrinsic_add.mlir @@ -0,0 +1,64 @@ +// This implementation is based on [this file](https://github.com/buddy-compiler/buddy-mlir/blob/main/examples/RVVExperiment/rvv-vp-intrinsic-add.mlir) from buddy-mlir. + +memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]> +func.func @test() -> i32 { + %mem_i32 = memref.get_global @gv_i32 : memref<20xi32> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c1_i32 = arith.constant 1 : i32 + %mask14 = arith.constant dense<[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]> : vector<16xi1> + %mask16 = arith.constant dense<1> : vector<16xi1> + %evl14 = arith.constant 14 : i32 + %evl16 = arith.constant 16 : i32 + + %mask = arith.constant dense<1> : vector<16xi1> + %evl = arith.constant 16 : i32 + %output0 = arith.constant 0 : i32 + + //===---------------------------------------------------------------------------===// + // Case 1: VP Intrinsic Add Operation + Fixed Vector Type + Mask Driven + //===---------------------------------------------------------------------------===// + + %vec1 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> + %vec2 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> + %res_add_mask_driven = "llvm.intr.vp.add" (%vec2, %vec1, %mask14, %evl16) : + (vector<16xi32>, vector<16xi32>, vector<16xi1>, i32) -> vector<16xi32> + + %res_add_mask_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_mask_driven, %mask, %evl) : + (i32, vector<16xi32>, vector<16xi1>, i32) -> i32 + %output1 = arith.addi %output0, %res_add_mask_driven_reduce_add : i32 + + //===---------------------------------------------------------------------------===// + // Case 2: VP Intrinsic Add Operation + Fixed Vector Type + EVL Driven + //===---------------------------------------------------------------------------===// + + %vec3 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> + %vec4 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> + %res_add_evl_driven = "llvm.intr.vp.add" (%vec4, %vec3, %mask16, %evl14) : + (vector<16xi32>, vector<16xi32>, vector<16xi1>, i32) -> vector<16xi32> + + %res_add_evl_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_evl_driven, %mask, %evl) : + (i32, vector<16xi32>, vector<16xi1>, i32) -> i32 + %output2 = arith.addi %output1, %res_add_evl_driven_reduce_add : i32 + + //===---------------------------------------------------------------------------===// + // Case 3: VP Intrinsic Reduce Add Operation + Fixed Vector Type + Mask Driven + //===---------------------------------------------------------------------------===// + + %vec9 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> + %res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %vec9, %mask14, %evl16) : + (i32, vector<16xi32>, vector<16xi1>, i32) -> i32 + %output3 = arith.addi %output2, %res_reduce_add_mask_driven : i32 + + //===---------------------------------------------------------------------------===// + // Case 4: VP Intrinsic Reduce Add Operation + Fixed Vector Type + EVL Driven + //===---------------------------------------------------------------------------===// + + %vec10 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> + %res_reduce_add_evl_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %vec10, %mask16, %evl14) : + (i32, vector<16xi32>, vector<16xi1>, i32) -> i32 + %output4 = arith.addi %output3, %res_reduce_add_evl_driven : i32 + + return %output4 : i32 +} diff --git a/tests/mlir/rvv_vp_intrinsic_add_scalable/default.nix b/tests/mlir/rvv_vp_intrinsic_add_scalable/default.nix index b5ba4ceb2..030b389f9 100644 --- a/tests/mlir/rvv_vp_intrinsic_add_scalable/default.nix +++ b/tests/mlir/rvv_vp_intrinsic_add_scalable/default.nix @@ -1,7 +1,7 @@ { testcase-env }: testcase-env.mkMlirCase { caseName = "rvv_vp_intrinsic_add_scalable"; - src = ./rvv-vp-intrinsic-add-scalable.mlir; + src = ./rvv_vp_intrinsic_add_scalable.mlir; linkSrcs = [ ../main.S ]; diff --git a/tests/mlir/rvv_vp_intrinsic_add_scalable/rvv_vp_intrinsic_add_scalable.mlir b/tests/mlir/rvv_vp_intrinsic_add_scalable/rvv_vp_intrinsic_add_scalable.mlir new file mode 100644 index 000000000..5939c0f60 --- /dev/null +++ b/tests/mlir/rvv_vp_intrinsic_add_scalable/rvv_vp_intrinsic_add_scalable.mlir @@ -0,0 +1,88 @@ +// This implementation is based on [this file](https://github.com/buddy-compiler/buddy-mlir/blob/main/examples/RVVExperiment/rvv-vp-intrinsic-add-scalable.mlir) from buddy-mlir. + +memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]> + +func.func @test() -> i32 { + %mem_i32 = memref.get_global @gv_i32 : memref<20xi32> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %evl16 = arith.constant 16 : i32 + %evl14 = arith.constant 14 : i32 + %c1_i32 = arith.constant 1 : i32 + + // Configure the register. + // SEW = 32 + %sew = arith.constant 2 : i32 + // LMUL = 4 + %lmul = arith.constant 2 : i32 + // AVL = 14 / 16 + %avl14 = arith.constant 14 : i32 + %avl16 = arith.constant 16 : i32 + + // Load vl elements. + %vl14 = rvv.setvl %avl14, %sew, %lmul : i32 + %vl14_idx = arith.index_cast %vl14 : i32 to index + %vl16 = rvv.setvl %avl16, %sew, %lmul : i32 + %vl16_idx = arith.index_cast %vl16 : i32 to index + %load_vec1_i32 = rvv.load %mem_i32[%c0], %vl16 : memref<20xi32>, vector<[8]xi32>, i32 + %load_vec2_i32 = rvv.load %mem_i32[%c0], %vl16 : memref<20xi32>, vector<[8]xi32>, i32 + + // Create the mask. + %mask_scalable14 = vector.create_mask %vl14_idx : vector<[8]xi1> + %mask_scalable16 = vector.create_mask %vl16_idx : vector<[8]xi1> + + %mask_reduce_sum = arith.constant dense<1> : vector<16xi1> + %evl_reduce_sum = arith.constant 16 : i32 + %output0 = arith.constant 0 : i32 + + //===---------------------------------------------------------------------------===// + // Case 1: VP Intrinsic Add Operation + Scalable Vector Type + Mask Driven + //===---------------------------------------------------------------------------===// + + %res_add_mask_driven = "llvm.intr.vp.add" (%load_vec2_i32, %load_vec1_i32, %mask_scalable14, %vl16) : + (vector<[8]xi32>, vector<[8]xi32>, vector<[8]xi1>, i32) -> vector<[8]xi32> + + %res_add_mask_driven_mem = memref.get_global @gv_i32 : memref<20xi32> + rvv.store %res_add_mask_driven, %res_add_mask_driven_mem[%c0], %vl16 : vector<[8]xi32>, memref<20xi32>, i32 + + %res_add_mask_driven_vec = vector.load %res_add_mask_driven_mem[%c0] : memref<20xi32>, vector<16xi32> + %res_add_mask_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_mask_driven_vec, %mask_reduce_sum, %evl_reduce_sum) : + (i32, vector<16xi32>, vector<16xi1>, i32) -> i32 + %output1 = arith.addi %output0, %res_add_mask_driven_reduce_add : i32 + + //===---------------------------------------------------------------------------===// + // Case 2: VP Intrinsic Add Operation + Scalable Vector Type + EVL Driven + //===---------------------------------------------------------------------------===// + + %res_add_evl_driven = "llvm.intr.vp.add" (%load_vec2_i32, %load_vec1_i32, %mask_scalable16, %vl14) : + (vector<[8]xi32>, vector<[8]xi32>, vector<[8]xi1>, i32) -> vector<[8]xi32> + + %res_add_evl_driven_mem = memref.get_global @gv_i32 : memref<20xi32> + rvv.store %res_add_evl_driven, %res_add_evl_driven_mem[%c0], %vl16 : vector<[8]xi32>, memref<20xi32>, i32 + + %res_add_evl_driven_vec = vector.load %res_add_evl_driven_mem[%c0] : memref<20xi32>, vector<16xi32> + %res_add_evl_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_evl_driven_vec, %mask_reduce_sum, %evl_reduce_sum) : + (i32, vector<16xi32>, vector<16xi1>, i32) -> i32 + %output2 = arith.addi %output1, %res_add_evl_driven_reduce_add : i32 + + //===---------------------------------------------------------------------------===// + // Case 3: VP Intrinsic Reduce Add Operation + Scalable Vector Type + Mask Driven + //===---------------------------------------------------------------------------===// + + %res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %load_vec1_i32, %mask_scalable14, %vl16) : + (i32, vector<[8]xi32>, vector<[8]xi1>, i32) -> i32 + + %output3 = arith.addi %output2, %res_reduce_add_mask_driven : i32 + + //===-------------------------------------------------------------------------===// + // Case 4: VP Intrinsic Reduce Add Operation + Scalable Vector Type + EVL Driven + //===-------------------------------------------------------------------------===// + + %res_reduce_add_evl_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %load_vec1_i32, %mask_scalable16, %vl14) : + (i32, vector<[8]xi32>, vector<[8]xi1>, i32) -> i32 + + %output4 = arith.addi %output3, %res_reduce_add_evl_driven : i32 + + return %output4 : i32 +}