-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[tests] fix test cases name and type name collision
Signed-off-by: Avimitin <[email protected]>
- Loading branch information
Showing
8 changed files
with
265 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#map0 = affine_map<(d0) -> (d0)> | ||
#map1 = affine_map<(d0) -> (d0 ceildiv 64)> | ||
|
||
memref.global "private" @gv_i32 : memref<4100xi32> // 4100 = 128 * 32 + 4 | ||
|
||
func.func @test() -> i32 { | ||
|
||
%input1 = memref.get_global @gv_i32 : memref<4100xi32> | ||
|
||
%input2 = memref.get_global @gv_i32 : memref<4100xi32> | ||
%output = memref.get_global @gv_i32 : memref<4100xi32> | ||
|
||
%c0 = arith.constant 0 : index | ||
%c0_i32 = arith.constant 0 : i32 | ||
%c0_vector = arith.constant dense<0> : vector<64xi32> | ||
%c64 = arith.constant 64 : index | ||
%dim = memref.dim %input1, %c0 : memref<4100xi32> | ||
|
||
%a_vector = affine.vector_load %input1[%c0] : memref<4100xi32>, vector<64xi32> | ||
|
||
affine.for %idx = #map0(%c0) to #map1(%dim) { | ||
%curlen = arith.muli %idx, %c64 : index | ||
%remain = arith.subi %dim, %curlen : index | ||
%cmp = arith.cmpi sge, %remain, %c64 : index | ||
scf.if %cmp { | ||
%x_vector = affine.vector_load %input1[%idx * 64] : memref<4100xi32>, vector<64xi32> | ||
%y_vector = affine.vector_load %input2[%idx * 64] : memref<4100xi32>, vector<64xi32> | ||
%mul_vector = arith.muli %x_vector, %a_vector : vector<64xi32> | ||
%result_vector = arith.addi %mul_vector, %y_vector : vector<64xi32> | ||
affine.vector_store %result_vector, %output[%idx * 64] : memref<4100xi32>, vector<64xi32> | ||
} else { | ||
// TODO: `vector.create_mask` operation will result in the error "spike trapped with trap_illegal_instruction", which needs further analysis. | ||
// %mask64 = vector.create_mask %remain : vector<64xi1> | ||
%mask64 = arith.constant dense<1> : vector<64xi1> | ||
%remain_i32 = arith.index_cast %remain : index to i32 | ||
%x_vector = vector.maskedload %input1[%curlen], %mask64, %c0_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> into vector<64xi32> | ||
%y_vector = vector.maskedload %input2[%curlen], %mask64, %c0_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> into vector<64xi32> | ||
%mul_vector = arith.muli %x_vector, %a_vector : vector<64xi32> | ||
%result_vector = arith.addi %mul_vector, %y_vector : vector<64xi32> | ||
vector.maskedstore %output[%curlen], %mask64, %result_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> | ||
} | ||
} | ||
|
||
%result = vector.load %output[%c0] : memref<4100xi32>, vector<8xi32> | ||
|
||
%mask_res = arith.constant dense<1> : vector<8xi1> | ||
%c1_i32 = arith.constant 1 : i32 | ||
%evl = arith.constant 8 : i32 | ||
%res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %result, %mask_res, %evl) : | ||
(i32, vector<8xi32>, vector<8xi1>, i32) -> i32 | ||
|
||
return %res_reduce_add_mask_driven : i32 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
56 changes: 56 additions & 0 deletions
56
tests/mlir/maxvl_tail_setvl_front/maxvl_tail_setvl_front.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
memref.global "private" @input_A : memref<1500xi32> | ||
memref.global "private" @input_B : memref<1500xi32> | ||
memref.global "private" @output : memref<1500xi32> | ||
|
||
#map_1 = affine_map<(d)[B, N] -> (N*d + B)> | ||
|
||
func.func @test() -> i32 { | ||
// for (i = 0; i < n; i++) C[i] = A[i] + B[i] | ||
// use MAXVL as a fix vector length | ||
// use setvl to do tail-processing | ||
%A = memref.get_global @input_A : memref<1500xi32> | ||
%B = memref.get_global @input_B : memref<1500xi32> | ||
%C = memref.get_global @output : memref<1500xi32> | ||
%n = arith.constant 1500 : i32 | ||
|
||
// just need 2 vec-reg, use SEV=32, LMUL=8 | ||
// e32 = 0b010, m8 = 0b011, vscale = [16] | ||
%sew = arith.constant 2 : i32 | ||
%lmul = arith.constant 3 : i32 | ||
%maxvl = "rvv.setvl"(%n, %sew, %lmul) : (i32, i32, i32) -> i32 | ||
%maxvl_idx = arith.index_cast %maxvl : i32 to index | ||
|
||
|
||
%iter_end = arith.divui %n, %maxvl : i32 | ||
%rem = arith.remui %n, %maxvl : i32 | ||
|
||
%c0 = arith.constant 0 : i32 | ||
%c0_idx = arith.constant 0 : index | ||
|
||
%tail = arith.cmpi ne, %rem, %c0 : i32 | ||
scf.if %tail { | ||
%new_vl = "rvv.setvl"(%rem, %sew, %lmul) : (i32, i32, i32) -> i32 | ||
|
||
%A_vec = "rvv.load"(%A, %c0_idx, %new_vl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> | ||
%B_vec = "rvv.load"(%B, %c0_idx, %new_vl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> | ||
%sum = "rvv.add"(%A_vec, %B_vec, %new_vl) : (vector<[16]xi32>, vector<[16]xi32>, i32) -> vector<[16]xi32> | ||
"rvv.store"(%sum, %C, %c0_idx, %new_vl) : (vector<[16]xi32>, memref<1500xi32>, index, i32) -> () | ||
} | ||
|
||
%new_maxvl = "rvv.setvl"(%n, %sew, %lmul) : (i32, i32, i32) -> i32 | ||
%new_maxvl_idx = arith.index_cast %new_maxvl : i32 to index | ||
%iter_end_idx = arith.index_cast %iter_end : i32 to index | ||
%rem_idx = arith.index_cast %rem : i32 to index | ||
affine.for %i_ = 0 to %iter_end_idx step 1 { | ||
// i = REM + i_ * MAXVL, this make loop for i_ be a normalized loop | ||
%i = affine.apply #map_1(%i_)[%rem_idx, %new_maxvl_idx] | ||
|
||
%A_vec = "rvv.load"(%A, %i, %new_maxvl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> | ||
%B_vec = "rvv.load"(%B, %i, %new_maxvl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32> | ||
%sum = "rvv.add"(%A_vec, %B_vec, %new_maxvl) : (vector<[16]xi32>, vector<[16]xi32>, i32) -> vector<[16]xi32> | ||
"rvv.store"(%sum, %C, %i, %new_maxvl) : (vector<[16]xi32>, memref<1500xi32>, index, i32) -> () | ||
} | ||
|
||
%ret = arith.constant 0 : i32 | ||
return %ret : i32 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// This implementation is based on [this file](https://github.com/buddy-compiler/buddy-mlir/blob/main/examples/RVVExperiment/rvv-vp-intrinsic-add.mlir) from buddy-mlir. | ||
|
||
memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19]> | ||
func.func @test() -> i32 { | ||
%mem_i32 = memref.get_global @gv_i32 : memref<20xi32> | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%c1_i32 = arith.constant 1 : i32 | ||
%mask14 = arith.constant dense<[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]> : vector<16xi1> | ||
%mask16 = arith.constant dense<1> : vector<16xi1> | ||
%evl14 = arith.constant 14 : i32 | ||
%evl16 = arith.constant 16 : i32 | ||
|
||
%mask = arith.constant dense<1> : vector<16xi1> | ||
%evl = arith.constant 16 : i32 | ||
%output0 = arith.constant 0 : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 1: VP Intrinsic Add Operation + Fixed Vector Type + Mask Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%vec1 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> | ||
%vec2 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> | ||
%res_add_mask_driven = "llvm.intr.vp.add" (%vec2, %vec1, %mask14, %evl16) : | ||
(vector<16xi32>, vector<16xi32>, vector<16xi1>, i32) -> vector<16xi32> | ||
|
||
%res_add_mask_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_mask_driven, %mask, %evl) : | ||
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32 | ||
%output1 = arith.addi %output0, %res_add_mask_driven_reduce_add : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 2: VP Intrinsic Add Operation + Fixed Vector Type + EVL Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%vec3 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> | ||
%vec4 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> | ||
%res_add_evl_driven = "llvm.intr.vp.add" (%vec4, %vec3, %mask16, %evl14) : | ||
(vector<16xi32>, vector<16xi32>, vector<16xi1>, i32) -> vector<16xi32> | ||
|
||
%res_add_evl_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_evl_driven, %mask, %evl) : | ||
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32 | ||
%output2 = arith.addi %output1, %res_add_evl_driven_reduce_add : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 3: VP Intrinsic Reduce Add Operation + Fixed Vector Type + Mask Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%vec9 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> | ||
%res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %vec9, %mask14, %evl16) : | ||
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32 | ||
%output3 = arith.addi %output2, %res_reduce_add_mask_driven : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 4: VP Intrinsic Reduce Add Operation + Fixed Vector Type + EVL Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%vec10 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32> | ||
%res_reduce_add_evl_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %vec10, %mask16, %evl14) : | ||
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32 | ||
%output4 = arith.addi %output3, %res_reduce_add_evl_driven : i32 | ||
|
||
return %output4 : i32 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
tests/mlir/rvv_vp_intrinsic_add_scalable/rvv_vp_intrinsic_add_scalable.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// This implementation is based on [this file](https://github.com/buddy-compiler/buddy-mlir/blob/main/examples/RVVExperiment/rvv-vp-intrinsic-add-scalable.mlir) from buddy-mlir. | ||
|
||
memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19]> | ||
|
||
func.func @test() -> i32 { | ||
%mem_i32 = memref.get_global @gv_i32 : memref<20xi32> | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%evl16 = arith.constant 16 : i32 | ||
%evl14 = arith.constant 14 : i32 | ||
%c1_i32 = arith.constant 1 : i32 | ||
|
||
// Configure the register. | ||
// SEW = 32 | ||
%sew = arith.constant 2 : i32 | ||
// LMUL = 4 | ||
%lmul = arith.constant 2 : i32 | ||
// AVL = 14 / 16 | ||
%avl14 = arith.constant 14 : i32 | ||
%avl16 = arith.constant 16 : i32 | ||
|
||
// Load vl elements. | ||
%vl14 = rvv.setvl %avl14, %sew, %lmul : i32 | ||
%vl14_idx = arith.index_cast %vl14 : i32 to index | ||
%vl16 = rvv.setvl %avl16, %sew, %lmul : i32 | ||
%vl16_idx = arith.index_cast %vl16 : i32 to index | ||
%load_vec1_i32 = rvv.load %mem_i32[%c0], %vl16 : memref<20xi32>, vector<[8]xi32>, i32 | ||
%load_vec2_i32 = rvv.load %mem_i32[%c0], %vl16 : memref<20xi32>, vector<[8]xi32>, i32 | ||
|
||
// Create the mask. | ||
%mask_scalable14 = vector.create_mask %vl14_idx : vector<[8]xi1> | ||
%mask_scalable16 = vector.create_mask %vl16_idx : vector<[8]xi1> | ||
|
||
%mask_reduce_sum = arith.constant dense<1> : vector<16xi1> | ||
%evl_reduce_sum = arith.constant 16 : i32 | ||
%output0 = arith.constant 0 : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 1: VP Intrinsic Add Operation + Scalable Vector Type + Mask Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%res_add_mask_driven = "llvm.intr.vp.add" (%load_vec2_i32, %load_vec1_i32, %mask_scalable14, %vl16) : | ||
(vector<[8]xi32>, vector<[8]xi32>, vector<[8]xi1>, i32) -> vector<[8]xi32> | ||
|
||
%res_add_mask_driven_mem = memref.get_global @gv_i32 : memref<20xi32> | ||
rvv.store %res_add_mask_driven, %res_add_mask_driven_mem[%c0], %vl16 : vector<[8]xi32>, memref<20xi32>, i32 | ||
|
||
%res_add_mask_driven_vec = vector.load %res_add_mask_driven_mem[%c0] : memref<20xi32>, vector<16xi32> | ||
%res_add_mask_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_mask_driven_vec, %mask_reduce_sum, %evl_reduce_sum) : | ||
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32 | ||
%output1 = arith.addi %output0, %res_add_mask_driven_reduce_add : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 2: VP Intrinsic Add Operation + Scalable Vector Type + EVL Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%res_add_evl_driven = "llvm.intr.vp.add" (%load_vec2_i32, %load_vec1_i32, %mask_scalable16, %vl14) : | ||
(vector<[8]xi32>, vector<[8]xi32>, vector<[8]xi1>, i32) -> vector<[8]xi32> | ||
|
||
%res_add_evl_driven_mem = memref.get_global @gv_i32 : memref<20xi32> | ||
rvv.store %res_add_evl_driven, %res_add_evl_driven_mem[%c0], %vl16 : vector<[8]xi32>, memref<20xi32>, i32 | ||
|
||
%res_add_evl_driven_vec = vector.load %res_add_evl_driven_mem[%c0] : memref<20xi32>, vector<16xi32> | ||
%res_add_evl_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_evl_driven_vec, %mask_reduce_sum, %evl_reduce_sum) : | ||
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32 | ||
%output2 = arith.addi %output1, %res_add_evl_driven_reduce_add : i32 | ||
|
||
//===---------------------------------------------------------------------------===// | ||
// Case 3: VP Intrinsic Reduce Add Operation + Scalable Vector Type + Mask Driven | ||
//===---------------------------------------------------------------------------===// | ||
|
||
%res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %load_vec1_i32, %mask_scalable14, %vl16) : | ||
(i32, vector<[8]xi32>, vector<[8]xi1>, i32) -> i32 | ||
|
||
%output3 = arith.addi %output2, %res_reduce_add_mask_driven : i32 | ||
|
||
//===-------------------------------------------------------------------------===// | ||
// Case 4: VP Intrinsic Reduce Add Operation + Scalable Vector Type + EVL Driven | ||
//===-------------------------------------------------------------------------===// | ||
|
||
%res_reduce_add_evl_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %load_vec1_i32, %mask_scalable16, %vl14) : | ||
(i32, vector<[8]xi32>, vector<[8]xi1>, i32) -> i32 | ||
|
||
%output4 = arith.addi %output3, %res_reduce_add_evl_driven : i32 | ||
|
||
return %output4 : i32 | ||
} |