Skip to content

Commit

Permalink
[tests] fix test cases name and type name collision
Browse files Browse the repository at this point in the history
Signed-off-by: Avimitin <[email protected]>
  • Loading branch information
Avimitin committed Feb 26, 2024
1 parent 39e9213 commit 51fc892
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 4 deletions.
53 changes: 53 additions & 0 deletions tests/mlir/axpy_masked/axpy_masked.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#map0 = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> (d0 ceildiv 64)>

memref.global "private" @gv_i32 : memref<4100xi32> // 4100 = 128 * 32 + 4

func.func @test() -> i32 {

%input1 = memref.get_global @gv_i32 : memref<4100xi32>

%input2 = memref.get_global @gv_i32 : memref<4100xi32>
%output = memref.get_global @gv_i32 : memref<4100xi32>

%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
%c0_vector = arith.constant dense<0> : vector<64xi32>
%c64 = arith.constant 64 : index
%dim = memref.dim %input1, %c0 : memref<4100xi32>

%a_vector = affine.vector_load %input1[%c0] : memref<4100xi32>, vector<64xi32>

affine.for %idx = #map0(%c0) to #map1(%dim) {
%curlen = arith.muli %idx, %c64 : index
%remain = arith.subi %dim, %curlen : index
%cmp = arith.cmpi sge, %remain, %c64 : index
scf.if %cmp {
%x_vector = affine.vector_load %input1[%idx * 64] : memref<4100xi32>, vector<64xi32>
%y_vector = affine.vector_load %input2[%idx * 64] : memref<4100xi32>, vector<64xi32>
%mul_vector = arith.muli %x_vector, %a_vector : vector<64xi32>
%result_vector = arith.addi %mul_vector, %y_vector : vector<64xi32>
affine.vector_store %result_vector, %output[%idx * 64] : memref<4100xi32>, vector<64xi32>
} else {
// TODO: `vector.create_mask` operation will result in the error "spike trapped with trap_illegal_instruction", which needs further analysis.
// %mask64 = vector.create_mask %remain : vector<64xi1>
%mask64 = arith.constant dense<1> : vector<64xi1>
%remain_i32 = arith.index_cast %remain : index to i32
%x_vector = vector.maskedload %input1[%curlen], %mask64, %c0_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> into vector<64xi32>
%y_vector = vector.maskedload %input2[%curlen], %mask64, %c0_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32> into vector<64xi32>
%mul_vector = arith.muli %x_vector, %a_vector : vector<64xi32>
%result_vector = arith.addi %mul_vector, %y_vector : vector<64xi32>
vector.maskedstore %output[%curlen], %mask64, %result_vector : memref<4100xi32>, vector<64xi1>, vector<64xi32>
}
}

%result = vector.load %output[%c0] : memref<4100xi32>, vector<8xi32>

%mask_res = arith.constant dense<1> : vector<8xi1>
%c1_i32 = arith.constant 1 : i32
%evl = arith.constant 8 : i32
%res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %result, %mask_res, %evl) :
(i32, vector<8xi32>, vector<8xi1>, i32) -> i32

return %res_reduce_add_mask_driven : i32
}
2 changes: 1 addition & 1 deletion tests/mlir/axpy_masked/default.nix
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{ testcase-env }:
testcase-env.mkMlirCase {
caseName = "axpy_masked";
src = ./axpy-masked.mlir;
src = ./axpy_masked.mlir;
linkSrcs = [
../main.S
];
Expand Down
2 changes: 1 addition & 1 deletion tests/mlir/maxvl_tail_setvl_front/default.nix
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{ testcase-env }:
testcase-env.mkMlirCase {
caseName = "maxvl_tail_setvl_front";
src = ./maxvl-tail-setvl-front.mlir;
src = ./maxvl_tail_setvl_front.mlir;
linkSrcs = [
../main.S
];
Expand Down
56 changes: 56 additions & 0 deletions tests/mlir/maxvl_tail_setvl_front/maxvl_tail_setvl_front.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
memref.global "private" @input_A : memref<1500xi32>
memref.global "private" @input_B : memref<1500xi32>
memref.global "private" @output : memref<1500xi32>

#map_1 = affine_map<(d)[B, N] -> (N*d + B)>

func.func @test() -> i32 {
// for (i = 0; i < n; i++) C[i] = A[i] + B[i]
// use MAXVL as a fix vector length
// use setvl to do tail-processing
%A = memref.get_global @input_A : memref<1500xi32>
%B = memref.get_global @input_B : memref<1500xi32>
%C = memref.get_global @output : memref<1500xi32>
%n = arith.constant 1500 : i32

// just need 2 vec-reg, use SEV=32, LMUL=8
// e32 = 0b010, m8 = 0b011, vscale = [16]
%sew = arith.constant 2 : i32
%lmul = arith.constant 3 : i32
%maxvl = "rvv.setvl"(%n, %sew, %lmul) : (i32, i32, i32) -> i32
%maxvl_idx = arith.index_cast %maxvl : i32 to index


%iter_end = arith.divui %n, %maxvl : i32
%rem = arith.remui %n, %maxvl : i32

%c0 = arith.constant 0 : i32
%c0_idx = arith.constant 0 : index

%tail = arith.cmpi ne, %rem, %c0 : i32
scf.if %tail {
%new_vl = "rvv.setvl"(%rem, %sew, %lmul) : (i32, i32, i32) -> i32

%A_vec = "rvv.load"(%A, %c0_idx, %new_vl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32>
%B_vec = "rvv.load"(%B, %c0_idx, %new_vl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32>
%sum = "rvv.add"(%A_vec, %B_vec, %new_vl) : (vector<[16]xi32>, vector<[16]xi32>, i32) -> vector<[16]xi32>
"rvv.store"(%sum, %C, %c0_idx, %new_vl) : (vector<[16]xi32>, memref<1500xi32>, index, i32) -> ()
}

%new_maxvl = "rvv.setvl"(%n, %sew, %lmul) : (i32, i32, i32) -> i32
%new_maxvl_idx = arith.index_cast %new_maxvl : i32 to index
%iter_end_idx = arith.index_cast %iter_end : i32 to index
%rem_idx = arith.index_cast %rem : i32 to index
affine.for %i_ = 0 to %iter_end_idx step 1 {
// i = REM + i_ * MAXVL, this make loop for i_ be a normalized loop
%i = affine.apply #map_1(%i_)[%rem_idx, %new_maxvl_idx]

%A_vec = "rvv.load"(%A, %i, %new_maxvl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32>
%B_vec = "rvv.load"(%B, %i, %new_maxvl) : (memref<1500xi32>, index, i32) -> vector<[16]xi32>
%sum = "rvv.add"(%A_vec, %B_vec, %new_maxvl) : (vector<[16]xi32>, vector<[16]xi32>, i32) -> vector<[16]xi32>
"rvv.store"(%sum, %C, %i, %new_maxvl) : (vector<[16]xi32>, memref<1500xi32>, index, i32) -> ()
}

%ret = arith.constant 0 : i32
return %ret : i32
}
2 changes: 1 addition & 1 deletion tests/mlir/rvv_vp_intrinsic_add/default.nix
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{ testcase-env }:
testcase-env.mkMlirCase {
caseName = "rvv_vp_intrinsic_add";
src = ./rvv-vp-intrinsic-add.mlir;
src = ./rvv_vp_intrinsic_add.mlir;
linkSrcs = [
../main.S
];
Expand Down
64 changes: 64 additions & 0 deletions tests/mlir/rvv_vp_intrinsic_add/rvv_vp_intrinsic_add.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// This implementation is based on [this file](https://github.com/buddy-compiler/buddy-mlir/blob/main/examples/RVVExperiment/rvv-vp-intrinsic-add.mlir) from buddy-mlir.

memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19]>
func.func @test() -> i32 {
%mem_i32 = memref.get_global @gv_i32 : memref<20xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c1_i32 = arith.constant 1 : i32
%mask14 = arith.constant dense<[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]> : vector<16xi1>
%mask16 = arith.constant dense<1> : vector<16xi1>
%evl14 = arith.constant 14 : i32
%evl16 = arith.constant 16 : i32

%mask = arith.constant dense<1> : vector<16xi1>
%evl = arith.constant 16 : i32
%output0 = arith.constant 0 : i32

//===---------------------------------------------------------------------------===//
// Case 1: VP Intrinsic Add Operation + Fixed Vector Type + Mask Driven
//===---------------------------------------------------------------------------===//

%vec1 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32>
%vec2 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32>
%res_add_mask_driven = "llvm.intr.vp.add" (%vec2, %vec1, %mask14, %evl16) :
(vector<16xi32>, vector<16xi32>, vector<16xi1>, i32) -> vector<16xi32>

%res_add_mask_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_mask_driven, %mask, %evl) :
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32
%output1 = arith.addi %output0, %res_add_mask_driven_reduce_add : i32

//===---------------------------------------------------------------------------===//
// Case 2: VP Intrinsic Add Operation + Fixed Vector Type + EVL Driven
//===---------------------------------------------------------------------------===//

%vec3 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32>
%vec4 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32>
%res_add_evl_driven = "llvm.intr.vp.add" (%vec4, %vec3, %mask16, %evl14) :
(vector<16xi32>, vector<16xi32>, vector<16xi1>, i32) -> vector<16xi32>

%res_add_evl_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_evl_driven, %mask, %evl) :
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32
%output2 = arith.addi %output1, %res_add_evl_driven_reduce_add : i32

//===---------------------------------------------------------------------------===//
// Case 3: VP Intrinsic Reduce Add Operation + Fixed Vector Type + Mask Driven
//===---------------------------------------------------------------------------===//

%vec9 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32>
%res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %vec9, %mask14, %evl16) :
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32
%output3 = arith.addi %output2, %res_reduce_add_mask_driven : i32

//===---------------------------------------------------------------------------===//
// Case 4: VP Intrinsic Reduce Add Operation + Fixed Vector Type + EVL Driven
//===---------------------------------------------------------------------------===//

%vec10 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<16xi32>
%res_reduce_add_evl_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %vec10, %mask16, %evl14) :
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32
%output4 = arith.addi %output3, %res_reduce_add_evl_driven : i32

return %output4 : i32
}
2 changes: 1 addition & 1 deletion tests/mlir/rvv_vp_intrinsic_add_scalable/default.nix
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{ testcase-env }:
testcase-env.mkMlirCase {
caseName = "rvv_vp_intrinsic_add_scalable";
src = ./rvv-vp-intrinsic-add-scalable.mlir;
src = ./rvv_vp_intrinsic_add_scalable.mlir;
linkSrcs = [
../main.S
];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// This implementation is based on [this file](https://github.com/buddy-compiler/buddy-mlir/blob/main/examples/RVVExperiment/rvv-vp-intrinsic-add-scalable.mlir) from buddy-mlir.

memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19]>

func.func @test() -> i32 {
%mem_i32 = memref.get_global @gv_i32 : memref<20xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%evl16 = arith.constant 16 : i32
%evl14 = arith.constant 14 : i32
%c1_i32 = arith.constant 1 : i32

// Configure the register.
// SEW = 32
%sew = arith.constant 2 : i32
// LMUL = 4
%lmul = arith.constant 2 : i32
// AVL = 14 / 16
%avl14 = arith.constant 14 : i32
%avl16 = arith.constant 16 : i32

// Load vl elements.
%vl14 = rvv.setvl %avl14, %sew, %lmul : i32
%vl14_idx = arith.index_cast %vl14 : i32 to index
%vl16 = rvv.setvl %avl16, %sew, %lmul : i32
%vl16_idx = arith.index_cast %vl16 : i32 to index
%load_vec1_i32 = rvv.load %mem_i32[%c0], %vl16 : memref<20xi32>, vector<[8]xi32>, i32
%load_vec2_i32 = rvv.load %mem_i32[%c0], %vl16 : memref<20xi32>, vector<[8]xi32>, i32

// Create the mask.
%mask_scalable14 = vector.create_mask %vl14_idx : vector<[8]xi1>
%mask_scalable16 = vector.create_mask %vl16_idx : vector<[8]xi1>

%mask_reduce_sum = arith.constant dense<1> : vector<16xi1>
%evl_reduce_sum = arith.constant 16 : i32
%output0 = arith.constant 0 : i32

//===---------------------------------------------------------------------------===//
// Case 1: VP Intrinsic Add Operation + Scalable Vector Type + Mask Driven
//===---------------------------------------------------------------------------===//

%res_add_mask_driven = "llvm.intr.vp.add" (%load_vec2_i32, %load_vec1_i32, %mask_scalable14, %vl16) :
(vector<[8]xi32>, vector<[8]xi32>, vector<[8]xi1>, i32) -> vector<[8]xi32>

%res_add_mask_driven_mem = memref.get_global @gv_i32 : memref<20xi32>
rvv.store %res_add_mask_driven, %res_add_mask_driven_mem[%c0], %vl16 : vector<[8]xi32>, memref<20xi32>, i32

%res_add_mask_driven_vec = vector.load %res_add_mask_driven_mem[%c0] : memref<20xi32>, vector<16xi32>
%res_add_mask_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_mask_driven_vec, %mask_reduce_sum, %evl_reduce_sum) :
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32
%output1 = arith.addi %output0, %res_add_mask_driven_reduce_add : i32

//===---------------------------------------------------------------------------===//
// Case 2: VP Intrinsic Add Operation + Scalable Vector Type + EVL Driven
//===---------------------------------------------------------------------------===//

%res_add_evl_driven = "llvm.intr.vp.add" (%load_vec2_i32, %load_vec1_i32, %mask_scalable16, %vl14) :
(vector<[8]xi32>, vector<[8]xi32>, vector<[8]xi1>, i32) -> vector<[8]xi32>

%res_add_evl_driven_mem = memref.get_global @gv_i32 : memref<20xi32>
rvv.store %res_add_evl_driven, %res_add_evl_driven_mem[%c0], %vl16 : vector<[8]xi32>, memref<20xi32>, i32

%res_add_evl_driven_vec = vector.load %res_add_evl_driven_mem[%c0] : memref<20xi32>, vector<16xi32>
%res_add_evl_driven_reduce_add = "llvm.intr.vp.reduce.add" (%c1_i32, %res_add_evl_driven_vec, %mask_reduce_sum, %evl_reduce_sum) :
(i32, vector<16xi32>, vector<16xi1>, i32) -> i32
%output2 = arith.addi %output1, %res_add_evl_driven_reduce_add : i32

//===---------------------------------------------------------------------------===//
// Case 3: VP Intrinsic Reduce Add Operation + Scalable Vector Type + Mask Driven
//===---------------------------------------------------------------------------===//

%res_reduce_add_mask_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %load_vec1_i32, %mask_scalable14, %vl16) :
(i32, vector<[8]xi32>, vector<[8]xi1>, i32) -> i32

%output3 = arith.addi %output2, %res_reduce_add_mask_driven : i32

//===-------------------------------------------------------------------------===//
// Case 4: VP Intrinsic Reduce Add Operation + Scalable Vector Type + EVL Driven
//===-------------------------------------------------------------------------===//

%res_reduce_add_evl_driven = "llvm.intr.vp.reduce.add" (%c1_i32, %load_vec1_i32, %mask_scalable16, %vl14) :
(i32, vector<[8]xi32>, vector<[8]xi1>, i32) -> i32

%output4 = arith.addi %output3, %res_reduce_add_evl_driven : i32

return %output4 : i32
}

0 comments on commit 51fc892

Please sign in to comment.