Skip to content

Commit

Permalink
Adds zero dimension check and related tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hmalgewatta committed Nov 19, 2024
1 parent d87893f commit 7b20b6b
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 13 deletions.
20 changes: 20 additions & 0 deletions test/Conversion/amd/invalid_extractslice_to_llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@ tt.func @invalid_size_input(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibili

// -----

// Invalid zero source dimension
#blocked1 = #triton_gpu.blocked<{sizePerThread = [8, 1], threadsPerWarp = [4, 16], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
tt.func @invalid_size_input(%arg0: tensor<256x0xi32, #blocked1> {tt.divisibility = 16 : i32}) {
// expected-error @+1 {{source tensor dimension size zero at dimension 1}}
%1 = amdgpu.extract_slice %arg0 [0,0] : tensor<256x0xi32, #blocked1> to tensor<256x16xi32, #blocked1>
tt.return
}

// -----

// Invalid zero result dimension
#blocked1 = #triton_gpu.blocked<{sizePerThread = [8, 1], threadsPerWarp = [4, 16], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
tt.func @invalid_size_input(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibility = 16 : i32}) {
// expected-error @+1 {{result tensor dimension size zero at dimension 1}}
%1 = amdgpu.extract_slice %arg0 [0,0] : tensor<256x128xi32, #blocked1> to tensor<256x0xi32, #blocked1>
tt.return
}

// -----

// Invalid offset, not multiple of shapePerTile
#blocked1 = #triton_gpu.blocked<{sizePerThread = [8, 1], threadsPerWarp = [4, 16], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
tt.func @invalid_offset_input(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibility = 16 : i32}) {
Expand Down
6 changes: 6 additions & 0 deletions third_party/amd/lib/Dialect/TritonAMDGPU/IR/Dialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ LogicalResult ExtractSliceOp::verify() {
for (auto i = 0; i < 2; ++i) {
auto resultDimSize = resultTy.getDimSize(i);
auto srcDimSize = srcTy.getDimSize(i);
if (resultDimSize == 0) {
return emitError("result tensor dimension size zero at dimension ") << i;
}
if (srcDimSize == 0) {
return emitError("source tensor dimension size zero at dimension ") << i;
}
if (resultDimSize > srcDimSize) {
return emitError(
"result shape cannot be larger than input shape at dimension ")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

using namespace mlir;
using namespace mlir::triton;
using namespace mlir::triton::gpu;
namespace tta = mlir::triton::amdgpu;

// clang-format off
/***
Expand Down Expand Up @@ -66,11 +64,13 @@ struct ExtractSliceOpConversion
auto srcShape = srcTy.getShape();
auto resultTy = cast<RankedTensorType>(op.getType());
auto vals = unpackLLElements(loc, adaptor.getSource(), rewriter);
auto elemsPerThread = mlir::triton::gpu::getElemsPerThread(srcTy);
auto sizePerThread = getSizePerThread(srcLayout);
auto totalSizePerThread = sizePerThread[0] * sizePerThread[1];
auto order = getOrder(srcLayout);
auto shapePerCTA = getShapePerCTATile(srcLayout, srcShape);
auto elemsPerThread = triton::gpu::getElemsPerThread(srcTy);
auto sizePerThread = triton::gpu::getSizePerThread(srcLayout);
auto totalSizePerThread = product<unsigned>(sizePerThread);
auto order = triton::gpu::getOrder(srcLayout);

// Calculate valid total number of workers in each dimension
auto shapePerCTA = triton::gpu::getShapePerCTATile(srcLayout, srcShape);
shapePerCTA[0] =
std::min(static_cast<unsigned>(srcShape[0]), shapePerCTA[0]);
shapePerCTA[1] =
Expand All @@ -85,12 +85,12 @@ struct ExtractSliceOpConversion
auto offsets = op.getStaticOffsets();

// Calculate offsets and sizes in terms of CTA units.
std::vector<long int> CTAOffsets{offsets[0] / shapePerCTA[0],
offsets[1] / shapePerCTA[1]};
std::vector<long int> CTASizes{sizes[0] / shapePerCTA[0],
sizes[1] / shapePerCTA[1]};
std::vector<long int> CTAPerShape{srcShape[0] / shapePerCTA[0],
srcShape[1] / shapePerCTA[1]};
std::array<int64_t,2> CTAOffsets{offsets[0] / shapePerCTA[0],
offsets[1] / shapePerCTA[1]};
std::array<int64_t,2> CTASizes{sizes[0] / shapePerCTA[0],
sizes[1] / shapePerCTA[1]};
std::array<int64_t,2> CTAPerShape{srcShape[0] / shapePerCTA[0],
srcShape[1] / shapePerCTA[1]};

// The diagram above illustrates the graphical representation of the
// skipElems, tensorStride, and lastIdx variables.
Expand Down

0 comments on commit 7b20b6b

Please sign in to comment.