From 05284a2436293e10bba4eace47163b6d2f216a84 Mon Sep 17 00:00:00 2001 From: Jeff Fifield Date: Tue, 25 Jun 2024 15:52:15 -0600 Subject: [PATCH] Remove obsolete ops and passes (#618) * Remove obsolete ops and passes * rm tests --- mlir/include/air/Conversion/AIRPipeline.h | 42 ---- mlir/include/air/Conversion/Passes.td | 13 -- mlir/include/air/Dialect/AIR/AIR.td | 84 ------- mlir/include/air/Transform/Passes.td | 10 - mlir/lib/Conversion/AIRLoweringPass.cpp | 140 ------------ mlir/lib/Conversion/AIRPipeline.cpp | 135 ----------- mlir/lib/Conversion/AIRToAIEPass.cpp | 144 ------------ mlir/lib/Conversion/AIRToAsyncPass.cpp | 21 +- mlir/lib/Conversion/CMakeLists.txt | 1 - mlir/lib/Dialect/AIR/IR/AIRDialect.cpp | 104 --------- mlir/lib/Transform/AIRMiscPasses.cpp | 214 ------------------ .../Conversion/AIRLowering/air_pipeline.mlir | 86 ------- mlir/test/Dialect/AIR/air_pipeline.mlir | 41 ---- 13 files changed, 2 insertions(+), 1033 deletions(-) delete mode 100644 mlir/include/air/Conversion/AIRPipeline.h delete mode 100644 mlir/lib/Conversion/AIRPipeline.cpp delete mode 100644 mlir/test/Conversion/AIRLowering/air_pipeline.mlir delete mode 100644 mlir/test/Dialect/AIR/air_pipeline.mlir diff --git a/mlir/include/air/Conversion/AIRPipeline.h b/mlir/include/air/Conversion/AIRPipeline.h deleted file mode 100644 index a58f862d4..000000000 --- a/mlir/include/air/Conversion/AIRPipeline.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- AIRPipeline.h --------------------------------------------*- C++ -*-===// -// -// Copyright (C) 2022, Xilinx Inc. All rights reserved. -// Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. -// SPDX-License-Identifier: MIT -// -//===----------------------------------------------------------------------===// - -#ifndef AIR_PIPELINE_H -#define AIR_PIPELINE_H - -#include "air/Conversion/PassDetail.h" - -#include "air/Dialect/AIR/AIRDialect.h" - -#include "mlir/Transforms/DialectConversion.h" - -using namespace mlir; - -namespace xilinx { -namespace air { - -class AIRPipeStageConversion : public ConversionPattern { -public: - enum LoweringType { AllocBuffer = 0, PipelineGetPut = 1 }; - - explicit AIRPipeStageConversion(MLIRContext *context, LoweringType type) - : ConversionPattern(xilinx::air::PipelineStageOp::getOperationName(), 10, - context), - loweringType(type) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; - -private: - LoweringType loweringType; -}; - -} // namespace air -} // namespace xilinx -#endif // AIR_PIPELINE_H \ No newline at end of file diff --git a/mlir/include/air/Conversion/Passes.td b/mlir/include/air/Conversion/Passes.td index 020c39db6..d8ff9d296 100644 --- a/mlir/include/air/Conversion/Passes.td +++ b/mlir/include/air/Conversion/Passes.td @@ -477,19 +477,6 @@ def AIRSplitDevices : Pass<"air-split-devices", "ModuleOp"> { ]; } -def AIRPipelineToAffine : Pass<"air-pipeline-to-affine", "ModuleOp"> { - let summary = "Lower air.pipeline stages to affine.if"; - let constructor = "xilinx::air::createAIRPipelineToAffinePass()"; - let description = [{ - Lower air.pipeline stages to affine.if - }]; - let options = [ - Option<"clLoweringType", "lowering-type", "std::string", - /*default=*/"\"buffers\"", - "Type of lowering to use for core-to-core communication. Can be 'buffer' or 'getput'"> - ]; -} - def InsertEmptyLaunchOverHerd : Pass<"air-insert-launch-and-segment-around-herd", "ModuleOp"> { let summary = "Insert segment and launch ops around herd op"; let constructor = "xilinx::air::createInsertEmptyLaunchOverHerdPass()"; diff --git a/mlir/include/air/Dialect/AIR/AIR.td b/mlir/include/air/Dialect/AIR/AIR.td index a7ece1568..71256f904 100644 --- a/mlir/include/air/Dialect/AIR/AIR.td +++ b/mlir/include/air/Dialect/AIR/AIR.td @@ -244,90 +244,6 @@ def air_HerdTerminatorOp : air_Op<"herd_terminator", [HasParent<"HerdOp">, let assemblyFormat = "attr-dict"; } -def air_HerdPipelineOp : air_Op<"pipeline", [HasParent<"HerdOp">, - AffineScope]>, - Arguments<(ins)>, - Results<(outs)> { - let summary = "Define a pipeline"; - let description = [{ - Define a pipeline within an AIR Herd. - }]; - - let regions = (region SizedRegion<1>:$body); - let hasVerifier = 1; - let assemblyFormat = [{ - attr-dict-with-keyword $body - }]; - let extraClassDeclaration = [{ - // Return a vector of the air.pipeline.stage ops for this pipeline - SmallVector getStages(); - }]; -} - -def air_PipelineStageOp : air_Op<"pipeline.stage", [HasParent<"HerdPipelineOp">]>, - Arguments<(ins Variadic:$opers)>, Results<(outs Variadic:$results)> { - let summary = "Pipeline stage"; - let regions = (region SizedRegion<1>:$body); - let description = [{ - Pipeline stage. - }]; - let hasCustomAssemblyFormat = 1; - let extraClassDeclaration = [{ - // Return the offset of this stage in the pipeline" - unsigned getStageId(); - }]; -} - -def air_PipelineYieldOp : air_Op<"pipeline.yield", [HasParent<"PipelineStageOp">, - Pure, ReturnLike, Terminator]>, - Arguments<(ins Variadic:$opers)>, Results<(outs)> { - let summary = "Yield for air pipeline stages."; - let description = [{ - A terminator operation for regions that appear in the body of - `air.pipeline.stage` operation. The operation takes variable number of - operands and produces no results. The operand number and types must - match the signature of the `air.pipeline` that contains the operation. - }]; - let assemblyFormat = [{ - ($opers^)? attr-dict (`:` type($opers)^)? - }]; -} - -def air_PipelinePutOp : air_Op<"pipeline.put", []>, - Arguments<(ins AnyType:$dst0, AnyType:$dst1, Variadic:$opers)>, Results<(outs)> { - let summary = "Put for air pipeline stages."; - let description = [{ - Experimental operation to represent copying data to another tile. - Currently used internally by air-to-aie pass during pipeline lowering. - }]; - let assemblyFormat = [{ - $dst0 $dst1 (`,` $opers^)? attr-dict `:` type($dst0) `,` type($dst1) (`,` type($opers)^)? - }]; -} - -def air_PipelineGetOp : air_Op<"pipeline.get", []>, - Arguments<(ins AnyType:$src0, AnyType:$src1)>, Results<(outs Variadic:$results)> { - let summary = "Get for air pipeline stages."; - let description = [{ - Experimental operation to represent copying data from another tile. - Currently used internally by air-to-aie pass during pipeline lowering. - }]; - let assemblyFormat = [{ - $src0 $src1 attr-dict `:` type($src0) `,` type($src1) `->` type($results) - }]; -} - -def air_PipelineTerminatorOp : air_Op<"pipeline.terminator", [HasParent<"HerdPipelineOp">, - Pure, Terminator]>, - Arguments<(ins Variadic:$opers)>, Results<(outs)> { - let summary = "Terminator for air pipeline regions."; - let description = [{ - A terminator operation for regions that appear in the body of - `air.pipeline` operation. - }]; - let assemblyFormat = "attr-dict ($opers^ `:` type($opers))?"; -} - def air_DmaMemcpyNdOp: air_Op<"dma_memcpy_nd", [air_AsyncOpInterface, air_MemcpyInterface, diff --git a/mlir/include/air/Transform/Passes.td b/mlir/include/air/Transform/Passes.td index 0ccf8d1e5..b548d5bae 100644 --- a/mlir/include/air/Transform/Passes.td +++ b/mlir/include/air/Transform/Passes.td @@ -1099,11 +1099,6 @@ def AIRExamplePass : Pass<"air-example-pass", "ModuleOp"> { let constructor = "xilinx::air::createAIRExamplePass()"; } -def AIRSpecializeDma : Pass<"air-specialize-dma", "ModuleOp"> { - let summary = "Specialize dma operations"; - let constructor = "xilinx::air::createAIRSpecializeDma()"; -} - def AIRSpecializeDmaBroadcast : Pass<"air-specialize-dma-broadcast", "ModuleOp"> { let summary = "Specialize dma operations for broadcast pattern"; let constructor = "xilinx::air::createAIRSpecializeDmaBroadcast()"; @@ -1114,11 +1109,6 @@ def AIRSpecializeDmaBroadcast : Pass<"air-specialize-dma-broadcast", "ModuleOp"> }]; } -def AIRPromoteUniformL1Dma : Pass<"air-promote-dma", "ModuleOp"> { - let summary = "promote uniform dma operations"; - let constructor = "xilinx::air::createAIRPromoteUniformL1Dma()"; -} - def AIRLinalgNamePass : Pass<"air-linalg-name", "ModuleOp"> { let summary = "Give linalg ops a LinalgTransformMarker string attribute if they don't already have one"; let constructor = "xilinx::air::createAIRLinalgNamePass()"; diff --git a/mlir/lib/Conversion/AIRLoweringPass.cpp b/mlir/lib/Conversion/AIRLoweringPass.cpp index 40d26cccf..ea65d61f6 100644 --- a/mlir/lib/Conversion/AIRLoweringPass.cpp +++ b/mlir/lib/Conversion/AIRLoweringPass.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "air/Conversion/AIRLoweringPass.h" -#include "air/Conversion/AIRPipeline.h" #include "air/Dialect/AIR/AIRDialect.h" #include "air/Dialect/AIRRt/AIRRtDialect.h" #include "air/Dialect/AIRRt/AIRRtOps.h" @@ -279,59 +278,6 @@ class AIRHerdConversion : public ConversionPattern { } }; -class AIRPipelineConversion : public ConversionPattern { -public: - explicit AIRPipelineConversion(MLIRContext *context) - : ConversionPattern(air::HerdPipelineOp::getOperationName(), 1, context) { - } - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - auto pipeOp = cast(op); - Block &bb = pipeOp.getBody().front(); - rewriter.eraseOp(pipeOp.getBody().back().getTerminator()); - bb.getOperations().splice(Block::iterator(op), bb.getOperations()); - rewriter.eraseOp(op); - return success(); - } -}; - -class AIRPipelinePutConversion : public ConversionPattern { -public: - explicit AIRPipelinePutConversion(MLIRContext *context) - : ConversionPattern(air::PipelinePutOp::getOperationName(), 1, context) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - rewriter.eraseOp(op); - return success(); - } -}; - -class AIRPipelineGetConversion : public ConversionPattern { -public: - explicit AIRPipelineGetConversion(MLIRContext *context) - : ConversionPattern(air::PipelineGetOp::getOperationName(), 1, context) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - auto getOp = cast(op); - SmallVector gets; - for (auto r : getOp.getResults()) { - if (auto ty = llvm::dyn_cast(r.getType())) - gets.push_back(rewriter.create( - op->getLoc(), ty, ValueRange{})); - else - return failure(); - } - rewriter.replaceOp(op, gets); - return success(); - } -}; - class AIRWaitAllToAIRRtConversion : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -1136,32 +1082,6 @@ class AIRLoweringPass : public air::impl::AIRLoweringBase { signalPassFailure(); } - // Replace the PipelineStageOps first, followed by the - // HerdPipelineOps, then run the rest of the patterns. - // This avoids creating invalid intermediate code with respect - // to the herd->pipeline->stages nesting requirements. - - // PipelineStageOp conversion - RewritePatternSet air_pipe_stage_patterns(context); - air_pipe_stage_patterns.insert( - context, air::AIRPipeStageConversion::LoweringType::AllocBuffer); - if (failed(applyPartialConversion(module, target, - std::move(air_pipe_stage_patterns)))) { - emitError(UnknownLoc::get(context), - "error lowering air.pipeline.stage\n"); - signalPassFailure(); - } - - // HerdPipelineOp conversion - RewritePatternSet air_pipe_patterns(context); - air_pipe_patterns.insert(context); - if (failed(applyPartialConversion(module, target, - std::move(air_pipe_patterns)))) { - emitError(UnknownLoc::get(context), "error lowering air.pipeline\n"); - signalPassFailure(); - } - // DMA and HerdOp conversion RewritePatternSet air_patterns(context); @@ -1528,62 +1448,6 @@ class AIRLoweringPass : public air::impl::AIRLoweringBase { } }; -class AIRPipelineToAffinePass - : public air::impl::AIRPipelineToAffineBase { - -public: - AIRPipelineToAffinePass() = default; - AIRPipelineToAffinePass(const AIRPipelineToAffinePass &pass) {} - - void getDependentDialects(::mlir::DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnOperation() override { - auto module = getOperation(); - auto context = module.getContext(); - - ConversionTarget target(*context); - - target.addLegalDialect< - LLVM::LLVMDialect, func::FuncDialect, arith::ArithDialect, - affine::AffineDialect, scf::SCFDialect, linalg::LinalgDialect, - memref::MemRefDialect, bufferization::BufferizationDialect, - airrt::AIRRtDialect, air::airDialect>(); - - target.addIllegalOp(); - - // PipelineStageOp conversion - RewritePatternSet air_pipe_stage_patterns(context); - auto loweringType = - air::AIRPipeStageConversion::LoweringType::PipelineGetPut; - if (clLoweringType == "buffer") - loweringType = air::AIRPipeStageConversion::LoweringType::AllocBuffer; - air_pipe_stage_patterns.insert(context, - loweringType); - if (failed(applyPartialConversion(module, target, - std::move(air_pipe_stage_patterns)))) { - emitError(UnknownLoc::get(context), - "error lowering air.pipeline.stage\n"); - signalPassFailure(); - } - - SmallVector pipelines; - module.walk([&](air::HerdPipelineOp p) { pipelines.push_back(p); }); - - for (auto p : pipelines) { - auto pipeOp = cast(p); - OpBuilder b(p); - Block &bb = pipeOp.getBody().front(); - IRMapping remap; - bb.getTerminator()->erase(); - for (auto &o : bb) - b.clone(o, remap); - p->erase(); - } - } -}; - } // namespace namespace xilinx { @@ -1593,9 +1457,5 @@ std::unique_ptr createAIRLoweringPass() { return std::make_unique(); } -std::unique_ptr createAIRPipelineToAffinePass() { - return std::make_unique(); -} - } // namespace air } // namespace xilinx diff --git a/mlir/lib/Conversion/AIRPipeline.cpp b/mlir/lib/Conversion/AIRPipeline.cpp deleted file mode 100644 index 8e8484174..000000000 --- a/mlir/lib/Conversion/AIRPipeline.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===- AIRPipeline.cpp ------------------------------------------*- C++ -*-===// -// -// Copyright (C) 2022, Xilinx Inc. All rights reserved. -// Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. -// SPDX-License-Identifier: MIT -// -//===----------------------------------------------------------------------===// - -#include "air/Conversion/AIRPipeline.h" - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/IR/IRMapping.h" -#include "mlir/IR/IntegerSet.h" - -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "air-pipeline-conversion" - -using namespace mlir; - -namespace xilinx { -namespace air { - -LogicalResult AIRPipeStageConversion::matchAndRewrite( - Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - xilinx::air::HerdPipelineOp pipeline = - op->getParentOfType(); - - auto direction = pipeline->getAttrOfType("direction"); - - xilinx::air::HerdOp herd = op->getParentOfType(); - if (!herd) { - LLVM_DEBUG(llvm::errs() << "Failed to find herd op for air.pipeline\n"); - return failure(); - } - - Value x = herd.getIds()[0]; - Value y = herd.getIds()[1]; - - auto ctx = op->getContext(); - auto stage = cast(op); - - // Create an affine.if to contain the code for this pipeline stage. - unsigned id = stage.getStageId(); - - bool dir = (direction.str() == "horiz"); - - SmallVector constraints{getAffineDimExpr(dir ? 0 : 1, ctx) - - getAffineConstantExpr(id, ctx), - getAffineDimExpr(dir ? 1 : 0, ctx)}; - SmallVector eqflags{true, false}; - auto int_set = IntegerSet::get(2, 0, constraints, eqflags); - SmallVector int_set_args{x, y}; - affine::AffineIfOp aif = rewriter.create( - stage->getLoc(), int_set, int_set_args, false); - - auto &stageBlock = stage.getBody().front(); - auto &yield = stageBlock.getOperations().back(); - - if (loweringType == LoweringType::AllocBuffer) { - // For each output of the pipeline stage, create a buffer + store - SmallVector bufs; - for (auto o : yield.getOperands()) { - if (RankedTensorType tt = llvm::dyn_cast(o.getType())) { - auto memrefTy = MemRefType::get(tt.getShape(), tt.getElementType()); - rewriter.setInsertionPoint(aif); - auto buf = rewriter.create(op->getLoc(), memrefTy); - rewriter.setInsertionPoint(&yield); - auto to_memref = rewriter.create( - yield.getLoc(), buf.getType(), o); - rewriter.create(yield.getLoc(), to_memref, buf); - rewriter.setInsertionPointAfter(aif); - bufs.push_back( - rewriter.create(aif.getLoc(), buf) - .getResult()); - } - } - rewriter.replaceOp(stage, bufs); - } else if (loweringType == LoweringType::PipelineGetPut) { - SmallVector bufs; - rewriter.setInsertionPoint(aif); - for (auto o : yield.getOperands()) { - if (RankedTensorType tt = llvm::dyn_cast(o.getType())) { - rewriter.setInsertionPoint(&yield); - auto idValPlus = - rewriter.create(op->getLoc(), id + 1); - rewriter.create( - yield.getLoc(), dir ? idValPlus : x, dir ? y : idValPlus, o); - bufs.push_back(o); - } - } - rewriter.replaceOp(stage, bufs); - } - - // Clone the region into the affine.if while remapping the args - IRMapping remap; - rewriter.setInsertionPoint(aif); - auto idVal = rewriter.create(op->getLoc(), id); - remap.map(dir ? x : y, idVal); - - for (int i = 0, e = stageBlock.getNumArguments(); i < e; i++) { - if (loweringType == LoweringType::AllocBuffer) { - remap.map(stageBlock.getArgument(i), operands[i]); - } else if (loweringType == LoweringType::PipelineGetPut) { - auto idValMinus = - rewriter.create(op->getLoc(), id - 1); - rewriter.setInsertionPointToStart(&aif.getBodyRegion().front()); - auto get = rewriter.create( - stage->getLoc(), operands[i].getType(), dir ? idValMinus : x, - dir ? y : idValMinus); - remap.map(stageBlock.getArgument(i), get.getResult(0)); - } - } - - auto &body_region = aif.getBodyRegion(); - stage.getBody().cloneInto(&body_region, body_region.begin(), remap); - body_region.back().getOperations().back().erase(); - body_region.front().getOperations().splice( - body_region.front().getOperations().begin(), - body_region.back().getOperations()); - rewriter.eraseBlock(&body_region.back()); - - // replace the pipeline.yield with affine.yield - rewriter.eraseOp(body_region.front().getTerminator()); - rewriter.setInsertionPointToEnd(&body_region.front()); - rewriter.create(aif.getLoc()); - - return success(); -} - -} // namespace air -} // namespace xilinx \ No newline at end of file diff --git a/mlir/lib/Conversion/AIRToAIEPass.cpp b/mlir/lib/Conversion/AIRToAIEPass.cpp index d085acc56..ff9bffda8 100644 --- a/mlir/lib/Conversion/AIRToAIEPass.cpp +++ b/mlir/lib/Conversion/AIRToAIEPass.cpp @@ -850,147 +850,6 @@ void lowerScfAirTokens(AIE::DeviceOp m) { (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); } -// struct LowerPipeGetPutPattern : public OpRewritePattern { -// using OpRewritePattern::OpRewritePattern; - -// LowerPipeGetPutPattern(MLIRContext *ctx, -// std::map &tileToHerdMap) -// : OpRewritePattern(ctx), tileToHerdMap(tileToHerdMap) {} - -// LogicalResult matchAndRewrite(air::PipelinePutOp put, -// PatternRewriter &rewriter) const override { -// auto aie_device = put->getParentOfType(); -// auto core = put->getParentOfType(); -// assert(aie_device && core); - -// auto herd = tileToHerdMap[core.getTileOp()]; -// auto c = herd.getColOffset(); -// auto r = herd.getRowOffset(); -// auto col_offset = c ? *c : 0; -// auto row_offset = r ? *r : 0; - -// auto other_x = -// cast(put.getDst0().getDefiningOp()); auto other_y -// = cast(put.getDst1().getDefiningOp()); auto -// other_core = getPhysTileOp(aie_device, other_x.value() + col_offset, -// other_y.value() + row_offset) -// .getCoreOp(); -// assert(other_core); - -// air::PipelineGetOp get; -// other_core.walk([&](air::PipelineGetOp pgo) { get = pgo; }); -// assert(get && get->getNumResults() == (put->getNumOperands() - 2)); - -// for (auto p : -// llvm::zip(put->getOperands().drop_front(2), get->getResults())) { - -// auto o = std::get<0>(p); // operand of put -// auto r = std::get<1>(p); // result of get -// // for each ranked tensor put (yielded) by the tile -// if (RankedTensorType tt = -// llvm::dyn_cast(o.getType())) { -// auto memrefTy = MemRefType::get(tt.getShape(), tt.getElementType(), -// {}, -// (int)air::MemorySpace::L1); -// // allocate buffer+lock -// auto buf = allocateBufferOp( -// memrefTy, core.getTileOp(), -// StringAttr::get(aie_device.getContext(), "pipebuf")); -// auto lockOp = allocateLockOp(aie_device, core.getTileOp()); - -// // acquire the lock for write on the put side -// rewriter.setInsertionPoint(put); -// rewriter.create(put->getLoc(), lockOp, 0, -// AIE::LockAction::Acquire); -// rewriter.create(put->getLoc(), o, buf); -// rewriter.create(put->getLoc(), lockOp, 1, -// AIE::LockAction::Release); - -// // acquire the lock for read on the get side -// rewriter.setInsertionPoint(get); -// rewriter.create(get->getLoc(), lockOp, 1, -// AIE::LockAction::Acquire); -// auto loadOp = -// rewriter.create(get->getLoc(), buf); -// rewriter.create(get->getLoc(), lockOp, 0, -// AIE::LockAction::Release); -// r.replaceAllUsesWith(loadOp.getResult()); -// } else { -// llvm::errs() << "error, unsupported air.pipeline.yield operand -// type\n"; assert(0 && "Unsupported"); return failure(); -// } -// } -// rewriter.eraseOp(get); -// rewriter.eraseOp(put); -// return success(); -// } - -// private: -// std::map &tileToHerdMap; -// }; - -// This function replaces PipelinePutOp/PipelineGetOp pairs with a -// shared aie.buffer + aie.lock. This is a single-buffered implementation -// with exclusive access to the buffer controlled by the lock. i.e. FIXME. -// void lowerPipelineGetPut(AIE::DeviceOp &m, -// std::map tileToHerdMap) { -// auto ctx = m->getContext(); -// RewritePatternSet patterns(ctx); -// patterns.insert(ctx, tileToHerdMap); -// (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); -// } - -// struct AllocL1TensorsPattern -// : public OpRewritePattern { -// using OpRewritePattern::OpRewritePattern; - -// AllocL1TensorsPattern(MLIRContext *ctx, -// std::map &tileToHerdMap) -// : OpRewritePattern(ctx), tileToHerdMap(tileToHerdMap) {} - -// LogicalResult matchAndRewrite(bufferization::ToMemrefOp cast, -// PatternRewriter &rewriter) const override { - -// AIE::CoreOp core = cast->getParentOfType(); -// if (!core) -// return failure(); - -// AIE::TileOp tile = core.getTileOp(); -// if (!tile) -// return failure(); - -// MemRefType memrefTy = nullptr; -// memrefTy = llvm::cast(cast.getType()); - -// if (memrefTy.getMemorySpaceAsInt() != (int)air::MemorySpace::L1) -// return failure(); - -// rewriter.setInsertionPointAfter(tile); -// auto herd = tileToHerdMap[core.getTileOp()]; -// int64_t col_offset = 0; -// int64_t row_offset = 0; -// if (herd) { -// auto c = herd.getColOffset(); -// auto r = herd.getRowOffset(); -// col_offset = c ? *c : 0; -// row_offset = r ? *r : 0; -// } -// auto buffer = allocateBufferOp( -// memrefTy, tile, -// cast->getAttrOfType(SymbolTable::getSymbolAttrName()), -// tile.getCol() - col_offset, tile.getRow() - row_offset); - -// rewriter.setInsertionPoint(cast); -// rewriter.create(cast.getLoc(), cast.getOperand(), -// buffer); -// rewriter.replaceOp(cast, buffer->getResults()); -// return success(); -// } - -// private: -// std::map &tileToHerdMap; -// }; - struct AllocL1BuffersPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -3210,8 +3069,6 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { ctx, tileToHerdMap, BufferId); if (clTestPatterns.find("specialize-affine-if") != std::string::npos) patterns.insert(ctx); - // if (clTestPatterns.find("lower-pipe-get-put") != std::string::npos) - // patterns.insert(ctx, tileToHerdMap); if (clTestPatterns.find("lower-scf-tokens") != std::string::npos) patterns.insert(ctx); @@ -3336,7 +3193,6 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { lowerAIRMemcpyOp(device, shimDmaAlloc, options); - // lowerPipelineGetPut(device, tileToHerdMap); if (options.insert_trace_packet_flow) createTracePacketFlow(device); diff --git a/mlir/lib/Conversion/AIRToAsyncPass.cpp b/mlir/lib/Conversion/AIRToAsyncPass.cpp index 3b549a493..3e6ef3c16 100644 --- a/mlir/lib/Conversion/AIRToAsyncPass.cpp +++ b/mlir/lib/Conversion/AIRToAsyncPass.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "air/Conversion/AIRPipeline.h" +#include "air/Conversion/PassDetail.h" + #include "air/Dialect/AIR/AIRDialect.h" #include "air/Dialect/AIRRt/AIRRtDialect.h" #include "air/Dialect/AIRRt/AIRRtOps.h" @@ -123,24 +124,6 @@ class AIRHerdOpConversion : public ConversionPattern { } }; -class AIRPipelineConversion : public ConversionPattern { -public: - explicit AIRPipelineConversion(MLIRContext *context) - : ConversionPattern(air::HerdPipelineOp::getOperationName(), 1, context) { - } - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - auto pipeOp = cast(op); - Block &bb = pipeOp.getBody().front(); - rewriter.eraseOp(pipeOp.getBody().back().getTerminator()); - bb.getOperations().splice(Block::iterator(op), bb.getOperations()); - rewriter.eraseOp(op); - return success(); - } -}; - static func::CallOp convertOpToFunction(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter, StringRef fnName) { diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index 08bf4d59c..d7c75e84d 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -10,7 +10,6 @@ add_mlir_library( AIRToAIEPass.cpp AIRToAIESchedulingUtils.cpp AIRToAsyncPass.cpp - AIRPipeline.cpp AIRRtToNpuPass.cpp Passes.cpp diff --git a/mlir/lib/Dialect/AIR/IR/AIRDialect.cpp b/mlir/lib/Dialect/AIR/IR/AIRDialect.cpp index 96308ae33..5bc26c7fa 100644 --- a/mlir/lib/Dialect/AIR/IR/AIRDialect.cpp +++ b/mlir/lib/Dialect/AIR/IR/AIRDialect.cpp @@ -989,110 +989,6 @@ uint64_t HerdOp::getNumRows() { return cast(rows).value(); } -// -// HerdPipelineOp -// - -LogicalResult HerdPipelineOp::verify() { - auto direction = (*this)->getAttrOfType("direction"); - if (!direction) - return emitOpError() << "expects 'direction' attribute"; - - return success(); -} - -SmallVector HerdPipelineOp::getStages() { - SmallVector stages; - for (auto &o : getBody().front().getOperations()) { - if (auto stage = dyn_cast(o)) - stages.push_back(stage); - } - return stages; -} - -// -// PipelineStageOp -// - -ParseResult PipelineStageOp::parse(OpAsmParser &parser, - OperationState &result) { - - SmallVector kernelOperands; - SmallVector kernelArguments; - SmallVector types; - if (succeeded(parser.parseOptionalKeyword("args"))) { - if (parser.parseAssignmentList(kernelArguments, kernelOperands)) - return failure(); - if (parser.parseColonTypeList(types)) - return failure(); - } - - for (int i = 0, e = kernelOperands.size(); i < e; i++) { - kernelArguments[i].type = types[i]; - if (parser.resolveOperand(kernelOperands[i], types[i], result.operands)) - return failure(); - } - - if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) - return failure(); - - Region *body = result.addRegion(); - if (parser.parseRegion(*body, kernelArguments, false)) - return failure(); - - SmallVector retTypes; - if (parser.parseOptionalColon()) - return success(); - - if (parser.parseTypeList(retTypes)) - return failure(); - - result.addTypes(retTypes); - return success(); -} - -void PipelineStageOp::print(OpAsmPrinter &p) { - - if (getNumOperands()) { - auto args = getBody().front().getArguments(); - p << " args("; - for (int i = 0, e = getNumOperands(); i < e; i++) { - if (i) - p << ", "; - p << args[i] << "="; - p << getOperand(i); - } - p << ") : "; - for (int i = 0, e = getNumOperands(); i < e; i++) { - if (i) - p << ", "; - p << getOperand(i).getType(); - } - } - - p << " "; - if ((*this)->getAttrs().size()) { - p << "attributes "; - p.printOptionalAttrDict((*this)->getAttrs()); - p << " "; - } - p.printRegion(getBody(), /*printEntryBlockArgs=*/false); - - if ((*this)->getNumResults()) - p << " : "; - for (Type type : (*this)->getResultTypes()) - p.printType(type); -} - -unsigned PipelineStageOp::getStageId() { - auto stages = getOperation()->getParentOfType().getStages(); - for (unsigned idx = 0; idx < stages.size(); idx++) - if (stages[idx] == *this) - return idx; - llvm_unreachable("Could not find stage in parent"); - return -1; -} - // // Asynchronous execute // diff --git a/mlir/lib/Transform/AIRMiscPasses.cpp b/mlir/lib/Transform/AIRMiscPasses.cpp index c2dd99fed..e928c497f 100644 --- a/mlir/lib/Transform/AIRMiscPasses.cpp +++ b/mlir/lib/Transform/AIRMiscPasses.cpp @@ -111,212 +111,6 @@ void AIRRemoveLinalgNamePass::runOnOperation() { }); } -// AIRPromoteUniformL1Dma -class AIRPromoteUniformL1Dma - : public air::impl::AIRPromoteUniformL1DmaBase { - -public: - AIRPromoteUniformL1Dma() = default; - AIRPromoteUniformL1Dma(const AIRPromoteUniformL1Dma &pass){}; - - void runOnOperation() override; - -private: -}; - -void do_clone(OpBuilder &builder, Operation *op, IRMapping &mapping) { - if (!op) - return; - for (auto o : op->getOperands()) { - if (mapping.contains(o)) - continue; - do_clone(builder, o.getDefiningOp(), mapping); - } - builder.clone(*op, mapping); -} - -void AIRPromoteUniformL1Dma::runOnOperation() { - auto module = getOperation(); - // auto ctx = module.getContext(); - - std::vector erasedOps; - int64_t max_id = -1; - SmallVector memCopies; - module.walk([&](air::DmaMemcpyNdOp memcpyOp) { - memCopies.push_back(memcpyOp); - IntegerAttr attr = memcpyOp->getAttrOfType("id"); - if (!attr) - return; - max_id = std::max(max_id, attr.getInt()); - }); - - for (auto memcpyOp : memCopies) { - auto pipeline = memcpyOp->getParentOfType(); - auto stage = memcpyOp->getParentOfType(); - auto launch = memcpyOp->getParentOfType(); - if (!pipeline || !stage || !launch) - continue; - - // auto direction = pipeline->getAttrOfType("direction"); - auto uniform = stage->getAttrOfType("uniform"); - if (!uniform) - continue; - - auto src_type = llvm::cast(memcpyOp.getSrc().getType()); - auto dst_type = llvm::cast(memcpyOp.getDst().getType()); - auto src_space = src_type.getMemorySpaceAsInt(); - auto dst_space = dst_type.getMemorySpaceAsInt(); - - MemRefType ty = nullptr; - bool to_l1 = (src_space == 0 && dst_space == 2); - bool from_l1 = (src_space == 2 && dst_space == 0); - if (to_l1) - ty = dst_type; - else if (from_l1) - ty = src_type; - else - continue; - - OpBuilder builder(launch); - auto loc = memcpyOp->getLoc(); - auto alloc = builder.create( - loc, MemRefType::get(ty.getShape(), ty.getElementType(), - ty.getLayout().getAffineMap(), 1)); - std::vector launch_operands; - IRMapping remap; - for (unsigned int i = 0; i < launch.getNumKernelOperands(); i++) { - auto arg = launch.getKernelArguments()[i]; - auto oper = launch.getKernelOperand(i); - remap.map(arg, oper); - } - if (to_l1) - remap.map(memcpyOp.getDst(), alloc); - do_clone(builder, memcpyOp.getOperation(), remap); - - launch_operands.insert(launch_operands.begin(), - launch->getOperands().begin(), - launch->getOperands().end()); - launch_operands.push_back(alloc.getResult()); - launch->setOperands(launch_operands); - launch.getBody().front().addArgument(alloc.getType(), loc); - auto sizeAttr = llvm::cast<::mlir::DenseIntElementsAttr>( - launch->getAttr("operand_segment_sizes")); - const uint32_t *it = &*sizeAttr.value_begin(); - auto newAttr = DenseIntElementsAttr::get(sizeAttr.getType(), - {it[0], it[1], it[2], it[3] + 1}); - launch->setAttr("operand_segment_sizes", newAttr); - - builder.setInsertionPoint(memcpyOp); - SmallVector opers{}; - SmallVector mt; - Value a = launch.getKernelArguments()[it[3]]; - builder.create( - loc, SmallVector{}, mt, to_l1 ? memcpyOp.getDst() : a, mt, mt, - mt, to_l1 ? a : memcpyOp.getSrc(), mt, mt, mt); - erasedOps.push_back(memcpyOp); - } - for (auto e : erasedOps) - e->erase(); -} - -// return true if op is a function of v -bool isFuncOf(Operation *op, Value v, std::vector &ops) { - bool r = false; - if (!op) - return r; - - for (auto o : op->getOperands()) { - if ((o == v) || (isFuncOf(o.getDefiningOp(), v, ops))) { - if (std::find(std::begin(ops), std::end(ops), op) == std::end(ops)) - ops.push_back(op); - r = true; - } - } - return r; -} - -// AIRSpecializeDma -class AIRSpecializeDma - : public air::impl::AIRSpecializeDmaBase { - -public: - AIRSpecializeDma() = default; - AIRSpecializeDma(const AIRSpecializeDma &pass){}; - - void runOnOperation() override; - -private: -}; - -void AIRSpecializeDma::runOnOperation() { - auto module = getOperation(); - auto ctx = module.getContext(); - - module.walk([&](air::HerdOp launch) { - launch.walk([&](air::DmaMemcpyNdOp memcpyOp) { - std::vector xOps, yOps; - bool fn_x = isFuncOf(memcpyOp, launch.getIds()[0], xOps); - bool fn_y = isFuncOf(memcpyOp, launch.getIds()[1], yOps); - int64_t herd_size_x = launch.getNumCols(); - int64_t herd_size_y = launch.getNumRows(); - if (fn_x && !fn_y) { - auto loc = memcpyOp->getLoc(); - OpBuilder builder(memcpyOp); - auto pipe = builder.create(loc); - pipe->setAttr("direction", StringAttr::get(ctx, "horiz")); - auto pipe_bb = new Block(); - pipe.getBody().push_back(pipe_bb); - builder.setInsertionPointToEnd(pipe_bb); - builder.create(loc, SmallVector{}); - builder.setInsertionPointToStart(pipe_bb); - for (int x = 0; x < herd_size_x; x++) { - auto stage = builder.create( - loc, SmallVector{}, SmallVector{}); - stage->setAttr("uniform", BoolAttr::get(ctx, true)); - auto stage_bb = new Block(); - stage.getBody().push_back(stage_bb); - auto stage_builder = OpBuilder::atBlockEnd(stage_bb); - auto c_x = stage_builder.create(loc, x); - IRMapping remap; - remap.map(launch.getIds()[0], c_x); - for (auto xop : xOps) - stage_builder.clone(*xop, remap); - stage_builder.create( - loc, SmallVector{}, SmallVector{}); - } - memcpyOp.erase(); - } - if (fn_y && !fn_x) { - auto loc = memcpyOp->getLoc(); - OpBuilder builder(memcpyOp); - auto pipe = builder.create(loc); - pipe->setAttr("direction", StringAttr::get(ctx, "vert")); - auto pipe_bb = new Block(); - pipe.getBody().push_back(pipe_bb); - builder.setInsertionPointToEnd(pipe_bb); - builder.create(loc, SmallVector{}); - builder.setInsertionPointToStart(pipe_bb); - for (int y = 0; y < herd_size_y; y++) { - auto stage = builder.create( - loc, SmallVector{}, SmallVector{}); - stage->setAttr("uniform", BoolAttr::get(ctx, true)); - auto stage_bb = new Block(); - stage.getBody().push_back(stage_bb); - auto stage_builder = OpBuilder::atBlockEnd(stage_bb); - auto c_y = stage_builder.create(loc, y); - IRMapping remap; - remap.map(launch.getIds()[1], c_y); - for (auto yop : yOps) - stage_builder.clone(*yop, remap); - stage_builder.create( - loc, SmallVector{}, SmallVector{}); - } - memcpyOp.erase(); - } - }); - }); -} - // AIRSpecializeDmaBroadcast class AIRSpecializeDmaBroadcast : public air::impl::AIRSpecializeDmaBroadcastBase< @@ -1645,18 +1439,10 @@ std::unique_ptr createAIRExamplePass() { return std::make_unique(); } -std::unique_ptr createAIRSpecializeDma() { - return std::make_unique(); -} - std::unique_ptr createAIRSpecializeDmaBroadcast() { return std::make_unique(); } -std::unique_ptr createAIRPromoteUniformL1Dma() { - return std::make_unique(); -} - std::unique_ptr createAIRLinalgNamePass() { return std::make_unique(); } diff --git a/mlir/test/Conversion/AIRLowering/air_pipeline.mlir b/mlir/test/Conversion/AIRLowering/air_pipeline.mlir deleted file mode 100644 index ce895eaf5..000000000 --- a/mlir/test/Conversion/AIRLowering/air_pipeline.mlir +++ /dev/null @@ -1,86 +0,0 @@ -//===- air_pipeline.mlir ---------------------------------------*- MLIR -*-===// -// -// Copyright (C) 2021-2022, Xilinx Inc. All rights reserved. -// Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. -// SPDX-License-Identifier: MIT -// -//===----------------------------------------------------------------------===// - -// RUN: air-opt -air-to-std %s | FileCheck %s -// CHECK: #set = affine_set<(d0, d1) : (d0 == 0, d1 >= 0)> -// CHECK: #set1 = affine_set<(d0, d1) : (d0 - 1 == 0, d1 >= 0)> -// CHECK: #set2 = affine_set<(d0, d1) : (d0 - 2 == 0, d1 >= 0)> -// CHECK: #set3 = affine_set<(d0, d1) : (d0 - 3 == 0, d1 >= 0)> -// CHECK: affine.for %arg3 = 0 to 4 { -// CHECK: affine.for %arg4 = 0 to 1 { -// CHECK: affine.if #set(%arg3, %arg4) { -// CHECK: affine.if #set1(%arg3, %arg4) { -// CHECK: affine.if #set2(%arg3, %arg4) { -// CHECK: affine.if #set3(%arg3, %arg4) { -#map0 = affine_map<(d0) -> (d0)> -module { - func.func @launch(%m0: memref<1024xf32>, %m1: memref<1024xf32>, %m2: memref<1024xf32>) { - %c4 = arith.constant 4 : index - %c1 = arith.constant 1 : index - air.herd @pipelined_herd tile (%x, %y) in (%sx=%c4, %sy=%c1) args(%op0=%m0, %op1=%m1, %op2=%m2) : memref<1024xf32>,memref<1024xf32>,memref<1024xf32> { - %c1_f32 = arith.constant 1.0 : f32 - %c0 = arith.constant 0 : index - %c1024 = arith.constant 1024 : index - - air.pipeline attributes {direction = "horiz"} { - %output1 = air.pipeline.stage { - %a = memref.alloc() : memref<1024xf32, 2> - %b = memref.alloc() : memref<1024xf32, 2> - air.dma_memcpy_nd (%a[][][], %op0[%c0] [%c0] [%c1024]) {id = 1 : i32} : (memref<1024xf32, 2>, memref<1024xf32>) - air.dma_memcpy_nd (%b[][][], %op1[%c0] [%c0] [%c1024]) {id = 2 : i32} : (memref<1024xf32, 2>, memref<1024xf32>) - %init = tensor.empty () : tensor<1024xf32> - %ta = bufferization.to_tensor %a : memref<1024xf32, 2> - %tb = bufferization.to_tensor %b : memref<1024xf32, 2> - %5 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} ins(%ta, %tb : tensor<1024xf32>, tensor<1024xf32>) outs(%init : tensor<1024xf32>) { - ^bb0(%a2: f32, %a3: f32, %a4: f32): // no predecessors - %6 = arith.mulf %a2, %a3 : f32 - linalg.yield %6 : f32 - } -> tensor<1024xf32> - air.pipeline.yield %5 : tensor<1024xf32> - } : tensor<1024xf32> - - %output2 = air.pipeline.stage args(%in = %output1) : tensor<1024xf32> { - %init = tensor.empty () : tensor<1024xf32> - %5 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} ins(%in : tensor<1024xf32>) outs(%init : tensor<1024xf32>) { - ^bb0(%a2: f32, %a3: f32): // no predecessors - %one = arith.constant 1.0 : f32 - %6 = arith.addf %a2, %one : f32 - linalg.yield %6 : f32 - } -> tensor<1024xf32> - air.pipeline.yield %5 : tensor<1024xf32> - } : tensor<1024xf32> - - %output3 = air.pipeline.stage args(%in = %output2) : tensor<1024xf32> { - %init = tensor.empty () : tensor<1024xf32> - %5 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} ins(%in : tensor<1024xf32>) outs(%init : tensor<1024xf32>) { - ^bb0(%a2: f32, %a3: f32): // no predecessors - %one = arith.constant 1.0 : f32 - %6 = arith.addf %a2, %one : f32 - linalg.yield %6 : f32 - } -> tensor<1024xf32> - air.pipeline.yield %5 : tensor<1024xf32> - } : tensor<1024xf32> - - air.pipeline.stage args(%in = %output3) : tensor<1024xf32> { - %init = tensor.empty () : tensor<1024xf32> - %5 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} ins(%in : tensor<1024xf32>) outs(%init : tensor<1024xf32>) { - ^bb0(%a2: f32, %a3: f32): // no predecessors - %one = arith.constant 1.0 : f32 - %6 = arith.addf %a2, %one : f32 - linalg.yield %6 : f32 - } -> tensor<1024xf32> - %c = bufferization.to_memref %5 : memref<1024xf32, 2> - air.dma_memcpy_nd (%op2[%c0] [%c0] [%c1024], %c[][][]) {id = 3 : i32} : (memref<1024xf32>, memref<1024xf32, 2>) - air.pipeline.yield - } - air.pipeline.terminator - } - } - return - } -} diff --git a/mlir/test/Dialect/AIR/air_pipeline.mlir b/mlir/test/Dialect/AIR/air_pipeline.mlir deleted file mode 100644 index 18e2cf4c4..000000000 --- a/mlir/test/Dialect/AIR/air_pipeline.mlir +++ /dev/null @@ -1,41 +0,0 @@ -//===- air_pipeline.mlir ---------------------------------------*- MLIR -*-===// -// -// Copyright (C) 2021-2022, Xilinx Inc. All rights reserved. -// Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. -// SPDX-License-Identifier: MIT -// -//===----------------------------------------------------------------------===// - -// RUN: air-opt %s | FileCheck %s -module { - func.func @launch(%arg0: i32) { - %c4 = arith.constant 4 : index - air.herd tile (%arg1, %arg2) in (%arg3=%c4, %arg4=%c4) args(%arg5=%arg0, %arg6=%arg0) : i32,i32 { - %c1_i32 = arith.constant 1 : i32 - // CHECK: air.pipeline attributes {direction = "horiz"} { - air.pipeline attributes {direction = "horiz"} { - // CHECK: %{{.*}} = air.pipeline.stage args(%{{.*}}=%{{.*}}) : i32 { - // CHECK: air.pipeline.yield %{{.*}} : i32 - // CHECK: } : i32 - %output1 = air.pipeline.stage args(%in = %c1_i32) : i32 { - %o = arith.addi %in, %c1_i32 : i32 - air.pipeline.yield %o : i32 - } : i32 - %output2 = air.pipeline.stage args(%in = %output1) : i32 { - %o = arith.addi %in, %c1_i32 : i32 - air.pipeline.yield %o : i32 - } : i32 - %output3 = air.pipeline.stage args(%in = %output2) : i32 { - %o = arith.addi %in, %c1_i32 : i32 - air.pipeline.yield %o : i32 - } : i32 - air.pipeline.stage args(%in = %output3) : i32 { - %o = arith.addi %in, %c1_i32 : i32 - air.pipeline.yield %o : i32 - } - air.pipeline.terminator - } - } - return - } -}