Skip to content

Commit

Permalink
Merge pull request #899 from xmos/feature/memory-analysis
Browse files Browse the repository at this point in the history
Add memory plan print and op split analysis support
  • Loading branch information
panickal-xmos authored Jun 7, 2024
2 parents a22e567 + 937de31 commit 3c0a431
Show file tree
Hide file tree
Showing 14 changed files with 377 additions and 39 deletions.
2 changes: 1 addition & 1 deletion third_party/lib_tflite_micro
Submodule lib_tflite_micro updated 0 files
138 changes: 133 additions & 5 deletions xformer/Analysis/MemoryPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"

#define DEBUG_TYPE "xcore-memory-plan"

Expand Down Expand Up @@ -41,9 +42,6 @@ void MemoryPlan::build() {
}

funcOp.walk<WalkOrder::PreOrder>([&](Operation *op) {
operationIds.insert({op, operationIds.size()});
operations.push_back(op);

if (op == funcOp || llvm::isa<quantfork::StatisticsOp>(op)) {
return;
}
Expand All @@ -56,6 +54,12 @@ void MemoryPlan::build() {
isConstantOp = true;
}

if (!llvm::isa<TFL::NoValueOp, TFL::QConstOp, TFL::ConstOp,
arith::ConstantOp>(op)) {
operationIds.insert({op, operationIds.size()});
operations.push_back(op);
}

for (Value result : op->getResults()) {
if (result.getType().isa<NoneType>()) {
continue;
Expand Down Expand Up @@ -146,7 +150,8 @@ int MemoryPlan::getOffset(Value v, int size,
}

std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
int &peakMemoryUsed) {
int &peakMemoryUsed,
int &peakOpId) {
std::vector<int> offsets;
// Copy of valueInfo
auto vInfo = valueInfo;
Expand Down Expand Up @@ -266,10 +271,23 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
}

size_t peakUsed = 0;
size_t peakUsedValueID = 0;
size_t maxId = 0;
nonConstantAllocatedValues.clear();
nonConstantOffsets.clear();
LLVM_DEBUG(llvm::dbgs() << "\nAllocated offsets : ");
for (auto i : allocatedValuesOrderedByID) {
offsets.push_back(i.second);
peakUsed = std::max(peakUsed, vInfo[i.first].size + i.second);
if (!vInfo[i.first].isConstant) {
maxId++;
nonConstantAllocatedValues.push_back(i.first);
nonConstantOffsets.push_back(i.second);
size_t currentSize = vInfo[i.first].size + i.second;
if (currentSize >= peakUsed) {
peakUsed = currentSize;
peakOpId = maxId;
}
}
LLVM_DEBUG(llvm::dbgs() << "\nValue " << vInfo[i.first].id << ", size = "
<< vInfo[i.first].size << ", offset = " << i.second
<< ", first = " << vInfo[i.first].firstUsed
Expand All @@ -279,7 +297,117 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
LLVM_DEBUG(llvm::dbgs() << "\n\n");
peakMemoryUsed = peakUsed;

// printf("\npeakmemory %d, vid %d maxid %d, opid %d\n", peakMemoryUsed,
// vInfo[values[peakUsedValueID]].id, maxId, peakOpId);

return offsets;
}

char MemoryPlan::getOrdinalCharacter(int i) {
if (i < 10) {
return '0' + i;
} else if (i < 36) {
return 'a' + (i - 10);
} else if (i < 62) {
return 'A' + (i - 36);
}
return '*';
}

void MemoryPlan::printMemoryPlan() {
llvm::outs() << "\nMEMORY PLAN ANALYSIS\n"
<< "¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯";

// llvm::outs() << "\nAllocated Offsets\n";
// for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
// llvm::outs() << llvm::format(
// "\n%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
// getOrdinalCharacter(i), i,
// valueInfo[nonConstantAllocatedValues[i]].size, nonConstantOffsets[i],
// valueInfo[nonConstantAllocatedValues[i]].firstUsed,
// valueInfo[nonConstantAllocatedValues[i]].lastUsed);
// }
// llvm::outs() << "\n";

// llvm::outs() << "\nMemory Plan\n";

constexpr int kLineWidth = 60;
int max_size = kLineWidth;
int max_time = 0;
for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
const int offset = nonConstantOffsets[i];
const int last_time_used =
valueInfo[nonConstantAllocatedValues[i]].lastUsed;
const int size = offset + valueInfo[nonConstantAllocatedValues[i]].size;
if (size > max_size) {
max_size = size;
}
if (last_time_used > max_time) {
max_time = last_time_used;
}
}

char line[kLineWidth + 1];
for (int t = 0; t <= max_time; ++t) {
for (int c = 0; c < kLineWidth; ++c) {
line[c] = '.';
}
int memory_use = 0;
for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
if ((t < valueInfo[nonConstantAllocatedValues[i]].firstUsed) ||
(t > valueInfo[nonConstantAllocatedValues[i]].lastUsed)) {
continue;
}
const int offset = nonConstantOffsets[i];
if (offset == -1) {
continue;
}
const int size = valueInfo[nonConstantAllocatedValues[i]].size;
memory_use += size;
const int line_start = (offset * kLineWidth) / max_size;
const int line_end = ((offset + size) * kLineWidth) / max_size;
for (int n = line_start; n < line_end; ++n) {
if (line[n] == '.') {
line[n] = getOrdinalCharacter(i);
} else {
line[n] = '!';
}
}
}
line[kLineWidth] = 0;

llvm::outs() << llvm::format(
"\n%-20s %s%d: %s (%dk)",
operations[t]->getName().stripDialect().str().c_str(),
t < 10 ? " " : "", t, (const char *)line, (memory_use + 1023) / 1024);
}
llvm::outs() << "\n";
}

int MemoryPlan::getNextBottomOpId(int opId) {
Block *block = &op->getRegion(0).front();
const LivenessBlockInfo *lvb = liveness.getLiveness(block);
Operation *startOp = lvb->getStartOperation(nonConstantAllocatedValues[opId]);
Operation *endOp =
lvb->getEndOperation(nonConstantAllocatedValues[opId], startOp);
int nextOpId = operationIds[endOp];

if (nextOpId < opId) {
nextOpId = -1;
} else if (nextOpId == opId) {
nextOpId++;
}

if (nextOpId != -1) {
startOp = lvb->getStartOperation(nonConstantAllocatedValues[nextOpId]);
endOp = lvb->getEndOperation(nonConstantAllocatedValues[nextOpId], startOp);
int nextNextOpId = operationIds[endOp];
if (nextNextOpId != nextOpId) {
nextOpId = nextNextOpId;
}
}

return nextOpId;
}

} // namespace mlir::xcore
14 changes: 13 additions & 1 deletion xformer/Analysis/MemoryPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ class MemoryPlan {
// last buffer that's simultaneously active.
// - This continues until all buffers are placed, and the offsets stored.
std::vector<int> getAllocatedOffsets(const bool overlapOps,
int &peakMemoryUsed);
int &peakMemoryUsed, int &peakOpId);

Operation *getOpWithMaxMemoryUsed();

int getNextBottomOpId(int opId);

// OpSplitPlan getOpSplitPlan();

void printMemoryPlan();

private:
/// Initializes the internal mappings.
void build();
Expand All @@ -73,6 +77,8 @@ class MemoryPlan {
int getOffset(Value v, int size, DenseMap<Value, ValueInfo> &valueInfo,
ValuesOrderedByOffset &allocatedOffsets);

char getOrdinalCharacter(int i);

DenseMap<Value, ValueInfo> valueInfo;

std::vector<Value> values;
Expand All @@ -83,6 +89,12 @@ class MemoryPlan {
// Stores all operations according to the program sequence.
std::vector<Operation *> operations;

// Stores non constant values and offsets
// We need them for printing out the memory plan without getting polluted with
// constant values.
std::vector<Value> nonConstantAllocatedValues;
std::vector<int> nonConstantOffsets;

Liveness liveness;

Operation *op;
Expand Down
24 changes: 18 additions & 6 deletions xformer/Transforms/ConvPatterns.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,15 @@ struct BConvArgs {
template <typename ConcreteType, typename ConvOpType, typename ArgsType>
class ReplaceWithXCConv2DBase : public OpRewritePattern<ConvOpType> {
public:
ReplaceWithXCConv2DBase(MLIRContext *context)
: OpRewritePattern<ConvOpType>(context) {}
ReplaceWithXCConv2DBase(std::unordered_set<Operation *> *errorOpsSet,
MLIRContext *context)
: OpRewritePattern<ConvOpType>(context), errorOpsSet_(errorOpsSet) {}

LogicalResult matchAndRewrite(ConvOpType op,
PatternRewriter &rewriter) const override;

public:
std::unordered_set<Operation *> *errorOpsSet_;
};

//
Expand All @@ -80,7 +84,9 @@ class ReplaceBConv2DPattern
public:
using BaseType =
ReplaceWithXCConv2DBase<ReplaceBConv2DPattern, lq::Bconv2dOp, BConvArgs>;
ReplaceBConv2DPattern(MLIRContext *context) : BaseType(context) {}
ReplaceBConv2DPattern(std::unordered_set<Operation *> *errorOpsSet,
MLIRContext *context)
: BaseType(errorOpsSet, context) {}

LogicalResult checkIfValid(lq::Bconv2dOp op) const;

Expand Down Expand Up @@ -142,7 +148,9 @@ class ReplaceConv2DBase : public ReplaceWithXCConv2DBase<
using BaseType =
ReplaceWithXCConv2DBase<ReplaceConv2DBase<ConcreteType, TFLConvOpType>,
TFLConvOpType, TFLConvArgs>;
ReplaceConv2DBase(MLIRContext *context) : BaseType(context) {}
ReplaceConv2DBase(std::unordered_set<Operation *> *errorOpsSet,
MLIRContext *context)
: BaseType(errorOpsSet, context) {}

LogicalResult checkIfValid(TFLConvOpType op) const { return success(); }

Expand Down Expand Up @@ -181,7 +189,9 @@ class ReplaceConv2DPattern
: public ReplaceConv2DBase<ReplaceConv2DPattern, FakeConv2DOp> {
public:
using BaseType = ReplaceConv2DBase<ReplaceConv2DPattern, FakeConv2DOp>;
ReplaceConv2DPattern(MLIRContext *context) : BaseType(context) {}
ReplaceConv2DPattern(std::unordered_set<Operation *> *errorOpsSet,
MLIRContext *context)
: BaseType(errorOpsSet, context) {}

LogicalResult getKernelType(const TFLConvArgs &args, Conv2DType &kt) const;

Expand Down Expand Up @@ -240,7 +250,9 @@ class ReplaceDepthwiseConv2DPattern
public:
using BaseType =
ReplaceConv2DBase<ReplaceDepthwiseConv2DPattern, FakeDepthwiseConv2DOp>;
ReplaceDepthwiseConv2DPattern(MLIRContext *context) : BaseType(context) {}
ReplaceDepthwiseConv2DPattern(std::unordered_set<Operation *> *errorOpsSet,
MLIRContext *context)
: BaseType(errorOpsSet, context) {}

LogicalResult getKernelType(const TFLConvArgs &args, Conv2DType &kt) const;

Expand Down
34 changes: 22 additions & 12 deletions xformer/Transforms/ConvPatternsTFL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,14 +326,19 @@ LogicalResult ReplaceConv2DPattern::getOutputTransformParams(

if (quantError > args.quantErrorThreshold) {
std::stringstream msg;
msg << "Quantization error of " << quantError
msg << std::endl
<< "WARNING: Op left unoptimized!" << std::endl
<< "Reason: Quantization error of " << quantError
<< " larger than set threshold of " << args.quantErrorThreshold
<< ", therefore reverting to reference Conv2D op!" << std::endl
<< "Inspect the output, and if suitable, set a "
"higher threshold with --xcore-conv-err-threshold."
<< ", therefore reverting to reference Conv2D op" << std::endl
<< "Name: " << utils::getLocName(*args.convOp) << std::endl
<< "Solution: Inspect the output, and if suitable, set a "
"higher threshold with --xcore-conv-err-threshold"
<< std::endl;
args.convOp->emitWarning(
utils::getMsgWithLocPrefix(*args.convOp, msg.str()));
if (!errorOpsSet_->count(args.convOp)) {
errorOpsSet_->insert(args.convOp);
llvm::errs() << msg.str();
}
return failure();
} else {
otType = OtType::Channelwise;
Expand Down Expand Up @@ -627,15 +632,20 @@ LogicalResult ReplaceDepthwiseConv2DPattern::getOutputTransformParams(
mulAndBiases, qp, true);
if (quantError > args.quantErrorThreshold) {
std::stringstream msg;
msg << "Quantization error of " << quantError
msg << std::endl
<< "WARNING: Op left unoptimized!" << std::endl
<< "Reason: Quantization error of " << quantError
<< " larger than set threshold of " << args.quantErrorThreshold
<< ", therefore reverting to reference DepthwiseConv2D op!"
<< ", therefore reverting to reference DepthwiseConv2D op"
<< std::endl
<< "Inspect the output, and if suitable, set a "
"higher threshold with --xcore-conv-err-threshold."
<< "Name: " << utils::getLocName(*args.convOp) << std::endl
<< "Solution: Inspect the output, and if suitable, set a "
"higher threshold with --xcore-conv-err-threshold"
<< std::endl;
args.convOp->emitWarning(
utils::getMsgWithLocPrefix(*args.convOp, msg.str()));
if (!errorOpsSet_->count(args.convOp)) {
errorOpsSet_->insert(args.convOp);
llvm::errs() << msg.str();
}
return failure();
} else {
otType = OtType::Channelwise;
Expand Down
10 changes: 8 additions & 2 deletions xformer/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

namespace mlir::xcore {

void buildXCorePassPipeline(OpPassManager &pm) {
void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) {
// Run pass from LCE to convert Larq ops which are in TFL custom op format to
// Larq dialect
pm.addPass(mlir::TFL::CreateTranslateToLCEPass());
Expand All @@ -21,6 +21,9 @@ void buildXCorePassPipeline(OpPassManager &pm) {
pm.addPass(createOptimizeTransposePass());
pm.addPass(createReplaceAvgPoolWithConv2DPass());
pm.addPass(createReplaceFCWithConv2DPass());
}

void buildXCoreRemainingPassPipeline(OpPassManager &pm) {
if (opSplitTensorArenaOption) {
pm.addPass(createOpSplitPass());
}
Expand Down Expand Up @@ -58,7 +61,10 @@ void registerXCorePassPipeline() {
mlir::PassPipelineRegistration<> pipeline(
"xcore-tfl-pipeline",
"Run XCore passes for transforming TFLite code into XCore",
[](OpPassManager &passManager) { buildXCorePassPipeline(passManager); });
[](OpPassManager &passManager) {
buildXCorePreOpSplitPassPipeline(passManager);
buildXCoreRemainingPassPipeline(passManager);
});
}

} // namespace mlir::xcore
3 changes: 2 additions & 1 deletion xformer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ namespace xcore {

// Create a single pipeline that will run all the needed passes in the right
// order.
void buildXCorePassPipeline(OpPassManager &pm);
void buildXCorePreOpSplitPassPipeline(OpPassManager &pm);
void buildXCoreRemainingPassPipeline(OpPassManager &pm);

//===----------------------------------------------------------------------===//
// XCore-specific passes
Expand Down
Loading

0 comments on commit 3c0a431

Please sign in to comment.