Merge pull request #899 from xmos/feature/memory-analysis

Add memory plan print and op split analysis support
xmos · Jun 7, 2024 · 3c0a431 · 3c0a431
2 parents a22e567 + 937de31
commit 3c0a431
Show file tree

Hide file tree

Showing 14 changed files with 377 additions and 39 deletions.
diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro
diff --git a/xformer/Analysis/MemoryPlan.cpp b/xformer/Analysis/MemoryPlan.cpp
@@ -8,6 +8,7 @@
 
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 
 #define DEBUG_TYPE "xcore-memory-plan"
 
@@ -41,9 +42,6 @@ void MemoryPlan::build() {
   }
 
   funcOp.walk<WalkOrder::PreOrder>([&](Operation *op) {
-    operationIds.insert({op, operationIds.size()});
-    operations.push_back(op);
-
     if (op == funcOp || llvm::isa<quantfork::StatisticsOp>(op)) {
       return;
     }
@@ -56,6 +54,12 @@ void MemoryPlan::build() {
       isConstantOp = true;
     }
 
+    if (!llvm::isa<TFL::NoValueOp, TFL::QConstOp, TFL::ConstOp,
+                   arith::ConstantOp>(op)) {
+      operationIds.insert({op, operationIds.size()});
+      operations.push_back(op);
+    }
+
     for (Value result : op->getResults()) {
       if (result.getType().isa<NoneType>()) {
         continue;
@@ -146,7 +150,8 @@ int MemoryPlan::getOffset(Value v, int size,
 }
 
 std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
-                                                 int &peakMemoryUsed) {
+                                                 int &peakMemoryUsed,
+                                                 int &peakOpId) {
   std::vector<int> offsets;
   // Copy of valueInfo
   auto vInfo = valueInfo;
@@ -266,10 +271,23 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
   }
 
   size_t peakUsed = 0;
+  size_t peakUsedValueID = 0;
+  size_t maxId = 0;
+  nonConstantAllocatedValues.clear();
+  nonConstantOffsets.clear();
   LLVM_DEBUG(llvm::dbgs() << "\nAllocated offsets : ");
   for (auto i : allocatedValuesOrderedByID) {
     offsets.push_back(i.second);
-    peakUsed = std::max(peakUsed, vInfo[i.first].size + i.second);
+    if (!vInfo[i.first].isConstant) {
+      maxId++;
+      nonConstantAllocatedValues.push_back(i.first);
+      nonConstantOffsets.push_back(i.second);
+      size_t currentSize = vInfo[i.first].size + i.second;
+      if (currentSize >= peakUsed) {
+        peakUsed = currentSize;
+        peakOpId = maxId;
+      }
+    }
     LLVM_DEBUG(llvm::dbgs() << "\nValue " << vInfo[i.first].id << ", size = "
                             << vInfo[i.first].size << ", offset = " << i.second
                             << ", first = " << vInfo[i.first].firstUsed
@@ -279,7 +297,117 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
   LLVM_DEBUG(llvm::dbgs() << "\n\n");
   peakMemoryUsed = peakUsed;
 
+  // printf("\npeakmemory %d, vid %d maxid %d, opid %d\n", peakMemoryUsed,
+  //        vInfo[values[peakUsedValueID]].id, maxId, peakOpId);
+
   return offsets;
 }
 
+char MemoryPlan::getOrdinalCharacter(int i) {
+  if (i < 10) {
+    return '0' + i;
+  } else if (i < 36) {
+    return 'a' + (i - 10);
+  } else if (i < 62) {
+    return 'A' + (i - 36);
+  }
+  return '*';
+}
+
+void MemoryPlan::printMemoryPlan() {
+  llvm::outs() << "\nMEMORY PLAN ANALYSIS\n"
+               << "¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯";
+
+  // llvm::outs() << "\nAllocated Offsets\n";
+  // for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
+  //   llvm::outs() << llvm::format(
+  //       "\n%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
+  //       getOrdinalCharacter(i), i,
+  //       valueInfo[nonConstantAllocatedValues[i]].size, nonConstantOffsets[i],
+  //       valueInfo[nonConstantAllocatedValues[i]].firstUsed,
+  //       valueInfo[nonConstantAllocatedValues[i]].lastUsed);
+  // }
+  // llvm::outs() << "\n";
+
+  // llvm::outs() << "\nMemory Plan\n";
+
+  constexpr int kLineWidth = 60;
+  int max_size = kLineWidth;
+  int max_time = 0;
+  for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
+    const int offset = nonConstantOffsets[i];
+    const int last_time_used =
+        valueInfo[nonConstantAllocatedValues[i]].lastUsed;
+    const int size = offset + valueInfo[nonConstantAllocatedValues[i]].size;
+    if (size > max_size) {
+      max_size = size;
+    }
+    if (last_time_used > max_time) {
+      max_time = last_time_used;
+    }
+  }
+
+  char line[kLineWidth + 1];
+  for (int t = 0; t <= max_time; ++t) {
+    for (int c = 0; c < kLineWidth; ++c) {
+      line[c] = '.';
+    }
+    int memory_use = 0;
+    for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
+      if ((t < valueInfo[nonConstantAllocatedValues[i]].firstUsed) ||
+          (t > valueInfo[nonConstantAllocatedValues[i]].lastUsed)) {
+        continue;
+      }
+      const int offset = nonConstantOffsets[i];
+      if (offset == -1) {
+        continue;
+      }
+      const int size = valueInfo[nonConstantAllocatedValues[i]].size;
+      memory_use += size;
+      const int line_start = (offset * kLineWidth) / max_size;
+      const int line_end = ((offset + size) * kLineWidth) / max_size;
+      for (int n = line_start; n < line_end; ++n) {
+        if (line[n] == '.') {
+          line[n] = getOrdinalCharacter(i);
+        } else {
+          line[n] = '!';
+        }
+      }
+    }
+    line[kLineWidth] = 0;
+
+    llvm::outs() << llvm::format(
+        "\n%-20s %s%d: %s (%dk)",
+        operations[t]->getName().stripDialect().str().c_str(),
+        t < 10 ? " " : "", t, (const char *)line, (memory_use + 1023) / 1024);
+  }
+  llvm::outs() << "\n";
+}
+
+int MemoryPlan::getNextBottomOpId(int opId) {
+  Block *block = &op->getRegion(0).front();
+  const LivenessBlockInfo *lvb = liveness.getLiveness(block);
+  Operation *startOp = lvb->getStartOperation(nonConstantAllocatedValues[opId]);
+  Operation *endOp =
+      lvb->getEndOperation(nonConstantAllocatedValues[opId], startOp);
+  int nextOpId = operationIds[endOp];
+
+  if (nextOpId < opId) {
+    nextOpId = -1;
+  } else if (nextOpId == opId) {
+    nextOpId++;
+  }
+
+  if (nextOpId != -1) {
+    startOp = lvb->getStartOperation(nonConstantAllocatedValues[nextOpId]);
+    endOp = lvb->getEndOperation(nonConstantAllocatedValues[nextOpId], startOp);
+    int nextNextOpId = operationIds[endOp];
+    if (nextNextOpId != nextOpId) {
+      nextOpId = nextNextOpId;
+    }
+  }
+
+  return nextOpId;
+}
+
 } // namespace mlir::xcore
diff --git a/xformer/Analysis/MemoryPlan.h b/xformer/Analysis/MemoryPlan.h
@@ -41,12 +41,16 @@ class MemoryPlan {
   //    last buffer that's simultaneously active.
   //  - This continues until all buffers are placed, and the offsets stored.
   std::vector<int> getAllocatedOffsets(const bool overlapOps,
-                                       int &peakMemoryUsed);
+                                       int &peakMemoryUsed, int &peakOpId);
 
   Operation *getOpWithMaxMemoryUsed();
 
+  int getNextBottomOpId(int opId);
+
   // OpSplitPlan getOpSplitPlan();
 
+  void printMemoryPlan();
+
 private:
   /// Initializes the internal mappings.
   void build();
@@ -73,6 +77,8 @@ class MemoryPlan {
   int getOffset(Value v, int size, DenseMap<Value, ValueInfo> &valueInfo,
                 ValuesOrderedByOffset &allocatedOffsets);
 
+  char getOrdinalCharacter(int i);
+
   DenseMap<Value, ValueInfo> valueInfo;
 
   std::vector<Value> values;
@@ -83,6 +89,12 @@ class MemoryPlan {
   // Stores all operations according to the program sequence.
   std::vector<Operation *> operations;
 
+  // Stores non constant values and offsets
+  // We need them for printing out the memory plan without getting polluted with
+  // constant values.
+  std::vector<Value> nonConstantAllocatedValues;
+  std::vector<int> nonConstantOffsets;
+
   Liveness liveness;
 
   Operation *op;

diff --git a/xformer/Transforms/ConvPatterns.h b/xformer/Transforms/ConvPatterns.h
@@ -63,11 +63,15 @@ struct BConvArgs {
 template <typename ConcreteType, typename ConvOpType, typename ArgsType>
 class ReplaceWithXCConv2DBase : public OpRewritePattern<ConvOpType> {
 public:
-  ReplaceWithXCConv2DBase(MLIRContext *context)
-      : OpRewritePattern<ConvOpType>(context) {}
+  ReplaceWithXCConv2DBase(std::unordered_set<Operation *> *errorOpsSet,
+                          MLIRContext *context)
+      : OpRewritePattern<ConvOpType>(context), errorOpsSet_(errorOpsSet) {}
 
   LogicalResult matchAndRewrite(ConvOpType op,
                                 PatternRewriter &rewriter) const override;
+
+public:
+  std::unordered_set<Operation *> *errorOpsSet_;
 };
 
 //
@@ -80,7 +84,9 @@ class ReplaceBConv2DPattern
 public:
   using BaseType =
       ReplaceWithXCConv2DBase<ReplaceBConv2DPattern, lq::Bconv2dOp, BConvArgs>;
-  ReplaceBConv2DPattern(MLIRContext *context) : BaseType(context) {}
+  ReplaceBConv2DPattern(std::unordered_set<Operation *> *errorOpsSet,
+                        MLIRContext *context)
+      : BaseType(errorOpsSet, context) {}
 
   LogicalResult checkIfValid(lq::Bconv2dOp op) const;
 
@@ -142,7 +148,9 @@ class ReplaceConv2DBase : public ReplaceWithXCConv2DBase<
   using BaseType =
       ReplaceWithXCConv2DBase<ReplaceConv2DBase<ConcreteType, TFLConvOpType>,
                               TFLConvOpType, TFLConvArgs>;
-  ReplaceConv2DBase(MLIRContext *context) : BaseType(context) {}
+  ReplaceConv2DBase(std::unordered_set<Operation *> *errorOpsSet,
+                    MLIRContext *context)
+      : BaseType(errorOpsSet, context) {}
 
   LogicalResult checkIfValid(TFLConvOpType op) const { return success(); }
 
@@ -181,7 +189,9 @@ class ReplaceConv2DPattern
     : public ReplaceConv2DBase<ReplaceConv2DPattern, FakeConv2DOp> {
 public:
   using BaseType = ReplaceConv2DBase<ReplaceConv2DPattern, FakeConv2DOp>;
-  ReplaceConv2DPattern(MLIRContext *context) : BaseType(context) {}
+  ReplaceConv2DPattern(std::unordered_set<Operation *> *errorOpsSet,
+                       MLIRContext *context)
+      : BaseType(errorOpsSet, context) {}
 
   LogicalResult getKernelType(const TFLConvArgs &args, Conv2DType &kt) const;
 
@@ -240,7 +250,9 @@ class ReplaceDepthwiseConv2DPattern
 public:
   using BaseType =
       ReplaceConv2DBase<ReplaceDepthwiseConv2DPattern, FakeDepthwiseConv2DOp>;
-  ReplaceDepthwiseConv2DPattern(MLIRContext *context) : BaseType(context) {}
+  ReplaceDepthwiseConv2DPattern(std::unordered_set<Operation *> *errorOpsSet,
+                                MLIRContext *context)
+      : BaseType(errorOpsSet, context) {}
 
   LogicalResult getKernelType(const TFLConvArgs &args, Conv2DType &kt) const;
 

diff --git a/xformer/Transforms/ConvPatternsTFL.cpp b/xformer/Transforms/ConvPatternsTFL.cpp
@@ -326,14 +326,19 @@ LogicalResult ReplaceConv2DPattern::getOutputTransformParams(
 
     if (quantError > args.quantErrorThreshold) {
       std::stringstream msg;
-      msg << "Quantization error of " << quantError
+      msg << std::endl
+          << "WARNING: Op left unoptimized!" << std::endl
+          << "Reason: Quantization error of " << quantError
           << " larger than set threshold of " << args.quantErrorThreshold
-          << ", therefore reverting to reference Conv2D op!" << std::endl
-          << "Inspect the output, and if suitable, set a "
-             "higher threshold with --xcore-conv-err-threshold."
+          << ", therefore reverting to reference Conv2D op" << std::endl
+          << "Name: " << utils::getLocName(*args.convOp) << std::endl
+          << "Solution: Inspect the output, and if suitable, set a "
+             "higher threshold with --xcore-conv-err-threshold"
           << std::endl;
-      args.convOp->emitWarning(
-          utils::getMsgWithLocPrefix(*args.convOp, msg.str()));
+      if (!errorOpsSet_->count(args.convOp)) {
+        errorOpsSet_->insert(args.convOp);
+        llvm::errs() << msg.str();
+      }
       return failure();
     } else {
       otType = OtType::Channelwise;
@@ -627,15 +632,20 @@ LogicalResult ReplaceDepthwiseConv2DPattern::getOutputTransformParams(
         mulAndBiases, qp, true);
     if (quantError > args.quantErrorThreshold) {
       std::stringstream msg;
-      msg << "Quantization error of " << quantError
+      msg << std::endl
+          << "WARNING: Op left unoptimized!" << std::endl
+          << "Reason: Quantization error of " << quantError
           << " larger than set threshold of " << args.quantErrorThreshold
-          << ", therefore reverting to reference DepthwiseConv2D op!"
+          << ", therefore reverting to reference DepthwiseConv2D op"
           << std::endl
-          << "Inspect the output, and if suitable, set a "
-             "higher threshold with --xcore-conv-err-threshold."
+          << "Name: " << utils::getLocName(*args.convOp) << std::endl
+          << "Solution: Inspect the output, and if suitable, set a "
+             "higher threshold with --xcore-conv-err-threshold"
           << std::endl;
-      args.convOp->emitWarning(
-          utils::getMsgWithLocPrefix(*args.convOp, msg.str()));
+      if (!errorOpsSet_->count(args.convOp)) {
+        errorOpsSet_->insert(args.convOp);
+        llvm::errs() << msg.str();
+      }
       return failure();
     } else {
       otType = OtType::Channelwise;

diff --git a/xformer/Transforms/Passes.cpp b/xformer/Transforms/Passes.cpp
@@ -11,7 +11,7 @@
 
 namespace mlir::xcore {
 
-void buildXCorePassPipeline(OpPassManager &pm) {
+void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) {
   // Run pass from LCE to convert Larq ops which are in TFL custom op format to
   // Larq dialect
   pm.addPass(mlir::TFL::CreateTranslateToLCEPass());
@@ -21,6 +21,9 @@ void buildXCorePassPipeline(OpPassManager &pm) {
   pm.addPass(createOptimizeTransposePass());
   pm.addPass(createReplaceAvgPoolWithConv2DPass());
   pm.addPass(createReplaceFCWithConv2DPass());
+}
+
+void buildXCoreRemainingPassPipeline(OpPassManager &pm) {
   if (opSplitTensorArenaOption) {
     pm.addPass(createOpSplitPass());
   }
@@ -58,7 +61,10 @@ void registerXCorePassPipeline() {
   mlir::PassPipelineRegistration<> pipeline(
       "xcore-tfl-pipeline",
       "Run XCore passes for transforming TFLite code into XCore",
-      [](OpPassManager &passManager) { buildXCorePassPipeline(passManager); });
+      [](OpPassManager &passManager) {
+        buildXCorePreOpSplitPassPipeline(passManager);
+        buildXCoreRemainingPassPipeline(passManager);
+      });
 }
 
 } // namespace mlir::xcore
diff --git a/xformer/Transforms/Passes.h b/xformer/Transforms/Passes.h
@@ -16,7 +16,8 @@ namespace xcore {
 
 // Create a single pipeline that will run all the needed passes in the right
 // order.
-void buildXCorePassPipeline(OpPassManager &pm);
+void buildXCorePreOpSplitPassPipeline(OpPassManager &pm);
+void buildXCoreRemainingPassPipeline(OpPassManager &pm);
 
 //===----------------------------------------------------------------------===//
 // XCore-specific passes