Skip to content

Commit

Permalink
Merge pull request #905 from xmos/subop
Browse files Browse the repository at this point in the history
Sub op
  • Loading branch information
panickal-xmos authored Jul 18, 2024
2 parents 175ca98 + b388071 commit 592afc2
Show file tree
Hide file tree
Showing 37 changed files with 187 additions and 137 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release-beta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ jobs:
if: github.event.pull_request.merged == true
name: Build release wheels for macOS arm64
needs: [build-release-archive]
runs-on: macos-11
runs-on: macos-14
strategy:
matrix:
python-version: [3.9]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ jobs:
macos-arm-release-wheel:
name: Build release wheels for macOS arm64
needs: [build-release-archive]
runs-on: macos-11
runs-on: macos-14
strategy:
matrix:
python-version: [3.9]
Expand Down
6 changes: 6 additions & 0 deletions integration_tests/models/8x8/test_sub/1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cp $1 /tmp/
xcore-opt /tmp/$1 --lce-translate-tfl --mlir-print-ir-after-all -o /tmp/1.tflite >/tmp/1.mlir 2>&1
cat /tmp/1.mlir | grep -v Tensor > /tmp/2.mlir
sed -i -e 's/tfl.add/tfl.sub/g' /tmp/2.mlir
xcore-opt --mlir-io --lce-translate-tfl /tmp/2.mlir -o /tmp/t.tflite
cp /tmp/t.tflite $1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
94 changes: 60 additions & 34 deletions xformer/Analysis/MemoryPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,43 +161,62 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
llvm::DenseSet<Operation *> alreadyVisited;
if (overlapOps) {
for (auto o : operations) {
// We iterate through overlappable ops which have not been visited yet
if (o->hasTrait<OpTrait::xcore::MemoryOverlappable>() &&
!alreadyVisited.contains(o) && o->getOperand(0).hasOneUse()) {
alreadyVisited.insert(o);

llvm::SmallVector<Value> inputVals;
!alreadyVisited.contains(o)) {
auto inVal = o->getOperand(0);
inputVals.push_back(inVal);

auto outVal = o->getResult(0);
auto nextOp = *outVal.getUsers().begin();
// Identify chain of overlappable Ops
while (outVal.hasOneUse() && !alreadyVisited.contains(nextOp) &&
nextOp->hasTrait<OpTrait::xcore::MemoryOverlappable>()) {
inVal = outVal;

// We have binary and unary ops as overlappable
// For binary ops, we might have to overlap with the second operand
// The complicated if condition below is to check for valid one operand
// or two operand cases
if ((o->getNumOperands() == 1 && inVal.hasOneUse() &&
!vInfo[inVal].isConstant) ||
(o->getNumOperands() == 2 &&
(inVal.hasOneUse() && !vInfo[inVal].isConstant ||
o->getOperand(1).hasOneUse() &&
!vInfo[o->getOperand(1)].isConstant))) {
// In case of two operands and first operand is invalid, use the
// second one
if (o->getNumOperands() == 2 &&
(!inVal.hasOneUse() || vInfo[inVal].isConstant)) {
inVal = o->getOperand(1);
}

alreadyVisited.insert(o);
llvm::SmallVector<Value> inputVals;
inputVals.push_back(inVal);
alreadyVisited.insert(nextOp);
outVal = nextOp->getResult(0);
nextOp = *outVal.getUsers().begin();
}

// Set first Used of output Val to the first input Val
vInfo[outVal].firstUsed = vInfo[inputVals[0]].firstUsed;
auto unalignedSizeOutVal =
utils::getShapedTypeSize(outVal.getType().dyn_cast<ShapedType>());
size_t maxSizeNeeded = 0;
for (auto inV : inputVals) {
auto unalignedSizeInV =
utils::getShapedTypeSize(inV.getType().dyn_cast<ShapedType>());
auto unalignedOffset = unalignedSizeOutVal - unalignedSizeInV;
// Align offset up to double word = 8 bytes
auto offset = ((unalignedOffset + 7) / 8) * 8;
maxSizeNeeded = std::max(vInfo[inV].size + offset, maxSizeNeeded);
inOutMap[inV] = {outVal, offset};
auto outVal = o->getResult(0);
auto nextOp = *outVal.getUsers().begin();
// Identify chain of overlappable Ops
while (outVal.hasOneUse() && !alreadyVisited.contains(nextOp) &&
nextOp->hasTrait<OpTrait::xcore::MemoryOverlappable>()) {
inVal = outVal;
inputVals.push_back(inVal);
alreadyVisited.insert(nextOp);
outVal = nextOp->getResult(0);
nextOp = *outVal.getUsers().begin();
}

// Set first Used of output Val to the first input Val
vInfo[outVal].firstUsed = vInfo[inputVals[0]].firstUsed;
auto unalignedSizeOutVal =
utils::getShapedTypeSize(outVal.getType().dyn_cast<ShapedType>());
size_t maxSizeNeeded = 0;
for (auto inV : inputVals) {
auto unalignedSizeInV =
utils::getShapedTypeSize(inV.getType().dyn_cast<ShapedType>());
auto unalignedOffset = unalignedSizeOutVal - unalignedSizeInV;
// Align offset up to double word = 8 bytes
auto offset = ((unalignedOffset + 7) / 8) * 8;
maxSizeNeeded = std::max(vInfo[inV].size + offset, maxSizeNeeded);
inOutMap[inV] = {outVal, offset};
}
// The aligned input val size plus aligned offset might be larger than
// aligned output val size
vInfo[outVal].size = std::max(vInfo[outVal].size, maxSizeNeeded);
}
// The aligned input val size plus aligned offset might be larger than
// aligned output val size
vInfo[outVal].size = std::max(vInfo[outVal].size, maxSizeNeeded);
}
}
}
Expand Down Expand Up @@ -353,6 +372,7 @@ void MemoryPlan::printMemoryPlan() {
line[c] = '.';
}
int memory_use = 0;
int peakSize = 0;
for (int i = 0; i < nonConstantAllocatedValues.size(); ++i) {
if ((t < valueInfo[nonConstantAllocatedValues[i]].firstUsed) ||
(t > valueInfo[nonConstantAllocatedValues[i]].lastUsed)) {
Expand All @@ -362,7 +382,12 @@ void MemoryPlan::printMemoryPlan() {
if (offset == -1) {
continue;
}

const int size = valueInfo[nonConstantAllocatedValues[i]].size;
if (peakSize < offset + size) {
peakSize = offset + size;
}

memory_use += size;
const int line_start = (offset * kLineWidth) / max_size;
const int line_end = ((offset + size) * kLineWidth) / max_size;
Expand All @@ -377,9 +402,10 @@ void MemoryPlan::printMemoryPlan() {
line[kLineWidth] = 0;

llvm::outs() << llvm::format(
"\n%-20s %s%d: %s (%dk)",
"\n%-20s %s%d: %s (%dk), (%dk)",
operations[t]->getName().stripDialect().str().c_str(),
t < 10 ? " " : "", t, (const char *)line, (memory_use + 1023) / 1024);
t < 10 ? " " : "", t, (const char *)line, (memory_use + 1023) / 1024,
(peakSize + 1023) / 1024);
}
llvm::outs() << "\n";
}
Expand Down
2 changes: 1 addition & 1 deletion xformer/Test/add_broadcast.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: xcore-opt --mlir-io %s --xcore-replace-add | FileCheck %s
// RUN: xcore-opt --mlir-io %s --xcore-replace-addsub | FileCheck %s

// CHECK-LABEL: add_broadcast
func.func @add_broadcast(%arg0: tensor<1x15x1x1x!quant.uniform<i8:f32, 0.0078378040343523026:-1>> {tf_saved_model.index_path = ["input_1"]}) -> (tensor<?x15x5x4x!quant.uniform<i8:f32, 0.033033743500709534:-6>> {tf_saved_model.index_path = ["add"]}) attributes {tf.entry_function = {inputs = "serving_default_input_1:0", outputs = "PartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} {
Expand Down
8 changes: 4 additions & 4 deletions xformer/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) {
pm.addPass(mlir::TFL::CreateTranslateToLCEPass());
// Convert dynamic shapes in batch dimension to static
pm.addPass(createRemoveDynamicShapePass());
}

void buildXCoreRemainingPassPipeline(OpPassManager &pm) {
// TFL passes
pm.addPass(createOptimizeTransposePass());
pm.addPass(createReplaceAvgPoolWithConv2DPass());
pm.addPass(createReplaceFCWithConv2DPass());
}

void buildXCoreRemainingPassPipeline(OpPassManager &pm) {
if (opSplitTensorArenaOption) {
pm.addPass(createOpSplitPass());
}
Expand All @@ -36,7 +36,7 @@ void buildXCoreRemainingPassPipeline(OpPassManager &pm) {
pm.addPass(mlir::createCanonicalizerPass());

// XC passes
pm.addPass(createReplaceAddPass());
pm.addPass(createReplaceAddSubPass());
pm.addPass(createReplaceMaxPoolPass());
pm.addPass(createReplaceMulPass());
pm.addPass(createReplaceTransposeConvPass());
Expand Down
2 changes: 1 addition & 1 deletion xformer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeConv2DPass();
std::unique_ptr<OperationPass<func::FuncOp>> createOpSplitPass();
std::unique_ptr<OperationPass<func::FuncOp>> createApplyTFLPatternsPass();
std::unique_ptr<OperationPass<func::FuncOp>> createRemoveDynamicShapePass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceAddPass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceAddSubPass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceMulPass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceMaxPoolPass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceStridedSlicePass();
Expand Down
95 changes: 0 additions & 95 deletions xformer/Transforms/ReplaceAdd.cpp

This file was deleted.

Loading

0 comments on commit 592afc2

Please sign in to comment.