Skip to content

Commit

Permalink
Add beta float support
Browse files Browse the repository at this point in the history
  • Loading branch information
panickal-xmos committed Oct 25, 2023
1 parent e525588 commit 94fc870
Show file tree
Hide file tree
Showing 16 changed files with 224 additions and 48 deletions.
27 changes: 26 additions & 1 deletion xformer/IR/XCoreOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,31 @@ def XC_MulOp : XC_Op<"mul", [Pure]> {
let results = (outs TensorOf<[QI8]> : $output);
}

def XC_Beta_ActivationF32Op : XC_Op<"beta_activationf32", [Pure]> {
let summary = "Beta ActivationF32 op";

let description = [{Beta ActivationF32 op.}];

let arguments = (ins
TensorOf<[F32]>:$input,
I32Attr:$type
);

let results = (outs TensorOf<[F32]> : $output);
}

def XC_Beta_ConcatF32Op : XC_Op<"beta_concatf32", [Pure]> {
let summary = "Beta ConcatF32 op";

let description = [{Beta ConcatF32 op.}];

let arguments = (ins
Variadic<TensorOf<[F32]>>:$input
);

let results = (outs TensorOf<[F32]> : $output);
}

def XC_Beta_ConvF32Op : XC_Op<"beta_convf32", [Pure]> {
let summary = "Beta ConvF32 op";

Expand Down Expand Up @@ -248,7 +273,7 @@ def XC_LookupOp : XC_Op<"lookup", [Pure]> {

let description = [{Lookup table op.}];

let arguments = (ins TensorOf<[QI8]> : $input, TensorOf<[I8]> : $lut, I32Attr : $thread_count);
let arguments = (ins TensorOf<[QI8]> : $input, TensorOf<[I8]> : $lut);

let results = (outs TensorOf<[QI8]> : $output);
}
Expand Down
6 changes: 6 additions & 0 deletions xformer/Transforms/ApplyLoadConstantOpPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ struct ApplyLoadConstantOpPatterns
void runOnOperation() override;
};

static int totalSize_ = 0;

bool shouldBeLoadedExternally(Attribute values) {
if (totalSize_ > maxLoadExternalSizeOption) {
return false;
}
// values might be UnitAttr or BoolAttr which are too small to be loaded
// externally anyway
auto totalSizeInBits = 0;
Expand All @@ -40,6 +45,7 @@ bool shouldBeLoadedExternally(Attribute values) {
(valuesAttr.getNumElements() *
valuesAttr.getType().getElementType().getIntOrFloatBitWidth());
}
totalSize_ += totalSizeInBits / CHAR_BIT;
return totalSizeInBits / CHAR_BIT > loadExternallyIfLargerOption;
}

Expand Down
15 changes: 13 additions & 2 deletions xformer/Transforms/ApplyXCPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ struct ApplyXCPatterns
void runOnOperation() override;
};

bool isBetaFloatEnabled() { return enableBetaFloatOption; }

StringAttr getPaddingPlan(PatternRewriter &rewriter, TFL::PadOp padOp) {
DenseIntElementsAttr paddingAttr;
if (!matchPattern(padOp.getPadding(), m_Constant(&paddingAttr))) {
Expand Down Expand Up @@ -83,8 +85,17 @@ IntegerAttr getPadValue(PatternRewriter &rewriter, Value inputVal) {
return rewriter.getI32IntegerAttr(padValue);
}

IntegerAttr getThreadCount(PatternRewriter &rewriter) {
return rewriter.getI32IntegerAttr(threadCountOption);
IntegerAttr getActivationType(PatternRewriter &rewriter, Operation *op) {
// TODO: Refactor to use shared header file for enum
if (isa<TFL::EluOp>(op)) {
return rewriter.getI32IntegerAttr(0);
} else if (isa<TFL::LogisticOp>(op)) {
return rewriter.getI32IntegerAttr(1);
} else if (isa<TFL::TanhOp>(op)) {
return rewriter.getI32IntegerAttr(2);
} else {
llvm_unreachable("Unsupported op!");
}
}

DenseElementsAttr getLookupTable(PatternRewriter &rewriter, Operation *op) {
Expand Down
20 changes: 10 additions & 10 deletions xformer/Transforms/ConvPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,26 @@ Pat<(TFL_DepthwiseConv2DOp: $output TensorOf<[QI8]>:$input, TensorOf<[QI8]>:$f,
(IsConstOp $f),
]>;

// TODO: Special case, we only optimize conv with filter width 5, filter height
// 2, and stride height 3
// Special case, we only optimize conv with filter width 3, filter height
// 2, and stride height 2
def Hasfw5fh2
: Constraint<CPred<"$0.getType().cast<ShapedType>().getRank() == 4 && "
"$0.getType().cast<ShapedType>().getDimSize(1) == 5 && "
"$0.getType().cast<ShapedType>().getDimSize(1) == 3 && "
"$0.getType().cast<ShapedType>().getDimSize(2) == 2">>;

// F32 TFL_Conv2D() -> XC_Beta_ConvF32()
def :
Pat<(TFL_Conv2DOp: $output TensorOf<[F32]>:$input, TensorOf<[F32]>:$f, TensorOf<[F32]>:$b, $dh, $dw, $faf, $wf, ConstantAttr<I32Attr, "3">, ConstantAttr<I32Attr, "1">),
Pat<(TFL_Conv2DOp: $output TensorOf<[F32]>:$input, TensorOf<[F32]>:$f, TensorOf<[F32]>:$b, $dh, $dw, $faf, $wf, ConstantAttr<I32Attr, "2">, ConstantAttr<I32Attr, "1">),
(XC_Beta_ConvF32Op $input, $f, $b),
[(Hasfw5fh2 $f)]>;
[(Hasfw5fh2 $f), (isBetaFloatEnabled)]>;

// F32 TFL_TransposeConv2D() -> XC_Beta_TransposeConvF32()
// // F32 TFL_TransposeConv2D() -> XC_Beta_TransposeConvF32()
def :
Pat<(TFL_TransposeConvOp: $output $outshape, TensorOf<[F32]>:$f, TensorOf<[F32]>:$input, TensorOf<[F32]>:$b, $wf, ConstantAttr<I32Attr, "3">, ConstantAttr<I32Attr, "1">, $faf),
Pat<(TFL_TransposeConvOp: $output $outshape, TensorOf<[F32]>:$f, TensorOf<[F32]>:$input, TensorOf<[F32]>:$b, $wf, ConstantAttr<I32Attr, "2">, ConstantAttr<I32Attr, "1">, $faf),
(XC_Beta_TransposeConvF32Op $input, $f, $b),
[(Hasfw5fh2 $f)]>;
[(Hasfw5fh2 $f), (isBetaFloatEnabled)]>;

// F32 TFL_FullyConnected() -> XC_Beta_FcF32()
// // F32 TFL_FullyConnected() -> XC_Beta_FcF32()
def :
Pat<(TFL_FullyConnectedOp: $output TensorOf<[F32]>:$input, TensorOf<[F32]>:$f, $b, $faf, $wf, $knd, $aqi),
(XC_Beta_FcF32Op $input, $f)>;
(XC_Beta_FcF32Op $input, $f), [(isBetaFloatEnabled)]>;
3 changes: 3 additions & 0 deletions xformer/Transforms/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
namespace mlir {
namespace xcore {

extern llvm::cl::opt<bool> enableBetaFloatOption;
extern llvm::cl::opt<unsigned> threadCountOption;
extern llvm::cl::opt<std::string> flashImageFilenameOption;
extern llvm::cl::opt<unsigned> loadExternallyIfLargerOption;
extern llvm::cl::opt<bool> tileLoadOption;
extern llvm::cl::opt<unsigned> maxLoadExternalSizeOption;
extern llvm::cl::opt<double> convQuantErrorThresholdOption;
extern llvm::cl::opt<bool> convForceErrorCheckOption;
extern llvm::cl::opt<unsigned> convMultiplierFactorOption;
Expand Down
2 changes: 2 additions & 0 deletions xformer/Transforms/ReplaceConv2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ struct ReplaceConv2D
void runOnOperation() override;
};

bool isBetaFloatEnabled() { return enableBetaFloatOption; }

namespace convpatterns {
#include "Transforms/GeneratedConvPatterns.inc"
}
Expand Down
18 changes: 11 additions & 7 deletions xformer/Transforms/TranslateToCustomOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@ namespace mlir {
namespace xcore {

std::vector<uint8_t> Bsign8Op::buildCustomOptions() { return {}; }
std::vector<uint8_t> Beta_ConvF32Op::buildCustomOptions() { return {}; }
std::vector<uint8_t> Beta_TransposeConvF32Op::buildCustomOptions() {
return {};
}
std::vector<uint8_t> Beta_FcF32Op::buildCustomOptions() { return {}; }

std::vector<uint8_t> LookupOp::buildCustomOptions() {
std::vector<uint8_t> Beta_ActivationF32Op::buildCustomOptions() {
flexbuffers::Builder fbb;
fbb.Map([&]() { fbb.Int("tc", (int32_t)getThreadCount()); });
fbb.Map([&]() { fbb.Int("type", (int32_t)getType()); });
fbb.Finish();
return fbb.GetBuffer();
}
std::vector<uint8_t> Beta_ConcatF32Op::buildCustomOptions() { return {}; }
std::vector<uint8_t> Beta_ConvF32Op::buildCustomOptions() { return {}; }
std::vector<uint8_t> Beta_TransposeConvF32Op::buildCustomOptions() {
return {};
}
std::vector<uint8_t> Beta_FcF32Op::buildCustomOptions() { return {}; }
std::vector<uint8_t> LookupOp::buildCustomOptions() { return {}; }

std::vector<uint8_t> AddOp::buildCustomOptions() {
flexbuffers::Builder fbb;
Expand Down Expand Up @@ -176,6 +178,8 @@ void TranslateToCustomOp::runOnOperation() {
patterns.insert<RewriteToCustomOp<PadOp>>(ctx);
patterns.insert<RewriteToCustomOp<Pad3To4Op>>(ctx);
patterns.insert<RewriteToCustomOp<StridedSliceOp>>(ctx);
patterns.insert<RewriteToCustomOp<Beta_ActivationF32Op>>(ctx);
patterns.insert<RewriteToCustomOp<Beta_ConcatF32Op>>(ctx);
patterns.insert<RewriteToCustomOp<Beta_ConvF32Op>>(ctx);
patterns.insert<RewriteToCustomOp<Beta_TransposeConvF32Op>>(ctx);
patterns.insert<RewriteToCustomOp<Beta_FcF32Op>>(ctx);
Expand Down
49 changes: 25 additions & 24 deletions xformer/Transforms/WriteFlashImage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "IR/XCoreOps.h"
#include "Transforms/Options.h"
#include "Utils/FileIO.h"
#include "Utils/TileRamSupport.h"

#include "mlir/Pass/Pass.h"
#include "mlir/Support/FileUtilities.h"
Expand Down Expand Up @@ -94,6 +95,8 @@ struct WriteFlashImagePattern : public OpRewritePattern<LoadConstantOp> {
for (int i = 0; i < opNums.size(); i++) {
ownerOp->setOperand(opNums[i], loadFlashOp.getResult(i));
}

loadFlashOp->moveBefore(ownerOp);
loadOp.erase();
} else {
std::vector<char> loadOpData = getTensorData(loadOp);
Expand All @@ -103,6 +106,18 @@ struct WriteFlashImagePattern : public OpRewritePattern<LoadConstantOp> {
loadOp.getLoc(), loadOp.getType(), address,
rewriter.getArrayAttr(dataSizes));
rewriter.replaceOp(loadOp, loadFlashOp.getOutput());

// Find all uses of loadFlashOp and find the first Owner op
// so that we can move the loading to just before that op.
mlir::Operation *firstOwnerOp =
loadFlashOp->getResult(0).getUses().begin()->getOwner();
for (const mlir::OpOperand &use : loadFlashOp->getResult(0).getUses()) {
mlir::Operation *op = use.getOwner();
if (op->isBeforeInBlock(firstOwnerOp)) {
firstOwnerOp = op;
}
}
loadFlashOp->moveBefore(firstOwnerOp);
}

tensorsVec_->push_back(tensorData);
Expand All @@ -114,27 +129,6 @@ struct WriteFlashImagePattern : public OpRewritePattern<LoadConstantOp> {
std::vector<std::vector<char>> *tensorsVec_;
};

struct MoveLoadOpPattern : public OpRewritePattern<LoadFlashOp> {
MoveLoadOpPattern(MLIRContext *context)
: OpRewritePattern<LoadFlashOp>(context) {}

LogicalResult matchAndRewrite(LoadFlashOp loadFlashOp,
PatternRewriter &rewriter) const override {
// Constants are usually allocated in the beginning of the function.
// Lowering them to load from flash op leads to loading constants from flash
// occurring in the beginning of graph execution before other ops are
// executed, thereby needing a much larger tensor arena.
// We move the op to right before the user op (user op would be conv or
// lookup op etc, any op that is using the constant).
// This is so that when we lower to flatbuffer the loadOp will be located
// in the graph close to the user op.
Operation *ownerOp =
loadFlashOp->getResult(0).getUses().begin()->getOwner();
loadFlashOp->moveBefore(ownerOp);
return success();
}
};

void WriteFlashImage::runOnOperation() {
func::FuncOp f = getOperation();
if (flashImageFilenameOption.empty()) {
Expand All @@ -150,12 +144,19 @@ void WriteFlashImage::runOnOperation() {
std::vector<std::vector<char>> tensorsVec;
RewritePatternSet patterns(ctx);
patterns.insert<WriteFlashImagePattern>(&tensorsVec, ctx);
patterns.insert<MoveLoadOpPattern>(ctx);
(void)applyPatternsAndFoldGreedily(func, std::move(patterns));

if (tileLoadOption) {
if (failed(utils::writeTileServerDataToFile(flashImageFilenameOption,
tensorsVec))) {
f.emitError("Failed to write tile data!");
signalPassFailure();
return;
}
}
// Write tensor data to flash image file
if (failed(
utils::writeFlashImageToFile(flashImageFilenameOption, tensorsVec))) {
else if (failed(utils::writeFlashImageToFile(flashImageFilenameOption,
tensorsVec))) {
f.emitError("Failed to write flash image!");
signalPassFailure();
return;
Expand Down
17 changes: 14 additions & 3 deletions xformer/Transforms/XCPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,28 @@ include "Utils/Utils.td"
def getLookupTable
: NativeCodeCall<"getLookupTable($_builder, $0.getDefiningOp())">;

def getThreadCount : NativeCodeCall<"getThreadCount($_builder)">;

foreach activationOp =
[TFL_ReluOp, TFL_Relu6Op, TFL_TanhOp, TFL_LogisticOp, TFL_HardSwishOp] in {
def:
Pat<(activationOp
: $output TensorOf<[QI8]>:$input),
(XC_LookupOp $input, (Arith_ConstantOp (getLookupTable
$output)), (getThreadCount))>;
$output)))>;
}

def getActivationType
: NativeCodeCall<"getActivationType($_builder, $0.getDefiningOp())">;

foreach activationOp = [TFL_EluOp, TFL_LogisticOp, TFL_TanhOp] in {
def:
Pat<(activationOp
: $output TensorOf<[F32]>:$input),
(XC_Beta_ActivationF32Op $input, (getActivationType $output)), [(isBetaFloatEnabled)]>;
}

def : Pat<(TFL_ConcatenationOp $input, $axis, $faf),
(XC_Beta_ConcatF32Op $input), [(isBetaFloatEnabled)]>;

def getPadValue : NativeCodeCall<"getPadValue($_builder, $0)">;

def getPaddingPlan
Expand Down
36 changes: 36 additions & 0 deletions xformer/Utils/FileIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
// XMOS Public License: Version 1

#include "Utils/FileIO.h"
#include "Utils/TileRamSupport.h"

#include "mlir/Support/FileUtilities.h"
#include "tensorflow/compiler/mlir/lite/flatbuffer_export.h"
#include "tensorflow/compiler/mlir/lite/flatbuffer_import.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"

#include <iomanip>

namespace mlir {
namespace xcore {
namespace utils {
Expand All @@ -35,6 +38,39 @@ LogicalResult writeFlashImageToFile(const std::string &filename,
return utils::writeDataToFile(filename, data);
}

LogicalResult
writeTileServerDataToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec) {
// Add header
auto tileHeader = utils::tileRamHeader();
tensorsVec.insert(tensorsVec.begin(), tileHeader);

std::ostringstream out;
out << R"(#ifndef TILESERVERGEN_H
#define TILESERVERGEN_H
const int8_t tile_server_weights[] = {
)";
int lineEnding = 0;
for (auto const &tensor : tensorsVec) {
for (auto const &i : tensor) {
out << (int)i << ", ";
lineEnding++;
if (lineEnding > 80) {
out << "\n";
lineEnding = 0;
}
}
}

out << R"(};
#endif // TILESERVERGEN_H
)";

return utils::writeDataToFile(filename, out.str());
}

LogicalResult getFlatBufferStringFromMLIR(
mlir::ModuleOp module, std::map<std::string, std::string> metadata,
const bool &dontMinify, std::string &flatBufferString) {
Expand Down
4 changes: 4 additions & 0 deletions xformer/Utils/FileIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ LogicalResult writeDataToFile(const std::string &filename, std::string data);
LogicalResult writeFlashImageToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec);

LogicalResult
writeTileServerDataToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec);

LogicalResult getFlatBufferStringFromMLIR(
mlir::ModuleOp module, std::map<std::string, std::string> metadata,
const bool &dontMinify, std::string &flatBufferString);
Expand Down
Loading

0 comments on commit 94fc870

Please sign in to comment.