Skip to content

Commit

Permalink
Change ld flash to ld weights and add alignment for weights in DDR
Browse files Browse the repository at this point in the history
  • Loading branch information
panickal-xmos committed Jul 29, 2024
1 parent 5b12afb commit a2a3519
Show file tree
Hide file tree
Showing 14 changed files with 122 additions and 116 deletions.
11 changes: 7 additions & 4 deletions xformer/IR/XCoreOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -453,12 +453,15 @@ def XC_LoadConstantOp
let results = (outs AnyTensor : $output);
}

def XC_LoadFlashOp : XC_Op<"ld_flash", [Pure]> {
let summary = "Load from flash op";
def XC_LoadWeightsOp : XC_Op<"ld_weights", [Pure]> {
let summary = "Load weights op";

let description = [{Load from flash op.}];
let description = [{Load weights op.}];

let arguments = (ins I32Attr : $address, I32ArrayAttr : $sizes);
let arguments = (ins I32Attr
: $address, I32ArrayAttr
: $sizes, BoolAttr
: $in_ddr);

let results = (outs Variadic<AnyTensor> : $output);
}
Expand Down
2 changes: 1 addition & 1 deletion xformer/Test/invalid-loadconstantop.mlir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: xcore-opt --mlir-io %s --xcore-apply-loadconstantop-patterns -verify-diagnostics

// expected-error@+1 {{Flash image file option should be provided to run this pass!}}
// expected-error@+1 {{Weights file option should be provided to run this pass!}}
func.func @invalid(%arg0: tensor<?x4x8x1x!quant.uniform<i8:f32, 0.0078160231932997704>>) -> tensor<?x32x!quant.uniform<i8:f32, 0.037329975515604019:-13>> attributes {tf.entry_function = {inputs = "flatten_input", outputs = "Identity"}} {
%cst = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]> : tensor<10xi8>
%cst_0 = arith.constant dense<[[11, 12, 13, 14, 15, 16, 17, 18, 19, 10]]> : tensor<1x10xi16>
Expand Down
4 changes: 2 additions & 2 deletions xformer/Test/invalid-loadflashop.mlir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: xcore-opt --mlir-io %s --xcore-write-flash-image -verify-diagnostics
// RUN: xcore-opt --mlir-io %s --xcore-write-weights -verify-diagnostics

// expected-error@+1 {{Flash image file option should be provided to run this pass!}}
// expected-error@+1 {{Weights file option should be provided to run this pass!}}
func.func @valid(%arg0: tensor<?x4x8x1x!quant.uniform<i8:f32, 0.0078160231932997704>>) -> tensor<?x32x!quant.uniform<i8:f32, 0.037329975515604019:-13>> attributes {tf.entry_function = {inputs = "flatten_input", outputs = "Identity"}} {
%cst = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]> : tensor<10xi8>
%cst_0 = arith.constant dense<[[11, 12, 13, 14, 15, 16, 17, 18, 19, 10]]> : tensor<1x10xi16>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: xcore-opt --mlir-io %s --xcore-write-flash-image --xcore-weights-file=/dev/null | FileCheck %s
// RUN: xcore-opt --mlir-io %s --xcore-write-weights --xcore-weights-file=/dev/null | FileCheck %s

// CHECK-LABEL: valid
func.func @valid(%arg0: tensor<?x4x8x1x!quant.uniform<i8:f32, 0.0078160231932997704>>) -> tensor<?x32x!quant.uniform<i8:f32, 0.037329975515604019:-13>> attributes {tf.entry_function = {inputs = "flatten_input", outputs = "Identity"}} {
Expand All @@ -10,8 +10,8 @@ func.func @valid(%arg0: tensor<?x4x8x1x!quant.uniform<i8:f32, 0.0078160231932997
%cst_4 = "tfl.no_value"() {value} : () -> none
%0 = "tfl.reshape"(%arg0, %cst_3) : (tensor<?x4x8x1x!quant.uniform<i8:f32, 0.0078160231932997704>>, tensor<2xi32>) -> tensor<?x32x!quant.uniform<i8:f32, 0.0078160231932997704>>
%1 = "tfl.reshape"(%0, %cst_1) : (tensor<?x32x!quant.uniform<i8:f32, 0.0078160231932997704>>, tensor<4xi64>) -> tensor<?x1x1x32x!quant.uniform<i8:f32, 0.0078160231932997704>>
// CHECK: xc.ld_flash
// CHECK-NOT: xc.ld_flash
// CHECK: xc.ld_weights
// CHECK-NOT: xc.ld_weights
// CHECK: xc.conv2d_v2
%2 = "xc.ld_constant"(%cst) : (tensor<10xi8>) -> tensor<10xi8>
%3 = "xc.ld_constant"(%cst_0) : (tensor<1x10xi16>) -> tensor<1x10xi16>
Expand Down
2 changes: 1 addition & 1 deletion xformer/Transforms/ApplyLoadConstantOpPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ bool isNotUsedByLoadConstantOp(Value result) {
void ApplyLoadConstantOpPatterns::runOnOperation() {
func::FuncOp f = getOperation();
if (weightsFilenameOption.empty()) {
f.emitError("Flash image file option should be provided to run this pass!");
f.emitError("Weights file option should be provided to run this pass!");
signalPassFailure();
return;
}
Expand Down
2 changes: 1 addition & 1 deletion xformer/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void buildXCoreRemainingPassPipeline(OpPassManager &pm) {
pm.addPass(createReplaceBroadcastPass());
pm.addPass(createReplaceConcatPass());
pm.addPass(createApplyXCPatternsPass());
// Add to pipeline only if flash image file option is provided
// Add to pipeline only if weights file option is provided
if (!weightsFilenameOption.empty()) {
pm.addPass(createApplyLoadConstantOpPatternsPass());
pm.addPass(createWriteWeightsPass());
Expand Down
5 changes: 3 additions & 2 deletions xformer/Transforms/TranslateToCustomOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ std::vector<uint8_t> ConcatOp::buildCustomOptions() {
return fbb.GetBuffer();
}

std::vector<uint8_t> LoadFlashOp::buildCustomOptions() {
std::vector<uint8_t> LoadWeightsOp::buildCustomOptions() {
flexbuffers::Builder fbb;
auto rootMap = fbb.StartMap();
fbb.Int("addr", (int32_t)getAddress());
Expand All @@ -130,6 +130,7 @@ std::vector<uint8_t> LoadFlashOp::buildCustomOptions() {
fbb.Int(getSizes().cast<ArrayAttr>()[i].cast<IntegerAttr>().getInt());
}
fbb.EndVector(sizesVec, false, false);
fbb.Bool("ddr", (bool)getInDdr());
fbb.EndMap(rootMap);
fbb.Finish();
return fbb.GetBuffer();
Expand Down Expand Up @@ -244,7 +245,7 @@ void TranslateToCustomOp::runOnOperation() {
patterns.insert<RewriteToCustomOp<Bsign8Op>>(ctx);
patterns.insert<RewriteToCustomOp<Conv2DV2Op>>(ctx);
patterns.insert<RewriteToCustomOp<MaxPool2DOp>>(ctx);
patterns.insert<RewriteToCustomOp<LoadFlashOp>>(ctx);
patterns.insert<RewriteToCustomOp<LoadWeightsOp>>(ctx);
patterns.insert<RewriteToCustomOp<LookupOp>>(ctx);
patterns.insert<RewriteToCustomOp<SoftmaxOp>>(ctx);
patterns.insert<RewriteToCustomOp<BatchedSoftmaxOp>>(ctx);
Expand Down
64 changes: 33 additions & 31 deletions xformer/Transforms/WriteWeights.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
namespace mlir::xcore {

namespace {
// Write flash image
// Write weights to a file
struct WriteWeights
: public PassWrapper<WriteWeights, OperationPass<func::FuncOp>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(WriteWeights)

void getDependentDialects(DialectRegistry &registry) const final {
registry.insert<XCoreDialect>();
}
StringRef getArgument() const final { return "xcore-write-flash-image"; }
StringRef getDescription() const final { return "Write flash image"; }
StringRef getArgument() const final { return "xcore-write-weights"; }
StringRef getDescription() const final { return "Write weights"; }
void runOnOperation() override;
};

Expand Down Expand Up @@ -66,7 +66,11 @@ struct WriteWeightsPattern : public OpRewritePattern<LoadConstantOp> {
address += t.size();
}

if (loadOp.getResult().hasOneUse()) {
// We try to combine loads to one op if the load has only one use or if the
// load is not from external memory.
// External memory loads have to be aligned to 32 bytes/256 bits for max
// speed
if (loadOp.getResult().hasOneUse() && !weightsInExternalMemory) {
auto use = loadOp->use_begin();
Operation *ownerOp = use->getOwner();

Expand All @@ -87,36 +91,43 @@ struct WriteWeightsPattern : public OpRewritePattern<LoadConstantOp> {
}
}

auto loadFlashOp =
rewriter.create<LoadFlashOp>(loadOp.getLoc(), outputTypes, address,
rewriter.getArrayAttr(dataSizes));
auto loadWeightsOp = rewriter.create<LoadWeightsOp>(
loadOp.getLoc(), outputTypes, address,
rewriter.getArrayAttr(dataSizes), /*in_ddr=*/false);

for (int i = 0; i < opNums.size(); i++) {
ownerOp->setOperand(opNums[i], loadFlashOp.getResult(i));
ownerOp->setOperand(opNums[i], loadWeightsOp.getResult(i));
}

loadFlashOp->moveBefore(ownerOp);
loadWeightsOp->moveBefore(ownerOp);
loadOp.erase();
} else {
std::vector<char> loadOpData = getTensorData(loadOp);
dataSizes.push_back(rewriter.getI32IntegerAttr(loadOpData.size()));
tensorData.insert(tensorData.end(), loadOpData.begin(), loadOpData.end());
auto loadFlashOp = rewriter.create<LoadFlashOp>(
if (weightsInExternalMemory) {
// Pad tensordata to 32 bytes alignment
auto alignedSize = ((loadOpData.size() + 31) / 32) * 32;
auto toBePaddedSize = alignedSize - loadOpData.size();
// Pad with zeros
tensorData.insert(tensorData.end(), toBePaddedSize, 0);
}
auto loadWeightsOp = rewriter.create<LoadWeightsOp>(
loadOp.getLoc(), loadOp.getType(), address,
rewriter.getArrayAttr(dataSizes));
rewriter.replaceOp(loadOp, loadFlashOp.getOutput());
rewriter.getArrayAttr(dataSizes), /*in_ddr=*/weightsInExternalMemory);
rewriter.replaceOp(loadOp, loadWeightsOp.getOutput());

// Find all uses of loadFlashOp and find the first Owner op
// Find all uses of loadWeightsOp and find the first Owner op
// so that we can move the loading to just before that op.
mlir::Operation *firstOwnerOp =
loadFlashOp->getResult(0).getUses().begin()->getOwner();
for (const mlir::OpOperand &use : loadFlashOp->getResult(0).getUses()) {
loadWeightsOp->getResult(0).getUses().begin()->getOwner();
for (const mlir::OpOperand &use : loadWeightsOp->getResult(0).getUses()) {
mlir::Operation *op = use.getOwner();
if (op->isBeforeInBlock(firstOwnerOp)) {
firstOwnerOp = op;
}
}
loadFlashOp->moveBefore(firstOwnerOp);
loadWeightsOp->moveBefore(firstOwnerOp);
}

tensorsVec_->push_back(tensorData);
Expand All @@ -131,33 +142,24 @@ struct WriteWeightsPattern : public OpRewritePattern<LoadConstantOp> {
void WriteWeights::runOnOperation() {
func::FuncOp f = getOperation();
if (weightsFilenameOption.empty()) {
f.emitError("Flash image file option should be provided to run this pass!");
f.emitError("Weights file option should be provided to run this pass!");
signalPassFailure();
return;
}

auto *ctx = &getContext();
func::FuncOp func = getOperation();
// For each LoadOp in the graph, save the tensor data, and replace the LoadOp
// with a LoadFlashOp
// with a LoadWeightsOp
std::vector<std::vector<char>> tensorsVec;
RewritePatternSet patterns(ctx);
patterns.insert<WriteWeightsPattern>(&tensorsVec, ctx);
(void)applyPatternsAndFoldGreedily(func, std::move(patterns));

if (weightsAsArrayOption) {
if (failed(utils::writeTileServerDataToFile(weightsFilenameOption,
tensorsVec,
weightsInExternalMemory))) {
f.emitError("Failed to write tile data!");
signalPassFailure();
return;
}
}
// Write tensor data to flash image file
else if (failed(
utils::writeWeightsToFile(weightsFilenameOption, tensorsVec))) {
f.emitError("Failed to write flash image!");
if (failed(utils::writeWeightsToFile(weightsFilenameOption, tensorsVec,
weightsAsArrayOption,
weightsInExternalMemory))) {
f.emitError("Failed to write weights to file!");
signalPassFailure();
return;
}
Expand Down
97 changes: 51 additions & 46 deletions xformer/Utils/FileIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,63 +24,68 @@ LogicalResult writeDataToFile(const std::string &filename, std::string data) {
}

LogicalResult writeWeightsToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec) {
// Combine data for the tensors
std::string data;
for (auto const &tensor : tensorsVec) {
data += std::string(tensor.data(), tensor.size());
}

return utils::writeDataToFile(filename, data);
}

LogicalResult
writeTileServerDataToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec,
bool placeInExternalMemory) {
// Add header
auto tileHeader = utils::tileRamHeader();
tensorsVec.insert(tensorsVec.begin(), tileHeader);

std::ostringstream cOut;
cOut << R"(#include <stdint.h>)";

if (placeInExternalMemory) {
cOut << "\n\n" << R"(__attribute__ ((section(".ExtMem.data"))))" << "\n";
}
std::vector<std::vector<char>> tensorsVec,
bool writeWeightsAsArray,
bool placeInExternalMemory) {
if (writeWeightsAsArray) {
std::ostringstream cOut;
cOut << R"(#include <stdint.h>)";

if (placeInExternalMemory) {
cOut << "\n\n"
<< R"(__attribute__ ((section(".ExtMem.data"))))"
<< "\n";
} else {
// Weights are to be placed in SRAM tile
// Add tile ram server header
auto tileHeader = utils::tileRamServerHeader();
tensorsVec.insert(tensorsVec.begin(), tileHeader);
}

cOut << "const int8_t tile_server_weights[] = {\n";
int lineEnding = 0;
int weightsSize = 0;
for (auto const &tensor : tensorsVec) {
for (auto const &i : tensor) {
cOut << (int)i << ", ";
lineEnding++;
weightsSize++;
if (lineEnding > 80) {
cOut << "\n";
lineEnding = 0;
cOut << "const int8_t weights[] = {\n";
int lineEnding = 0;
int weightsSize = 0;
for (auto const &tensor : tensorsVec) {
for (auto const &i : tensor) {
cOut << (int)i << ", ";
lineEnding++;
weightsSize++;
if (lineEnding > 80) {
cOut << "\n";
lineEnding = 0;
}
}
}
}

cOut << R"(};
cOut << R"(};
)";

if (failed(utils::writeDataToFile(filename + ".c", cOut.str()))) {
return failure();
}
if (failed(utils::writeDataToFile(filename + ".c", cOut.str()))) {
return failure();
}

std::ostringstream hOut;
hOut << R"(#ifndef TILESERVERGEN_H
#define TILESERVERGEN_H
std::ostringstream hOut;
hOut << R"(#ifndef WEIGHTSGEN_H
#define WEIGHTSGEN_H
#define TILE_SERVER_WEIGHTS_SIZE ()" << weightsSize << R"(U)
#define WEIGHTS_SIZE ()"
<< weightsSize << R"(U)
#endif // TILESERVERGEN_H
#endif // WEIGHTSGEN_H
)";

return utils::writeDataToFile(filename + ".h", hOut.str());
return utils::writeDataToFile(filename + ".h", hOut.str());

} else {
// Write data for flash image
// Combine data for the tensors
std::string data;
for (auto const &tensor : tensorsVec) {
data += std::string(tensor.data(), tensor.size());
}

return utils::writeDataToFile(filename, data);
}
}

LogicalResult getFlatBufferStringFromMLIR(
Expand Down
9 changes: 3 additions & 6 deletions xformer/Utils/FileIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@ namespace mlir::xcore::utils {
LogicalResult writeDataToFile(const std::string &filename, std::string data);

LogicalResult writeWeightsToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec);

LogicalResult
writeTileServerDataToFile(const std::string &filename,
std::vector<std::vector<char>> tensorsVec,
bool placeInExternalMemory);
std::vector<std::vector<char>> tensorsVec,
bool writeWeightsAsArray,
bool placeInExternalMemory);

LogicalResult getFlatBufferStringFromMLIR(
mlir::ModuleOp module, std::map<std::string, std::string> metadata,
Expand Down
2 changes: 1 addition & 1 deletion xformer/Utils/TileRamSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace mlir::xcore::utils {

std::vector<char> tileRamHeader() {
std::vector<char> tileRamServerHeader() {
// TODO: Change flash_t struct to mem_server_header_t
// We are reusing the flash_t struct in lib_tflite_micro as the header
// The header version is stored as one integer
Expand Down
2 changes: 1 addition & 1 deletion xformer/Utils/TileRamSupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace mlir::xcore::utils {

/** Function that creates a tile_ram_header
*/
std::vector<char> tileRamHeader();
std::vector<char> tileRamServerHeader();

} // namespace mlir::xcore::utils

Expand Down
Loading

0 comments on commit a2a3519

Please sign in to comment.