diff --git a/third_party/lib_nn b/third_party/lib_nn index b43e7a1e6..9bb528273 160000 --- a/third_party/lib_nn +++ b/third_party/lib_nn @@ -1 +1 @@ -Subproject commit b43e7a1e6c52c3ee21874a970e02cda124a862c8 +Subproject commit 9bb528273ea8c73b23ae4af389f553a09ed1dbd8 diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro index 37310d212..bc21cf4bf 160000 --- a/third_party/lib_tflite_micro +++ b/third_party/lib_tflite_micro @@ -1 +1 @@ -Subproject commit 37310d21265d43fe6d838f8c5bae70e6caedd9a5 +Subproject commit bc21cf4bfa5127dd21d5379ea22ec19fc8642c23 diff --git a/xformer/Transforms/ConvPatterns.h b/xformer/Transforms/ConvPatterns.h index 21ceffdac..fcbffaa81 100644 --- a/xformer/Transforms/ConvPatterns.h +++ b/xformer/Transforms/ConvPatterns.h @@ -7,7 +7,10 @@ #include "IR/XCoreOps.h" #include "larq_compute_engine/mlir/ir/lce_ops.h" -#include "lib_nn/api/Conv2d.hpp" +#include "lib_nn/api/AbstractKernel.hpp" +#include "lib_nn/api/AggregateFn.hpp" +#include "lib_nn/api/MemCpyFn.hpp" +#include "lib_nn/api/OutputTransformFn.hpp" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" @@ -244,16 +247,16 @@ class ReplaceDepthwiseConv2DPattern std::vector &mulsBiasesData) const; }; -template -llvm::SmallVector getAbstractKernelParamsForMultipleThreads( +static llvm::SmallVector getAbstractKernelParamsForMultipleThreads( llvm::SmallVector> imageRegionSplits, const nn::ImageGeometry &Y) { llvm::SmallVector abstractKernelParams; for (auto ®ionsplits : imageRegionSplits) { auto ir = nn::ImageRegion(regionsplits[0], regionsplits[1], 0, regionsplits[2], regionsplits[3], Y.depth); - Filter2DParams akParams(Y, ir, VPU_INT8_ACC_PERIOD); - std::string akpStr = akParams.template serialise(); + nn::AbstractKernel ak(Y, ir, VPU_INT8_ACC_PERIOD); + auto akParams = ak.getParams(); + std::string akpStr = std::string((char *)&akParams, sizeof(akParams)); abstractKernelParams.push_back(akpStr); } return abstractKernelParams; diff --git a/xformer/Transforms/ConvPatternsLCE.cpp b/xformer/Transforms/ConvPatternsLCE.cpp index e77cc1c60..b0213170d 100644 --- a/xformer/Transforms/ConvPatternsLCE.cpp +++ b/xformer/Transforms/ConvPatternsLCE.cpp @@ -281,14 +281,16 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidDirectBinaryParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, std::vector &thresholdsData, int &scratchBytes) const { - nn::DerefInputFn::Params imToColParams(args.X, args.K); + nn::DerefInputFn imToCol(args.X, args.K); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulInt8::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, 1, args.padValue); - nn::MatMulBinaryDirectFn::Params afParams(args.X, args.K, args.inputDepth); + nn::MatMulBinaryDirectFn af(args.X, args.K, args.inputDepth); + auto afParams = af.getParams(); // adjust the thresholds from xorpopcount space // to xcore space @@ -298,13 +300,13 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidDirectBinaryParams( nn::OutputTransformFn::pad_final_access(adjustedThresholds, VPU_INT16_EPV, (int16_t)args.padValue); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); - std::string otStr = ""; // otParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); + std::string otStr = ""; abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); strParams.push_back(otStr); @@ -320,7 +322,8 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidIndirectBinaryParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, std::vector &thresholdsData, int &scratchBytes) const { - nn::ImToColValid::Params imToColParams(args.X, args.K, args.inputDepth); + nn::ImToColValid imToCol(args.X, args.K, args.inputDepth); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; @@ -330,7 +333,8 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidIndirectBinaryParams( const int elementsPerByte = 8; int inputBytes = args.filterHeight * args.filterWidth * args.inputDepth / elementsPerByte; - nn::MatMulBinary::Params afParams(args.outputDepth, inputBytes); + nn::MatMulBinary af(args.outputDepth, inputBytes); + auto afParams = af.getParams(); // adjust the thresholds from xorpopcount space // to xcore space @@ -340,13 +344,13 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidIndirectBinaryParams( nn::OutputTransformFn::pad_final_access(adjustedThresholds, VPU_INT16_EPV, (int16_t)args.padValue); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); - std::string otStr = ""; // otParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); + std::string otStr = ""; abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); strParams.push_back(otStr); @@ -362,14 +366,16 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidDirectInt8Params( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, std::vector &mulsBiasesData, int &scratchBytes) const { - nn::DerefInputFn::Params imToColParams(args.X, args.K); + nn::DerefInputFn imToCol(args.X, args.K); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulInt8::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, 1, args.padValue); - nn::MatMulBinaryDirectFn::Params afParams(args.X, args.K, args.inputDepth); + nn::MatMulBinaryDirectFn af(args.X, args.K, args.inputDepth); + auto afParams = af.getParams(); int receptiveVolume = args.filterHeight * args.filterWidth * args.inputDepth; nn::MulsAndBias mulAndBiases = nn::OT_int8_clamped::canonicalise_mul_and_bias( @@ -388,16 +394,17 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidDirectInt8Params( VPU_INT16_EPV, (int16_t)args.padValue); - nn::OT_int8_clamped::Params otParams((int32_t)args.outputDepth, - qp.initial_shr, qp.final_shr); + nn::OT_int8_clamped ot((int32_t)args.outputDepth, qp.initial_shr, + qp.final_shr); + auto otParams = ot.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); - std::string otStr = otParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); + std::string otStr = std::string((char *)&otParams, sizeof(otParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); strParams.push_back(otStr); @@ -413,7 +420,8 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidIndirectInt8Params( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, std::vector &mulsBiasesData, int &scratchBytes) const { - nn::ImToColValid::Params imToColParams(args.X, args.K, args.inputDepth); + nn::ImToColValid imToCol(args.X, args.K, args.inputDepth); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; @@ -424,7 +432,8 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidIndirectInt8Params( int inputBytes = args.filterHeight * args.filterWidth * args.inputDepth / elementsPerByte; - nn::MatMulBinary::Params afParams(args.outputDepth, inputBytes); + nn::MatMulBinary af(args.outputDepth, inputBytes); + auto afParams = af.getParams(); int receptiveVolume = args.filterHeight * args.filterWidth * args.inputDepth; nn::MulsAndBias mulAndBiases = nn::OT_int8_clamped::canonicalise_mul_and_bias( @@ -443,16 +452,17 @@ LogicalResult ReplaceBConv2DPattern::getBConv2DValidIndirectInt8Params( VPU_INT16_EPV, (int16_t)args.padValue); - nn::OT_int8_clamped::Params otParams((int32_t)args.outputDepth, - qp.initial_shr, qp.final_shr); + nn::OT_int8_clamped ot((int32_t)args.outputDepth, qp.initial_shr, + qp.final_shr); + auto otParams = ot.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); - std::string otStr = otParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); + std::string otStr = std::string((char *)&otParams, sizeof(otParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); strParams.push_back(otStr); diff --git a/xformer/Transforms/ConvPatternsTFL.cpp b/xformer/Transforms/ConvPatternsTFL.cpp index eb4afcafd..dcbd41218 100644 --- a/xformer/Transforms/ConvPatternsTFL.cpp +++ b/xformer/Transforms/ConvPatternsTFL.cpp @@ -309,10 +309,10 @@ LogicalResult ReplaceConv2DPattern::getOutputTransformParams( nn::OutputTransformFn::pad_final_access(serialisedMultipliersAndBiases, VPU_INT16_EPV, (int16_t)args.padValue); - nn::OT_int8_channelwise::Params otParams((int32_t)args.outputDepth, - qp.final_shr); + nn::OT_int8_channelwise ot((int32_t)args.outputDepth, qp.final_shr); + auto otParams = ot.getParams(); + otStr = std::string((char *)&otParams, sizeof(otParams)); - otStr = otParams.serialise(); mulsBiasesData = serialisedMultipliersAndBiases; return success(); } @@ -323,10 +323,10 @@ LogicalResult ReplaceConv2DPattern::getOutputTransformParams( nn::OutputTransformFn::serialise_memory(qp.multipliers, qp.biases); nn::OutputTransformFn::pad_final_access( serialisedMultipliersAndBiases, VPU_INT16_EPV, (int16_t)args.padValue); - nn::OT_int8::Params otParams((int32_t)args.outputDepth, qp.initial_shr, - qp.final_shr); + nn::OT_int8 ot((int32_t)args.outputDepth, qp.initial_shr, qp.final_shr); + auto otParams = ot.getParams(); + otStr = std::string((char *)&otParams, sizeof(otParams)); - otStr = otParams.serialise(); mulsBiasesData = serialisedMultipliersAndBiases; } @@ -338,22 +338,24 @@ LogicalResult ReplaceConv2DPattern::getConv2DPaddedIndirectParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, int &scratchBytes) const { - nn::ImToColPadded::Params imToColParams(args.X, args.K, args.padding, - args.inputDepth, args.inputZeroPoint); + nn::ImToColPadded imToCol(args.X, args.K, args.padding, args.inputDepth, + args.inputZeroPoint); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulInt8::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, 8, args.padValue); int inputBytes = args.filterHeight * args.filterWidth * args.inputDepth; - nn::MatMulInt8::Params afParams(args.outputDepth, inputBytes); + nn::MatMulInt8 af(args.outputDepth, inputBytes); + auto afParams = af.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); weightsData = rw.weights; @@ -368,21 +370,23 @@ LogicalResult ReplaceConv2DPattern::getConv2DValidIndirectParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, int &scratchBytes) const { - nn::ImToColValid::Params imToColParams(args.X, args.K, args.inputDepth); + nn::ImToColValid imToCol(args.X, args.K, args.inputDepth); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulInt8::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, 8, args.padValue); int inputBytes = args.filterHeight * args.filterWidth * args.inputDepth; - nn::MatMulInt8::Params afParams(args.outputDepth, inputBytes); + nn::MatMulInt8 af(args.outputDepth, inputBytes); + auto afParams = af.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); weightsData = rw.weights; @@ -397,20 +401,22 @@ LogicalResult ReplaceConv2DPattern::getConv2DValidDirectParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, int &scratchBytes) const { - nn::DerefInputFn::Params imToColParams(args.X, args.K); + nn::DerefInputFn imToCol(args.X, args.K); + auto imToColParams = imToCol.getParams(); std::array filterShape = {args.outputDepth, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulInt8::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, 8, args.padValue); - nn::MatMulDirectFn::Params afParams(args.X, args.K, args.inputDepth); + nn::MatMulDirectFn af(args.X, args.K, args.inputDepth); + auto afParams = af.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); weightsData = rw.weights; @@ -537,10 +543,10 @@ LogicalResult ReplaceDepthwiseConv2DPattern::getOutputTransformParams( nn::OutputTransformFn::pad_final_access(serialisedMultipliersAndBiases, VPU_INT16_EPV, (int16_t)args.padValue); - nn::OT_int8_channelwise::Params otParams((int32_t)args.outputDepth, - qp.final_shr); + nn::OT_int8_channelwise ot((int32_t)args.outputDepth, qp.final_shr); + auto otParams = ot.getParams(); - otStr = otParams.serialise(); + otStr = std::string((char *)&otParams, sizeof(otParams)); mulsBiasesData = serialisedMultipliersAndBiases; return success(); } @@ -550,10 +556,10 @@ LogicalResult ReplaceDepthwiseConv2DPattern::getOutputTransformParams( nn::OutputTransformFn::serialise_memory(qp.multipliers, qp.biases); nn::OutputTransformFn::pad_final_access( serialisedMultipliersAndBiases, VPU_INT16_EPV, (int16_t)args.padValue); - nn::OT_int8::Params otParams((int32_t)args.outputDepth, qp.initial_shr, - qp.final_shr); + nn::OT_int8 ot((int32_t)args.outputDepth, qp.initial_shr, qp.final_shr); + auto otParams = ot.getParams(); - otStr = otParams.serialise(); + otStr = std::string((char *)&otParams, sizeof(otParams)); mulsBiasesData = serialisedMultipliersAndBiases; return success(); } @@ -565,20 +571,22 @@ ReplaceDepthwiseConv2DPattern::getDepthwiseConv2DValidDirectParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, int &scratchBytes) const { - nn::DerefInputFn::Params imToColParams(args.X, args.K); + nn::DerefInputFn imToCol(args.X, args.K); + auto imToColParams = imToCol.getParams(); std::array filterShape = {1, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulDirectFn_DW::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, args.padValue); - nn::MatMulDirectFn_DW::Params afParams(args.X, args.K); + nn::MatMulDirectFn_DW af(args.X, args.K); + auto afParams = af.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); weightsData = rw.weights; @@ -593,21 +601,23 @@ ReplaceDepthwiseConv2DPattern::getDepthwiseConv2DPaddedIndirectParams( llvm::SmallVector &abstractKernelParams, std::vector &weightsData, int &scratchBytes) const { - nn::ImToColPadded::Params imToColParams(args.X, args.K, args.padding, 16, - args.inputZeroPoint); + nn::ImToColPadded imToCol(args.X, args.K, args.padding, 16, + args.inputZeroPoint); + auto imToColParams = imToCol.getParams(); std::array filterShape = {1, args.filterHeight, args.filterWidth, args.inputDepth}; nn::Conv2dReorderedWeights rw = nn::MatMulDirectFn_DW::reorder_kernel_weights( (int8_t *)args.filter.data(), filterShape, args.padValue); - nn::MatMulDirectFn_DW::Params afParams(args.K); + nn::MatMulDirectFn_DW af(args.K); + auto afParams = af.getParams(); - std::string mfStr = imToColParams.serialise(); - std::string afStr = afParams.serialise(); + std::string mfStr = + std::string((char *)&imToColParams, sizeof(imToColParams)); + std::string afStr = std::string((char *)&afParams, sizeof(afParams)); abstractKernelParams = - getAbstractKernelParamsForMultipleThreads( - args.imageRegionSplits, args.Y); + getAbstractKernelParamsForMultipleThreads(args.imageRegionSplits, args.Y); strParams.push_back(mfStr); strParams.push_back(afStr); weightsData = rw.weights; diff --git a/xformer/Transforms/ReplaceStridedSlice.cpp b/xformer/Transforms/ReplaceStridedSlice.cpp index 717b6d5a2..6240bc949 100644 --- a/xformer/Transforms/ReplaceStridedSlice.cpp +++ b/xformer/Transforms/ReplaceStridedSlice.cpp @@ -127,11 +127,11 @@ struct ReplaceStridedSlicePattern nn::WindowGeometry({yDiff, xDiff, static_cast(inputDepth)}, {beginY, beginX}, {1, 1, 1}, {strideY, strideX}); - nn::ImToColValid::Params imToColParams(image_geom, window_geom, - static_cast(inputDepth), - /*dont_zero_pad_at_the_end=*/true); - - std::string mfStr = imToColParams.serialise(); + nn::ImToColValid imToCol(image_geom, window_geom, + static_cast(inputDepth), + /*dont_zero_pad_at_the_end=*/true); + auto imToColParams = imToCol.getParams(); + auto mfStr = std::string((char *)&imToColParams, sizeof(imToColParams)); auto binaryObjectStridedSliceOp = rewriter.create( stridedSliceOp.getLoc(), stridedSliceOp.getType(),