diff --git a/third_party/lib_nn b/third_party/lib_nn index c383d28c4..80749b260 160000 --- a/third_party/lib_nn +++ b/third_party/lib_nn @@ -1 +1 @@ -Subproject commit c383d28c45a4556ed9528c3a34a0d3e293a9360b +Subproject commit 80749b2609004f96e6dac26951a57a02dd8fa92b diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro index b2409cb7c..51aa1f05f 160000 --- a/third_party/lib_tflite_micro +++ b/third_party/lib_tflite_micro @@ -1 +1 @@ -Subproject commit b2409cb7ccedf4808533b92b797e257cfe067960 +Subproject commit 51aa1f05fdc3c4100a38faef2b3da3f6281ab075 diff --git a/xformer/IR/XCoreOps.td b/xformer/IR/XCoreOps.td index a7944e769..8a6dc12e8 100644 --- a/xformer/IR/XCoreOps.td +++ b/xformer/IR/XCoreOps.td @@ -197,6 +197,24 @@ def XC_Beta_FcF32Op : XC_Op<"beta_fcf32", [Pure]> { let results = (outs TensorOf<[F32]> : $output); } +def XC_MaxPool2DOp : XC_Op<"maxpool2d", [Pure]> { + let summary = "MaxPool2D op"; + + let description = [{MaxPool2D op.}]; + + let arguments = (ins + TensorOf<[QI8]>:$input, + StrAttr:$memcpy_fn_param, + StrAttr:$aggregate_fn_param, + StrAttr:$output_transform_fn_param, + I32Attr:$scratch_bytes, + I32Attr:$thread_count, + StrArrayAttr:$abstract_kernel_params + ); + + let results = (outs TensorOf<[QI8]> : $output); +} + def XC_Conv2DV2Op : XC_Op<"conv2d_v2", [Pure]> { let summary = "Conv2D V2 op"; diff --git a/xformer/Transforms/Passes.cpp b/xformer/Transforms/Passes.cpp index a53154bf4..eb5e9de31 100644 --- a/xformer/Transforms/Passes.cpp +++ b/xformer/Transforms/Passes.cpp @@ -30,6 +30,7 @@ void buildXCorePassPipeline(OpPassManager &pm) { // XC passes pm.addPass(createReplaceAddPass()); + pm.addPass(createReplaceMaxPoolPass()); pm.addPass(createReplaceMulPass()); pm.addPass(createReplaceStridedSlicePass()); pm.addPass(createReplaceConv2DPass()); diff --git a/xformer/Transforms/Passes.h b/xformer/Transforms/Passes.h index 5b813e82c..b1d240397 100644 --- a/xformer/Transforms/Passes.h +++ b/xformer/Transforms/Passes.h @@ -31,6 +31,7 @@ std::unique_ptr> createOpSplitPass(); std::unique_ptr> createApplyTFLPatternsPass(); std::unique_ptr> createReplaceAddPass(); std::unique_ptr> createReplaceMulPass(); +std::unique_ptr> createReplaceMaxPoolPass(); std::unique_ptr> createReplaceStridedSlicePass(); std::unique_ptr> createReplaceConv2DPass(); std::unique_ptr> createApplyXCPatternsPass(); diff --git a/xformer/Transforms/ReplaceConv2D.cpp b/xformer/Transforms/ReplaceConv2D.cpp index 47ed454b0..fc97d2745 100644 --- a/xformer/Transforms/ReplaceConv2D.cpp +++ b/xformer/Transforms/ReplaceConv2D.cpp @@ -69,12 +69,11 @@ ReplaceWithXCConv2DBase::matchAndRewrite( std::vector mulsBiasesOrThresholdsData; // Obtain thread count from command-line option - const int threadCount = threadCountOption; llvm::SmallVector strParams; int scratchBytes = 0; // Get image region splits for multiple threads args.imageRegionSplits = utils::getImageRegionThreadSplits( - threadCount, args.Y.height, args.Y.width); + threadCountOption, args.Y.height, args.Y.width); // Obtain serialized params and calculated tensors from lib_nn for the // conv2d kernel type diff --git a/xformer/Transforms/ReplaceMaxPool2D.cpp b/xformer/Transforms/ReplaceMaxPool2D.cpp new file mode 100644 index 000000000..258b4e184 --- /dev/null +++ b/xformer/Transforms/ReplaceMaxPool2D.cpp @@ -0,0 +1,113 @@ +#include "IR/XCoreOps.h" +#include "Transforms/Options.h" + +#include "Utils/ThreadSupport.h" +#include "lib_nn/api/AbstractKernel.hpp" +#include "lib_nn/api/AggregateFn.hpp" +#include "lib_nn/api/MemCpyFn.hpp" +#include "lib_nn/api/OutputTransformFn.hpp" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" + +namespace mlir { +namespace xcore { + +namespace { +struct ReplaceMaxPool2D + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ReplaceMaxPool2D) + void getDependentDialects(DialectRegistry ®istry) const final { + registry.insert(); + } + StringRef getArgument() const final { return "xcore-replace-maxpool2d"; } + StringRef getDescription() const final { + return "Replace TFL MaxPool2D with MaxPool2D for XCore."; + } + void runOnOperation() override; +}; + +struct ReplaceMaxPool2DPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TFL::MaxPool2DOp mPoolOp, + PatternRewriter &rewriter) const override { + auto inputType = + mPoolOp.getInput().getType().template dyn_cast(); + auto outputType = + mPoolOp.getOutput().getType().template dyn_cast(); + auto inputHeight = inputType.getDimSize(1); + auto inputWidth = inputType.getDimSize(2); + auto inputDepth = inputType.getDimSize(3); + auto outputHeight = outputType.getDimSize(1); + auto outputWidth = outputType.getDimSize(2); + auto outputDepth = outputType.getDimSize(3); + auto splits = utils::getImageRegionThreadSplits(threadCountOption, + outputHeight, outputWidth); + + auto actualThreadCount = splits.size(); + // Create a string array attr from a vector of strings + auto getStringArrayAttr = [&](llvm::SmallVector value) { + auto attrs = llvm::to_vector<8>( + llvm::map_range(value, [&](std::string v) -> Attribute { + return rewriter.getStringAttr(v); + })); + return rewriter.getArrayAttr(attrs); + }; + int32_t scratchByteParam = + nn::MatMulInt8::get_scratch_mem_bytes(mPoolOp.getFilterWidth() * + mPoolOp.getFilterHeight()) + + 32; //[asj] FIXME + nn::ImageGeometry X(inputHeight, inputWidth, inputDepth); + nn::ImageGeometry Y(outputHeight, outputWidth, outputDepth); + llvm::SmallVector akp; + for (auto ®ion : splits) { + nn::ImageRegion ir(region[0], region[1], 0, region[2], region[3], + outputDepth); + nn::AbstractKernel ak(Y, ir, VPU_INT8_ACC_PERIOD); + auto akParams = ak.getParams(); + auto akpStr = std::string((char *)&akParams, sizeof(akParams)); + akp.push_back(akpStr); + } + nn::ImageRegion ir(0, 0, 0, outputHeight, outputWidth, outputDepth); + nn::WindowGeometry window( + mPoolOp.getFilterHeight(), mPoolOp.getFilterWidth(), 1, 0, 0, + mPoolOp.getStrideH(), mPoolOp.getStrideW(), 1, 1, 1); + nn::DerefInputFn mf(X, window); + nn::MatMulDirectFn_DW af(X, window); + // TODO + nn::OT_int8_channelwise ot(outputDepth, 0); + auto mfParams = mf.getParams(); + auto afParams = af.getParams(); + auto otParams = ot.getParams(); + auto mfStr = std::string((char *)&mfParams, sizeof(mfParams)); + auto afStr = std::string((char *)&afParams, sizeof(afParams)); + auto otStr = std::string((char *)&otParams, sizeof(otParams)); + + auto xcMaxPool2DOp = rewriter.create( + mPoolOp.getLoc(), mPoolOp.getType(), mPoolOp.getInput(), + rewriter.getStringAttr(mfStr), rewriter.getStringAttr(afStr), + rewriter.getStringAttr(otStr), + rewriter.getI32IntegerAttr(scratchByteParam), + rewriter.getI32IntegerAttr(actualThreadCount), getStringArrayAttr(akp)); + rewriter.replaceOp(mPoolOp, xcMaxPool2DOp.getOutput()); + return success(); + } +}; + +void ReplaceMaxPool2D::runOnOperation() { + auto *ctx = &getContext(); + func::FuncOp func = getOperation(); + RewritePatternSet patterns(ctx); + patterns.insert(ctx); + (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); +} +} // namespace +std::unique_ptr> createReplaceMaxPoolPass() { + return std::make_unique(); +} + +static PassRegistration pass; + +} // namespace xcore +} // namespace mlir diff --git a/xformer/Transforms/TranslateToCustomOp.cpp b/xformer/Transforms/TranslateToCustomOp.cpp index 12550e927..2a7e74a90 100644 --- a/xformer/Transforms/TranslateToCustomOp.cpp +++ b/xformer/Transforms/TranslateToCustomOp.cpp @@ -131,6 +131,31 @@ std::vector Conv2DV2Op::buildCustomOptions() { return fbb.GetBuffer(); } +std::vector MaxPool2DOp::buildCustomOptions() { + // TODO: Is the alignement messed up? + flexbuffers::Builder fbb; + auto rootMap = fbb.StartMap(); + fbb.String("mp", getMemcpyFnParam().str()); + fbb.String("a", getAggregateFnParam().str()); + fbb.String("o", getOutputTransformFnParam().str()); + int threadCount = (int)getThreadCount(); + auto akpVec = fbb.StartVector("p"); + for (int i = 0; i < threadCount; ++i) { + fbb.String(getAbstractKernelParams() + .cast()[i] + .cast() + .getValue() + .str() + + "00"); + } + fbb.EndVector(akpVec, false, false); + fbb.Int("s", (int32_t)getScratchBytes()); + + fbb.EndMap(rootMap); + fbb.Finish(); + return fbb.GetBuffer(); +} + namespace { /// This pass translates XCore ops to TFLite custom ops. struct TranslateToCustomOp @@ -172,6 +197,7 @@ void TranslateToCustomOp::runOnOperation() { patterns.insert>(ctx); patterns.insert>(ctx); patterns.insert>(ctx); + patterns.insert>(ctx); patterns.insert>(ctx); patterns.insert>(ctx); patterns.insert>(ctx); diff --git a/xformer/WORKSPACE b/xformer/WORKSPACE index b02077222..9c50f9e9b 100644 --- a/xformer/WORKSPACE +++ b/xformer/WORKSPACE @@ -31,7 +31,7 @@ load("@bazel_skylib//lib:paths.bzl", "paths") ############################### Compile Commands ############################### # Hedron's Compile Commands Extractor for Bazel, used to get clangd to work # Replace commit hash with latest version, later setup automatic update tool maybe? -BCCE_HASH = "e16062717d9b098c3c2ac95717d2b3e661c50608" +BCCE_HASH = "eca42c63700fccdc49cf58177e0a96f0f6075a68" http_archive( name = "hedron_compile_commands", url = "https://github.com/hedronvision/bazel-compile-commands-extractor/archive/{hash}.tar.gz".format(hash = BCCE_HASH), diff --git a/xformer/lib_tflite_micro.BUILD b/xformer/lib_tflite_micro.BUILD index 11a06f374..0c83673e5 100644 --- a/xformer/lib_tflite_micro.BUILD +++ b/xformer/lib_tflite_micro.BUILD @@ -28,6 +28,7 @@ filegroup( "lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.cc", "lib_tflite_micro/src/tflite-xcore-kernels/xcore_bsign.cc", "lib_tflite_micro/src/tflite-xcore-kernels/xcore_conv2d_v2.cc", + "lib_tflite_micro/src/tflite-xcore-kernels/xcore_maxpool2d.cc", "lib_tflite_micro/src/tflite-xcore-kernels/xcore_detection_post.cc", "lib_tflite_micro/src/tflite-xcore-kernels/xcore_load_from_flash.cc", "lib_tflite_micro/src/tflite-xcore-kernels/xcore_lookup.cc", diff --git a/xformer/toolchain/BUILD b/xformer/toolchain/BUILD deleted file mode 100644 index 369886450..000000000 --- a/xformer/toolchain/BUILD +++ /dev/null @@ -1,72 +0,0 @@ -# -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -package(default_visibility = ["//visibility:public"]) - -#filegroup( -# name = "toolchain", -# srcs = [ -# ":cc-compiler-linux", -# ], -#) - -#filegroup(name = "clang_suite") - -cc_toolchain_suite( - name = "gnu_suite", - toolchains = { - "k8": ":k8_toolchain", - }, -) - -filegroup(name = "empty") - -cc_toolchain( - name = "k8_toolchain", - toolchain_identifier = "k8-toolchain", - toolchain_config = ":k8_toolchain_config", - all_files = ":empty", - compiler_files = ":empty", - dwp_files = ":empty", - linker_files = ":empty", - objcopy_files = ":empty", - strip_files = ":empty", - supports_param_files = 0, -) - -load(":cc_toolchain_config.bzl", "cc_toolchain_config") - -cc_toolchain_config(name = "k8_toolchain_config") - -#cc_toolchain( -# name = "cc_linux", -# toolchain_config = ":linux_toolchain_config", -# all_files = ":empty", -# compiler_files = ":empty", -# dwp_files = ":empty", -# linker_files = ":empty", -# objcopy_files = ":empty", -# strip_files = ":empty", -# supports_param_files = 0, -#) -# -#toolchain_type(name = "toolchain_type") -# -#toolchain( -# name = "cc-compiler-linux", -# toolchain = ":cc_linux", -# toolchain_type = ":toolchain_type", -# toolchain_config = ":linux_toolchain_config", -###) diff --git a/xformer/toolchain/cc_toolchain_config.bzl b/xformer/toolchain/cc_toolchain_config.bzl deleted file mode 100644 index 77cc66ca5..000000000 --- a/xformer/toolchain/cc_toolchain_config.bzl +++ /dev/null @@ -1,97 +0,0 @@ -# NEW -load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") -# NEW -load( - "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", - "feature", - "flag_group", - "flag_set", - "tool_path", -) - -all_link_actions = [ # NEW - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, -] - -def _impl(ctx): - tool_paths = [ # NEW - tool_path( - name = "gcc", - path = "/opt/xmos/gcc/11.2.0/bin/gcc", - ), - tool_path( - name = "ld", - path = "/usr/bin/ld", - ), - tool_path( - name = "ar", - path = "/opt/xmos/gcc/11.2.0/bin/gcc-ar", - ), - tool_path( - name = "cpp", - path = "/opt/xmos/gcc/11.2.0/bin/cpp", - ), - tool_path( - name = "gcov", - path = "/bin/false", - ), - tool_path( - name = "nm", - path = "/bin/false", - ), - tool_path( - name = "objdump", - path = "/bin/false", - ), - tool_path( - name = "strip", - path = "/bin/false", - ), - ] - - features = [ # NEW - feature( - name = "default_linker_flags", - enabled = True, - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = ([ - flag_group( - flags = [ - "-lstdc++", - ], - ), - ]), - ), - ], - ), - ] - - return cc_common.create_cc_toolchain_config_info( - ctx = ctx, - features = features, # NEW - cxx_builtin_include_directories = [ - "/opt/xmos/gcc/11.2.0/lib/gcc/x86_64-pc-linux-gnu/11.2.0/include", - "/opt/xmos/gcc/11.2.0/lib/gcc/x86_64-pc-linux-gnu/11.2.0/include-fixed", - "/opt/xmos/gcc/11.2.0/include", - "/usr/include", - ], - toolchain_identifier = "local", - host_system_name = "local", - target_system_name = "local", - target_cpu = "k8", - target_libc = "unknown", - compiler = "gnu", - abi_version = "unknown", - abi_libc_version = "unknown", - tool_paths = tool_paths - ) - -cc_toolchain_config = rule( - implementation = _impl, - attrs = {}, - provides = [CcToolchainConfigInfo], -)