Skip to content

Commit

Permalink
feat(kernel): add naive padding opr
Browse files Browse the repository at this point in the history
GitOrigin-RevId: cf4eaeba27a5a0aefe9c946baa923a448fde100b
  • Loading branch information
megvii-mge committed Nov 9, 2023
1 parent 11c6d3c commit 63a125d
Show file tree
Hide file tree
Showing 17 changed files with 426 additions and 1 deletion.
12 changes: 12 additions & 0 deletions compiler/include/compiler/Dialect/Kernel/IR/AbstractKernels.td
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,18 @@ def GaussianBlurKernel: AbstractKernelBase<"GaussianBlur"> {
);
}

def PaddingKernel: AbstractKernelBase<"Padding"> {
let arguments = (ins
StrAttr:$padding_mode,
F32Attr:$padding_val,
ArrayAttr:$front_offsets,
ArrayAttr:$back_offsets,

Arg<AnyMemRef, "", [MemRead]>:$input,
Arg<AnyMemRef, "", [MemWrite]>:$output
);
}

def IndexingMultiAxisVecKernel: AbstractKernelBase<"IndexingMultiAxisVec"> {
let arguments = (ins
ArrayAttr:$axis,
Expand Down
1 change: 1 addition & 0 deletions compiler/include/compiler/KernelGen/KernelGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ struct KernelPack {
FusedElemwiseKernel,
CVGaussianBlur,
GaussianBlurKernel,
PaddingKernel,
};
static std::pair<std::vector<const KernelFunc*>, const DeduceFunc*> GetKernel(
KernelPack::KernType kernel_type, Arch arch);
Expand Down
9 changes: 9 additions & 0 deletions compiler/include/megbrain/IR/ops.td
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@ def GaussianBlur: MgbHashableOp<"GaussianBlur", [GaussianBlurParam], [NoSideEffe
let results = (outs AnyType);
}

def Padding: MgbHashableOp<"Padding", [PaddingParam], [NoSideEffect]>{
let inputs = (ins AnyType:$input);
let results = (outs AnyType);
let extraArguments = (ins
MgbArrayAttr<MgbI32Attr>:$front_offsets,
MgbArrayAttr<MgbI32Attr>:$back_offsets
);
}

def IndexingOneHot: MgbHashableOp<"IndexingOneHot", [AxisParam], [NoSideEffect]>{
let inputs = (ins
AnyType:$input,
Expand Down
10 changes: 10 additions & 0 deletions compiler/include/megbrain/IR/param_defs.td
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,16 @@ class GaussianBlurParamBase<string accessor> : MgbPackedParamBase<"GaussianBlur"

def GaussianBlurParam : GaussianBlurParamBase<"param">;

def PaddingBorderMode : MgbEnumAttr<"::megdnn::param::Padding", "PaddingMode", ["REPLICATE", "REFLECT", "CONSTANT"], 0>;
class PaddingParamBase<string accessor> : MgbPackedParamBase<"Padding", accessor> {
let fields = (ins
MgbDefaultValuedAttr<PaddingBorderMode, "::megdnn::param::Padding::PaddingMode::CONSTANT">:$padding_mode,
MgbDefaultValuedAttr<MgbF32Attr, "0.f">:$padding_val
);
}

def PaddingParam : PaddingParamBase<"param">;

def ResizeInterpolationMode : MgbEnumAliasAttr<"::megdnn::param::Resize", "InterpolationMode", WarpPerspectiveV1InterpolationMode>;
def ResizeFormat : MgbEnumAliasAttr<"::megdnn::param::Resize", "Format", ConvolutionFormat>;
class ResizeParamBase<string accessor> : MgbPackedParamBase<"Resize", accessor> {
Expand Down
1 change: 1 addition & 0 deletions compiler/lib/Conversion/MGBToKernel/MGBToKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ void populateMGBToKernelConversionPatterns(
GenericConverter<MGB::WarpAffine, Kernel::WarpAffineKernel>,
GenericConverter<MGB::Resize, Kernel::ResizeKernel>,
GenericConverter<MGB::GaussianBlur, Kernel::GaussianBlurKernel>,
GenericConverter<MGB::Padding, Kernel::PaddingKernel>,
GenericConverter<MGB::MatrixInverse, Kernel::MatrixInvKernel>,

GenericConverter<MGB::GetVarShape, Kernel::GetVarShapeIns>,
Expand Down
15 changes: 15 additions & 0 deletions compiler/lib/Conversion/MGBToKernel/MGBToKernelHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,21 @@ SmallVector<NamedAttribute, 4> ConvertAttr<MGB::WarpPerspective>(
return attrs;
}

template <>
SmallVector<NamedAttribute, 4> ConvertAttr<MGB::Padding>(
DictionaryAttr direct_attr, MLIRContext* context) {
SmallVector<NamedAttribute, 4> attrs;

using PMode = ::megdnn::param::Padding::PaddingMode;
GetParamEnum(PMode, "padding_mode");

GetParam("padding_val");
GetParam("front_offsets");
GetParam("back_offsets");

return attrs;
}

template <>
SmallVector<NamedAttribute, 4> ConvertAttr<MGB::Subtensor>(
DictionaryAttr direct_attr, MLIRContext* context) {
Expand Down
2 changes: 2 additions & 0 deletions compiler/lib/Dialect/Kernel/Transforms/KernelRegister.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ INSTANCE_GET_KERNELS(mlir::Kernel::ArgmaxKernel, KernType::ArgmaxKernel)
INSTANCE_GET_KERNELS(mlir::Kernel::IndexingOneHotKernel, KernType::IndexingOneHotKernel)
INSTANCE_GET_KERNELS(mlir::Kernel::FusedElemwiseKernel, KernType::FusedElemwiseKernel)
INSTANCE_GET_KERNELS(mlir::Kernel::GaussianBlurKernel, KernType::GaussianBlurKernel)
INSTANCE_GET_KERNELS(mlir::Kernel::PaddingKernel, KernType::PaddingKernel)

template <class T, typename... Args>
void addBuiltinTemplatesOpr(
Expand Down Expand Up @@ -100,6 +101,7 @@ void addBuiltinTemplatesByOperator(
addBuiltinTemplatesOpr<mlir::Kernel::ConvBackDataKernel>(registry, arch);
addBuiltinTemplatesOpr<mlir::Kernel::FusedElemwiseKernel>(registry, arch);
addBuiltinTemplatesOpr<mlir::Kernel::GaussianBlurKernel>(registry, arch);
addBuiltinTemplatesOpr<mlir::Kernel::PaddingKernel>(registry, arch);
}
} // namespace Kernel
} // namespace mlir
Expand Down
3 changes: 3 additions & 0 deletions compiler/lib/KernelGen/BareMetal/KernelPack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "IndexingOneHot.h"
#include "MatrixInv.h"
#include "MatrixMul.h"
#include "Padding.h"
#include "Pooling.h"
#include "PowC.h"
#include "Reduce.h"
Expand Down Expand Up @@ -96,6 +97,8 @@ struct AllBareKernel {
std::make_shared<BareMetal::GaussianBlurKernel>()};
inner_map[KernelPack::KernType::GaussianBlurKernel] = {
std::make_shared<BareMetal::GaussianBlurKernel>()};
inner_map[KernelPack::KernType::PaddingKernel] = {
std::make_shared<BareMetal::PaddingKernel>()};
}

std::unordered_map<KernelPack::KernType, std::vector<std::shared_ptr<KernelFunc>>>
Expand Down
236 changes: 236 additions & 0 deletions compiler/lib/KernelGen/BareMetal/Padding.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
#include <sstream>

#include "Fp16Common.h"
#include "Padding.h"
#include "Utils/StringTemplate.h"
#include "Utils/Utils.h"

using namespace megcc;
using namespace KernelGen;
using namespace BareMetal;

bool PaddingKernel::IsAvailable(TContext* context) const {
std::string padding_mode = context->getAttrStr("padding_mode");
bool mode_ok =
(padding_mode == "REPLICATE" || padding_mode == "CONSTANT" ||
padding_mode == "REFLECT");
return mode_ok;
}

//! kernel gen
std::string PaddingKernel::GetKernelSymbol(TContext* context) const {
std::stringstream ss;
ss << "kernel_padding_front_offset_";
for (int i = 0; i < 7; ++i) {
ss << context->getAttrInt("front_offsets:" + std::to_string(i)) << "_";
}
ss << context->getAttrStr("padding_mode") << "_"
<< context->getAttrFloat("padding_val") << "_"
<< context->getAttrOprand("operand:0").dtype;
return ss.str();
}

namespace {
std::string gen_replicate_padding(TContext* context, std::string* func_name) {
*func_name = "replicate_padding";
std::string func = R"(
static void ${func_name}(
const size_t ndim, const size_t total_out_nr, const ${dtype}* const src, ${dtype}* const dst,
const int* front_offsets, const uint32_t* dst_shape, const int* dst_stride, const uint32_t* src_shape, const int* src_stride) {
uint32_t **idx_tbl = (uint32_t**)tinynn_malloc(sizeof(uint32_t*) * ndim);
for (size_t i = 0; i < ndim; ++i) {
idx_tbl[i] = (uint32_t*)tinynn_malloc(sizeof(uint32_t) * dst_shape[i]);
for (uint32_t idx = 0; idx < dst_shape[i]; ++idx) {
if (idx < front_offsets[i]) {
idx_tbl[i][idx] = 0;
} else if (idx >= front_offsets[i] + src_shape[i]) {
idx_tbl[i][idx] = src_shape[i] - 1;
} else {
idx_tbl[i][idx] = idx - front_offsets[i];
}
}
}
for(size_t out_index = 0; out_index < total_out_nr; ++out_index) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / dst_stride[dim];
out_index_tmp -= dim_index * dst_stride[dim];
in_index += idx_tbl[dim][dim_index] * src_stride[dim];
}
dst[out_index] = src[in_index];
}
for (size_t i = 0; i < ndim; ++i) {
tinynn_free(idx_tbl[i]);
}
tinynn_free(idx_tbl);
}
)";
return StringTemplate::StringTemplateArgs()
.add("func_name", *func_name)
.add("dtype",
Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype))
.render(func);
}

std::string gen_constant_padding(TContext* context, std::string* func_name) {
*func_name = "constant_padding";
std::string func = R"(
static void ${func_name}(
const size_t ndim, const size_t total_out_nr, const ${dtype}* const src, ${dtype}* const dst,
const int* front_offsets, const uint32_t* dst_shape, const int* dst_stride, const uint32_t* src_shape, const int* src_stride) {
uint8_t **is_valid = (uint8_t**)tinynn_malloc(sizeof(uint8_t*) * ndim);
for (size_t i = 0; i < ndim; ++i) {
is_valid[i] = (uint8_t*)tinynn_malloc(sizeof(uint8_t) * dst_shape[i]);
for (uint32_t idx = 0; idx < dst_shape[i]; ++idx) {
if (idx < front_offsets[i] || idx >= front_offsets[i] + src_shape[i]) {
is_valid[i][idx] = 0;
} else {
is_valid[i][idx] = 1;
}
}
}
for(size_t out_index = 0; out_index < total_out_nr; ++out_index) {
int in_src_valid_area = 1;
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / dst_stride[dim];
out_index_tmp -= dim_index * dst_stride[dim];
if (!is_valid[dim][dim_index]) {
in_src_valid_area = 0;
break;
}
in_index += (dim_index - front_offsets[dim]) * src_stride[dim];
}
if (in_src_valid_area) {
dst[out_index] = src[in_index];
} else {
dst[out_index] = (${dtype})${padding_val};
}
}
for (size_t i = 0; i < ndim; ++i) {
tinynn_free(is_valid[i]);
}
tinynn_free(is_valid);
}
)";
return StringTemplate::StringTemplateArgs()
.add("func_name", *func_name)
.add("padding_val", std::to_string(context->getAttrFloat("padding_val")))
.add("dtype",
Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype))
.render(func);
}

std::string gen_reflect_padding(TContext* context, std::string* func_name) {
*func_name = "reflect_padding";
std::string func = R"(
static void ${func_name}(
const size_t ndim, const size_t total_out_nr, const ${dtype}* const src, ${dtype}* const dst,
const int* front_offsets, const uint32_t* dst_shape, const int* dst_stride, const uint32_t* src_shape, const int* src_stride) {
uint32_t **idx_tbl = (uint32_t**)tinynn_malloc(sizeof(uint32_t*) * ndim);
for (size_t i = 0; i < ndim; ++i) {
idx_tbl[i] = (uint32_t*)tinynn_malloc(sizeof(uint32_t) * dst_shape[i]);
for (uint32_t idx = 0; idx < dst_shape[i]; ++idx) {
if (idx < front_offsets[i]) {
idx_tbl[i][idx] = front_offsets[i] - idx;
} else if (idx >= front_offsets[i] + src_shape[i]) {
idx_tbl[i][idx] = src_shape[i] * 2 - 2 - (idx - front_offsets[i]); //! (src_shape[i] - 1) - (idx - front_offsets[i] - (src_shape[i] - 1))
} else {
idx_tbl[i][idx] = idx - front_offsets[i];
}
}
}
for(size_t out_index = 0; out_index < total_out_nr; ++out_index) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = out_index_tmp / dst_stride[dim];
out_index_tmp -= dim_index * dst_stride[dim];
in_index += idx_tbl[dim][dim_index] * (size_t)src_stride[dim];
}
dst[out_index] = src[in_index];
}
for (size_t i = 0; i < ndim; ++i) {
tinynn_free(idx_tbl[i]);
}
tinynn_free(idx_tbl);
}
)";
return StringTemplate::StringTemplateArgs()
.add("func_name", *func_name)
.add("dtype",
Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype))
.render(func);
}
} // namespace

std::string PaddingKernel::GetKernelBody(TContext* context) const {
std::stringstream ss;
ss << "#include \"utils.h\"\n";
std::string dtype =
Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype);
if (dtype == "gi_float16_t") {
ss << gen_fp16_define();
}
std::string func_name;
std::string padding_mode = context->getAttrStr("padding_mode");
if (padding_mode == "REPLICATE") {
ss << gen_replicate_padding(context, &func_name);
} else if (padding_mode == "CONSTANT") {
ss << gen_constant_padding(context, &func_name);
} else {
CC_ASSERT(padding_mode == "REFLECT");
ss << gen_reflect_padding(context, &func_name);
}
ss << GenCommonRet() << " " << GetKernelSignature(context);
std::string body_temp = R"({
${dtype}* a_data = (${dtype}*)inputs[0]->ptr;
${dtype}* c_data = (${dtype}*)outputs[0]->ptr;
TINYNN_ASSERT(a_data);
TINYNN_ASSERT(c_data);
const Tensor* a_tensor = inputs[0];
const Layout a_layout = a_tensor->layout;
const Tensor* c_tensor = outputs[0];
const Layout c_layout = c_tensor->layout;
size_t nr_elem = 1;
for (int i = 0; i < c_layout.nr_dim; ++i) {
nr_elem *= c_layout.dims[i];
}
#define MAX_NDIM 7
int front_offsets[MAX_NDIM];
#undef MAX_NDIM
front_offsets[0] = ${front_offset0};
front_offsets[1] = ${front_offset1};
front_offsets[2] = ${front_offset2};
front_offsets[3] = ${front_offset3};
front_offsets[4] = ${front_offset4};
front_offsets[5] = ${front_offset5};
front_offsets[6] = ${front_offset6};
${func_name}(a_layout.nr_dim, nr_elem, a_data, c_data, front_offsets, c_layout.dims, c_layout.stride, a_layout.dims, a_layout.stride);
return TinyNN_SUCCESS;
})";

ss << StringTemplate::StringTemplateArgs()
.add("dtype", dtype)
.add("func_name", func_name)
.add("front_offset0", context->getAttrInt("front_offsets:0"))
.add("front_offset1", context->getAttrInt("front_offsets:1"))
.add("front_offset2", context->getAttrInt("front_offsets:2"))
.add("front_offset3", context->getAttrInt("front_offsets:3"))
.add("front_offset4", context->getAttrInt("front_offsets:4"))
.add("front_offset5", context->getAttrInt("front_offsets:5"))
.add("front_offset6", context->getAttrInt("front_offsets:6"))
.render(body_temp);
return ss.str();
}

// vim: syntax=cpp.doxygen
20 changes: 20 additions & 0 deletions compiler/lib/KernelGen/BareMetal/Padding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once
#include <string>
#include "compiler/KernelGen/KernelGen.h"

namespace megcc {
namespace KernelGen {
namespace BareMetal {

class PaddingKernel : public KernelFunc {
public:
bool IsAvailable(TContext* context) const override;
std::string GetKernelSymbol(TContext* context) const override;
std::string GetKernelBody(TContext* context) const override;
};

} // namespace BareMetal
} // namespace KernelGen
} // namespace megcc

// vim: syntax=cpp.doxygen
Loading

0 comments on commit 63a125d

Please sign in to comment.