diff --git a/compiler/include/compiler/Dialect/Kernel/IR/AbstractKernels.td b/compiler/include/compiler/Dialect/Kernel/IR/AbstractKernels.td index 57c16c8c..496edeff 100644 --- a/compiler/include/compiler/Dialect/Kernel/IR/AbstractKernels.td +++ b/compiler/include/compiler/Dialect/Kernel/IR/AbstractKernels.td @@ -176,6 +176,18 @@ def GaussianBlurKernel: AbstractKernelBase<"GaussianBlur"> { ); } +def PaddingKernel: AbstractKernelBase<"Padding"> { + let arguments = (ins + StrAttr:$padding_mode, + F32Attr:$padding_val, + ArrayAttr:$front_offsets, + ArrayAttr:$back_offsets, + + Arg:$input, + Arg:$output + ); +} + def IndexingMultiAxisVecKernel: AbstractKernelBase<"IndexingMultiAxisVec"> { let arguments = (ins ArrayAttr:$axis, diff --git a/compiler/include/compiler/KernelGen/KernelGen.h b/compiler/include/compiler/KernelGen/KernelGen.h index b3df4097..d100ffd6 100644 --- a/compiler/include/compiler/KernelGen/KernelGen.h +++ b/compiler/include/compiler/KernelGen/KernelGen.h @@ -189,6 +189,7 @@ struct KernelPack { FusedElemwiseKernel, CVGaussianBlur, GaussianBlurKernel, + PaddingKernel, }; static std::pair, const DeduceFunc*> GetKernel( KernelPack::KernType kernel_type, Arch arch); diff --git a/compiler/include/megbrain/IR/ops.td b/compiler/include/megbrain/IR/ops.td index 9cd1eccf..dd172785 100644 --- a/compiler/include/megbrain/IR/ops.td +++ b/compiler/include/megbrain/IR/ops.td @@ -149,6 +149,15 @@ def GaussianBlur: MgbHashableOp<"GaussianBlur", [GaussianBlurParam], [NoSideEffe let results = (outs AnyType); } +def Padding: MgbHashableOp<"Padding", [PaddingParam], [NoSideEffect]>{ + let inputs = (ins AnyType:$input); + let results = (outs AnyType); + let extraArguments = (ins + MgbArrayAttr:$front_offsets, + MgbArrayAttr:$back_offsets + ); +} + def IndexingOneHot: MgbHashableOp<"IndexingOneHot", [AxisParam], [NoSideEffect]>{ let inputs = (ins AnyType:$input, diff --git a/compiler/include/megbrain/IR/param_defs.td b/compiler/include/megbrain/IR/param_defs.td index 5a4495fd..0f7e70bb 100644 --- a/compiler/include/megbrain/IR/param_defs.td +++ b/compiler/include/megbrain/IR/param_defs.td @@ -480,6 +480,16 @@ class GaussianBlurParamBase : MgbPackedParamBase<"GaussianBlur" def GaussianBlurParam : GaussianBlurParamBase<"param">; +def PaddingBorderMode : MgbEnumAttr<"::megdnn::param::Padding", "PaddingMode", ["REPLICATE", "REFLECT", "CONSTANT"], 0>; +class PaddingParamBase : MgbPackedParamBase<"Padding", accessor> { + let fields = (ins + MgbDefaultValuedAttr:$padding_mode, + MgbDefaultValuedAttr:$padding_val + ); +} + +def PaddingParam : PaddingParamBase<"param">; + def ResizeInterpolationMode : MgbEnumAliasAttr<"::megdnn::param::Resize", "InterpolationMode", WarpPerspectiveV1InterpolationMode>; def ResizeFormat : MgbEnumAliasAttr<"::megdnn::param::Resize", "Format", ConvolutionFormat>; class ResizeParamBase : MgbPackedParamBase<"Resize", accessor> { diff --git a/compiler/lib/Conversion/MGBToKernel/MGBToKernel.cpp b/compiler/lib/Conversion/MGBToKernel/MGBToKernel.cpp index ca274a0f..97e79ba9 100644 --- a/compiler/lib/Conversion/MGBToKernel/MGBToKernel.cpp +++ b/compiler/lib/Conversion/MGBToKernel/MGBToKernel.cpp @@ -572,6 +572,7 @@ void populateMGBToKernelConversionPatterns( GenericConverter, GenericConverter, GenericConverter, + GenericConverter, GenericConverter, GenericConverter, diff --git a/compiler/lib/Conversion/MGBToKernel/MGBToKernelHelper.h b/compiler/lib/Conversion/MGBToKernel/MGBToKernelHelper.h index 6e18c922..66363b09 100644 --- a/compiler/lib/Conversion/MGBToKernel/MGBToKernelHelper.h +++ b/compiler/lib/Conversion/MGBToKernel/MGBToKernelHelper.h @@ -363,6 +363,21 @@ SmallVector ConvertAttr( return attrs; } +template <> +SmallVector ConvertAttr( + DictionaryAttr direct_attr, MLIRContext* context) { + SmallVector attrs; + + using PMode = ::megdnn::param::Padding::PaddingMode; + GetParamEnum(PMode, "padding_mode"); + + GetParam("padding_val"); + GetParam("front_offsets"); + GetParam("back_offsets"); + + return attrs; +} + template <> SmallVector ConvertAttr( DictionaryAttr direct_attr, MLIRContext* context) { diff --git a/compiler/lib/Dialect/Kernel/Transforms/KernelRegister.h b/compiler/lib/Dialect/Kernel/Transforms/KernelRegister.h index 58f13324..76dc5889 100644 --- a/compiler/lib/Dialect/Kernel/Transforms/KernelRegister.h +++ b/compiler/lib/Dialect/Kernel/Transforms/KernelRegister.h @@ -51,6 +51,7 @@ INSTANCE_GET_KERNELS(mlir::Kernel::ArgmaxKernel, KernType::ArgmaxKernel) INSTANCE_GET_KERNELS(mlir::Kernel::IndexingOneHotKernel, KernType::IndexingOneHotKernel) INSTANCE_GET_KERNELS(mlir::Kernel::FusedElemwiseKernel, KernType::FusedElemwiseKernel) INSTANCE_GET_KERNELS(mlir::Kernel::GaussianBlurKernel, KernType::GaussianBlurKernel) +INSTANCE_GET_KERNELS(mlir::Kernel::PaddingKernel, KernType::PaddingKernel) template void addBuiltinTemplatesOpr( @@ -100,6 +101,7 @@ void addBuiltinTemplatesByOperator( addBuiltinTemplatesOpr(registry, arch); addBuiltinTemplatesOpr(registry, arch); addBuiltinTemplatesOpr(registry, arch); + addBuiltinTemplatesOpr(registry, arch); } } // namespace Kernel } // namespace mlir diff --git a/compiler/lib/KernelGen/BareMetal/KernelPack.cpp b/compiler/lib/KernelGen/BareMetal/KernelPack.cpp index 9f082f46..05d6c3ab 100644 --- a/compiler/lib/KernelGen/BareMetal/KernelPack.cpp +++ b/compiler/lib/KernelGen/BareMetal/KernelPack.cpp @@ -19,6 +19,7 @@ #include "IndexingOneHot.h" #include "MatrixInv.h" #include "MatrixMul.h" +#include "Padding.h" #include "Pooling.h" #include "PowC.h" #include "Reduce.h" @@ -96,6 +97,8 @@ struct AllBareKernel { std::make_shared()}; inner_map[KernelPack::KernType::GaussianBlurKernel] = { std::make_shared()}; + inner_map[KernelPack::KernType::PaddingKernel] = { + std::make_shared()}; } std::unordered_map>> diff --git a/compiler/lib/KernelGen/BareMetal/Padding.cpp b/compiler/lib/KernelGen/BareMetal/Padding.cpp new file mode 100644 index 00000000..4def40c2 --- /dev/null +++ b/compiler/lib/KernelGen/BareMetal/Padding.cpp @@ -0,0 +1,236 @@ +#include + +#include "Fp16Common.h" +#include "Padding.h" +#include "Utils/StringTemplate.h" +#include "Utils/Utils.h" + +using namespace megcc; +using namespace KernelGen; +using namespace BareMetal; + +bool PaddingKernel::IsAvailable(TContext* context) const { + std::string padding_mode = context->getAttrStr("padding_mode"); + bool mode_ok = + (padding_mode == "REPLICATE" || padding_mode == "CONSTANT" || + padding_mode == "REFLECT"); + return mode_ok; +} + +//! kernel gen +std::string PaddingKernel::GetKernelSymbol(TContext* context) const { + std::stringstream ss; + ss << "kernel_padding_front_offset_"; + for (int i = 0; i < 7; ++i) { + ss << context->getAttrInt("front_offsets:" + std::to_string(i)) << "_"; + } + ss << context->getAttrStr("padding_mode") << "_" + << context->getAttrFloat("padding_val") << "_" + << context->getAttrOprand("operand:0").dtype; + return ss.str(); +} + +namespace { +std::string gen_replicate_padding(TContext* context, std::string* func_name) { + *func_name = "replicate_padding"; + std::string func = R"( + static void ${func_name}( + const size_t ndim, const size_t total_out_nr, const ${dtype}* const src, ${dtype}* const dst, + const int* front_offsets, const uint32_t* dst_shape, const int* dst_stride, const uint32_t* src_shape, const int* src_stride) { + uint32_t **idx_tbl = (uint32_t**)tinynn_malloc(sizeof(uint32_t*) * ndim); + for (size_t i = 0; i < ndim; ++i) { + idx_tbl[i] = (uint32_t*)tinynn_malloc(sizeof(uint32_t) * dst_shape[i]); + for (uint32_t idx = 0; idx < dst_shape[i]; ++idx) { + if (idx < front_offsets[i]) { + idx_tbl[i][idx] = 0; + } else if (idx >= front_offsets[i] + src_shape[i]) { + idx_tbl[i][idx] = src_shape[i] - 1; + } else { + idx_tbl[i][idx] = idx - front_offsets[i]; + } + } + } + + for(size_t out_index = 0; out_index < total_out_nr; ++out_index) { + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = out_index_tmp / dst_stride[dim]; + out_index_tmp -= dim_index * dst_stride[dim]; + in_index += idx_tbl[dim][dim_index] * src_stride[dim]; + } + dst[out_index] = src[in_index]; + } + + for (size_t i = 0; i < ndim; ++i) { + tinynn_free(idx_tbl[i]); + } + tinynn_free(idx_tbl); + } + )"; + return StringTemplate::StringTemplateArgs() + .add("func_name", *func_name) + .add("dtype", + Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype)) + .render(func); +} + +std::string gen_constant_padding(TContext* context, std::string* func_name) { + *func_name = "constant_padding"; + std::string func = R"( + static void ${func_name}( + const size_t ndim, const size_t total_out_nr, const ${dtype}* const src, ${dtype}* const dst, + const int* front_offsets, const uint32_t* dst_shape, const int* dst_stride, const uint32_t* src_shape, const int* src_stride) { + uint8_t **is_valid = (uint8_t**)tinynn_malloc(sizeof(uint8_t*) * ndim); + for (size_t i = 0; i < ndim; ++i) { + is_valid[i] = (uint8_t*)tinynn_malloc(sizeof(uint8_t) * dst_shape[i]); + for (uint32_t idx = 0; idx < dst_shape[i]; ++idx) { + if (idx < front_offsets[i] || idx >= front_offsets[i] + src_shape[i]) { + is_valid[i][idx] = 0; + } else { + is_valid[i][idx] = 1; + } + } + } + + for(size_t out_index = 0; out_index < total_out_nr; ++out_index) { + int in_src_valid_area = 1; + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = out_index_tmp / dst_stride[dim]; + out_index_tmp -= dim_index * dst_stride[dim]; + if (!is_valid[dim][dim_index]) { + in_src_valid_area = 0; + break; + } + in_index += (dim_index - front_offsets[dim]) * src_stride[dim]; + } + if (in_src_valid_area) { + dst[out_index] = src[in_index]; + } else { + dst[out_index] = (${dtype})${padding_val}; + } + } + + for (size_t i = 0; i < ndim; ++i) { + tinynn_free(is_valid[i]); + } + tinynn_free(is_valid); + } + )"; + return StringTemplate::StringTemplateArgs() + .add("func_name", *func_name) + .add("padding_val", std::to_string(context->getAttrFloat("padding_val"))) + .add("dtype", + Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype)) + .render(func); +} + +std::string gen_reflect_padding(TContext* context, std::string* func_name) { + *func_name = "reflect_padding"; + std::string func = R"( + static void ${func_name}( + const size_t ndim, const size_t total_out_nr, const ${dtype}* const src, ${dtype}* const dst, + const int* front_offsets, const uint32_t* dst_shape, const int* dst_stride, const uint32_t* src_shape, const int* src_stride) { + uint32_t **idx_tbl = (uint32_t**)tinynn_malloc(sizeof(uint32_t*) * ndim); + for (size_t i = 0; i < ndim; ++i) { + idx_tbl[i] = (uint32_t*)tinynn_malloc(sizeof(uint32_t) * dst_shape[i]); + for (uint32_t idx = 0; idx < dst_shape[i]; ++idx) { + if (idx < front_offsets[i]) { + idx_tbl[i][idx] = front_offsets[i] - idx; + } else if (idx >= front_offsets[i] + src_shape[i]) { + idx_tbl[i][idx] = src_shape[i] * 2 - 2 - (idx - front_offsets[i]); //! (src_shape[i] - 1) - (idx - front_offsets[i] - (src_shape[i] - 1)) + } else { + idx_tbl[i][idx] = idx - front_offsets[i]; + } + } + } + for(size_t out_index = 0; out_index < total_out_nr; ++out_index) { + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + long long dim_index = out_index_tmp / dst_stride[dim]; + out_index_tmp -= dim_index * dst_stride[dim]; + in_index += idx_tbl[dim][dim_index] * (size_t)src_stride[dim]; + } + dst[out_index] = src[in_index]; + } + + for (size_t i = 0; i < ndim; ++i) { + tinynn_free(idx_tbl[i]); + } + tinynn_free(idx_tbl); + } + )"; + return StringTemplate::StringTemplateArgs() + .add("func_name", *func_name) + .add("dtype", + Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype)) + .render(func); +} +} // namespace + +std::string PaddingKernel::GetKernelBody(TContext* context) const { + std::stringstream ss; + ss << "#include \"utils.h\"\n"; + std::string dtype = + Utils::cvt_dtype_specifier(context->getAttrOprand("operand:0").dtype); + if (dtype == "gi_float16_t") { + ss << gen_fp16_define(); + } + std::string func_name; + std::string padding_mode = context->getAttrStr("padding_mode"); + if (padding_mode == "REPLICATE") { + ss << gen_replicate_padding(context, &func_name); + } else if (padding_mode == "CONSTANT") { + ss << gen_constant_padding(context, &func_name); + } else { + CC_ASSERT(padding_mode == "REFLECT"); + ss << gen_reflect_padding(context, &func_name); + } + ss << GenCommonRet() << " " << GetKernelSignature(context); + std::string body_temp = R"({ + ${dtype}* a_data = (${dtype}*)inputs[0]->ptr; + ${dtype}* c_data = (${dtype}*)outputs[0]->ptr; + TINYNN_ASSERT(a_data); + TINYNN_ASSERT(c_data); + const Tensor* a_tensor = inputs[0]; + const Layout a_layout = a_tensor->layout; + const Tensor* c_tensor = outputs[0]; + const Layout c_layout = c_tensor->layout; + size_t nr_elem = 1; + for (int i = 0; i < c_layout.nr_dim; ++i) { + nr_elem *= c_layout.dims[i]; + } +#define MAX_NDIM 7 + int front_offsets[MAX_NDIM]; +#undef MAX_NDIM + front_offsets[0] = ${front_offset0}; + front_offsets[1] = ${front_offset1}; + front_offsets[2] = ${front_offset2}; + front_offsets[3] = ${front_offset3}; + front_offsets[4] = ${front_offset4}; + front_offsets[5] = ${front_offset5}; + front_offsets[6] = ${front_offset6}; + + ${func_name}(a_layout.nr_dim, nr_elem, a_data, c_data, front_offsets, c_layout.dims, c_layout.stride, a_layout.dims, a_layout.stride); + + return TinyNN_SUCCESS; +})"; + + ss << StringTemplate::StringTemplateArgs() + .add("dtype", dtype) + .add("func_name", func_name) + .add("front_offset0", context->getAttrInt("front_offsets:0")) + .add("front_offset1", context->getAttrInt("front_offsets:1")) + .add("front_offset2", context->getAttrInt("front_offsets:2")) + .add("front_offset3", context->getAttrInt("front_offsets:3")) + .add("front_offset4", context->getAttrInt("front_offsets:4")) + .add("front_offset5", context->getAttrInt("front_offsets:5")) + .add("front_offset6", context->getAttrInt("front_offsets:6")) + .render(body_temp); + return ss.str(); +} + +// vim: syntax=cpp.doxygen \ No newline at end of file diff --git a/compiler/lib/KernelGen/BareMetal/Padding.h b/compiler/lib/KernelGen/BareMetal/Padding.h new file mode 100644 index 00000000..86bfa79f --- /dev/null +++ b/compiler/lib/KernelGen/BareMetal/Padding.h @@ -0,0 +1,20 @@ +#pragma once +#include +#include "compiler/KernelGen/KernelGen.h" + +namespace megcc { +namespace KernelGen { +namespace BareMetal { + +class PaddingKernel : public KernelFunc { +public: + bool IsAvailable(TContext* context) const override; + std::string GetKernelSymbol(TContext* context) const override; + std::string GetKernelBody(TContext* context) const override; +}; + +} // namespace BareMetal +} // namespace KernelGen +} // namespace megcc + +// vim: syntax=cpp.doxygen diff --git a/compiler/lib/Target/MGB/importer.cpp b/compiler/lib/Target/MGB/importer.cpp index d45734f2..7a3a0fb2 100644 --- a/compiler/lib/Target/MGB/importer.cpp +++ b/compiler/lib/Target/MGB/importer.cpp @@ -1061,6 +1061,32 @@ class Importer { for (int i = 0; i < opr->output().size(); ++i) { m_var2value.emplace(opr->output(i), values.getResult(i)); } + } else if (auto padding_opr = opr->try_cast_final()) { + std::vector front_offsets, back_offsets; + auto&& p = padding_opr->param(); + auto&& out = padding_opr->output(0); + auto&& in = padding_opr->input(0); + front_offsets.push_back(p.front_offset_dim0); + front_offsets.push_back(p.front_offset_dim1); + front_offsets.push_back(p.front_offset_dim2); + front_offsets.push_back(p.front_offset_dim3); + front_offsets.push_back(p.front_offset_dim4); + front_offsets.push_back(p.front_offset_dim5); + front_offsets.push_back(p.front_offset_dim6); + + back_offsets.push_back(p.back_offset_dim0); + back_offsets.push_back(p.back_offset_dim1); + back_offsets.push_back(p.back_offset_dim2); + back_offsets.push_back(p.back_offset_dim3); + back_offsets.push_back(p.back_offset_dim4); + back_offsets.push_back(p.back_offset_dim5); + back_offsets.push_back(p.back_offset_dim6); + + mlir::Value value = m_builder.create( + m_builder.getUnknownLoc(), var_to_shaped_type(out), + m_var2value.at(in), p.padding_mode, p.padding_val, front_offsets, + back_offsets); + m_var2value.emplace(out, value); } else { CC_ABORT << "unsupported mgb operator type " << opr->dyn_typeinfo()->name << "\n"; diff --git a/compiler/test/kernel/common/dnn_proxy_trait.h b/compiler/test/kernel/common/dnn_proxy_trait.h index 3403b87d..c0da38e1 100644 --- a/compiler/test/kernel/common/dnn_proxy_trait.h +++ b/compiler/test/kernel/common/dnn_proxy_trait.h @@ -80,6 +80,7 @@ DEF(Rotate, 2, true, true); DEF(CvtColor, 2, true, true); DEF(WarpAffine, 3, true, false); DEF(GaussianBlur, 2, true, true); +DEF(Padding, 2, true, true); DEF(Resize, 2, true, false); DEF(ResizeBackward, 2, true, false); DEF(IndexingOneHot, 3, true, true); diff --git a/compiler/test/kernel/common/src/cc_fill_attr.cpp b/compiler/test/kernel/common/src/cc_fill_attr.cpp index 60714e39..735b143f 100644 --- a/compiler/test/kernel/common/src/cc_fill_attr.cpp +++ b/compiler/test/kernel/common/src/cc_fill_attr.cpp @@ -435,6 +435,38 @@ KernelGenRet opr_fill_attr( return KernelGen::KernelPack::GetKernel(KernType::GaussianBlurKernel, arch); } +template <> +KernelGenRet opr_fill_attr( + std::unordered_map& attr_map, megdnn::Padding* opr, + const TensorNDArray& tensors, KernelGen::Arch arch, + const std::unordered_map& proxy_attr) { + auto param = opr->param(); + + if (param.padding_mode == ::megdnn::Padding::Param::PaddingMode::CONSTANT) { + attr_map["padding_mode"] = CCAttr("CONSTANT"); + } else if (param.padding_mode == ::megdnn::Padding::Param::PaddingMode::REPLICATE) { + attr_map["padding_mode"] = CCAttr("REPLICATE"); + } else if (param.padding_mode == ::megdnn::Padding::Param::PaddingMode::REFLECT) { + attr_map["padding_mode"] = CCAttr("REFLECT"); + } + FILL_MAP(attr_map, param, padding_val); + attr_map["front_offsets:0"] = param.front_offset_dim0; + attr_map["front_offsets:1"] = param.front_offset_dim1; + attr_map["front_offsets:2"] = param.front_offset_dim2; + attr_map["front_offsets:3"] = param.front_offset_dim3; + attr_map["front_offsets:4"] = param.front_offset_dim4; + attr_map["front_offsets:5"] = param.front_offset_dim5; + attr_map["front_offsets:6"] = param.front_offset_dim6; + attr_map["back_offsets:0"] = param.back_offset_dim0; + attr_map["back_offsets:1"] = param.back_offset_dim1; + attr_map["back_offsets:2"] = param.back_offset_dim2; + attr_map["back_offsets:3"] = param.back_offset_dim3; + attr_map["back_offsets:4"] = param.back_offset_dim4; + attr_map["back_offsets:5"] = param.back_offset_dim5; + attr_map["back_offsets:6"] = param.back_offset_dim6; + return KernelGen::KernelPack::GetKernel(KernType::PaddingKernel, arch); +} + template <> KernelGenRet opr_fill_attr( std::unordered_map& attr_map, megdnn::CVRoicopy* opr, diff --git a/compiler/test/kernel/common/src/cc_proxy.cpp b/compiler/test/kernel/common/src/cc_proxy.cpp index 1f7e0a15..9e1cb0b5 100644 --- a/compiler/test/kernel/common/src/cc_proxy.cpp +++ b/compiler/test/kernel/common/src/cc_proxy.cpp @@ -453,6 +453,7 @@ DEF_CCOPRPROXY(megdnn::Argsort); DEF_CCOPRPROXY(megdnn::ConcatForward); DEF_CCOPRPROXY(megdnn::ArgmaxForward); DEF_CCOPRPROXY(megdnn::GaussianBlurForward); +DEF_CCOPRPROXY(megdnn::PaddingForward); #undef DEF_CCOPRPROXY diff --git a/compiler/test/kernel/common/src/checker.cpp b/compiler/test/kernel/common/src/checker.cpp index 11a3099a..f526553a 100644 --- a/compiler/test/kernel/common/src/checker.cpp +++ b/compiler/test/kernel/common/src/checker.cpp @@ -408,6 +408,7 @@ template class Checker; template class Checker; template class Checker; template class Checker; +template class Checker; //! CV DEF_CV_OPR(megdnn::CVtranspose); diff --git a/compiler/test/kernel/opr/generalIntrinsic/Fp16conv.cpp b/compiler/test/kernel/opr/generalIntrinsic/Fp16conv.cpp index b4b4fa51..5f12feb4 100644 --- a/compiler/test/kernel/opr/generalIntrinsic/Fp16conv.cpp +++ b/compiler/test/kernel/opr/generalIntrinsic/Fp16conv.cpp @@ -7,7 +7,7 @@ using namespace megcc::KernelGen; #if ENABLE_KERNEL_FP16 TEST(GI, Fp16ConvWinogradNCHW88) { Checker checker(Arch::BAREMETAL, 1); - checker.set_epsilon(0.38); //! For CI. When tested individually, the error can be + checker.set_epsilon(0.48); //! For CI. When tested individually, the error can be //! controlled within 1e-3. ConvBiasForward::Param param; param.stride_h = 1; diff --git a/compiler/test/kernel/opr/naive/padding.cpp b/compiler/test/kernel/opr/naive/padding.cpp new file mode 100644 index 00000000..2b1eb024 --- /dev/null +++ b/compiler/test/kernel/opr/naive/padding.cpp @@ -0,0 +1,55 @@ +#include "test/kernel/common/checker.h" +using namespace megdnn; +using namespace megcc::test; +using namespace megcc::KernelGen; + +TEST(NAIVE, Padding) { + Checker checker; + megdnn::Padding::Param param; + using PaddingMode = megdnn::Padding::Param::PaddingMode; + auto run = [&checker, ¶m]() { + for (auto mode : + {PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::REPLICATE}) { + for (int offset0 : {3, 5}) { + for (int offset1 : {5, 7}) { + param.back_offset_dim0 = 0; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = offset0; + param.back_offset_dim3 = offset1; + + param.front_offset_dim0 = 0; + param.front_offset_dim1 = 0; + param.front_offset_dim2 = offset0; + param.front_offset_dim3 = offset1; + + param.padding_mode = mode; + param.padding_val = 2.f; + + checker.set_param(param); + checker.exec({{1, 1, 30, 30}, {}}); + checker.exec({{1, 3, 30, 30}, {}}); + checker.exec({{3, 3, 30, 30}, {}}); + } + } + } + }; + UniformIntRNG seq(0, 255); + checker.set_rng(0, &seq); + checker.set_dtype(0, dtype::Uint8()); + checker.set_dtype(1, dtype::Uint8()); + run(); + + megcc::test::UniformRNG rng(-30, 30); + checker.set_rng(0, &rng); + checker.set_dtype(0, dtype::Float32()); + checker.set_dtype(1, dtype::Float32()); + run(); + +#if ENABLE_KERNEL_FP16 + megcc::test::Float16PeriodicalRNG rng1; + checker.set_rng(0, &rng1); + checker.set_dtype(0, dtype::Float16()); + checker.set_dtype(1, dtype::Float16()); + run(); +#endif +} \ No newline at end of file