From 7f002008f152b52f24656fdbbaa7c9891520f87b Mon Sep 17 00:00:00 2001 From: Jongsoo Park Date: Tue, 13 Nov 2018 00:50:35 -0800 Subject: [PATCH] remove ShouldFp32FallbackToNCHW (#13814) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/13814 D10333829 implemented 3D conv in NHWC in fp32 ops so int8 ops don't need special handling anymore. Reviewed By: hx89 Differential Revision: D13017666 fbshipit-source-id: 41df449f5e21c4c7134cc5c480e559f8c247069b --- caffe2/quantization/server/CMakeLists.txt | 1 - .../server/conv_pool_dnnlowp_op_base.h | 25 ----- caffe2/quantization/server/op_wrapper.cc | 19 ---- caffe2/quantization/server/op_wrapper.h | 100 +++--------------- 4 files changed, 12 insertions(+), 133 deletions(-) delete mode 100644 caffe2/quantization/server/op_wrapper.cc diff --git a/caffe2/quantization/server/CMakeLists.txt b/caffe2/quantization/server/CMakeLists.txt index 15884eb97ba80..ab8596814a2bc 100644 --- a/caffe2/quantization/server/CMakeLists.txt +++ b/caffe2/quantization/server/CMakeLists.txt @@ -29,7 +29,6 @@ list(APPEND Caffe2_CPU_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/fully_connected_dnnlowp_op.cc" "${CMAKE_CURRENT_SOURCE_DIR}/fully_connected_rowwise_dnnlowp_op.cc" "${CMAKE_CURRENT_SOURCE_DIR}/lstm_unit_dnnlowp_op.cc" - "${CMAKE_CURRENT_SOURCE_DIR}/op_wrapper.cc" "${CMAKE_CURRENT_SOURCE_DIR}/pool_dnnlowp_op.cc" "${CMAKE_CURRENT_SOURCE_DIR}/quantize_dnnlowp_op.cc" "${CMAKE_CURRENT_SOURCE_DIR}/sigmoid_dnnlowp_op.cc" diff --git a/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h b/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h index be6f2697725d1..bcca812f3d007 100644 --- a/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h +++ b/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h @@ -96,31 +96,6 @@ class ConvPoolDNNLowPOpBase : public ConvPoolOpBase { } } - bool fp32_fallback_to_nchw = ShouldFp32FallbackToNCHW(debug_def()); - std::vector temp; - if (fp32_fallback_to_nchw) { - temp.resize(OutputTensorCPU_(0)->numel()); - int ndim = float_tensor->dim(); - CAFFE_ENFORCE_GE(ndim, 3); - const int N = float_tensor->dim32(0), C = float_tensor->dim32(1); - int image_size = 1; - for (auto i = 2; i < ndim; ++i) { - image_size *= float_tensor->dim32(i); - } - std::array dims = {C, image_size}; - std::array axes = {1, 0}; - for (int n = 0; n < N; ++n) { - math::Transpose( - 2, - dims.data(), - axes.data(), - ref + n * image_size * C, - temp.data() + n * image_size * C, - &context_); - } - ref = temp.data(); - } - dnnlowp::MeasureQuantizationError( actual, ref, OutputTensorCPU_(0)->numel(), &quantization_error_stats_); } diff --git a/caffe2/quantization/server/op_wrapper.cc b/caffe2/quantization/server/op_wrapper.cc deleted file mode 100644 index e42289e1cf6e9..0000000000000 --- a/caffe2/quantization/server/op_wrapper.cc +++ /dev/null @@ -1,19 +0,0 @@ -#include "op_wrapper.h" - -namespace caffe2 { - -bool ShouldFp32FallbackToNCHW(const OperatorDef& def) { - if ((def.type() == "Conv" || def.type() == "Int8Conv" || - def.type() == "ConvRelu" || def.type() == "Int8ConvRelu") && - ArgumentHelper::GetSingleArgument( - def, "order", "NCHW") == "NHWC") { - auto kernels = - ArgumentHelper::GetRepeatedArgument(def, "kernels"); - if (kernels.size() > 2) { - return true; - } - } - return false; -} - -} // namespace caffe2 diff --git a/caffe2/quantization/server/op_wrapper.h b/caffe2/quantization/server/op_wrapper.h index 883f51600a97d..99ffb89917cfb 100644 --- a/caffe2/quantization/server/op_wrapper.h +++ b/caffe2/quantization/server/op_wrapper.h @@ -7,8 +7,6 @@ namespace caffe2 { -bool ShouldFp32FallbackToNCHW(const OperatorDef& def); - /** * Wrap a floating-point operator with quantized inputs with type T. * This class is to measure quantization error against fp32 reference. @@ -23,15 +21,6 @@ class OpWrapper { CHECK_NOTNULL(local_input_blobs_.back()); } OperatorDef def = op->debug_def(); - if (ShouldFp32FallbackToNCHW(def)) { - // C2 default Conv operator doesn't support 3D convolution in NHWC - Argument* arg = GetMutableArgument("order", false, &def); - arg->set_s("NCHW"); - std::string new_order = - ArgumentHelper::GetSingleArgument( - def, "order", ""); - assert(new_order == "NCHW"); - } local_op_.reset(new OpType(def, &local_ws_)); for (auto name : def.output()) { local_output_blobs_.push_back(local_ws_.GetBlob(name)); @@ -42,90 +31,25 @@ class OpWrapper { void DequantizeInput() { const OperatorDef& def = op_->debug_def(); CPUContext context(def.device_option()); - bool fallback_to_nchw = ShouldFp32FallbackToNCHW(def); for (int i = 0; i < op_->InputSize(); ++i) { if (op_->InputIsType(i)) { const TensorCPU& qtensor = op_->Input(i).t; TensorCPU *float_tensor = BlobGetMutableTensor(local_input_blobs_[i], CPU); - if (fallback_to_nchw && i < 2) { - // NHWC2NCHW for input - std::vector temp(qtensor.numel()); - - int ndim = qtensor.dim(); - std::vector dims(qtensor.sizes().begin(), qtensor.sizes().end()); - std::vector axes(ndim); - axes[0] = 0; - axes[1] = ndim - 1; - for (auto j = 1; j < ndim - 1; ++j) { - axes[j + 1] = j; - } - - std::vector new_dims(ndim); - for (auto j = 0; j < ndim; ++j) { - new_dims[j] = dims[axes[j]]; - } - float_tensor->Resize(new_dims); - - math::Transpose( - ndim, - dims.data(), - axes.data(), - qtensor.data(), - temp.data(), - &context); - - Dequantize( - temp.data(), - float_tensor->template mutable_data(), - qtensor.numel(), - dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_)); - } else { - // FIXME: doesn't work for bias so we shouldn't quantize bias before - // model loading. - float_tensor->ResizeLike(qtensor); - Dequantize( - qtensor.data(), - float_tensor->template mutable_data(), - qtensor.numel(), - dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_)); - } + // FIXME: doesn't work for bias so we shouldn't quantize bias before + // model loading when we're running a shadow operator in fp32 for + // example for measuring quantization error. + float_tensor->ResizeLike(qtensor); + Dequantize( + qtensor.data(), + float_tensor->template mutable_data(), + qtensor.numel(), + dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_)); } else { - if (fallback_to_nchw && i < 2) { - // NHWC2NCHW for input - const TensorCPU& in_tensor = op_->Input(i, CPU); - TensorCPU* float_tensor = - BlobGetMutableTensor(local_input_blobs_[i], CPU); - - int ndim = in_tensor.dim(); - std::vector dims( - in_tensor.sizes().begin(), in_tensor.sizes().end()); - std::vector axes(ndim); - axes[0] = 0; - axes[1] = ndim - 1; - for (int j = 1; j < ndim - 1; ++j) { - axes[j + 1] = j; - } - - std::vector new_dims(ndim); - for (auto j = 0; j < ndim; ++j) { - new_dims[j] = dims[axes[j]]; - } - float_tensor->Resize(new_dims); - - math::Transpose( - ndim, - dims.data(), - axes.data(), - in_tensor.data(), - float_tensor->mutable_data(), - &context); - } else { - local_input_blobs_[i]->ShareExternal( - const_cast(op_->Inputs()[i]->GetRaw()), - op_->Inputs()[i]->meta()); - } + local_input_blobs_[i]->ShareExternal( + const_cast(op_->Inputs()[i]->GetRaw()), + op_->Inputs()[i]->meta()); } } }