Skip to content

Commit

Permalink
remove ShouldFp32FallbackToNCHW (pytorch#13814)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#13814

D10333829 implemented 3D conv in NHWC in fp32 ops so int8 ops don't need special handling anymore.

Reviewed By: hx89

Differential Revision: D13017666

fbshipit-source-id: 41df449f5e21c4c7134cc5c480e559f8c247069b
  • Loading branch information
jspark1105 authored and facebook-github-bot committed Nov 13, 2018
1 parent a7eee0a commit 7f00200
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 133 deletions.
1 change: 0 additions & 1 deletion caffe2/quantization/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ list(APPEND Caffe2_CPU_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/fully_connected_dnnlowp_op.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/fully_connected_rowwise_dnnlowp_op.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/lstm_unit_dnnlowp_op.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/op_wrapper.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/pool_dnnlowp_op.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/quantize_dnnlowp_op.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/sigmoid_dnnlowp_op.cc"
Expand Down
25 changes: 0 additions & 25 deletions caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,31 +96,6 @@ class ConvPoolDNNLowPOpBase : public ConvPoolOpBase<CPUContext> {
}
}

bool fp32_fallback_to_nchw = ShouldFp32FallbackToNCHW(debug_def());
std::vector<float> temp;
if (fp32_fallback_to_nchw) {
temp.resize(OutputTensorCPU_(0)->numel());
int ndim = float_tensor->dim();
CAFFE_ENFORCE_GE(ndim, 3);
const int N = float_tensor->dim32(0), C = float_tensor->dim32(1);
int image_size = 1;
for (auto i = 2; i < ndim; ++i) {
image_size *= float_tensor->dim32(i);
}
std::array<int, 2> dims = {C, image_size};
std::array<int, 2> axes = {1, 0};
for (int n = 0; n < N; ++n) {
math::Transpose(
2,
dims.data(),
axes.data(),
ref + n * image_size * C,
temp.data() + n * image_size * C,
&context_);
}
ref = temp.data();
}

dnnlowp::MeasureQuantizationError(
actual, ref, OutputTensorCPU_(0)->numel(), &quantization_error_stats_);
}
Expand Down
19 changes: 0 additions & 19 deletions caffe2/quantization/server/op_wrapper.cc

This file was deleted.

100 changes: 12 additions & 88 deletions caffe2/quantization/server/op_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

namespace caffe2 {

bool ShouldFp32FallbackToNCHW(const OperatorDef& def);

/**
* Wrap a floating-point operator with quantized inputs with type T.
* This class is to measure quantization error against fp32 reference.
Expand All @@ -23,15 +21,6 @@ class OpWrapper {
CHECK_NOTNULL(local_input_blobs_.back());
}
OperatorDef def = op->debug_def();
if (ShouldFp32FallbackToNCHW(def)) {
// C2 default Conv operator doesn't support 3D convolution in NHWC
Argument* arg = GetMutableArgument("order", false, &def);
arg->set_s("NCHW");
std::string new_order =
ArgumentHelper::GetSingleArgument<OperatorDef, std::string>(
def, "order", "");
assert(new_order == "NCHW");
}
local_op_.reset(new OpType(def, &local_ws_));
for (auto name : def.output()) {
local_output_blobs_.push_back(local_ws_.GetBlob(name));
Expand All @@ -42,90 +31,25 @@ class OpWrapper {
void DequantizeInput() {
const OperatorDef& def = op_->debug_def();
CPUContext context(def.device_option());
bool fallback_to_nchw = ShouldFp32FallbackToNCHW(def);

for (int i = 0; i < op_->InputSize(); ++i) {
if (op_->InputIsType<int8::Int8TensorCPU>(i)) {
const TensorCPU& qtensor = op_->Input<int8::Int8TensorCPU>(i).t;
TensorCPU *float_tensor =
BlobGetMutableTensor(local_input_blobs_[i], CPU);
if (fallback_to_nchw && i < 2) {
// NHWC2NCHW for input
std::vector<T> temp(qtensor.numel());

int ndim = qtensor.dim();
std::vector<int> dims(qtensor.sizes().begin(), qtensor.sizes().end());
std::vector<int> axes(ndim);
axes[0] = 0;
axes[1] = ndim - 1;
for (auto j = 1; j < ndim - 1; ++j) {
axes[j + 1] = j;
}

std::vector<int> new_dims(ndim);
for (auto j = 0; j < ndim; ++j) {
new_dims[j] = dims[axes[j]];
}
float_tensor->Resize(new_dims);

math::Transpose(
ndim,
dims.data(),
axes.data(),
qtensor.data<T>(),
temp.data(),
&context);

Dequantize(
temp.data(),
float_tensor->template mutable_data<float>(),
qtensor.numel(),
dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
} else {
// FIXME: doesn't work for bias so we shouldn't quantize bias before
// model loading.
float_tensor->ResizeLike(qtensor);
Dequantize(
qtensor.data<T>(),
float_tensor->template mutable_data<float>(),
qtensor.numel(),
dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
}
// FIXME: doesn't work for bias so we shouldn't quantize bias before
// model loading when we're running a shadow operator in fp32 for
// example for measuring quantization error.
float_tensor->ResizeLike(qtensor);
Dequantize(
qtensor.data<T>(),
float_tensor->template mutable_data<float>(),
qtensor.numel(),
dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
} else {
if (fallback_to_nchw && i < 2) {
// NHWC2NCHW for input
const TensorCPU& in_tensor = op_->Input<Tensor>(i, CPU);
TensorCPU* float_tensor =
BlobGetMutableTensor(local_input_blobs_[i], CPU);

int ndim = in_tensor.dim();
std::vector<int> dims(
in_tensor.sizes().begin(), in_tensor.sizes().end());
std::vector<int> axes(ndim);
axes[0] = 0;
axes[1] = ndim - 1;
for (int j = 1; j < ndim - 1; ++j) {
axes[j + 1] = j;
}

std::vector<int> new_dims(ndim);
for (auto j = 0; j < ndim; ++j) {
new_dims[j] = dims[axes[j]];
}
float_tensor->Resize(new_dims);

math::Transpose(
ndim,
dims.data(),
axes.data(),
in_tensor.data<float>(),
float_tensor->mutable_data<float>(),
&context);
} else {
local_input_blobs_[i]->ShareExternal(
const_cast<void*>(op_->Inputs()[i]->GetRaw()),
op_->Inputs()[i]->meta());
}
local_input_blobs_[i]->ShareExternal(
const_cast<void*>(op_->Inputs()[i]->GetRaw()),
op_->Inputs()[i]->meta());
}
}
}
Expand Down

0 comments on commit 7f00200

Please sign in to comment.