From 7f002008f152b52f24656fdbbaa7c9891520f87b Mon Sep 17 00:00:00 2001
From: Jongsoo Park <jongsoo@fb.com>
Date: Tue, 13 Nov 2018 00:50:35 -0800
Subject: [PATCH] remove ShouldFp32FallbackToNCHW (#13814)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/13814

D10333829 implemented 3D conv in NHWC in fp32 ops so int8 ops don't need special handling anymore.

Reviewed By: hx89

Differential Revision: D13017666

fbshipit-source-id: 41df449f5e21c4c7134cc5c480e559f8c247069b
---
 caffe2/quantization/server/CMakeLists.txt     |   1 -
 .../server/conv_pool_dnnlowp_op_base.h        |  25 -----
 caffe2/quantization/server/op_wrapper.cc      |  19 ----
 caffe2/quantization/server/op_wrapper.h       | 100 +++---------------
 4 files changed, 12 insertions(+), 133 deletions(-)
 delete mode 100644 caffe2/quantization/server/op_wrapper.cc
diff --git a/caffe2/quantization/server/CMakeLists.txt b/caffe2/quantization/server/CMakeLists.txt
index 15884eb97ba80..ab8596814a2bc 100644
--- a/caffe2/quantization/server/CMakeLists.txt
+++ b/caffe2/quantization/server/CMakeLists.txt
@@ -29,7 +29,6 @@ list(APPEND Caffe2_CPU_SRCS
   "${CMAKE_CURRENT_SOURCE_DIR}/fully_connected_dnnlowp_op.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/fully_connected_rowwise_dnnlowp_op.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/lstm_unit_dnnlowp_op.cc"
-  "${CMAKE_CURRENT_SOURCE_DIR}/op_wrapper.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/pool_dnnlowp_op.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/quantize_dnnlowp_op.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/sigmoid_dnnlowp_op.cc"
diff --git a/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h b/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
index be6f2697725d1..bcca812f3d007 100644
--- a/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
+++ b/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
@@ -96,31 +96,6 @@ class ConvPoolDNNLowPOpBase : public ConvPoolOpBase<CPUContext> {
       }
     }
 
-    bool fp32_fallback_to_nchw = ShouldFp32FallbackToNCHW(debug_def());
-    std::vector<float> temp;
-    if (fp32_fallback_to_nchw) {
-      temp.resize(OutputTensorCPU_(0)->numel());
-      int ndim = float_tensor->dim();
-      CAFFE_ENFORCE_GE(ndim, 3);
-      const int N = float_tensor->dim32(0), C = float_tensor->dim32(1);
-      int image_size = 1;
-      for (auto i = 2; i < ndim; ++i) {
-        image_size *= float_tensor->dim32(i);
-      }
-      std::array<int, 2> dims = {C, image_size};
-      std::array<int, 2> axes = {1, 0};
-      for (int n = 0; n < N; ++n) {
-        math::Transpose(
-            2,
-            dims.data(),
-            axes.data(),
-            ref + n * image_size * C,
-            temp.data() + n * image_size * C,
-            &context_);
-      }
-      ref = temp.data();
-    }
-
     dnnlowp::MeasureQuantizationError(
         actual, ref, OutputTensorCPU_(0)->numel(), &quantization_error_stats_);
   }
diff --git a/caffe2/quantization/server/op_wrapper.cc b/caffe2/quantization/server/op_wrapper.cc
deleted file mode 100644
index e42289e1cf6e9..0000000000000
--- a/caffe2/quantization/server/op_wrapper.cc
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "op_wrapper.h"
-
-namespace caffe2 {
-
-bool ShouldFp32FallbackToNCHW(const OperatorDef& def) {
-  if ((def.type() == "Conv" || def.type() == "Int8Conv" ||
-       def.type() == "ConvRelu" || def.type() == "Int8ConvRelu") &&
-      ArgumentHelper::GetSingleArgument<OperatorDef, std::string>(
-          def, "order", "NCHW") == "NHWC") {
-    auto kernels =
-        ArgumentHelper::GetRepeatedArgument<OperatorDef, int>(def, "kernels");
-    if (kernels.size() > 2) {
-      return true;
-    }
-  }
-  return false;
-}
-
-} // namespace caffe2
diff --git a/caffe2/quantization/server/op_wrapper.h b/caffe2/quantization/server/op_wrapper.h
index 883f51600a97d..99ffb89917cfb 100644
--- a/caffe2/quantization/server/op_wrapper.h
+++ b/caffe2/quantization/server/op_wrapper.h
@@ -7,8 +7,6 @@
 
 namespace caffe2 {
 
-bool ShouldFp32FallbackToNCHW(const OperatorDef& def);
-
 /**
  * Wrap a floating-point operator with quantized inputs with type T.
  * This class is to measure quantization error against fp32 reference.
@@ -23,15 +21,6 @@ class OpWrapper {
       CHECK_NOTNULL(local_input_blobs_.back());
     }
     OperatorDef def = op->debug_def();
-    if (ShouldFp32FallbackToNCHW(def)) {
-      // C2 default Conv operator doesn't support 3D convolution in NHWC
-      Argument* arg = GetMutableArgument("order", false, &def);
-      arg->set_s("NCHW");
-      std::string new_order =
-          ArgumentHelper::GetSingleArgument<OperatorDef, std::string>(
-              def, "order", "");
-      assert(new_order == "NCHW");
-    }
     local_op_.reset(new OpType(def, &local_ws_));
     for (auto name : def.output()) {
       local_output_blobs_.push_back(local_ws_.GetBlob(name));
@@ -42,90 +31,25 @@ class OpWrapper {
   void DequantizeInput() {
     const OperatorDef& def = op_->debug_def();
     CPUContext context(def.device_option());
-    bool fallback_to_nchw = ShouldFp32FallbackToNCHW(def);
 
     for (int i = 0; i < op_->InputSize(); ++i) {
       if (op_->InputIsType<int8::Int8TensorCPU>(i)) {
         const TensorCPU& qtensor = op_->Input<int8::Int8TensorCPU>(i).t;
         TensorCPU *float_tensor =
           BlobGetMutableTensor(local_input_blobs_[i], CPU);
-        if (fallback_to_nchw && i < 2) {
-          // NHWC2NCHW for input
-          std::vector<T> temp(qtensor.numel());
-
-          int ndim = qtensor.dim();
-          std::vector<int> dims(qtensor.sizes().begin(), qtensor.sizes().end());
-          std::vector<int> axes(ndim);
-          axes[0] = 0;
-          axes[1] = ndim - 1;
-          for (auto j = 1; j < ndim - 1; ++j) {
-            axes[j + 1] = j;
-          }
-
-          std::vector<int> new_dims(ndim);
-          for (auto j = 0; j < ndim; ++j) {
-            new_dims[j] = dims[axes[j]];
-          }
-          float_tensor->Resize(new_dims);
-
-          math::Transpose(
-              ndim,
-              dims.data(),
-              axes.data(),
-              qtensor.data<T>(),
-              temp.data(),
-              &context);
-
-          Dequantize(
-              temp.data(),
-              float_tensor->template mutable_data<float>(),
-              qtensor.numel(),
-              dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
-        } else {
-          // FIXME: doesn't work for bias so we shouldn't quantize bias before
-          // model loading.
-          float_tensor->ResizeLike(qtensor);
-          Dequantize(
-              qtensor.data<T>(),
-              float_tensor->template mutable_data<float>(),
-              qtensor.numel(),
-              dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
-        }
+        // FIXME: doesn't work for bias so we shouldn't quantize bias before
+        // model loading when we're running a shadow operator in fp32 for
+        // example for measuring quantization error.
+        float_tensor->ResizeLike(qtensor);
+        Dequantize(
+            qtensor.data<T>(),
+            float_tensor->template mutable_data<float>(),
+            qtensor.numel(),
+            dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
       } else {
-        if (fallback_to_nchw && i < 2) {
-          // NHWC2NCHW for input
-          const TensorCPU& in_tensor = op_->Input<Tensor>(i, CPU);
-          TensorCPU* float_tensor =
-            BlobGetMutableTensor(local_input_blobs_[i], CPU);
-
-          int ndim = in_tensor.dim();
-          std::vector<int> dims(
-              in_tensor.sizes().begin(), in_tensor.sizes().end());
-          std::vector<int> axes(ndim);
-          axes[0] = 0;
-          axes[1] = ndim - 1;
-          for (int j = 1; j < ndim - 1; ++j) {
-            axes[j + 1] = j;
-          }
-
-          std::vector<int> new_dims(ndim);
-          for (auto j = 0; j < ndim; ++j) {
-            new_dims[j] = dims[axes[j]];
-          }
-          float_tensor->Resize(new_dims);
-
-          math::Transpose(
-              ndim,
-              dims.data(),
-              axes.data(),
-              in_tensor.data<float>(),
-              float_tensor->mutable_data<float>(),
-              &context);
-        } else {
-          local_input_blobs_[i]->ShareExternal(
-              const_cast<void*>(op_->Inputs()[i]->GetRaw()),
-              op_->Inputs()[i]->meta());
-        }
+        local_input_blobs_[i]->ShareExternal(
+            const_cast<void*>(op_->Inputs()[i]->GetRaw()),
+            op_->Inputs()[i]->meta());
       }
     }
   }