quantizedConv computes on integer value

foss-for-synopsys-dwc-arc-processors · Apr 14, 2021 · 01b60fa · 01b60fa
1 parent d10d1e4
commit 01b60fa
Showing 1 changed file with 41 additions and 27 deletions.
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
@@ -2,6 +2,8 @@
 
 #include "caffe/layers/conv_layer.hpp"
 #include "caffe/util/math_functions.hpp"
+#define W this->blobs_[0]
+#define B this->blobs_[1]
 
 namespace caffe {
 
@@ -52,34 +54,42 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
 template <typename Dtype>
 void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  // set up quantization parameters: scale + zero_point
   const Dtype input_scale = this->input_scale_;
   const Dtype output_scale = this->output_scale_;
   const Dtype weight_scale = this->weight_scale_;
-  const Dtype bias_scale = this->bias_scale_;
+  const Dtype bias_scale = this->bias_scale_; // bias_scale = input_scale * weight_scale
   const int input_zero_point = this->input_zero_point_;
   const int output_zero_point = this->output_zero_point_;
   const int weight_zero_point = this->weight_zero_point_;
   const int bias_zero_point = this->bias_zero_point_;
   const Dtype saturate = this->saturate_;
-  const bool quant_in = (input_scale != Dtype(1.0) || input_zero_point != 0);
-  const bool quant_out = (output_scale != Dtype(1.0) || output_zero_point != 0);
-  const bool quant_w = (weight_scale != Dtype(1.0) || weight_zero_point != 0);
-  const bool quant_b = (this->bias_term_&& (bias_scale != Dtype(1.0) || bias_zero_point != 0));
-  if (quant_w) {
-    Dtype *qw = this->blobs_[0]->mutable_cpu_data();
-    caffe_cpu_dequantize<Dtype>(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point);
+  /*** Quantization Computation
+    (1) shift input/weight/bias w.r.t corresponding zero_point
+    (2) compute Convolution+Bias on the integer value range
+    (3) scale the output by input_scale*weight_scale/output_scale, and
+    (4) shift the output by output_zero_point
+  *Assumption is that bias_scale = input_scale*weight_scale
+  For a floating-value model, only (2) is computed with floating values
+  ***/
+  const bool shift_input = (input_zero_point != 0);
+  const bool shift_weight = (weight_zero_point != 0);
+  const bool shift_bias = (bias_zero_point != 0);
+  const bool scale_output = (input_scale != Dtype(1.0) || weight_scale != Dtype(1.0) ||
+                             output_scale != Dtype(1.0));
+  const bool shift_output = (output_zero_point != 0);
+
+  if (shift_weight) { // shift the quantized weight
+    caffe_add_scalar<Dtype>(W->count(), Dtype(-weight_zero_point), W->mutable_cpu_data());
   }
-  if (quant_b) {
-    Dtype *qb = this->blobs_[1]->mutable_cpu_data();
-    caffe_cpu_dequantize<Dtype>(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point);
+  if (shift_bias) {
+    caffe_add_scalar<Dtype>(B->count(), Dtype(-bias_zero_point), B->mutable_cpu_data());
   }
 
   const Dtype* weight = this->blobs_[0]->cpu_data();
   for (int i = 0; i < bottom.size(); ++i) {
-    if (quant_in) {
-      Dtype* qin = bottom[i]->mutable_cpu_data();
-      caffe_cpu_dequantize<Dtype>(bottom[i]->count(), qin, input_scale, input_zero_point);
+    if (shift_input) {
+      caffe_add_scalar<Dtype>(bottom[i]->count(),
+        Dtype(-input_zero_point), bottom[i]->mutable_cpu_data());
     }
 
     const Dtype* bottom_data = bottom[i]->cpu_data();
@@ -95,8 +105,14 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     }
 
     const int count_t = top[i]->count();
-    if (quant_out) {
-      caffe_cpu_quantize<Dtype>(count_t, top_data, output_scale, output_zero_point);
+    if (scale_output) {
+      //Dtype out_scal = input_scale * weight_scale / output_scale;
+      Dtype out_scal = bias_scale / output_scale;
+      caffe_scal<Dtype>(count_t, out_scal, top_data);
+      caffe_cpu_round<Dtype>(count_t, top_data);
+    }
+    if (shift_output) {
+      caffe_add_scalar<Dtype>(count_t, Dtype(output_zero_point), top_data);
     }
     if (saturate == ConvolutionParameter_SaturateMethod_Signed)
       caffe_cpu_signed_saturate(count_t, top_data);
@@ -107,19 +123,17 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
       caffe_cpu_unsigned_8bit_saturate(count_t, top_data);
 
-    if (quant_in) { // restore the quantized input blob
-      Dtype* qin = bottom[i]->mutable_cpu_data();
-      caffe_cpu_quantize<Dtype>(bottom[i]->count(), qin, input_scale, input_zero_point);
+    if (shift_input) { // shift the quantized input blob back to correct range
+      caffe_add_scalar<Dtype>(bottom[i]->count(),
+        Dtype(input_zero_point), bottom[i]->mutable_cpu_data());
     }
   }
-  // restore quantized weight/bias
-  if (quant_w) {
-    Dtype *qw = this->blobs_[0]->mutable_cpu_data();
-    caffe_cpu_quantize<Dtype>(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point);
+  // shift quantized weight/bias back to correct range
+  if (shift_weight) {
+    caffe_add_scalar<Dtype>(W->count(), Dtype(weight_zero_point), W->mutable_cpu_data());
   }
-  if (quant_b) {
-    Dtype *qb = this->blobs_[1]->mutable_cpu_data();
-    caffe_cpu_quantize<Dtype>(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point);
+  if (shift_bias) {
+    caffe_add_scalar<Dtype>(B->count(), Dtype(bias_zero_point), B->mutable_cpu_data());
   }
 }