Skip to content

Commit

Permalink
quantizedConv computes on integer value
Browse files Browse the repository at this point in the history
  • Loading branch information
snoopyisadog committed Apr 14, 2021
1 parent d10d1e4 commit 01b60fa
Showing 1 changed file with 41 additions and 27 deletions.
68 changes: 41 additions & 27 deletions src/caffe/layers/conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include "caffe/layers/conv_layer.hpp"
#include "caffe/util/math_functions.hpp"
#define W this->blobs_[0]
#define B this->blobs_[1]

namespace caffe {

Expand Down Expand Up @@ -52,34 +54,42 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
template <typename Dtype>
void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// set up quantization parameters: scale + zero_point
const Dtype input_scale = this->input_scale_;
const Dtype output_scale = this->output_scale_;
const Dtype weight_scale = this->weight_scale_;
const Dtype bias_scale = this->bias_scale_;
const Dtype bias_scale = this->bias_scale_; // bias_scale = input_scale * weight_scale
const int input_zero_point = this->input_zero_point_;
const int output_zero_point = this->output_zero_point_;
const int weight_zero_point = this->weight_zero_point_;
const int bias_zero_point = this->bias_zero_point_;
const Dtype saturate = this->saturate_;
const bool quant_in = (input_scale != Dtype(1.0) || input_zero_point != 0);
const bool quant_out = (output_scale != Dtype(1.0) || output_zero_point != 0);
const bool quant_w = (weight_scale != Dtype(1.0) || weight_zero_point != 0);
const bool quant_b = (this->bias_term_&& (bias_scale != Dtype(1.0) || bias_zero_point != 0));
if (quant_w) {
Dtype *qw = this->blobs_[0]->mutable_cpu_data();
caffe_cpu_dequantize<Dtype>(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point);
/*** Quantization Computation
(1) shift input/weight/bias w.r.t corresponding zero_point
(2) compute Convolution+Bias on the integer value range
(3) scale the output by input_scale*weight_scale/output_scale, and
(4) shift the output by output_zero_point
*Assumption is that bias_scale = input_scale*weight_scale
For a floating-value model, only (2) is computed with floating values
***/
const bool shift_input = (input_zero_point != 0);
const bool shift_weight = (weight_zero_point != 0);
const bool shift_bias = (bias_zero_point != 0);
const bool scale_output = (input_scale != Dtype(1.0) || weight_scale != Dtype(1.0) ||
output_scale != Dtype(1.0));
const bool shift_output = (output_zero_point != 0);

if (shift_weight) { // shift the quantized weight
caffe_add_scalar<Dtype>(W->count(), Dtype(-weight_zero_point), W->mutable_cpu_data());
}
if (quant_b) {
Dtype *qb = this->blobs_[1]->mutable_cpu_data();
caffe_cpu_dequantize<Dtype>(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point);
if (shift_bias) {
caffe_add_scalar<Dtype>(B->count(), Dtype(-bias_zero_point), B->mutable_cpu_data());
}

const Dtype* weight = this->blobs_[0]->cpu_data();
for (int i = 0; i < bottom.size(); ++i) {
if (quant_in) {
Dtype* qin = bottom[i]->mutable_cpu_data();
caffe_cpu_dequantize<Dtype>(bottom[i]->count(), qin, input_scale, input_zero_point);
if (shift_input) {
caffe_add_scalar<Dtype>(bottom[i]->count(),
Dtype(-input_zero_point), bottom[i]->mutable_cpu_data());
}

const Dtype* bottom_data = bottom[i]->cpu_data();
Expand All @@ -95,8 +105,14 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}

const int count_t = top[i]->count();
if (quant_out) {
caffe_cpu_quantize<Dtype>(count_t, top_data, output_scale, output_zero_point);
if (scale_output) {
//Dtype out_scal = input_scale * weight_scale / output_scale;
Dtype out_scal = bias_scale / output_scale;
caffe_scal<Dtype>(count_t, out_scal, top_data);
caffe_cpu_round<Dtype>(count_t, top_data);
}
if (shift_output) {
caffe_add_scalar<Dtype>(count_t, Dtype(output_zero_point), top_data);
}
if (saturate == ConvolutionParameter_SaturateMethod_Signed)
caffe_cpu_signed_saturate(count_t, top_data);
Expand All @@ -107,19 +123,17 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
if (saturate == ConvolutionParameter_SaturateMethod_Unsigned_8bit)
caffe_cpu_unsigned_8bit_saturate(count_t, top_data);

if (quant_in) { // restore the quantized input blob
Dtype* qin = bottom[i]->mutable_cpu_data();
caffe_cpu_quantize<Dtype>(bottom[i]->count(), qin, input_scale, input_zero_point);
if (shift_input) { // shift the quantized input blob back to correct range
caffe_add_scalar<Dtype>(bottom[i]->count(),
Dtype(input_zero_point), bottom[i]->mutable_cpu_data());
}
}
// restore quantized weight/bias
if (quant_w) {
Dtype *qw = this->blobs_[0]->mutable_cpu_data();
caffe_cpu_quantize<Dtype>(this->blobs_[0]->count(), qw, weight_scale, weight_zero_point);
// shift quantized weight/bias back to correct range
if (shift_weight) {
caffe_add_scalar<Dtype>(W->count(), Dtype(weight_zero_point), W->mutable_cpu_data());
}
if (quant_b) {
Dtype *qb = this->blobs_[1]->mutable_cpu_data();
caffe_cpu_quantize<Dtype>(this->blobs_[1]->count(), qb, bias_scale, bias_zero_point);
if (shift_bias) {
caffe_add_scalar<Dtype>(B->count(), Dtype(bias_zero_point), B->mutable_cpu_data());
}
}

Expand Down

0 comments on commit 01b60fa

Please sign in to comment.