caffe.patch

diff --git a/cmake/MKLDNN.cmake b/cmake/MKLDNN.cmake
index 43c51f7e..8c234743 100644
--- a/cmake/MKLDNN.cmake
+++ b/cmake/MKLDNN.cmake
@@ -2,6 +2,7 @@
 function(Download_MKLDNN)
   set(EXTERNAL_DIR ${CMAKE_SOURCE_DIR}/external)
   set(MKLDNN_DIR ${EXTERNAL_DIR}/mkldnn)
+  set(CAFFE_ROOT ${MKLDNN_DIR}/../../)
   set(MKLDNN_SOURCE_DIR ${MKLDNN_DIR}/src)
   set(MKLDNN_BUILD_DIR ${MKLDNN_DIR}/build)
   set(MKLDNN_INSTALL_DIR ${MKLDNN_DIR}/install CACHE PATH "Installation path of MKLDNN")
@@ -21,7 +22,9 @@ function(Download_MKLDNN)
                       CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKL_ROOT_DIR}
 #--Download step
                       GIT_REPOSITORY https://github.com/01org/mkl-dnn.git
-		      GIT_TAG ${MKLDNN_COMMIT}
+                      GIT_TAG ${MKLDNN_COMMIT}
+#--Patch step
+                      PATCH_COMMAND git apply ${CAFFE_ROOT}clip.patch
 #--Build step
                       BINARY_DIR ${MKLDNN_BUILD_DIR}
                       BUILD_COMMAND cmake ${MKLDNN_SOURCE_DIR}
diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp
index 19bcee9d..a4b8ff80 100644
--- a/include/caffe/layers/mkldnn_layers.hpp
+++ b/include/caffe/layers/mkldnn_layers.hpp
@@ -175,6 +175,7 @@ private:
                     , bwdw_top_diff_primitive, bwdw_bottom_data_primitive;
     int32_t width_, height_, width_out_, height_out_, kernel_w_, kernel_h_, stride_w_, stride_h_;
     int  pad_w_, pad_h_;
+    int  pad_b_, pad_r_;
     mkldnn::algorithm  conv_algorithm;
 
     /* In case of (iter_size > 1) we need additional buffers */
@@ -396,6 +397,7 @@ private:
     shared_ptr<primitive> fwd_bottom_data_primitive, bwd_top_diff_primitive;
     int32_t num_, width_, height_, channels_;
 
+    float min, max;
     PERFORMANCE_EVENT_ID_DECL(perf_id_fw_);
     PERFORMANCE_EVENT_ID_DECL(perf_id_bw_);
 };
diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index 205be870..7b46aef1 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -118,8 +118,10 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
         << "pad_h & pad_w can only be used for 2D convolution.";
     CHECK_EQ(0, conv_param.pad_size())
         << "Either pad or pad_h/w should be specified; not both.";
-    pad_data[0] = conv_param.pad_h();
-    pad_data[1] = conv_param.pad_w();
+    pad_data[0] = conv_param.pad_h();  // top
+    pad_data[1] = conv_param.pad_w();  // left
+    pad_data[2] = conv_param.pad_b();  // bottom
+    pad_data[3] = conv_param.pad_r();  // right
   } else {
     const int num_pad_dims = conv_param.pad_size();
     CHECK(num_pad_dims == 0 || num_pad_dims == 1 ||
@@ -128,7 +130,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
         << "(pad specified " << num_pad_dims << " times; "
         << num_spatial_axes_ << " spatial dims).";
     const int kDefaultPad = 0;
-    for (int i = 0; i < num_spatial_axes_; ++i) {
+    for (int i = 0; i < num_spatial_axes_ + 2; ++i) {
       pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
           conv_param.pad((num_pad_dims == 1) ? 0 : i);
     }
@@ -152,7 +154,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   is_1x1_ = true;
   for (int i = 0; i < num_spatial_axes_; ++i) {
     is_1x1_ &=
-        kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0;
+        kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0 && pad_data[i+2] == 0;
     if (!is_1x1_) { break; }
   }
   // Configure output channels and groups.
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index 82efc241..a33a010e 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -56,7 +56,8 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
     // i + 1 to skip channel axis
     const int input_dim = this->input_shape(i + 1);
     const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
-    const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
+    // const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
+    const int output_dim = (input_dim + pad_data[i] + pad_data[i+2] - kernel_extent)
         / stride_data[i] + 1;
     this->output_shape_.push_back(output_dim);
   }
diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp
index a262b4b3..f20695bd 100644
--- a/src/caffe/layers/mkldnn_convolution_layer.cpp
+++ b/src/caffe/layers/mkldnn_convolution_layer.cpp
@@ -64,7 +64,7 @@ MKLDNNConvolutionLayer<Dtype>::MKLDNNConvolutionLayer(const LayerParameter& para
             , bwdd_top_diff_primitive(NULL), bwdd_weights_data_primitive(NULL)
             , bwdw_top_diff_primitive(NULL), bwdw_bottom_data_primitive(NULL)
             , width_(0), height_(0), width_out_(0), height_out_(0), kernel_w_(0), kernel_h_(0)
-            , stride_w_(0), stride_h_(0), pad_w_(0), pad_h_(0),
+            , stride_w_(0), stride_h_(0), pad_w_(0), pad_h_(0), pad_r_(0), pad_b_(0),
             bwdw_weights_diff_iter(NULL),
             bwdw_bias_diff_iter(NULL),
             bwdw_weights_diff_memory_iter(NULL),
@@ -79,9 +79,9 @@ template <typename Dtype>
 void MKLDNNConvolutionLayer<Dtype>::compute_output_shape()
 {
     ConvolutionLayer<Dtype>::compute_output_shape();
-    this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_)
+    this->height_out_ = (this->height_ + this->pad_b_ + this->pad_h_ - this->kernel_h_)
         / this->stride_h_ + 1;
-    this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_)
+    this->width_out_ = (this->width_ + this->pad_r_ + this->pad_w_ - this->kernel_w_)
         / this->stride_w_ + 1;
 }
 
@@ -95,8 +95,10 @@ void MKLDNNConvolutionLayer<Dtype>::init_properties(const vector<Blob<Dtype>*>&
     this->height_ = bottom[0]->height();
     this->channels_ = bottom[0]->channels();
     this->num_ = bottom[0]->num();
-    this->pad_w_ = this->pad_.cpu_data()[1];
     this->pad_h_ = this->pad_.cpu_data()[0];
+    this->pad_w_ = this->pad_.cpu_data()[1];
+    this->pad_b_ = this->pad_.cpu_data()[2];
+    this->pad_r_ = this->pad_.cpu_data()[3];
     this->kernel_w_ = this->kernel_shape_.cpu_data()[1];
     this->kernel_h_  = this->kernel_shape_.cpu_data()[0];
     string _conv_algorithm = this->layer_param_.convolution_param().conv_algorithm();
@@ -175,7 +177,8 @@ void MKLDNNConvolutionLayer<Dtype>::InitConvolutionFwd(const vector<Blob<Dtype>*
     int32_t kh = this->kernel_h_;
 
     memory::dims convolutionStrides {this->stride_h_, this->stride_w_};
-    memory::dims padding {this->pad_h_, this->pad_w_};
+    memory::dims padding1 {this->pad_h_, this->pad_w_};
+    memory::dims padding2 {this->pad_b_, this->pad_r_};
 
     // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor -------------
     memory::data_type mpcsn = memory::data_type::f32;
@@ -207,11 +210,11 @@ void MKLDNNConvolutionLayer<Dtype>::InitConvolutionFwd(const vector<Blob<Dtype>*
         if (this->bias_term_) {
             convFwd_desc.reset(new convolution_forward::desc(propagation, convAlgorithm
                                                              , init_bottom_md, init_weights_md, init_bias_md, init_top_md
-                                                             , convolutionStrides, padding, padding, padding_kind::zero));
+                                                             , convolutionStrides, padding1, padding2, padding_kind::zero));
         } else {
             convFwd_desc.reset(new convolution_forward::desc(propagation, convAlgorithm
                                                              , init_bottom_md, init_weights_md, init_top_md
-                                                             , convolutionStrides, padding, padding, padding_kind::zero));
+                                                             , convolutionStrides, padding1, padding2, padding_kind::zero));
         }
         shared_ptr<convolution_relu_forward::desc> convReluFwd_desc;
         if(relu) convReluFwd_desc.reset(new convolution_relu_forward::desc(*convFwd_desc, negative_slope));
diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp
index 239ee66a..c5556449 100644
--- a/src/caffe/layers/mkldnn_pooling_layer.cpp
+++ b/src/caffe/layers/mkldnn_pooling_layer.cpp
@@ -109,8 +109,12 @@ void MKLDNNPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom
           pad_l_ = pad_r_ = pool_param.pad(1);
         }
     } else {
-        pad_t_ = pad_b_ = pool_param.pad_h();
-        pad_l_ = pad_r_ = pool_param.pad_w();
+        // pad_t_ = pad_b_ = pool_param.pad_h();
+        // pad_l_ = pad_r_ = pool_param.pad_w();
+        pad_t_ = pool_param.pad_h();
+        pad_l_ = pool_param.pad_w();
+        pad_b_ = pool_param.pad_b();
+        pad_r_ = pool_param.pad_r();
     }
 
     if (!pool_param.has_stride_h()) {
@@ -261,6 +265,7 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingFwd(const vector<Blob<Dtype>*>& botto
     int32_t pl = this->pad_l_;
     int32_t pr = this->pad_r_;
 
+    LOG(INFO) << "info: " << c << ";" << ih << ";" << iw << ";" << oh << ";" << ow << ";" << kh << ";" << kw << ";"<< sh<< ";" << sw<< ";" << pt<< ";" << pb << ";"<< pl<< ";" << pr;
     bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
 
     engine cpu_engine = CpuEngine::Instance().get_engine();
diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp
index e21b24a5..7758a562 100644
--- a/src/caffe/layers/mkldnn_relu_layer.cpp
+++ b/src/caffe/layers/mkldnn_relu_layer.cpp
@@ -101,13 +101,26 @@ void MKLDNNReLULayer<Dtype>::InitReLUFwd(const vector<Blob<Dtype>*>& bottom, con
         usr_data_mpd.reset(new memory::primitive_desc(*bottom_data_md, cpu_engine));
     }
     top_data_md = bottom_data_md;
+    float _max = this->layer_param_.relu_param().max();
+    float _min = this->layer_param_.relu_param().min();
 
     // ---- Initialize relu primitive descriptor -------------
     //relu_forward::desc reluFwd_desc(propagation, *bottom_data_md, negative_slope);
     // MKLDNN is deprecating standalone relu primitive in MKL-DNN.
     // Now MKLDNN has eltwise primitive with eltwise_relu algorithm inside.
-    eltwise_forward::desc eltwise_reluFwd_desc(propagation, eltwise_relu, *bottom_data_md, negative_slope);
 
+    eltwise_forward ::desc *pdesc = NULL;
+    if (_max || _min){
+        pdesc = new eltwise_forward::desc(propagation, eltwise_clip, *bottom_data_md, _min, _max);
+        // use eltwise_clip;
+        LOG(INFO) << "info: have relu value: "<< _max ;
+    } else {
+        pdesc = new eltwise_forward::desc(propagation, eltwise_relu, *bottom_data_md, negative_slope);
+        // use eltwise_relu;
+        LOG(INFO) << "info: do not have relu value: ";
+    }
+    //eltwise_forward::desc eltwise_reluFwd_desc(propagation, eltwise_clip, *bottom_data_md, _min, _max);
+    //eltwise_forward::desc eltwise_reluFwd_desc(propagation, eltwise_relu, *bottom_data_md, negative_slope);
     // ---- Determining engine to use -----------------------
     std::string subengines = this->layer_param_.engine();
     if (subengines == "" || subengines == "MKLDNN")
@@ -117,7 +130,7 @@ void MKLDNNReLULayer<Dtype>::InitReLUFwd(const vector<Blob<Dtype>*>& bottom, con
     reluFwd_pd = NULL;
     for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
       try {
-        reluFwd_pd.reset(new relu_forward::primitive_desc(eltwise_reluFwd_desc,
+        reluFwd_pd.reset(new relu_forward::primitive_desc(*pdesc,
                 ep.getMKLDNNSubEngine(subEngineIndex)));
       }
       catch(...) {
@@ -125,6 +138,7 @@ void MKLDNNReLULayer<Dtype>::InitReLUFwd(const vector<Blob<Dtype>*>& bottom, con
       }
       break;
     }
+    delete pdesc;
     CHECK(reluFwd_pd);
 
     // ---  init primitive and prv_memory descriptors ----------------------
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 4bf06a6e..a5ecb237 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -518,15 +518,14 @@ void Net<Dtype>::CompileNet(const NetParameter& param,
   param_temp.clear_layer();    // Remove layers
   CompilationRuleOne(param_temp0, &param_temp);
 
-  NetParameter param_temp2;  // temporary compiled param
-  param_temp2.CopyFrom(param_temp);
-  param_temp2.clear_layer();   // Remove layers
+  // NetParameter param_temp2;  // temporary compiled param
+  // param_temp2.CopyFrom(param_temp);
+  // param_temp2.clear_layer();   // Remove layers
+  // CompilationRuleTwo(param_temp, &param_temp2);
 
-  CompilationRuleTwo(param_temp, &param_temp2);
-
-  param_compiled->CopyFrom(param_temp2);
+  param_compiled->CopyFrom(param_temp);
   param_compiled->clear_layer();    // Remove layers
-  CompilationRuleThree(param_temp2, param_compiled);
+  CompilationRuleThree(param_temp, param_compiled);
 }
 
 template <typename Dtype>
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 1d4608de..e770e3d1 100755
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -992,6 +992,8 @@ message ConvolutionParameter {
   // specify both spatial dimensions.
   optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
   optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
+  optional uint32 pad_b = 100 [default = 0];
+  optional uint32 pad_r = 101 [default = 0];
   optional uint32 kernel_h = 11; // The kernel height (2D only)
   optional uint32 kernel_w = 12; // The kernel width (2D only)
   optional uint32 stride_h = 13; // The stride height (2D only)
@@ -1525,6 +1527,8 @@ message PoolingParameter {
   // specify both spatial dimensions.
   optional uint32 pad_h = 5 [default = 0]; // The padding height (2D only)
   optional uint32 pad_w = 6 [default = 0]; // The padding width (2D only)
+  optional uint32 pad_b = 100 [default = 0];
+  optional uint32 pad_r = 101 [default = 0];
   optional uint32 kernel_h = 7; // The kernel height (2D only)
   optional uint32 kernel_w = 8; // The kernel width (2D only)
   optional uint32 stride_h = 9 [default = 1]; // The stride height (2D only)
@@ -1680,6 +1684,8 @@ message ReLUParameter {
   }
   optional Engine engine = 2 [default = DEFAULT];
   optional bool fuse = 3 [default = false];
+  optional float max = 4;
+  optional float min = 5;
 }
 
 message ReshapeParameter {