From eb2cfcc165167a60f6d79474acb21d81c762c4e1 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Mon, 29 Jun 2015 15:27:49 -0700
Subject: [PATCH] Remove batch size division by number of devices

Follow expectation of `iter_size` in compounding the effective batch
size by the number of devices.
---
 include/caffe/parallel.hpp |  3 --
 src/caffe/net.cpp          |  3 --
 src/caffe/parallel.cpp     | 56 --------------------------------------
 tools/caffe.cpp            |  3 +-
 4 files changed, 2 insertions(+), 63 deletions(-)
diff --git a/include/caffe/parallel.hpp b/include/caffe/parallel.hpp
index 2b99069dc2d..b1e45ad9c04 100644
--- a/include/caffe/parallel.hpp
+++ b/include/caffe/parallel.hpp
@@ -95,9 +95,6 @@ class P2PSync : public GPUParams<Dtype>, public Solver<Dtype>::Callback,
 
   static void run(shared_ptr<Solver<Dtype> > root, const vector<int>& gpus);
 
-  // Divide the batch size by the number of solvers
-  static void divide_batch_size(NetParameter* net);
-
  protected:
   void on_start(Timer* timer, ostringstream* timing);
   void on_gradients_ready(Timer* timer, ostringstream* timing);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index f1579b85a27..399d255217a 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -40,9 +40,6 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   // the current NetState.
   NetParameter filtered_param;
   FilterNet(in_param, &filtered_param);
-  if (phase_ == TRAIN) {
-    caffe::P2PSync<Dtype>::divide_batch_size(&filtered_param);
-  }
   if (Caffe::root_solver()) {
     LOG(INFO) << "Initializing net from parameters: " << std::endl
               << filtered_param.DebugString();
diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp
index 3a934461b98..c1cdd4e3727 100644
--- a/src/caffe/parallel.cpp
+++ b/src/caffe/parallel.cpp
@@ -435,62 +435,6 @@ void P2PSync<Dtype>::run(shared_ptr<Solver<Dtype> > root,
   }
 }
 
-template<typename Dtype>
-void P2PSync<Dtype>::divide_batch_size(NetParameter* net) {
-  int solver_count = Caffe::solver_count();
-  for (int i = 0; i < net->layer_size(); ++i) {
-    string m = "Batch size must be divisible by the number of solvers (GPUs)";
-    if (net->layer(i).has_data_param()) {
-      if (net->layer(i).data_param().has_batch_size()) {
-        uint32_t total = net->layer(i).data_param().batch_size();
-        uint32_t batch = total / solver_count;
-        CHECK(batch * solver_count == total) << m;
-        net->mutable_layer(i)->mutable_data_param()->set_batch_size(batch);
-
-        // Also adjust the prefetch count, as it is shared by all solvers
-        uint32_t prefetch = net->layer(i).data_param().prefetch();
-        net->mutable_layer(i)->mutable_data_param()->set_prefetch(
-            prefetch * solver_count);
-      }
-    }
-    if (net->layer(i).has_hdf5_data_param()) {
-      if (net->layer(i).hdf5_data_param().has_batch_size()) {
-        uint32_t total = net->layer(i).hdf5_data_param().batch_size();
-        uint32_t batch = total / solver_count;
-        CHECK(batch * solver_count == total) << m;
-        net->mutable_layer(i)->mutable_hdf5_data_param()->set_batch_size(batch);
-      }
-    }
-    if (net->layer(i).has_image_data_param()) {
-      if (net->layer(i).image_data_param().has_batch_size()) {
-        uint32_t total = net->layer(i).image_data_param().batch_size();
-        uint32_t batch = total / solver_count;
-        CHECK(batch * solver_count == total) << m;
-        net->mutable_layer(i)->mutable_image_data_param()->set_batch_size(
-            batch);
-      }
-    }
-    if (net->layer(i).has_memory_data_param()) {
-      if (net->layer(i).memory_data_param().has_batch_size()) {
-        uint32_t total = net->layer(i).memory_data_param().batch_size();
-        uint32_t batch = total / solver_count;
-        CHECK(batch * solver_count == total) << m;
-        net->mutable_layer(i)->mutable_memory_data_param()->set_batch_size(
-            batch);
-      }
-    }
-    if (net->layer(i).has_window_data_param()) {
-      if (net->layer(i).window_data_param().has_batch_size()) {
-        uint32_t total = net->layer(i).window_data_param().batch_size();
-        uint32_t batch = total / solver_count;
-        CHECK(batch * solver_count == total) << m;
-        net->mutable_layer(i)->mutable_window_data_param()->set_batch_size(
-            batch);
-      }
-    }
-  }
-}
-
 INSTANTIATE_CLASS(Params);
 INSTANTIATE_CLASS(GPUParams);
 INSTANTIATE_CLASS(P2PSync);
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 97e6121b79a..3a23802add5 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -21,7 +21,8 @@ using std::ostringstream;
 
 DEFINE_string(gpu, "",
     "Optional; run in GPU mode on given device IDs separated by ','."
-    "Use '-gpu all' to run on all available GPUs.");
+    "Use '-gpu all' to run on all available GPUs. The effective training "
+    "batch size is multiplied by the number of devices.");
 DEFINE_string(solver, "",
     "The solver definition protocol buffer text file.");
 DEFINE_string(model, "",