From eb2cfcc165167a60f6d79474acb21d81c762c4e1 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Mon, 29 Jun 2015 15:27:49 -0700 Subject: [PATCH] Remove batch size division by number of devices Follow expectation of `iter_size` in compounding the effective batch size by the number of devices. --- include/caffe/parallel.hpp | 3 -- src/caffe/net.cpp | 3 -- src/caffe/parallel.cpp | 56 -------------------------------------- tools/caffe.cpp | 3 +- 4 files changed, 2 insertions(+), 63 deletions(-) diff --git a/include/caffe/parallel.hpp b/include/caffe/parallel.hpp index 2b99069dc2d..b1e45ad9c04 100644 --- a/include/caffe/parallel.hpp +++ b/include/caffe/parallel.hpp @@ -95,9 +95,6 @@ class P2PSync : public GPUParams, public Solver::Callback, static void run(shared_ptr > root, const vector& gpus); - // Divide the batch size by the number of solvers - static void divide_batch_size(NetParameter* net); - protected: void on_start(Timer* timer, ostringstream* timing); void on_gradients_ready(Timer* timer, ostringstream* timing); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index f1579b85a27..399d255217a 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -40,9 +40,6 @@ void Net::Init(const NetParameter& in_param) { // the current NetState. NetParameter filtered_param; FilterNet(in_param, &filtered_param); - if (phase_ == TRAIN) { - caffe::P2PSync::divide_batch_size(&filtered_param); - } if (Caffe::root_solver()) { LOG(INFO) << "Initializing net from parameters: " << std::endl << filtered_param.DebugString(); diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 3a934461b98..c1cdd4e3727 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -435,62 +435,6 @@ void P2PSync::run(shared_ptr > root, } } -template -void P2PSync::divide_batch_size(NetParameter* net) { - int solver_count = Caffe::solver_count(); - for (int i = 0; i < net->layer_size(); ++i) { - string m = "Batch size must be divisible by the number of solvers (GPUs)"; - if (net->layer(i).has_data_param()) { - if (net->layer(i).data_param().has_batch_size()) { - uint32_t total = net->layer(i).data_param().batch_size(); - uint32_t batch = total / solver_count; - CHECK(batch * solver_count == total) << m; - net->mutable_layer(i)->mutable_data_param()->set_batch_size(batch); - - // Also adjust the prefetch count, as it is shared by all solvers - uint32_t prefetch = net->layer(i).data_param().prefetch(); - net->mutable_layer(i)->mutable_data_param()->set_prefetch( - prefetch * solver_count); - } - } - if (net->layer(i).has_hdf5_data_param()) { - if (net->layer(i).hdf5_data_param().has_batch_size()) { - uint32_t total = net->layer(i).hdf5_data_param().batch_size(); - uint32_t batch = total / solver_count; - CHECK(batch * solver_count == total) << m; - net->mutable_layer(i)->mutable_hdf5_data_param()->set_batch_size(batch); - } - } - if (net->layer(i).has_image_data_param()) { - if (net->layer(i).image_data_param().has_batch_size()) { - uint32_t total = net->layer(i).image_data_param().batch_size(); - uint32_t batch = total / solver_count; - CHECK(batch * solver_count == total) << m; - net->mutable_layer(i)->mutable_image_data_param()->set_batch_size( - batch); - } - } - if (net->layer(i).has_memory_data_param()) { - if (net->layer(i).memory_data_param().has_batch_size()) { - uint32_t total = net->layer(i).memory_data_param().batch_size(); - uint32_t batch = total / solver_count; - CHECK(batch * solver_count == total) << m; - net->mutable_layer(i)->mutable_memory_data_param()->set_batch_size( - batch); - } - } - if (net->layer(i).has_window_data_param()) { - if (net->layer(i).window_data_param().has_batch_size()) { - uint32_t total = net->layer(i).window_data_param().batch_size(); - uint32_t batch = total / solver_count; - CHECK(batch * solver_count == total) << m; - net->mutable_layer(i)->mutable_window_data_param()->set_batch_size( - batch); - } - } - } -} - INSTANTIATE_CLASS(Params); INSTANTIATE_CLASS(GPUParams); INSTANTIATE_CLASS(P2PSync); diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 97e6121b79a..3a23802add5 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -21,7 +21,8 @@ using std::ostringstream; DEFINE_string(gpu, "", "Optional; run in GPU mode on given device IDs separated by ','." - "Use '-gpu all' to run on all available GPUs."); + "Use '-gpu all' to run on all available GPUs. The effective training " + "batch size is multiplied by the number of devices."); DEFINE_string(solver, "", "The solver definition protocol buffer text file."); DEFINE_string(model, "",