Skip to content

Commit

Permalink
Improve broadcast order, start with distant nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
mhouston authored and cypof committed Jul 14, 2015
1 parent 3b22659 commit 48a7adf
Showing 1 changed file with 1 addition and 5 deletions.
6 changes: 1 addition & 5 deletions src/caffe/parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ void P2PSync<Dtype>::on_start(Timer* timer, ostringstream* timing) {
if (children_.size()) {
timer->Start();
}
for (int i = 0; i < children_.size(); ++i) {
for (int i = children_.size() - 1; i >= 0; i--) {
Dtype* src = data_;
Dtype* dst = children_[i]->data_;

Expand All @@ -319,11 +319,7 @@ void P2PSync<Dtype>::on_start(Timer* timer, ostringstream* timing) {

CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype), //
cudaMemcpyDeviceToDevice, cudaStreamDefault));
}
if (children_.size()) {
CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
}
for (int i = 0; i < children_.size(); ++i) {
children_[i]->queue_.push(this);
}
if (children_.size()) {
Expand Down

0 comments on commit 48a7adf

Please sign in to comment.