Skip to content

Commit

Permalink
Tweaks to walking the topology for corner cases
Browse files Browse the repository at this point in the history
  • Loading branch information
mhouston authored and cypof committed Jul 14, 2015
1 parent e3f59d3 commit 3b22659
Showing 1 changed file with 40 additions and 29 deletions.
69 changes: 40 additions & 29 deletions src/caffe/parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,21 @@ void DevicePair::compute(const vector<int> devices, vector<DevicePair>* pairs) {
#ifndef CPU_ONLY
vector<int> remaining(devices);

// Group GPUs by board
for (int i = 0; i < remaining.size(); ++i) {
for (int j = i + 1; j < remaining.size(); ++j) {
cudaDeviceProp a, b;
CUDA_CHECK(cudaGetDeviceProperties(&a, remaining[i]));
CUDA_CHECK(cudaGetDeviceProperties(&b, remaining[j]));
if (a.isMultiGpuBoard && b.isMultiGpuBoard) {
if (a.multiGpuBoardGroupID == b.multiGpuBoardGroupID) {
pairs->push_back(DevicePair(remaining[i], remaining[j]));
DLOG(INFO) << "GPU board: " << remaining[i] << ":" << remaining[j];
remaining.erase(remaining.begin() + j);
break;
// Group GPUs by board - some boards can have more than 2 ASICs
for (int d = 0; d < remaining.size(); ++d) {
for (int i = 0; i < remaining.size(); ++i) {
for (int j = i + 1; j < remaining.size(); ++j) {
cudaDeviceProp a, b;
CUDA_CHECK(cudaGetDeviceProperties(&a, remaining[i]));
CUDA_CHECK(cudaGetDeviceProperties(&b, remaining[j]));
if (a.isMultiGpuBoard && b.isMultiGpuBoard) {
if (a.multiGpuBoardGroupID == b.multiGpuBoardGroupID) {
pairs->push_back(DevicePair(remaining[i], remaining[j]));
DLOG(INFO) << "GPU board: " << remaining[i]
<< ":" << remaining[j];
remaining.erase(remaining.begin() + j);
break;
}
}
}
}
Expand All @@ -141,32 +144,40 @@ void DevicePair::compute(const vector<int> devices, vector<DevicePair>* pairs) {
}
DLOG(INFO) << "GPUs paired by boards, remaining: " << s.str();

// Group by P2P accessibility
for (int i = 0; i < remaining.size(); ++i) {
for (int j = i + 1; j < remaining.size(); ++j) {
int access;
CUDA_CHECK(cudaDeviceCanAccessPeer(&access, remaining[i], remaining[j]));
if (access) {
pairs->push_back(DevicePair(remaining[i], remaining[j]));
DLOG(INFO) << "P2P pair: " << remaining[i] << ":" << remaining[j];
remaining.erase(remaining.begin() + j);
break;
// Group by P2P accessibility - P2P group can be larger than 4 boards
for (int d = 0; d < remaining.size(); ++d) {
for (int i = 0; i < remaining.size(); ++i) {
for (int j = i + 1; j < remaining.size(); ++j) {
int access;
CUDA_CHECK(cudaDeviceCanAccessPeer(&access,
remaining[i],
remaining[j]));
if (access) {
pairs->push_back(DevicePair(remaining[i], remaining[j]));
DLOG(INFO) << "P2P pair: " << remaining[i]
<< ":" << remaining[j];
remaining.erase(remaining.begin() + j);
break;
}
}
}
}
s.str("");
for (int i = 0; i < remaining.size(); ++i) {
s << (i ? ", " : "") << remaining[i];
s << (i ? ", " : "") << remaining[i];
}
DLOG(INFO) << "GPUs paired by P2P access, remaining: " << s.str();

// Group remaining
for (int i = 0; i < remaining.size(); ++i) {
for (int j = i + 1; j < remaining.size(); ++j) {
pairs->push_back(DevicePair(remaining[i], remaining[j]));
DLOG(INFO) << "Remaining pair: " << remaining[i] << ":" << remaining[j];
remaining.erase(remaining.begin() + j);
break;
for (int d = 0; d < remaining.size(); ++d) { // try to pair everyone
for (int i = 0; i < remaining.size(); ++i) {
for (int j = i + 1; j < remaining.size(); ++j) {
pairs->push_back(DevicePair(remaining[i], remaining[j]));
DLOG(INFO) << "Remaining pair: " << remaining[i]
<< ":" << remaining[j];
remaining.erase(remaining.begin() + j);
break;
}
}
}
CHECK_EQ(remaining.size(), 1);
Expand Down

0 comments on commit 3b22659

Please sign in to comment.