From e3f59d3a037b00eb8c052d83d9f841b132bd86e4 Mon Sep 17 00:00:00 2001 From: mhouston Date: Fri, 10 Jul 2015 15:56:10 -0700 Subject: [PATCH] Tweaks to track device in syncedmem --- include/caffe/syncedmem.hpp | 5 +++-- src/caffe/syncedmem.cpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 6bdf84ab660..62aadef498d 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -45,10 +45,10 @@ class SyncedMemory { public: SyncedMemory() : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), - own_cpu_data_(false), own_gpu_data_(false) {} + own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} explicit SyncedMemory(size_t size) : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), - own_cpu_data_(false), own_gpu_data_(false) {} + own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} ~SyncedMemory(); const void* cpu_data(); void set_cpu_data(void* data); @@ -73,6 +73,7 @@ class SyncedMemory { SyncedHead head_; bool own_cpu_data_; bool own_gpu_data_; + int gpu_device_; DISABLE_COPY_AND_ASSIGN(SyncedMemory); }; // class SyncedMemory diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 029d53a5cb6..a667a867af0 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -13,7 +13,13 @@ SyncedMemory::~SyncedMemory() { #ifndef CPU_ONLY if (gpu_ptr_ && own_gpu_data_) { + int initial_device; + cudaGetDevice(&initial_device); + if (gpu_device_ != -1) { + CUDA_CHECK(cudaSetDevice(gpu_device_)); + } CUDA_CHECK(cudaFree(gpu_ptr_)); + cudaSetDevice(initial_device); } #endif // CPU_ONLY } @@ -48,6 +54,7 @@ inline void SyncedMemory::to_gpu() { #ifndef CPU_ONLY switch (head_) { case UNINITIALIZED: + CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); caffe_gpu_memset(size_, 0, gpu_ptr_); head_ = HEAD_AT_GPU; @@ -55,6 +62,7 @@ inline void SyncedMemory::to_gpu() { break; case HEAD_AT_CPU: if (gpu_ptr_ == NULL) { + CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); own_gpu_data_ = true; } @@ -98,7 +106,13 @@ void SyncedMemory::set_gpu_data(void* data) { #ifndef CPU_ONLY CHECK(data); if (own_gpu_data_) { + int initial_device; + cudaGetDevice(&initial_device); + if (gpu_device_ != -1) { + CUDA_CHECK(cudaSetDevice(gpu_device_)); + } CUDA_CHECK(cudaFree(gpu_ptr_)); + cudaSetDevice(initial_device); } gpu_ptr_ = data; head_ = HEAD_AT_GPU; @@ -128,6 +142,7 @@ void* SyncedMemory::mutable_gpu_data() { void SyncedMemory::async_gpu_push(const cudaStream_t& stream) { CHECK(head_ == HEAD_AT_CPU); if (gpu_ptr_ == NULL) { + CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); own_gpu_data_ = true; }