Catch cudaError_t return val (nodiscard in rocm) (pytorch#16399)

Summary: Pull Request resolved: pytorch#16399 Catching cudaError_t return values in a few places, because it's nodiscard in rocm. Unless we add -Wno-unused-result, it'll end up with a compilation error. Also in c10/cuda/test, check whether a host has GPU or not. We were silently throwing out the error before (so not really testing the cuda api). Reviewed By: bddppq Differential Revision: D13828281 fbshipit-source-id: 587d1cc31c20b836ce9594e3c18f067d322b2934
Quansight · Feb 11, 2019 · 6fbb2f7 · 6fbb2f7
1 parent ab6be72
commit 6fbb2f7
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 6 deletions.
diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp
@@ -479,7 +479,7 @@ struct THCCachingAllocator
       cuda_events.emplace_back(event, block);
     }
 
-    cudaSetDevice(prev_device);
+    C10_CUDA_CHECK(cudaSetDevice(prev_device));
   }
 
   void process_events()

diff --git a/c10/cuda/impl/CUDAGuardImpl.h b/c10/cuda/impl/CUDAGuardImpl.h
@@ -39,7 +39,10 @@ struct CUDAGuardImpl final : public c10::impl::DeviceGuardImplInterface {
     C10_CUDA_CHECK(cudaSetDevice(d.index()));
   }
   void uncheckedSetDevice(Device d) const noexcept override {
-    cudaSetDevice(d.index());
+    cudaError_t __err = cudaSetDevice(d.index());
+    if (__err != cudaSuccess) {
+      AT_WARN("CUDA error: ", cudaGetErrorString(__err));
+    }
   }
   Stream getStream(Device d) const noexcept override {
     return getCurrentCUDAStream().unwrap();

diff --git a/c10/cuda/impl/CUDATest.cpp b/c10/cuda/impl/CUDATest.cpp
@@ -1,5 +1,6 @@
 // Just a little test file to make sure that the CUDA library works
 
+#include <c10/cuda/CUDAException.h>
 #include <c10/cuda/impl/CUDATest.h>
 
 #include <cuda_runtime.h>
@@ -8,9 +9,18 @@ namespace c10 {
 namespace cuda {
 namespace impl {
 
+bool has_cuda_gpu() {
+  int count;
+  C10_CUDA_CHECK(cudaGetDeviceCount(&count));
+
+  return count != 0;
+}
+
 int c10_cuda_test() {
-  int r;
-  cudaGetDevice(&r);
+  int r = 0;
+  if (has_cuda_gpu()) {
+    C10_CUDA_CHECK(cudaGetDevice(&r));
+  }
   return r;
 }
 

diff --git a/caffe2/core/context_gpu.h b/caffe2/core/context_gpu.h
@@ -203,7 +203,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
   // FinishDeviceComputation must be called on the same cpu thread as
   // SwitchToDevice()
   void FinishDeviceComputation() override {
-    cudaStreamSynchronize(getCudaObjects().GetStream(gpu_id_));
+    CUDA_ENFORCE(cudaStreamSynchronize(getCudaObjects().GetStream(gpu_id_)));
     cudaError_t error = cudaGetLastError();
     if (error != cudaSuccess) {
       CAFFE_THROW("Encountered CUDA error: ", cudaGetErrorString(error));
@@ -390,7 +390,7 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
       if (err == cudaErrorInvalidValue) {
         free(data);
         // Calling cudaGetLastError will reset the cuda error.
-        cudaGetLastError();
+        cudaError_t _err = cudaGetLastError();
       } else {
         // For all other errors, still do a cuda check.
         CUDA_ENFORCE(err);