From 6fbb2f70f64c0298153ebc11d89d25abf97c3b85 Mon Sep 17 00:00:00 2001
From: Xiaodong Wang <xdwang@fb.com>
Date: Mon, 11 Feb 2019 12:27:12 -0800
Subject: [PATCH] Catch cudaError_t return val (nodiscard in rocm) (#16399)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/16399

Catching cudaError_t return values in a few places, because it's nodiscard in rocm. Unless we add -Wno-unused-result, it'll end up with a compilation error.

Also in c10/cuda/test, check whether a host has GPU or not. We were silently throwing out the error before (so not really testing the cuda api).

Reviewed By: bddppq

Differential Revision: D13828281

fbshipit-source-id: 587d1cc31c20b836ce9594e3c18f067d322b2934
---
 c10/cuda/CUDACachingAllocator.cpp |  2 +-
 c10/cuda/impl/CUDAGuardImpl.h     |  5 ++++-
 c10/cuda/impl/CUDATest.cpp        | 14 ++++++++++++--
 caffe2/core/context_gpu.h         |  4 ++--
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp
index 11047b45b43094..884700beeac4bd 100644
--- a/c10/cuda/CUDACachingAllocator.cpp
+++ b/c10/cuda/CUDACachingAllocator.cpp
@@ -479,7 +479,7 @@ struct THCCachingAllocator
       cuda_events.emplace_back(event, block);
     }
 
-    cudaSetDevice(prev_device);
+    C10_CUDA_CHECK(cudaSetDevice(prev_device));
   }
 
   void process_events()
diff --git a/c10/cuda/impl/CUDAGuardImpl.h b/c10/cuda/impl/CUDAGuardImpl.h
index 3da750bbe6e580..21b7298628e36f 100644
--- a/c10/cuda/impl/CUDAGuardImpl.h
+++ b/c10/cuda/impl/CUDAGuardImpl.h
@@ -39,7 +39,10 @@ struct CUDAGuardImpl final : public c10::impl::DeviceGuardImplInterface {
     C10_CUDA_CHECK(cudaSetDevice(d.index()));
   }
   void uncheckedSetDevice(Device d) const noexcept override {
-    cudaSetDevice(d.index());
+    cudaError_t __err = cudaSetDevice(d.index());
+    if (__err != cudaSuccess) {
+      AT_WARN("CUDA error: ", cudaGetErrorString(__err));
+    }
   }
   Stream getStream(Device d) const noexcept override {
     return getCurrentCUDAStream().unwrap();
diff --git a/c10/cuda/impl/CUDATest.cpp b/c10/cuda/impl/CUDATest.cpp
index f80cb95b045d05..3746d14ae51cf4 100644
--- a/c10/cuda/impl/CUDATest.cpp
+++ b/c10/cuda/impl/CUDATest.cpp
@@ -1,5 +1,6 @@
 // Just a little test file to make sure that the CUDA library works
 
+#include <c10/cuda/CUDAException.h>
 #include <c10/cuda/impl/CUDATest.h>
 
 #include <cuda_runtime.h>
@@ -8,9 +9,18 @@ namespace c10 {
 namespace cuda {
 namespace impl {
 
+bool has_cuda_gpu() {
+  int count;
+  C10_CUDA_CHECK(cudaGetDeviceCount(&count));
+
+  return count != 0;
+}
+
 int c10_cuda_test() {
-  int r;
-  cudaGetDevice(&r);
+  int r = 0;
+  if (has_cuda_gpu()) {
+    C10_CUDA_CHECK(cudaGetDevice(&r));
+  }
   return r;
 }
 
diff --git a/caffe2/core/context_gpu.h b/caffe2/core/context_gpu.h
index 0e62708f165c72..9eb7fe5c83ea9f 100644
--- a/caffe2/core/context_gpu.h
+++ b/caffe2/core/context_gpu.h
@@ -203,7 +203,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
   // FinishDeviceComputation must be called on the same cpu thread as
   // SwitchToDevice()
   void FinishDeviceComputation() override {
-    cudaStreamSynchronize(getCudaObjects().GetStream(gpu_id_));
+    CUDA_ENFORCE(cudaStreamSynchronize(getCudaObjects().GetStream(gpu_id_)));
     cudaError_t error = cudaGetLastError();
     if (error != cudaSuccess) {
       CAFFE_THROW("Encountered CUDA error: ", cudaGetErrorString(error));
@@ -390,7 +390,7 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
       if (err == cudaErrorInvalidValue) {
         free(data);
         // Calling cudaGetLastError will reset the cuda error.
-        cudaGetLastError();
+        cudaError_t _err = cudaGetLastError();
       } else {
         // For all other errors, still do a cuda check.
         CUDA_ENFORCE(err);