diff --git a/CMakeLists.txt b/CMakeLists.txt index fefd839f..9208f28d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ option(ENABLE_IMPLICIT_CONVERSIONS "Enable implicit conversions to-from raw poin option(DISABLE_RM "Make ManagedArray a thin wrapper" Off) mark_as_advanced(DISABLE_RM) option(ENABLE_UM "Use CUDA unified (managed) memory" Off) +option(ENABLE_BOUNDS_CHECK "Enable bounds checking for chai::ManagedArray::operator[]" Off) set(ENABLE_TESTS On CACHE BOOL "") set(ENABLE_EXAMPLES On CACHE BOOL "") diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index 7821fa3f..64577fd9 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -47,6 +47,7 @@ set(CHAI_ENABLE_HIP ${ENABLE_HIP}) set(CHAI_ENABLE_IMPLICIT_CONVERSIONS ${ENABLE_IMPLICIT_CONVERSIONS}) set(CHAI_DISABLE_RM ${DISABLE_RM}) set(CHAI_ENABLE_UM ${ENABLE_UM}) +set(CHAI_ENABLE_BOUNDS_CHECK ${ENABLE_BOUNDS_CHECK}) configure_file( ${PROJECT_SOURCE_DIR}/src/chai/config.hpp.in diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 18f4db7a..ecfe52b5 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -46,6 +46,28 @@ #include "ManagedArray.hpp" #include "ArrayManager.hpp" +#if defined(CHAI_ENABLE_BOUNDS_CHECK) +#if defined(NDEBUG) + +#define HOST_ALWAYS_ASSERT(EXP) if (!(EXP)) { \ + printf("Assert triggered at %s:%d\n", __FILE__, __LINE__); \ + abort(); \ +} + +#define DEVICE_ALWAYS_ASSERT(EXP) if (!(EXP)) { \ + asm("trap;") \ +} + +#else // defined(NDEBUG) + +#include + +#define HOST_ALWAYS_ASSERT(EXP) assert(EXP) +#define DEVICE_ALWAYS_ASSERT(EXP) assert(EXP) + +#endif // defined(NDEBUG) +#endif // defined(CHAI_ENABLE_BOUNDS_CHECK) + namespace chai { template @@ -389,6 +411,20 @@ template template CHAI_INLINE CHAI_HOST_DEVICE T& ManagedArray::operator[](const Idx i) const { +#if defined(CHAI_ENABLE_BOUNDS_CHECK) +#if defined(__CUDA_ARCH__) + DEVICE_ALWAYS_ASSERT(i >= 0 && static_cast(i) < m_elems); +#else + //std::cout << "m_elems = " << m_elems << std::endl; + //std::cout << "i = " << i << std::endl; + //std::cout << "(size_t) i = " << static_cast(i) << std::endl; + + //printf("m_elems = %d\n", m_elems); + //printf("i = %d\n", i); + HOST_ALWAYS_ASSERT(i >= 0 && static_cast(i) < m_elems); +#endif +#endif // defined(CHAI_ENABLE_BOUNDS_CHECK) + return m_active_pointer[i]; } diff --git a/src/chai/config.hpp.in b/src/chai/config.hpp.in index a7a443fe..310eeaa7 100644 --- a/src/chai/config.hpp.in +++ b/src/chai/config.hpp.in @@ -49,5 +49,6 @@ #cmakedefine CHAI_ENABLE_IMPLICIT_CONVERSIONS #cmakedefine CHAI_DISABLE_RM #cmakedefine CHAI_ENABLE_UM +#cmakedefine CHAI_ENABLE_BOUNDS_CHECK #endif // CHAI_config_HPP diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 4cb537f0..d18b62a5 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -62,7 +62,7 @@ template void forall_kernel_cpu(int begin, int end, LOOP_BODY body) { for (int i = 0; i < (end - begin); ++i) { - body(i); + body(begin+i); } } diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 842293e7..e271668c 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -1394,4 +1394,59 @@ GPU_TEST(ManagedArray, CopyZero) array.free(); } -#endif \ No newline at end of file +#endif + +#if defined(CHAI_ENABLE_BOUNDS_CHECK) + +TEST(ManagedArray, UpperOutOfRangeAccess) +{ + chai::ManagedArray array(20); + array[19] = 0.0; // Should be fine + ASSERT_DEATH_IF_SUPPORTED(array[20] = 0.0, ".*"); +} + +TEST(ManagedArray, LowerOutOfRangeAccess) +{ + chai::ManagedArray array(20); + array[0] = 0.0; // Should be fine + ASSERT_DEATH_IF_SUPPORTED(array[-1] = 0.0, ".*"); +} + +#if defined(CHAI_ENABLE_CUDA) + +GPU_TEST(ManagedArray, UpperOutOfRangeAccessGPU) +{ + ASSERT_EQ(cudaSuccess, cudaDeviceReset()); + ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize()); + + chai::ManagedArray array(20); + + ASSERT_DEATH_IF_SUPPORTED(forall(gpu(), 0, 1, [=] __device__ (int) { + array[20] = 0.0; + }), ".*"); + + //cudaError_t errorCode = cudaGetLastError(); + cudaError_t errorCode = cudaDeviceSynchronize(); + ASSERT_EQ(cudaErrorAssert, errorCode); + ASSERT_EQ(cudaDeviceReset(), cudaSuccess); + ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize()); +} + +GPU_TEST(ManagedArray, LowerOutOfRangeAccessGPU) +{ + chai::ManagedArray array(20); + +#if 0 + forall(gpu(), 0, 1, [=] __device__ (int) { + array[-1] = 0.0; + }); + + //cudaError_t errorCode = cudaGetLastError(); + cudaError_t errorCode = cudaDeviceSynchronize(); + ASSERT_EQ(cudaErrorAssert, errorCode); + ASSERT_EQ(cudaDeviceReset(), cudaSuccess); +#endif +} + +#endif // defined(CHAI_ENABLE_CUDA) +#endif // defined(CHAI_ENABLE_BOUNDS_CHECK)