Skip to content

Commit

Permalink
Make out-of-memory abort more informative
Browse files Browse the repository at this point in the history
  • Loading branch information
mjuric committed May 20, 2010
1 parent 6d1cb6f commit fefe9ea
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 47 deletions.
2 changes: 1 addition & 1 deletion galaxy.kdevelop
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<kdevautoproject>
<general>
<activetarget>src/galfast.x</activetarget>
<useconfiguration>optimized</useconfiguration>
<useconfiguration>debug</useconfiguration>
</general>
<run>
<mainprogram>/home/mjuric/projects/galaxy/debug/src/galfast.x</mainprogram>
Expand Down
2 changes: 1 addition & 1 deletion src/common/cuda_rng.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ namespace prngs
this->nstreams = nstreams;
if(on_gpu)
{
cuxErrCheck( cudaMalloc((void**)&gstate, sizeof(uint32_t)*nstreams*statewidth) );
gstate = cuxNew<uint32_t>(nstreams*statewidth);
cuxErrCheck( cudaMemcpy(gstate, states, sizeof(uint32_t)*nstreams*statewidth, cudaMemcpyHostToDevice) );
} else {
gstate = new uint32_t[nstreams*statewidth];
Expand Down
102 changes: 58 additions & 44 deletions src/common/cux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,43 +152,69 @@ void cuxSmartPtr_impl_t::gc()
}
}

void *cuxSmartPtr_impl_t::syncTo(bool device)
unsigned cuxGetFreeMem(unsigned *totalptr = NULL)
{
#if !CUDA_DEVEMU
// Memory info
unsigned free = 0, total = 0;
cuxErrCheck( (cudaError)cuMemGetInfo(&free, &total) );
if(totalptr) *totalptr = total;
return free;
#else
if(totalptr) *totalptr = 0;
return 0;
#endif
}

void cuxMallocErrCheck_impl(cudaError err, size_t msize, const char *fun, const char *file, const int line)
{
if(onDevice != device)
VERIFY(err == cudaSuccess)
{
std::swap(slave, m_data.ptr);
MLOG(verb1) << "CUDA ERROR: " << cudaGetErrorString(err);
MLOG(verb1) << "CUDA ERROR: Attempted to allocate " << msize / (1<<20) << "MB, " << cuxGetFreeMem() / (1<<20) << "MB free.";
MLOG(verb1) << "CUDA ERROR: In " << fun << " (" << file << ":" << line << ")\n";
}
}

// Allocate m_data.ptr if needed.
if(!m_data.ptr)
#define GC_AND_RETRY_IF_FAIL(x, msize) \
{ \
cudaError err = (x); \
if(err == cudaErrorMemoryAllocation) \
{ \
global_gc(); \
err = (x); \
} \
cuxMallocErrCheck(err, msize); \
}

void *cuxSmartPtr_impl_t::syncTo(bool device)
{
if(onDevice == device && m_data.ptr) { return m_data.ptr; }

// Allocate slave (future master)
if(!slave)
{
if(device)
if(device) // syncing to GPU device
{
//cuxErrCheck( cudaMalloc((void**)&m_data.ptr, memsize()) );
cudaError err = cudaMalloc((void**)&m_data.ptr, memsize());
if(err == cudaErrorMemoryAllocation)
{
global_gc();
err = cudaMalloc((void**)&m_data.ptr, memsize());
}
cuxErrCheck(err);
GC_AND_RETRY_IF_FAIL( cudaMalloc((void**)&slave, memsize()), memsize() );
}
else
else // syncing to host
{
m_data.ptr = new char[memsize()];
slave = new char[memsize()];
//memset(m_data.ptr, 0xff, memsize()); // debugging
}
}

// copy slave -> m_data.ptr (if there's something to copy)
if(onDevice != device && slave)
std::swap(m_data.ptr, slave);
onDevice = device;

if(slave)
{
cudaMemcpyKind dir = device ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost;
// copy slave -> m_data.ptr (if there's something to copy)
cudaMemcpyKind dir = onDevice ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost;
cuxErrCheck( cudaMemcpy(m_data.ptr, slave, memsize(), dir) );
}

onDevice = device;

// assume the sync dirtied up the textures
cleanCudaArray = false;

Expand All @@ -197,17 +223,6 @@ void *cuxSmartPtr_impl_t::syncTo(bool device)
return m_data.ptr;
}

#define GC_AND_RETRY_IF_FAIL(x) \
{ \
cudaError err = (x); \
if(err == cudaErrorMemoryAllocation) \
{ \
global_gc(); \
err = (x); \
} \
cuxErrCheck(err); \
}

cudaArray *cuxSmartPtr_impl_t::getCUDAArray(cudaChannelFormatDesc &channelDesc)
{
ASSERT(channelDesc.x + channelDesc.y + channelDesc.z + channelDesc.w == m_elementSize*8);
Expand All @@ -228,12 +243,12 @@ cudaArray *cuxSmartPtr_impl_t::getCUDAArray(cudaChannelFormatDesc &channelDesc)
if(ex.depth > 1)
{
// 3D arrays
GC_AND_RETRY_IF_FAIL( cudaMalloc3DArray(&cuArray, &channelDesc, ex) );
GC_AND_RETRY_IF_FAIL( cudaMalloc3DArray(&cuArray, &channelDesc, ex), memsize() );
}
else
{
// 2D and 1D arrays
GC_AND_RETRY_IF_FAIL( cudaMallocArray(&cuArray, &channelDesc, ex.width, ex.height) );
GC_AND_RETRY_IF_FAIL( cudaMallocArray(&cuArray, &channelDesc, ex.width, ex.height), memsize() );
}
}

Expand Down Expand Up @@ -654,21 +669,20 @@ const char *cpuinfo()

void abort_on_cuda_error(cudaError err)
{
if(err == cudaSuccess) { return; }

MLOG(verb1) << "CUDA ERROR: " << cudaGetErrorString(err);
//abort();
exit(-100);
VERIFY(err == cudaSuccess)
{
MLOG(verb1) << "CUDA ERROR: " << cudaGetErrorString(err);
}
}

void cuxErrCheck_impl(cudaError err, const char *fun, const char *file, const int line)
{
if(err != cudaSuccess)
VERIFY(err == cudaSuccess)
{
MLOG(verb1) << "CUDA ERROR: " << cudaGetErrorString(err);
MLOG(verb1) << "CUDA ERROR: In " << fun << " (" << file << ":" << line << ")\n";
abort_on_cuda_error(err);
// throw cuxException(err);
}
// throw cuxException(err);
}

#if HAVE_CUDA
Expand Down Expand Up @@ -717,8 +731,8 @@ bool cux_init()
#if !CUDA_DEVEMU
// ensure a CUDA context is created and fetch the active
// device id
void *tmp;
cuxErrCheck( cudaMalloc(&tmp, 1024) );
char *tmp;
tmp = cuxNew<char>(1024);
cuxErrCheck( cudaFree(tmp) );
cuxErrCheck( cudaGetDevice(&dev) );
#endif
Expand Down
5 changes: 4 additions & 1 deletion src/common/cux.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ void cuxErrCheck_impl(cudaError err, const char *fun, const char *file, const in
#define cuxErrCheck(expr) \
cuxErrCheck_impl(expr, __PRETTY_FUNCTION__, __FILE__, __LINE__)

void cuxMallocErrCheck_impl(cudaError err, size_t msize, const char *fun, const char *file, const int line);
#define cuxMallocErrCheck(expr, msize) \
cuxMallocErrCheck_impl(expr, msize, __PRETTY_FUNCTION__, __FILE__, __LINE__)

/**
Compute the size, in bytes, of an aligned (strided) 1/2/3D array.
Expand Down Expand Up @@ -110,7 +113,7 @@ template<typename T>
size_t size = arrayMemSize<T>(nx, ny, nz, align);

T *devptr;
cuxErrCheck( cudaMalloc((void**)&devptr, size) );
cuxMallocErrCheck( cudaMalloc((void**)&devptr, size), size );
return devptr;
}

Expand Down

0 comments on commit fefe9ea

Please sign in to comment.