Skip to content

Commit

Permalink
improve some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ksimpson-work committed Oct 24, 2024
1 parent f933fe2 commit ec741c8
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 26 deletions.
2 changes: 2 additions & 0 deletions cuda_core/tests/notes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
how to query the devices available on the system
how to copy memory from device to host and vice versa, the copy to copy from take buffers which aren't exposed
25 changes: 19 additions & 6 deletions cuda_core/tests/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from cuda.core.experimental._device import Device
from cuda.core.experimental._utils import handle_return, ComputeCapability, CUDAError, \
precondition
import pytest

@pytest.fixture(scope='module')
def init_cuda():
Device().set_current()

def test_device_initialization():
device = Device()
Expand All @@ -11,6 +16,15 @@ def test_device_repr():
device = Device()
assert str(device).startswith('<Device 0')

def test_device_alloc():
device = Device()
device.set_current()
buffer = device.allocate(1024)
device.sync()
assert buffer.handle != 0
assert buffer.size == 1024
assert buffer.device_id == 0

def test_device_set_current():
device = Device()
device.set_current()
Expand All @@ -20,14 +34,13 @@ def test_device_create_stream():
stream = device.create_stream()
assert stream is not None


def test_pci_bus_id():
device = Device(0)
device = Device()
bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, device.device_id))
assert device.pci_bus_id == bus_id[:12].decode()

def test_uuid():
device = Device(0)
device = Device()
driver_ver = handle_return(cuda.cuDriverGetVersion())
if driver_ver >= 11040:
uuid = handle_return(cuda.cuDeviceGetUuid_v2(device.device_id))
Expand All @@ -38,16 +51,16 @@ def test_uuid():
assert device.uuid == expected_uuid

def test_name():
device = Device(0)
device = Device()
name = handle_return(cuda.cuDeviceGetName(128, device.device_id))
name = name.split(b'\0')[0]
assert device.name == name.decode()

def test_compute_capability():
device = Device(0)
device = Device()
major = handle_return(cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, device.device_id))
minor = handle_return(cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, device.device_id))
expected_cc = ComputeCapability(major, minor)
assert device.compute_capability == expected_cc
assert device.compute_capability == expected_cc
168 changes: 148 additions & 20 deletions cuda_core/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,72 @@
from cuda.core.experimental._device import Device
from cuda import cuda
from cuda.core.experimental._utils import handle_return
import ctypes

class DummyMemoryResource(MemoryResource):
def __init__(self):
@pytest.fixture(scope='module')
def init_cuda():
Device().set_current()

class DummyDeviceMemoryResource(MemoryResource):
def __init__(self, device):
self.device = device
pass

def allocate(self, size, stream=None) -> Buffer:
ptr = handle_return(cuda.cuMemAlloc(size))
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
handle_return(cuda.cuMemFree(ptr))
cuda.cuMemFree(ptr)

@property
def is_device_accessible(self) -> bool:
return True

@property
def is_host_accessible(self) -> bool:
return False

@property
def device_id(self) -> int:
return 0

class DummyHostMemoryResource(MemoryResource):
def __init__(self):
pass

def allocate(self, size, stream=None) -> Buffer:
# Allocate a ctypes buffer of size `size`
ptr = (ctypes.c_byte * size)()
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
#the memory is deallocated per the ctypes deallocation at garbage collection time
pass

@property
def is_device_accessible(self) -> bool:
return False

@property
def is_host_accessible(self) -> bool:
return True

@property
def device_id(self) -> int:
raise RuntimeError("the pinned memory resource is not bound to any GPU")

class DummyUnifiedMemoryResource(MemoryResource):
def __init__(self, device):
self.device = device
pass

def allocate(self, size, stream=None) -> Buffer:
ptr = handle_return(cuda.cuMemAllocManaged(size, cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value))
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
cuda.cuMemFree(ptr)

@property
def is_device_accessible(self) -> bool:
Expand All @@ -28,44 +83,117 @@ def is_host_accessible(self) -> bool:
def device_id(self) -> int:
return 0

def test_buffer_initialization():
dummy_mr = DummyMemoryResource()
class DummyPinnedMemoryResource(MemoryResource):
def __init__(self, device):
self.device = device
pass

def allocate(self, size, stream=None) -> Buffer:
ptr = handle_return(cuda.cuMemAllocHost(size))
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
cuda.cuMemFreeHost(ptr)

@property
def is_device_accessible(self) -> bool:
return True

@property
def is_host_accessible(self) -> bool:
return True

@property
def device_id(self) -> int:
raise RuntimeError("the pinned memory resource is not bound to any GPU")

def buffer_initialization(dummy_mr : MemoryResource):
buffer = dummy_mr.allocate(size=1024)
assert buffer.handle != 0
assert buffer.size == 1024
assert buffer.memory_resource == dummy_mr
assert buffer.is_device_accessible == True
assert buffer.is_host_accessible == True
assert buffer.device_id == 0
assert buffer.is_device_accessible == dummy_mr.is_device_accessible
assert buffer.is_host_accessible == dummy_mr.is_host_accessible
dummy_mr.deallocate(buffer.handle, buffer.size)

def test_buffer_copy_to():
dummy_mr = DummyMemoryResource()
src_buffer = dummy_mr.allocate(size=1024)
dst_buffer = dummy_mr.allocate(size=1024)
def test_buffer_initialization():
device = Device()
device.set_current()
buffer_initialization(DummyDeviceMemoryResource(device))
buffer_initialization(DummyHostMemoryResource())
buffer_initialization(DummyUnifiedMemoryResource(device))
buffer_initialization(DummyPinnedMemoryResource(device))

def buffer_copy_to(dummy_mr : MemoryResource, device : Device, check = False):
src_buffer = dummy_mr.allocate(size=1024)
dst_buffer = dummy_mr.allocate(size=1024)
stream = device.create_stream()

if check:
src_ptr = ctypes.cast(src_buffer.handle, ctypes.POINTER(ctypes.c_byte))
for i in range(1024):
src_ptr[i] = ctypes.c_byte(i)

src_buffer.copy_to(dst_buffer, stream=stream)
# Assuming cuMemcpyAsync is correctly called, we can't directly check the result here
device.sync()

if check:
dst_ptr = ctypes.cast(dst_buffer.handle, ctypes.POINTER(ctypes.c_byte))

for i in range(10):
assert dst_ptr[i] == src_ptr[i]

dummy_mr.deallocate(src_buffer.handle, src_buffer.size)
dummy_mr.deallocate(dst_buffer.handle, dst_buffer.size)

def test_buffer_copy_from():
dummy_mr = DummyMemoryResource()
src_buffer = dummy_mr.allocate(size=1024)
dst_buffer = dummy_mr.allocate(size=1024)
def test_buffer_copy_to():
device = Device()
device.set_current()
buffer_copy_to(DummyDeviceMemoryResource(device), device)
buffer_copy_to(DummyUnifiedMemoryResource(device), device)
buffer_copy_to(DummyPinnedMemoryResource(device), device, check = True)

def buffer_copy_from(dummy_mr : MemoryResource, device, check = False):
src_buffer = dummy_mr.allocate(size=1024)
dst_buffer = dummy_mr.allocate(size=1024)
stream = device.create_stream()

if check:
src_ptr = ctypes.cast(src_buffer.handle, ctypes.POINTER(ctypes.c_byte))
for i in range(1024):
src_ptr[i] = ctypes.c_byte(i)

dst_buffer.copy_from(src_buffer, stream=stream)
# Assuming cuMemcpyAsync is correctly called, we can't directly check the result here
device.sync()

if check:
dst_ptr = ctypes.cast(dst_buffer.handle, ctypes.POINTER(ctypes.c_byte))

for i in range(10):
assert dst_ptr[i] == src_ptr[i]

dummy_mr.deallocate(src_buffer.handle, src_buffer.size)
dummy_mr.deallocate(dst_buffer.handle, dst_buffer.size)

def test_buffer_close():
dummy_mr = DummyMemoryResource()
def test_buffer_copy_from():
device = Device()
device.set_current()
buffer_copy_from(DummyDeviceMemoryResource(device), device)
buffer_copy_from(DummyUnifiedMemoryResource(device), device)
buffer_copy_from(DummyPinnedMemoryResource(device), device, check = True)

def buffer_close(dummy_mr : MemoryResource):
buffer = dummy_mr.allocate(size=1024)
buffer.close()
assert buffer.handle == 0
assert buffer.memory_resource == None

def test_buffer_close():
device = Device()
device.set_current()
buffer_close(DummyDeviceMemoryResource(device))
buffer_close(DummyHostMemoryResource())
buffer_close(DummyUnifiedMemoryResource(device))
buffer_close(DummyPinnedMemoryResource(device))

test_buffer_copy_to()
8 changes: 8 additions & 0 deletions cuda_core/tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from cuda import nvrtc
from cuda.core.experimental._program import Program
from cuda.core.experimental._module import ObjectCode, Kernel
from cuda.core.experimental._device import Device

@pytest.fixture(scope='module')
def init_cuda():
Device().set_current()

def test_program_init_valid_code_type():
code = "extern \"C\" __global__ void my_kernel() {}"
Expand Down Expand Up @@ -48,3 +53,6 @@ def test_program_close():
program = Program(code, "c++")
program.close()
assert program.handle is None

Device().set_current()
test_program_compile_valid_target_type()

0 comments on commit ec741c8

Please sign in to comment.