Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ksimpson-work committed Oct 25, 2024
1 parent a860436 commit fc6d176
Show file tree
Hide file tree
Showing 8 changed files with 564 additions and 0 deletions.
9 changes: 9 additions & 0 deletions cuda_core/tests/test_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from cuda.core.experimental._context import Context

def test_context_initialization():
try:
context = Context()
except NotImplementedError as e:
assert True
else:
assert False, "Expected NotImplementedError was not raised"
66 changes: 66 additions & 0 deletions cuda_core/tests/test_device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from cuda import cuda, cudart
from cuda.core.experimental._device import Device
from cuda.core.experimental._utils import handle_return, ComputeCapability, CUDAError, \
precondition
import pytest

@pytest.fixture(scope='module')
def init_cuda():
Device().set_current()

def test_device_initialization():
device = Device()
assert device is not None

def test_device_repr():
device = Device()
assert str(device).startswith('<Device 0')

def test_device_alloc():
device = Device()
device.set_current()
buffer = device.allocate(1024)
device.sync()
assert buffer.handle != 0
assert buffer.size == 1024
assert buffer.device_id == 0

def test_device_set_current():
device = Device()
device.set_current()

def test_device_create_stream():
device = Device()
stream = device.create_stream()
assert stream is not None

def test_pci_bus_id():
device = Device()
bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, device.device_id))
assert device.pci_bus_id == bus_id[:12].decode()

def test_uuid():
device = Device()
driver_ver = handle_return(cuda.cuDriverGetVersion())
if driver_ver >= 11040:
uuid = handle_return(cuda.cuDeviceGetUuid_v2(device.device_id))
else:
uuid = handle_return(cuda.cuDeviceGetUuid(device.device_id))
uuid = uuid.bytes.hex()
expected_uuid = f"{uuid[:8]}-{uuid[8:12]}-{uuid[12:16]}-{uuid[16:20]}-{uuid[20:]}"
assert device.uuid == expected_uuid

def test_name():
device = Device()
name = handle_return(cuda.cuDeviceGetName(128, device.device_id))
name = name.split(b'\0')[0]
assert device.name == name.decode()

def test_compute_capability():
device = Device()
major = handle_return(cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, device.device_id))
minor = handle_return(cudart.cudaDeviceGetAttribute(
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, device.device_id))
expected_cc = ComputeCapability(major, minor)
assert device.compute_capability == expected_cc
38 changes: 38 additions & 0 deletions cuda_core/tests/test_event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from cuda import cuda
from cuda.core.experimental._event import EventOptions, Event
from cuda.core.experimental._utils import handle_return

def test_is_timing_disabled():
options = EventOptions(enable_timing=False)
event = Event._init(options)
assert event.is_timing_disabled == True

def test_is_sync_busy_waited():
options = EventOptions(busy_waited_sync=True)
event = Event._init(options)
assert event.is_sync_busy_waited == True

def test_is_ipc_supported():
options = EventOptions(support_ipc=True)
try:
event = Event._init(options)
except NotImplementedError:
assert True
else:
assert False

def test_sync():
options = EventOptions()
event = Event._init(options)
event.sync()
assert event.is_done == True

def test_is_done():
options = EventOptions()
event = Event._init(options)
assert event.is_done == True

def test_handle():
options = EventOptions()
event = Event._init(options)
assert isinstance(event.handle, int)
77 changes: 77 additions & 0 deletions cuda_core/tests/test_launcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from cuda.core.experimental._launcher import LaunchConfig
from cuda.core.experimental._stream import Stream
from cuda.core.experimental._device import Device
from cuda.core.experimental._utils import handle_return
from cuda import cuda

def test_launch_config_init():
config = LaunchConfig(grid=(1, 1, 1), block=(1, 1, 1), stream=None, shmem_size=0)
assert config.grid == (1, 1, 1)
assert config.block == (1, 1, 1)
assert config.stream is None
assert config.shmem_size == 0

config = LaunchConfig(grid=(2, 2, 2), block=(2, 2, 2), stream=Device().create_stream(), shmem_size=1024)
assert config.grid == (2, 2, 2)
assert config.block == (2, 2, 2)
assert isinstance(config.stream, Stream)
assert config.shmem_size == 1024

def test_launch_config_cast_to_3_tuple():
config = LaunchConfig(grid=1, block=1)
assert config._cast_to_3_tuple(1) == (1, 1, 1)
assert config._cast_to_3_tuple((1, 2)) == (1, 2, 1)
assert config._cast_to_3_tuple((1, 2, 3)) == (1, 2, 3)

# Edge cases
assert config._cast_to_3_tuple(999) == (999, 1, 1)
assert config._cast_to_3_tuple((999, 888)) == (999, 888, 1)
assert config._cast_to_3_tuple((999, 888, 777)) == (999, 888, 777)

def test_launch_config_invalid_values():
try:
LaunchConfig(grid=0, block=1)
except ValueError:
assert True
else:
assert False

try:
LaunchConfig(grid=(0, 1), block=1)
except ValueError:
assert True
else:
assert False

try:
LaunchConfig(grid=(1, 1, 1), block=0)
except ValueError:
assert True
else:
assert False

try:
LaunchConfig(grid=(1, 1, 1), block=(0, 1))
except ValueError:
assert True
else:
assert False

def test_launch_config_stream():
stream = Device().create_stream()
config = LaunchConfig(grid=(1, 1, 1), block=(1, 1, 1), stream=stream, shmem_size=0)
assert config.stream == stream

try:
LaunchConfig(grid=(1, 1, 1), block=(1, 1, 1), stream="invalid_stream", shmem_size=0)
except ValueError:
assert True
else:
assert False

def test_launch_config_shmem_size():
config = LaunchConfig(grid=(1, 1, 1), block=(1, 1, 1), stream=None, shmem_size=2048)
assert config.shmem_size == 2048

config = LaunchConfig(grid=(1, 1, 1), block=(1, 1, 1), stream=None)
assert config.shmem_size == 0
199 changes: 199 additions & 0 deletions cuda_core/tests/test_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# FILE: test_memory.py

from cuda.core.experimental._memory import Buffer, MemoryResource
from cuda.core.experimental._device import Device
from cuda import cuda
from cuda.core.experimental._utils import handle_return
import ctypes

@pytest.fixture(scope='module')
def init_cuda():
Device().set_current()

class DummyDeviceMemoryResource(MemoryResource):
def __init__(self, device):
self.device = device
pass

def allocate(self, size, stream=None) -> Buffer:
ptr = handle_return(cuda.cuMemAlloc(size))
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
cuda.cuMemFree(ptr)

@property
def is_device_accessible(self) -> bool:
return True

@property
def is_host_accessible(self) -> bool:
return False

@property
def device_id(self) -> int:
return 0

class DummyHostMemoryResource(MemoryResource):
def __init__(self):
pass

def allocate(self, size, stream=None) -> Buffer:
# Allocate a ctypes buffer of size `size`
ptr = (ctypes.c_byte * size)()
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
#the memory is deallocated per the ctypes deallocation at garbage collection time
pass

@property
def is_device_accessible(self) -> bool:
return False

@property
def is_host_accessible(self) -> bool:
return True

@property
def device_id(self) -> int:
raise RuntimeError("the pinned memory resource is not bound to any GPU")

class DummyUnifiedMemoryResource(MemoryResource):
def __init__(self, device):
self.device = device
pass

def allocate(self, size, stream=None) -> Buffer:
ptr = handle_return(cuda.cuMemAllocManaged(size, cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value))
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
cuda.cuMemFree(ptr)

@property
def is_device_accessible(self) -> bool:
return True

@property
def is_host_accessible(self) -> bool:
return True

@property
def device_id(self) -> int:
return 0

class DummyPinnedMemoryResource(MemoryResource):
def __init__(self, device):
self.device = device
pass

def allocate(self, size, stream=None) -> Buffer:
ptr = handle_return(cuda.cuMemAllocHost(size))
return Buffer(ptr=ptr, size=size, mr=self)

def deallocate(self, ptr, size, stream=None):
cuda.cuMemFreeHost(ptr)

@property
def is_device_accessible(self) -> bool:
return True

@property
def is_host_accessible(self) -> bool:
return True

@property
def device_id(self) -> int:
raise RuntimeError("the pinned memory resource is not bound to any GPU")

def buffer_initialization(dummy_mr : MemoryResource):
buffer = dummy_mr.allocate(size=1024)
assert buffer.handle != 0
assert buffer.size == 1024
assert buffer.memory_resource == dummy_mr
assert buffer.is_device_accessible == dummy_mr.is_device_accessible
assert buffer.is_host_accessible == dummy_mr.is_host_accessible
dummy_mr.deallocate(buffer.handle, buffer.size)

def test_buffer_initialization():
device = Device()
device.set_current()
buffer_initialization(DummyDeviceMemoryResource(device))
buffer_initialization(DummyHostMemoryResource())
buffer_initialization(DummyUnifiedMemoryResource(device))
buffer_initialization(DummyPinnedMemoryResource(device))

def buffer_copy_to(dummy_mr : MemoryResource, device : Device, check = False):
src_buffer = dummy_mr.allocate(size=1024)
dst_buffer = dummy_mr.allocate(size=1024)
stream = device.create_stream()

if check:
src_ptr = ctypes.cast(src_buffer.handle, ctypes.POINTER(ctypes.c_byte))
for i in range(1024):
src_ptr[i] = ctypes.c_byte(i)

src_buffer.copy_to(dst_buffer, stream=stream)
device.sync()

if check:
dst_ptr = ctypes.cast(dst_buffer.handle, ctypes.POINTER(ctypes.c_byte))

for i in range(10):
assert dst_ptr[i] == src_ptr[i]

dummy_mr.deallocate(src_buffer.handle, src_buffer.size)
dummy_mr.deallocate(dst_buffer.handle, dst_buffer.size)

def test_buffer_copy_to():
device = Device()
device.set_current()
buffer_copy_to(DummyDeviceMemoryResource(device), device)
buffer_copy_to(DummyUnifiedMemoryResource(device), device)
buffer_copy_to(DummyPinnedMemoryResource(device), device, check = True)

def buffer_copy_from(dummy_mr : MemoryResource, device, check = False):
src_buffer = dummy_mr.allocate(size=1024)
dst_buffer = dummy_mr.allocate(size=1024)
stream = device.create_stream()

if check:
src_ptr = ctypes.cast(src_buffer.handle, ctypes.POINTER(ctypes.c_byte))
for i in range(1024):
src_ptr[i] = ctypes.c_byte(i)

dst_buffer.copy_from(src_buffer, stream=stream)
device.sync()

if check:
dst_ptr = ctypes.cast(dst_buffer.handle, ctypes.POINTER(ctypes.c_byte))

for i in range(10):
assert dst_ptr[i] == src_ptr[i]

dummy_mr.deallocate(src_buffer.handle, src_buffer.size)
dummy_mr.deallocate(dst_buffer.handle, dst_buffer.size)

def test_buffer_copy_from():
device = Device()
device.set_current()
buffer_copy_from(DummyDeviceMemoryResource(device), device)
buffer_copy_from(DummyUnifiedMemoryResource(device), device)
buffer_copy_from(DummyPinnedMemoryResource(device), device, check = True)

def buffer_close(dummy_mr : MemoryResource):
buffer = dummy_mr.allocate(size=1024)
buffer.close()
assert buffer.handle == 0
assert buffer.memory_resource == None

def test_buffer_close():
device = Device()
device.set_current()
buffer_close(DummyDeviceMemoryResource(device))
buffer_close(DummyHostMemoryResource())
buffer_close(DummyUnifiedMemoryResource(device))
buffer_close(DummyPinnedMemoryResource(device))

test_buffer_copy_to()
Loading

0 comments on commit fc6d176

Please sign in to comment.