diff --git a/cuda_ffi/bindings.ml b/cuda_ffi/bindings.ml index 428cfeb..ee484ed 100644 --- a/cuda_ffi/bindings.ml +++ b/cuda_ffi/bindings.ml @@ -102,6 +102,17 @@ module Functions (F : Ctypes.FOREIGN) = struct let cu_memset_d32 = F.foreign "cuMemsetD32" F.(cu_deviceptr @-> uint32_t @-> size_t @-> returning E.cu_result) + let cu_memset_d8_async = + F.foreign "cuMemsetD8Async" F.(cu_deviceptr @-> uchar @-> size_t @-> cu_stream @-> returning E.cu_result) + + let cu_memset_d16_async = + F.foreign "cuMemsetD16Async" + F.(cu_deviceptr @-> ushort @-> size_t @-> cu_stream @-> returning E.cu_result) + + let cu_memset_d32_async = + F.foreign "cuMemsetD32Async" + F.(cu_deviceptr @-> uint32_t @-> size_t @-> cu_stream @-> returning E.cu_result) + let cu_module_get_global = F.foreign "cuModuleGetGlobal_v2" F.(ptr cu_deviceptr @-> ptr size_t @-> cu_module @-> string @-> returning E.cu_result) diff --git a/cudajit.ml b/cudajit.ml index 2164e71..221d552 100644 --- a/cudajit.ml +++ b/cudajit.ml @@ -499,6 +499,15 @@ let memset_d16 (Deviceptr dev) v ~length = let memset_d32 (Deviceptr dev) v ~length = check "cu_memset_d32" @@ Cuda.cu_memset_d32 dev v @@ Unsigned.Size_t.of_int length +let memset_d8_async (Deviceptr dev) v ~length stream = + check "cu_memset_d8_async" @@ Cuda.cu_memset_d8_async dev v (Unsigned.Size_t.of_int length) stream + +let memset_d16_async (Deviceptr dev) v ~length stream = + check "cu_memset_d16_async" @@ Cuda.cu_memset_d16_async dev v (Unsigned.Size_t.of_int length) stream + +let memset_d32_async (Deviceptr dev) v ~length stream = + check "cu_memset_d32_async" @@ Cuda.cu_memset_d32_async dev v (Unsigned.Size_t.of_int length) stream + let module_get_global module_ ~name = let open Ctypes in let device = allocate_n cu_deviceptr ~count:1 in