Feature: Improve Kernel Decorator (#69)

This pull request addresses issue #68 by changing the implantation of kernel decorate, so the function runs multiple times depending on the number of blocks and the number of threads for each block --------- Co-authored-by: EmilyBourne <[email protected]> Co-authored-by: bauom <[email protected]>
pyccel · Sep 25, 2024 · bb18b0a · bb18b0a
1 parent bdc48e6
commit bb18b0a
Show file tree

Hide file tree

Showing 8 changed files with 181 additions and 3 deletions.
diff --git a/docs/cuda.md b/docs/cuda.md
@@ -43,4 +43,22 @@ def my_kernel():
 my_kernel[1, 1]()
 
 ```
+## Cuda Device Methods
+The following methods are available for CUDA devices in Pyccel and can be called from either kernels or device functions. Currently, the only import syntax supported is:
+```python
+from pyccel import cuda
+```
+Using an alias for the import is not supported, so this is not allowed:
+
+```python
+from pyccel import cuda as py_cu
+```
+
+| Method | Description |
+|--------|-------------|
+
+
+
+
+
 
diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py
@@ -0,0 +1,88 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. #
+#------------------------------------------------------------------------------------------#
+"""
+This module contains all the CUDA thread indexing methods
+"""
+class CudaThreadIndexing:
+ """
+ Class representing the CUDA thread indexing.
+
+ Class representing the CUDA thread indexing.
+
+ Parameters
+ ----------
+ block_idx : int
+ The index of the block in the x-dimension.
+
+ thread_idx : int
+ The index of the thread in the x-dimension.
+ """
+ def __init__(self, block_idx, thread_idx):
+ self._block_idx = block_idx
+ self._thread_idx = thread_idx
+
+ def threadIdx(self, dim):
+ """
+ Get the thread index.
+
+ Get the thread index.
+
+ Parameters
+ ----------
+ dim : int
+ The dimension of the indexing. It can be:
+ - 0 for the x-dimension
+ - 1 for the y-dimension
+ - 2 for the z-dimension
+
+ Returns
+ -------
+ int
+ The index of the thread in the specified dimension of its block.
+ """
+ return self._thread_idx
+
+ def blockIdx(self, dim):
+ """
+ Get the block index.
+
+ Get the block index.
+
+ Parameters
+ ----------
+ dim : int
+ The dimension of the indexing. It can be:
+ - 0 for the x-dimension
+ - 1 for the y-dimension
+ - 2 for the z-dimension
+
+ Returns
+ -------
+ int
+ The index of the block in the specified dimension.
+ """
+ return self._block_idx
+
+ def blockDim(self, dim):
+ """
+ Get the block dimension.
+
+ Get the block dimension.
+
+ Parameters
+ ----------
+ dim : int
+ The dimension of the indexing. It can be:
+ - 0 for the x-dimension
+ - 1 for the y-dimension
+ - 2 for the z-dimension
+
+ Returns
+ -------
+ int
+ The size of the block in the specified dimension.
+ """
+ return 0
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
@@ -6,6 +6,7 @@
 """
 This module contains all the provided decorator methods.
 """
+from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing
 import warnings
 
 __all__ = (
@@ -139,7 +140,24 @@ class KernelAccessor:
  def __init__(self, f):
  self._f = f
  def __getitem__(self, args):
- return self._f
+ num_blocks, num_threads = args
+ def internal_loop(*args, **kwargs):
+ """
+ The internal loop for kernel execution.
+
+ The internal loop for kernel execution.
+ """
+ for b in range(num_blocks):
+ for t in range(num_threads):
+ cu = CudaThreadIndexing(b, t)
+ if 'cuda' in self._f.__globals__:
+ self._f.__globals__['cuda'].threadIdx = cu.threadIdx
+ self._f.__globals__['cuda'].blockIdx = cu.blockIdx
+ self._f.__globals__['cuda'].blockDim = cu.blockDim
+ else:
+ self._f.__globals__['cuda'] = cu
+ self._f(*args, **kwargs)
+ return internal_loop
 
  return KernelAccessor(f)
 

diff --git a/tests/pyccel/scripts/kernel/block_idx.py b/tests/pyccel/scripts/kernel/block_idx.py
@@ -0,0 +1,15 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def print_block():
+ print(cuda.blockIdx(0)) # pylint: disable=no-member
+
+def f():
+ print_block[5,5]()
+ cuda.synchronize()
+
+if __name__ == '__main__':
+ f()
+
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
@@ -1,6 +1,6 @@
 # pylint: disable=missing-function-docstring, missing-module-docstring
 from pyccel.decorators import device, kernel
-from pyccel import cuda
+from pyccel  import cuda
 
 @device
 def device_call():

diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -1,6 +1,6 @@
 # pylint: disable=missing-function-docstring, missing-module-docstring
 from pyccel.decorators import kernel
-from pyccel import cuda
+from pyccel  import cuda
 
 @kernel
 def say_hello(its_morning : bool):

diff --git a/tests/pyccel/scripts/kernel/thread_idx.py b/tests/pyccel/scripts/kernel/thread_idx.py
@@ -0,0 +1,15 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def print_block():
+ print(cuda.threadIdx(0)) # pylint: disable=no-member
+
+def f():
+ print_block[5,5]()
+ cuda.synchronize()
+
+if __name__ == '__main__':
+ f()
+
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
@@ -730,6 +730,8 @@ def test_elemental(language):
  pyccel_test("scripts/decorators_elemental.py", language = language)
 
 #------------------------------------------------------------------------------
+
+
 @pytest.mark.cuda
 def test_hello_kernel(gpu_available):
  types = str
@@ -743,7 +745,29 @@ def test_kernel_collision(gpu_available):
  language="cuda", execute_code=gpu_available)
 
 #------------------------------------------------------------------------------
+def test_block_idx():
+ test_file = get_abs_path("scripts/kernel/block_idx.py")
+ cwd = get_abs_path(os.path.dirname(test_file))
+
+ pyth_out = get_python_output(test_file, cwd)
+
+ python_block_idx = list(map(int, pyth_out.split()))
+
+ for i in range(5):
+ assert python_block_idx.count(i) == 5
+#------------------------------------------------------------------------------
+def test_thread_idx():
+ test_file = get_abs_path("scripts/kernel/thread_idx.py")
+ cwd = get_abs_path(os.path.dirname(test_file))
+
+ pyth_out = get_python_output(test_file, cwd)
 
+ python_idx = list(map(int, pyth_out.split()))
+
+ for i in range(5):
+ assert python_idx.count(i) == 5
+
+#------------------------------------------------------------------------------
 @pytest.mark.cuda
 def test_device_call(gpu_available):
  types = str