diff --git a/.dict_custom.txt b/.dict_custom.txt index b9240f6215..d0b759b36e 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -110,6 +110,8 @@ Valgrind variadic subclasses oneAPI +Cuda +cuda getter setter bitwise @@ -119,3 +121,4 @@ indexable traceback STC gFTL +GPUs diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml index ac5294e542..5732baee34 100644 --- a/.github/actions/coverage_install/action.yml +++ b/.github/actions/coverage_install/action.yml @@ -15,7 +15,7 @@ runs: - name: Directory Creation run: | INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])") - SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + SITE_DIR=$(dirname ${INSTALL_DIR}) echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml index 8fb5cd8505..0ef9a69b8e 100644 --- a/.github/actions/linux_install/action.yml +++ b/.github/actions/linux_install/action.yml @@ -9,22 +9,22 @@ runs: shell: bash - name: Install fortran run: - sudo apt-get install gfortran + sudo apt-get install -y gfortran shell: bash - name: Install LaPack run: - sudo apt-get install libblas-dev liblapack-dev + sudo apt-get install -y libblas-dev liblapack-dev shell: bash - name: Install MPI run: | - sudo apt-get install libopenmpi-dev openmpi-bin + sudo apt-get install -y libopenmpi-dev openmpi-bin echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV shell: bash - name: Install OpenMP run: - sudo apt-get install libomp-dev libomp5 + sudo apt-get install -y libomp-dev libomp5 shell: bash - name: Install Valgrind run: - sudo apt-get install valgrind + sudo apt-get install -y valgrind shell: bash diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml index c7c77d99c7..f91d84915b 100644 --- a/.github/actions/pytest_parallel/action.yml +++ b/.github/actions/pytest_parallel/action.yml @@ -10,8 +10,8 @@ runs: steps: - name: Test with pytest run: | - mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx - #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx + mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx + #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index 0b6f0f988d..451fa39e92 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml new file mode 100644 index 0000000000..46f90552ed --- /dev/null +++ b/.github/actions/pytest_run_cuda/action.yml @@ -0,0 +1,24 @@ +name: 'Pyccel pytest commands generating Cuda' +inputs: + shell_cmd: + description: 'Specifies the shell command (different for anaconda)' + required: false + default: "bash" + +runs: + using: "composite" + steps: + - name: Ccuda tests with pytest + run: | + # Catch exit 5 (no tests found) + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out + pyccel-clean + shell: ${{ inputs.shell_cmd }} + working-directory: ./tests + - name: Final step + if: always() + id: status + run: + python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out" + + shell: ${{ inputs.shell_cmd }} diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml new file mode 100644 index 0000000000..f9b720e3e1 --- /dev/null +++ b/.github/actions/python_install/action.yml @@ -0,0 +1,17 @@ +name: 'Python installation commands' + +runs: + using: "composite" + steps: + - name: Install python + run: + sudo apt-get -y install python3-dev + shell: bash + - name: python as python3 + run: + sudo apt-get -y install python-is-python3 + shell: bash + - name: Install Pip + run: + sudo apt-get -y install python3-pip + shell: bash diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml index 5a5384e5ce..525903a54f 100644 --- a/.github/workflows/anaconda_linux.yml +++ b/.github/workflows/anaconda_linux.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml index 154a4d01e8..0f3f8a04ed 100644 --- a/.github/workflows/anaconda_windows.yml +++ b/.github/workflows/anaconda_windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 0000000000..833ebf5d85 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,83 @@ +name: Cuda unit tests + +on: + workflow_dispatch: + inputs: + python_version: + required: false + type: string + ref: + required: false + type: string + check_run_id: + required: false + type: string + pr_repo: + required: false + type: string + push: + branches: [devel, main] + +env: + COMMIT: ${{ inputs.ref || github.event.ref }} + PEM: ${{ secrets.BOT_PEM }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }} + PR_REPO: ${{ inputs.pr_repo || github.repository }} + +jobs: + Cuda: + + runs-on: ubuntu-20.04 + name: Unit tests + + container: nvidia/cuda:11.7.1-devel-ubuntu20.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ env.COMMIT }} + repository: ${{ env.PR_REPO }} + - name: Prepare docker + run: | + apt update && apt install sudo + TZ=Europe/France + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata + shell: bash + - name: Install python (setup-python action doesn't work with containers) + uses: ./.github/actions/python_install + - name: "Setup" + id: token + run: | + pip install jwt requests + python ci_tools/setup_check_run.py cuda + - name: CUDA Version + run: nvcc --version # cuda install check + - name: Install dependencies + uses: ./.github/actions/linux_install + - name: Install Pyccel with tests + run: | + PATH=${PATH}:$HOME/.local/bin + echo "PATH=${PATH}" >> $GITHUB_ENV + python -m pip install --upgrade pip + python -m pip install --user .[test] + shell: bash + - name: Coverage install + uses: ./.github/actions/coverage_install + - name: Ccuda tests with pytest + id: cuda_pytest + uses: ./.github/actions/pytest_run_cuda + - name: Collect coverage information + continue-on-error: True + uses: ./.github/actions/coverage_collection + - name: Save code coverage report + uses: actions/upload-artifact@v3 + with: + name: coverage-artifact + path: .coverage + retention-days: 1 + - name: "Post completed" + if: always() + run: + python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }} + diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 920b14cf0b..391511329f 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: waitForWorklows: name: Wait for workflows runs-on: ubuntu-latest - if: github.event.workflow_run.head_branch == 'main' + if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel' steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 977d5f9afd..5f340e1088 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -29,7 +29,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad39cee725..664ae3aa60 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -28,7 +28,7 @@ env: jobs: matrix_prep: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml index 4768a64efa..f51041c0b8 100644 --- a/.github/workflows/macosx.yml +++ b/.github/workflows/macosx.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: macos-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml index 052028a5cb..cc3864afd2 100644 --- a/.github/workflows/pickle.yml +++ b/.github/workflows/pickle.yml @@ -31,7 +31,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-matrix.outputs.python_version }} matrix: ${{ steps.set-matrix.outputs.matrix }} diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml index 1dc82af503..718dc13dcc 100644 --- a/.github/workflows/pickle_wheel.yml +++ b/.github/workflows/pickle_wheel.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 60c560ffee..827038a279 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index a5f103a85f..bedda6d43c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,18 @@ # Change Log All notable changes to this project will be documented in this file. +## \[Cuda - UNRELEASED\] + +### Added + +- #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. +- #48 : Fix incorrect handling of imports in `cuda`. +- #59 : Updated `cuda` clash checker. +- #42 : Add support for custom kernel in`cuda`. +- #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. +- #41 : Add support for custom device in`cuda`. +- #64 : Add support for `cuda.device_empty` function. + ## \[UNRELEASED\] ### Added diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt index adc07e8431..eb15492d2e 100644 --- a/ci_tools/bot_messages/show_tests.txt +++ b/ci_tools/bot_messages/show_tests.txt @@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol - **linux** : Runs the unit tests on a Linux system. - **windows** : Runs the unit tests on a Windows system. - **macosx** : Runs the unit tests on a MacOS X system. +- **cuda** : Runs the cuda unit tests on a Linux system. - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests. - **docs** : Checks if the documentation follows the numpydoc format. - **pylint** : Runs pylint on files which are too big to be handled by codacy. diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py index 7084a01bb9..1621d1d089 100644 --- a/ci_tools/bot_tools/bot_funcs.py +++ b/ci_tools/bot_tools/bot_funcs.py @@ -23,7 +23,8 @@ 'pyccel_lint': '3.8', 'pylint': '3.8', 'spelling': '3.8', - 'windows': '3.8' + 'windows': '3.8', + 'cuda': '-' } test_names = { @@ -40,15 +41,16 @@ 'pyccel_lint': "Pyccel best practices", 'pylint': "Python linting", 'spelling': "Spelling verification", - 'windows': "Unit tests on Windows" + 'windows': "Unit tests on Windows", + 'cuda': "Unit tests on Linux with cuda" } -test_dependencies = {'coverage':['linux']} +test_dependencies = {'coverage':['linux', 'cuda']} tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint') pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint', - 'pyccel_lint', 'spelling') + 'pyccel_lint', 'spelling', 'cuda') review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"] @@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state): True if the test should be run, False otherwise. """ print("Checking : ", name, key) - if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'): + if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'): has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment and f.endswith('.py') and f != 'pyccel/version.py' for f in diff) diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py index 1102ef9e92..ec67b6c49a 100644 --- a/ci_tools/devel_branch_tests.py +++ b/ci_tools/devel_branch_tests.py @@ -15,3 +15,4 @@ bot.run_tests(['anaconda_linux'], '3.10', force_run = True) bot.run_tests(['anaconda_windows'], '3.10', force_run = True) bot.run_tests(['intel'], '3.9', force_run = True) + bot.run_tests(['cuda'], '-', force_run = True) diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py index 409ae76d72..b84f4a4c09 100644 --- a/ci_tools/json_pytest_output.py +++ b/ci_tools/json_pytest_output.py @@ -61,7 +61,7 @@ def mini_md_summary(title, outcome, failed_tests): summary = "" failed_pattern = re.compile(r".*FAILED.*") - languages = ('c', 'fortran', 'python') + languages = ('c', 'fortran', 'python', 'cuda') pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages} for i in p_args.tests: diff --git a/docs/cuda.md b/docs/cuda.md new file mode 100644 index 0000000000..ff68b5c69a --- /dev/null +++ b/docs/cuda.md @@ -0,0 +1,65 @@ +# Getting started GPU + +Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel + +## Cuda Decorator + +### kernel + +The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba. + +```python +from pyccel.decorators import kernel + +@kernel +def my_kernel(): + pass + +blockspergrid = 1 +threadsperblock = 1 +# Call your kernel function +my_kernel[blockspergrid, threadsperblock]() + +``` + +### device + +Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel. + +```python +from pyccel.decorators import device, kernel + +@device +def add(x, y): + return x + y + +@kernel +def my_kernel(): + x = 1 + y = 2 + z = add(x, y) + print(z) + +my_kernel[1, 1]() + +``` + +## Cuda Arrays + +Pyccel provides support for CUDA arrays, enabling efficient data transfer between the host and the GPU device. Here are some of the key functions you can use: + +### cuda+host_empty + +The cuda+host_empty function allocates an empty array on the host. + +```python +from pyccel import cuda + +a = cuda.host_empty(10, 'int') + +for i in range(10): + a[i] = 1 + +if __name__ == '__main__': + print(a) +``` diff --git a/pyccel/ast/class_defs.py b/pyccel/ast/class_defs.py index 425573482b..4318cd1575 100644 --- a/pyccel/ast/class_defs.py +++ b/pyccel/ast/class_defs.py @@ -21,9 +21,13 @@ NumpyImag, NumpyReal, NumpyTranspose, NumpyConjugate, NumpySize, NumpyResultType, NumpyArray) from .numpytypes import NumpyNumericType, NumpyNDArrayType +from .cudatypes import CudaArrayType __all__ = ( 'BooleanClass', + 'CudaArrayClass', + 'IntegerClass', + 'FloatClass', 'ComplexClass', 'DictClass', 'FloatClass', @@ -36,6 +40,7 @@ 'get_cls_base', 'literal_classes', ) +#======================================================================================= #======================================================================================= @@ -180,7 +185,9 @@ #index #count ]) - +CudaArrayClass = ClassDef('cuda.array', + methods=[] + ) #======================================================================================= NumpyArrayClass = ClassDef('numpy.ndarray', @@ -255,8 +262,11 @@ def get_cls_base(class_type): return None elif class_type in literal_classes: return literal_classes[class_type] + elif isinstance(class_type, CudaArrayType): + return CudaArrayClass elif isinstance(class_type, (NumpyNumericType, NumpyNDArrayType)): return NumpyArrayClass + # elif isinstance(class_type, StackArrayType): elif isinstance(class_type, TupleType): return TupleClass elif isinstance(class_type, HomogeneousListType): diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py index 8981ddc160..2758b75be2 100644 --- a/pyccel/ast/core.py +++ b/pyccel/ast/core.py @@ -73,6 +73,7 @@ 'If', 'IfSection', 'Import', + 'IndexedFunctionCall', 'InProgram', 'InlineFunctionDef', 'Interface', @@ -2065,6 +2066,42 @@ def _ignore(cls, c): """ return c is None or isinstance(c, (FunctionDef, *cls._ignored_types)) +class IndexedFunctionCall(FunctionCall): + """ + Represents an indexed function call in the code. + + Class representing indexed function calls, encapsulating all + relevant information for such calls within the code base. + + Parameters + ---------- + func : FunctionDef + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + indexes : iterable of TypedAstNode + The indexes of the function call. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_indexes',) + _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',) + def __init__(self, func, args, indexes, current_function = None): + self._indexes = indexes + super().__init__(func, args, current_function) + + @property + def indexes(self): + """ + Indexes of function call. + + Represents the indexes of the function call + """ + return self._indexes + class ConstructorCall(FunctionCall): """ diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py new file mode 100644 index 0000000000..f1e50ef7f0 --- /dev/null +++ b/pyccel/ast/cuda.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Module +This module provides a collection of classes and utilities for CUDA programming. +""" +from pyccel.ast.core import FunctionCall + +__all__ = ( + 'KernelCall', +) + +class KernelCall(FunctionCall): + """ + Represents a kernel function call in the code. + + The class serves as a representation of a kernel + function call within the codebase. + + Parameters + ---------- + func : FunctionDef + The definition of the function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + num_blocks : TypedAstNode + The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`. + + tp_block : TypedAstNode + The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_num_blocks','_tp_block') + _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block') + + def __init__(self, func, args, num_blocks, tp_block, current_function = None): + self._num_blocks = num_blocks + self._tp_block = tp_block + super().__init__(func, args, current_function) + + @property + def num_blocks(self): + """ + The number of blocks in the kernel being called. + + The number of blocks in the kernel being called. + """ + return self._num_blocks + + @property + def tp_block(self): + """ + The number of threads per block. + + The number of threads per block. + """ + return self._tp_block + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py new file mode 100644 index 0000000000..21a18c1ba5 --- /dev/null +++ b/pyccel/ast/cudaext.py @@ -0,0 +1,154 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Extension Module +Provides CUDA functionality for code generation. +""" +from .internals import PyccelFunction +from .literals import Nil + +from .datatypes import VoidType +from .core import Module, PyccelFunctionDef +from .numpyext import process_dtype, process_shape +from .cudatypes import CudaArrayType + + + +__all__ = ( + 'CudaSynchronize', + 'CudaNewarray', + 'CudaFull', + 'CudaHostEmpty' +) + +class CudaNewarray(PyccelFunction): + """ + Superclass for nodes representing Cuda array allocation functions. + + Class from which all nodes representing a Cuda function which implies a call + to `Allocate` should inherit. + + Parameters + ---------- + *args : tuple of TypedAstNode + The arguments of the superclass PyccelFunction. + + class_type : NumpyNDArrayType + The type of the new array. + + init_dtype : PythonType, PyccelFunctionDef, LiteralString, str + The actual dtype passed to the Cuda function. + + memory_location : str + The memory location of the new array ('host' or 'device'). + """ + __slots__ = ('_class_type', '_init_dtype', '_memory_location') + name = 'newarray' + + def __init__(self, *args ,class_type, init_dtype, memory_location): + self._class_type = class_type + self._init_dtype = init_dtype + self._memory_location = memory_location + + super().__init__(*args) + +class CudaFull(CudaNewarray): + """ + Represents a call to `cuda.full` for code generation. + + Represents a call to the Cuda function `full` which creates an array + filled with a specified value. + + Parameters + ---------- + shape : TypedAstNode + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + For a 1D array this is either a `LiteralInteger` or an expression. + For a ND array this is a `TypedAstNode` with the class type HomogeneousTupleType. + + fill_value : TypedAstNode + Fill value. + + dtype : PythonType, PyccelFunctionDef, LiteralString, str, optional + Datatype for the constructed array. + If `None` the dtype of the fill value is used. + + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. + + memory_location : str + The memory location of the new array ('host' or 'device'). + """ + __slots__ = ('_fill_value','_shape') + name = 'full' + + def __init__(self, shape, fill_value, dtype, order, memory_location): + shape = process_shape(False, shape) + init_dtype = dtype + if(dtype is None): + dtype = fill_value.dtype + + dtype = process_dtype(dtype) + + self._shape = shape + rank = len(self._shape) + class_type = CudaArrayType(dtype, rank, order, memory_location) + super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location) + + +class CudaHostEmpty(CudaFull): + """ + Represents a call to Cuda.host_empty for code generation. + + A class representing a call to the Cuda `host_empty` function. + + Parameters + ---------- + shape : TypedAstNode + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + For a 1D array this is either a `LiteralInteger` or an expression. + For a cuda ND array this is a `TypedAstNode` with the class type HomogeneousTupleType. + + dtype : PythonType, PyccelFunctionDef, LiteralString, str, optional + Datatype for the constructed array. + + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. + """ + __slots__ = () + name = 'empty' + def __init__(self, shape, dtype='float', order='C'): + memory_location = 'host' + super().__init__(shape, Nil(), dtype, order , memory_location) + +class CudaSynchronize(PyccelFunction): + """ + Represents a call to Cuda.synchronize for code generation. + + This class serves as a representation of the Cuda.synchronize method. + """ + __slots__ = () + _attribute_nodes = () + _shape = None + _class_type = VoidType() + def __init__(self): + super().__init__() + +cuda_funcs = { + 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), + 'full' : PyccelFunctionDef('full' , CudaFull), + 'host_empty' : PyccelFunctionDef('host_empty' , CudaHostEmpty), +} + +cuda_mod = Module('cuda', + variables=[], + funcs=cuda_funcs.values(), + imports=[] +) + diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py new file mode 100644 index 0000000000..1edbdb0ccc --- /dev/null +++ b/pyccel/ast/cudatypes.py @@ -0,0 +1,131 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/devel/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" Module containing types from the cuda module understood by pyccel +""" +from functools import lru_cache +import numpy as np + +from pyccel.utilities.metaclasses import ArgumentSingleton + +from .datatypes import FixedSizeNumericType, HomogeneousContainerType +from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type + +from .numpytypes import NumpyNDArrayType + +__all__ = ('CudaArrayType',) + +class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): + """ + Class representing the Cuda array type. + + Class representing the Cuda array type + + Parameters + ---------- + dtype : NumpyNumericType | PythonNativeBool | GenericType + The internal datatype of the object (GenericType is allowed for external + libraries, e.g. MPI). + rank : int + The rank of the new NumPy array. + order : str + The order of the memory layout for the new NumPy array. + memory_location : str + The memory location of the new cuda array ('host' or 'device'). + """ + __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location') + + + def __init__(self, dtype, rank, order, memory_location): + assert isinstance(rank, int) + assert order in (None, 'C', 'F') + assert memory_location in ('host', 'device') + + self._element_type = dtype + self._container_rank = rank + self._order = order + self._memory_location = memory_location + super().__init__() + + @property + def memory_location(self): + """ + The memory location of the new array ('host' or 'device'). + + The memory location of the new array ('host' or 'device'). + """ + return self._memory_location + + @lru_cache + def __add__(self, other): + if(isinstance(other, CudaArrayType)): + assert self.memory_location == other.memory_location + + test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type]) + if isinstance(other, FixedSizeNumericType): + comparison_type = pyccel_type_to_original_type[other]() + elif isinstance(other, CudaArrayType) or (isinstance(other, NumpyNDArrayType) and self.memory_location == "host"): + comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type]) + else: + return NotImplemented + + result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type] + rank = max(other.rank, self.rank) + if rank < 2: + order = None + else: + other_f_contiguous = other.order == 'F' + self_f_contiguous = self.order == 'F' + order = 'F' if other_f_contiguous and self_f_contiguous else 'C' + return CudaArrayType(result_type, rank, order, self.memory_location) + + @property + def rank(self): + """ + Number of dimensions of the object. + + Number of dimensions of the object. If the object is a scalar then + this is equal to 0. + """ + return self._container_rank + + @property + def order(self): + """ + The data layout ordering in memory. + + Indicates whether the data is stored in row-major ('C') or column-major + ('F') format. This is only relevant if rank > 1. When it is not relevant + this function returns None. + """ + return self._order + def switch_rank(self, new_rank, new_order = None): + """ + Get a type which is identical to this type in all aspects except the rank and/or order. + + Get a type which is identical to this type in all aspects except the rank and/or order. + The order must be provided if the rank is increased from 1. Otherwise it defaults to the + same order as the current type. + + Parameters + ---------- + new_rank : int + The rank of the new type. + + new_order : str, optional + The order of the new type. This should be provided if the rank is increased from 1. + + Returns + ------- + PyccelType + The new type. + """ + new_order = (new_order or self._order) if new_rank > 1 else None + return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location) + def __repr__(self): + dims = ','.join(':'*self._container_rank) + order_str = f'(order={self._order})' if self._order else '' + return f'{self.element_type}[{dims}]{order_str}' diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py index 1e6c0422ab..e5cd77b168 100644 --- a/pyccel/ast/utilities.py +++ b/pyccel/ast/utilities.py @@ -25,6 +25,7 @@ from .literals import LiteralInteger, LiteralEllipsis, Nil from .mathext import math_mod from .sysext import sys_mod +from .cudaext import cuda_mod from .numpyext import (NumpyEmpty, NumpyArray, numpy_mod, NumpyTranspose, NumpyLinspace) @@ -49,7 +50,8 @@ decorators_mod = Module('decorators',(), funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__]) pyccel_mod = Module('pyccel',(),(), - imports = [Import('decorators', decorators_mod)]) + imports = [Import('decorators', decorators_mod), + Import('cuda', cuda_mod)]) # TODO add documentation builtin_import_registry = Module('__main__', diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py index a7a02d7804..33721a48e8 100644 --- a/pyccel/codegen/codegen.py +++ b/pyccel/codegen/codegen.py @@ -9,16 +9,18 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.printing.ccode import CCodePrinter from pyccel.codegen.printing.pycode import PythonCodePrinter +from pyccel.codegen.printing.cucode import CudaCodePrinter from pyccel.ast.core import FunctionDef, Interface, ModuleHeader from pyccel.utilities.stage import PyccelStage -_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py'} -_header_extension_registry = {'fortran': None, 'c':'h', 'python':None} +_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py', 'cuda':'cu'} +_header_extension_registry = {'fortran': None, 'c':'h', 'python':None, 'cuda':'h'} printer_registry = { 'fortran':FCodePrinter, 'c':CCodePrinter, - 'python':PythonCodePrinter + 'python':PythonCodePrinter, + 'cuda':CudaCodePrinter } pyccel_stage = PyccelStage() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index c866ee5b1a..d909a5036e 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh # Collect compile information exec_cmd, includes, libs_flags, libdirs_flags, m_code = \ self._get_compile_components(compile_obj, accelerators) - linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] + if self._info['exec'] == 'nvcc': + linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags] + else: + linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] flags.insert(0,"-shared") diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index 14087fb567..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -180,9 +180,10 @@ def handle_error(stage): if language is None: language = 'fortran' - # Choose Fortran compiler + # Choose Default compiler if compiler is None: - compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU') + default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU' + compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family) fflags = [] if fflags is None else fflags.split() wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split() diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 1c75d83f4f..4831fbab88 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -45,6 +45,7 @@ from pyccel.ast.numpytypes import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map +from pyccel.ast.cudatypes import CudaArrayType from pyccel.ast.type_annotations import VariableTypeAnnotation @@ -60,6 +61,8 @@ from pyccel.codegen.printing.codeprinter import CodePrinter + + from pyccel.errors.errors import Errors from pyccel.errors.messages import (PYCCEL_RESTRICTION_TODO, INCOMPATIBLE_TYPEVAR_TO_FUNC, PYCCEL_RESTRICTION_IS_ISNOT, UNSUPPORTED_ARRAY_RANK) @@ -241,6 +244,7 @@ 'assert', 'numpy_c']} + import_header_guard_prefix = {'Set_extensions' : '_TOOLS_SET'} class CCodePrinter(CodePrinter): @@ -1335,6 +1339,7 @@ def get_declare_type(self, expr): >>> self.get_declare_type(v) 't_ndarray*' """ + from pyccel.codegen.printing.cucode import cu_imports class_type = expr.class_type rank = expr.rank @@ -1347,6 +1352,10 @@ def get_declare_type(self, expr): errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' + elif isinstance(expr.class_type, CudaArrayType): + self.add_import(c_imports['ndarrays']) + self.add_import(cu_imports['cuda_ndarrays']) + dtype = 't_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') elif not isinstance(class_type, CustomDataType): @@ -1472,6 +1481,7 @@ def _print_IndexedElement(self, expr): inds = list(expr.indices) base_shape = base.shape allow_negative_indexes = expr.allows_negative_indexes + if isinstance(base.class_type, NumpyNDArrayType): #set dtype to the C struct types dtype = self.find_in_ndarray_type_registry(expr.dtype) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py new file mode 100644 index 0000000000..171ecc97fa --- /dev/null +++ b/pyccel/codegen/printing/cucode.py @@ -0,0 +1,180 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Provide tools for generating and handling CUDA code. +This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA, +enabling the direct translation of high-level Pyccel expressions into CUDA code. +""" + +from pyccel.codegen.printing.ccode import CCodePrinter + +from pyccel.ast.core import Import, Module +from pyccel.ast.literals import Nil + +from pyccel.errors.errors import Errors +from pyccel.ast.cudatypes import CudaArrayType +from pyccel.ast.cudaext import CudaFull + + +errors = Errors() + +__all__ = ["CudaCodePrinter"] + +cu_imports = {n : Import(n, Module(n, (), ())) for n in + ['cuda_ndarrays',] + } + +class CudaCodePrinter(CCodePrinter): + """ + Print code in CUDA format. + + This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code. + Navigation through this file utilizes _print_X functions, + as is common with all printers. + + Parameters + ---------- + filename : str + The name of the file being pyccelised. + prefix_module : str + A prefix to be added to the name of the module. + """ + language = "cuda" + + def __init__(self, filename, prefix_module = None): + + errors.set_target(filename) + + super().__init__(filename) + + def _print_Module(self, expr): + self.set_scope(expr.scope) + self._current_module = expr.name + body = ''.join(self._print(i) for i in expr.body) + + global_variables = ''.join(self._print(d) for d in expr.declarations) + + # Print imports last to be sure that all additional_imports have been collected + imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + code = f'{imports}\n\ + {global_variables}\n\ + {body}\n' + + self.exit_scope() + return code + + def function_signature(self, expr, print_arg_names = True): + """ + Get the Cuda representation of the function signature. + + Extract from the function definition `expr` all the + information (name, input, output) needed to create the + function signature and return a string describing the + function. + This is not a declaration as the signature does not end + with a semi-colon. + + Parameters + ---------- + expr : FunctionDef + The function definition for which a signature is needed. + + print_arg_names : bool, default : True + Indicates whether argument names should be printed. + + Returns + ------- + str + Signature of the function. + """ + cuda_decorator = '__global__' if 'kernel' in expr.decorators else \ + '__device__' if 'device' in expr.decorators else '' + c_function_signature = super().function_signature(expr, print_arg_names) + return f'{cuda_decorator} {c_function_signature}' + + def _print_KernelCall(self, expr): + func = expr.funcdef + args = [a.value or Nil() for a in expr.args] + + args = ', '.join(self._print(a) for a in args) + return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n" + + def _print_CudaSynchronize(self, expr): + return 'cudaDeviceSynchronize();\n' + + def _print_ModuleHeader(self, expr): + self.set_scope(expr.module.scope) + self._in_header = True + name = expr.module.name + + funcs = "" + cuda_headers = "" + for f in expr.module.funcs: + if not f.is_inline: + if 'kernel' in f.decorators or 'device' in f.decorators: + cuda_headers += self.function_signature(f) + ';\n' + else: + funcs += self.function_signature(f) + ';\n' + global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private) + # Print imports last to be sure that all additional_imports have been collected + imports = [*expr.module.imports, *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + self._in_header = False + self.exit_scope() + function_declaration = f'{cuda_headers}\n\ + extern "C"{{\n\ + {funcs}\ + }}\n' + return '\n'.join((f"#ifndef {name.upper()}_H", + f"#define {name.upper()}_H", + imports, + global_variables, + function_declaration, + "#endif // {name.upper()}_H\n")) + def _print_Allocate(self, expr): + variable = expr.variable + if not isinstance(variable.class_type, CudaArrayType): + return super()._print_Allocate(expr) + shape = ", ".join(self._print(i) for i in expr.shape) + if isinstance(variable.class_type, CudaArrayType): + dtype = self.find_in_ndarray_type_registry(variable.dtype) + else: + raise NotImplementedError(f"Don't know how to index {variable.class_type} type") + shape_Assign = f"int64_t shape_Assign_{expr.variable.name} [] = {{{shape}}};\n" + + is_view = 'false' if variable.on_heap else 'true' + memory_location = variable.class_type.memory_location + if memory_location in ('device', 'host'): + memory_location = str(memory_location).capitalize() + 'Memory' + else: + memory_location = 'managedMemory' + self.add_import(cu_imports['cuda_ndarrays']) + alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, shape_Assign_{expr.variable.name}, {dtype}, {is_view},{memory_location});\n" + return f'{shape_Assign} {alloc_code}' + + def _print_Deallocate(self, expr): + var_code = self._print(expr.variable) + + if not isinstance(expr.variable.class_type, CudaArrayType): + return super()._print_Deallocate(expr) + + if expr.variable.class_type.memory_location == 'host': + return f"cuda_free_host({var_code});\n" + else: + return f"cuda_free({var_code});\n" + + def _print_Assign(self, expr): + rhs = expr.rhs + if isinstance(rhs.class_type, CudaArrayType): + if(isinstance(rhs, (CudaFull))): + # TODO add support for CudaFull + return " \n" + + return super()._print_Assign(expr) + diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py index 9437727042..62c303fa64 100644 --- a/pyccel/codegen/python_wrapper.py +++ b/pyccel/codegen/python_wrapper.py @@ -13,6 +13,7 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper from pyccel.codegen.wrapper.c_to_python_wrapper import CToPythonWrapper +from pyccel.codegen.wrapper.cuda_to_c_wrapper import CudaToCWrapper from pyccel.codegen.utilities import recompile_object from pyccel.codegen.utilities import copy_internal_library from pyccel.codegen.utilities import internal_libs @@ -144,6 +145,9 @@ def create_shared_library(codegen, verbose=verbose) timings['Bind C wrapping'] = time.time() - start_bind_c_compiling c_ast = bind_c_mod + elif language == 'cuda': + wrapper = CudaToCWrapper() + c_ast = wrapper.wrap(codegen.ast) else: c_ast = codegen.ast diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index c6140a1a7d..e535e284d4 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -36,13 +36,16 @@ # map internal libraries to their folders inside pyccel/stdlib and their compile objects # The compile object folder will be in the pyccel dirpath internal_libs = { - "ndarrays" : ("ndarrays", CompileObj("ndarrays.c",folder="ndarrays")), - "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")), - "pyc_math_c" : ("math", CompileObj("pyc_math_c.c",folder="math")), - "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))), - "numpy_f90" : ("numpy", CompileObj("numpy_f90.f90",folder="numpy")), - "numpy_c" : ("numpy", CompileObj("numpy_c.c",folder="numpy")), - "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", folder="STC_Extensions", has_target_file = False)), + "ndarrays" : ("ndarrays", CompileObj("ndarrays.c",folder="ndarrays")), + "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="cuda_ndarrays")), + "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")), + "pyc_math_c" : ("math", CompileObj("pyc_math_c.c",folder="math")), + "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", + accelerators=('python',))), + "numpy_f90" : ("numpy", CompileObj("numpy_f90.f90",folder="numpy")), + "numpy_c" : ("numpy", CompileObj("numpy_c.c",folder="numpy")), + "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", + folder="STC_Extensions", has_target_file = False)), } internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays", accelerators = ('python',), @@ -143,6 +146,9 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None): if to_create: # Copy all files from the source to the destination shutil.copytree(lib_path, lib_dest_path) + dst_files = [os.path.relpath(os.path.join(root, f), lib_dest_path) \ + for root, dirs, files in os.walk(lib_dest_path) \ + for f in files if not f.endswith('.lock')] # Create any requested extra files if extra_files: for filename, contents in extra_files.items(): diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py new file mode 100644 index 0000000000..c0e24c7c09 --- /dev/null +++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py @@ -0,0 +1,78 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Module describing the code-wrapping class : CudaToPythonWrapper +which creates an interface exposing Cuda code to C. +""" + +from pyccel.ast.bind_c import BindCModule +from pyccel.errors.errors import Errors +from pyccel.ast.bind_c import BindCVariable +from .wrapper import Wrapper + +errors = Errors() + +class CudaToCWrapper(Wrapper): + """ + Class for creating a wrapper exposing Cuda code to C. + + While CUDA is typically compatible with C by default. + this wrapper becomes necessary in scenarios where specific adaptations + or modifications are required to ensure seamless integration with C. + """ + + def _wrap_Module(self, expr): + """ + Create a Module which is compatible with C. + + Create a Module which provides an interface between C and the + Module described by expr. + + Parameters + ---------- + expr : pyccel.ast.core.Module + The module to be wrapped. + + Returns + ------- + pyccel.ast.core.BindCModule + The C-compatible module. + """ + init_func = expr.init_func + if expr.interfaces: + errors.report("Interface wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + if expr.classes: + errors.report("Class wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + + variables = [self._wrap(v) for v in expr.variables] + + return BindCModule(expr.name, variables, expr.funcs, + init_func=init_func, + scope = expr.scope, + original_module=expr) + + def _wrap_Variable(self, expr): + """ + Create all objects necessary to expose a module variable to C. + + Create and return the objects which must be printed in the wrapping + module in order to expose the variable to C + + Parameters + ---------- + expr : pyccel.ast.variables.Variable + The module variable. + + Returns + ------- + pyccel.ast.core.BindCVariable + The C-compatible variable. which must be printed in + the wrapping module to expose the variable. + """ + return expr.clone(expr.name, new_class = BindCVariable) + diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py index 596c440ec0..fcbec009de 100644 --- a/pyccel/commands/console.py +++ b/pyccel/commands/console.py @@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com # ... backend compiler options group = parser.add_argument_group('Backend compiler options') - group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language') + group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language') group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}') diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py index 166085d22e..d47856773c 100644 --- a/pyccel/compilers/default_compilers.py +++ b/pyccel/compilers/default_compilers.py @@ -185,6 +185,15 @@ }, 'family': 'nvidia', } +#------------------------------------------------------------ +nvcc_info = {'exec' : 'nvcc', + 'language' : 'cuda', + 'debug_flags' : ("-g",), + 'release_flags': ("-O3",), + 'general_flags': ('--compiler-options', '-fPIC',), + 'family' : 'nvidia' + } + #------------------------------------------------------------ def change_to_lib_flag(lib): @@ -288,6 +297,7 @@ def change_to_lib_flag(lib): pgfortran_info.update(python_info) nvc_info.update(python_info) nvfort_info.update(python_info) +nvcc_info.update(python_info) available_compilers = {('GNU', 'c') : gcc_info, ('GNU', 'fortran') : gfort_info, @@ -296,6 +306,7 @@ def change_to_lib_flag(lib): ('PGI', 'c') : pgcc_info, ('PGI', 'fortran') : pgfortran_info, ('nvidia', 'c') : nvc_info, - ('nvidia', 'fortran') : nvfort_info} + ('nvidia', 'fortran') : nvfort_info, + ('nvidia', 'cuda'): nvcc_info} vendors = ('GNU','intel','PGI','nvidia') diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py new file mode 100644 index 0000000000..ae4be32387 --- /dev/null +++ b/pyccel/cuda/__init__.py @@ -0,0 +1,11 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" + This module is for exposing the CudaSubmodule functions. +""" +from .cuda_sync_primitives import synchronize +from .cuda_arrays import host_empty + +__all__ = ['synchronize', 'host_empty'] diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py new file mode 100644 index 0000000000..5aa34bbf38 --- /dev/null +++ b/pyccel/cuda/cuda_arrays.py @@ -0,0 +1,35 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains cuda_arrays methods for Pyccel. +""" + +def host_empty(shape, dtype = 'float', order = 'C'): + """ + Create an empty array on the host. + + Create an empty array on the host. + + Parameters + ---------- + shape : tuple of int or int + The shape of the array. + + dtype : str, optional + The data type of the array. The default is 'float'. + + order : str, optional + The order of the array. The default is 'C'. + + Returns + ------- + array + The empty array on the host. + """ + import numpy as np + a = np.empty(shape, dtype = dtype, order = order) + return a + + diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py new file mode 100644 index 0000000000..f3442fe9e2 --- /dev/null +++ b/pyccel/cuda/cuda_sync_primitives.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains CUDA methods for Pyccel. +""" + + +def synchronize(): + """ + Synchronize CUDA device execution. + + Synchronize CUDA device execution. + """ + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 1f640043db..ff413fe443 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -11,6 +11,7 @@ __all__ = ( 'allow_negative_index', 'bypass', + 'device', 'elemental', 'inline', 'private', @@ -19,6 +20,7 @@ 'sympy', 'template', 'types', + 'kernel' ) @@ -109,3 +111,52 @@ def allow_negative_index(f,*args): def identity(f): return f return identity + +def kernel(f): + """ + Decorator for marking a Python function as a kernel. + + This class serves as a decorator to mark a Python function + as a kernel function, typically used for GPU computations. + This allows the function to be indexed with the number of blocks and threads. + + Parameters + ---------- + f : function + The function to which the decorator is applied. + + Returns + ------- + KernelAccessor + A class representing the kernel function. + """ + class KernelAccessor: + """ + Class representing the kernel function. + + Class representing the kernel function. + """ + def __init__(self, f): + self._f = f + def __getitem__(self, args): + return self._f + + return KernelAccessor(f) + +def device(f): + """ + Decorator for marking a function as a GPU device function. + + This decorator is used to mark a Python function as a GPU device function. + + Parameters + ---------- + f : Function + The function to be marked as a device. + + Returns + ------- + f + The function marked as a device. + """ + return f diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 79eccc1df2..5fe622c29b 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -162,3 +162,11 @@ WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean' NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown' NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on' +MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified' +INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' +INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' +INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' +INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.' + + + diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py index 72c318d3ad..b3e4bbbe0e 100644 --- a/pyccel/naming/__init__.py +++ b/pyccel/naming/__init__.py @@ -10,7 +10,9 @@ from .fortrannameclashchecker import FortranNameClashChecker from .cnameclashchecker import CNameClashChecker from .pythonnameclashchecker import PythonNameClashChecker +from .cudanameclashchecker import CudaNameClashChecker name_clash_checkers = {'fortran':FortranNameClashChecker(), 'c':CNameClashChecker(), - 'python':PythonNameClashChecker()} + 'python':PythonNameClashChecker(), + 'cuda':CudaNameClashChecker()} diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py new file mode 100644 index 0000000000..c7aaa4952f --- /dev/null +++ b/pyccel/naming/cudanameclashchecker.py @@ -0,0 +1,126 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Handles name clash problems in Cuda +""" +from .languagenameclashchecker import LanguageNameClashChecker + +class CudaNameClashChecker(LanguageNameClashChecker): + """ + Class containing functions to help avoid problematic names in Cuda. + + A class which provides functionalities to check or propose variable names and + verify that they do not cause name clashes. Name clashes may be due to + new variables, or due to the use of reserved keywords. + """ + + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword + keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', + 'extern', 'float', 'for', 'goto', 'if', 'inline', 'int', + 'long', 'register', 'restrict', 'return', 'short', 'signed', + 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', + 'unsigned', 'void', 'volatile', 'whie', '_Alignas', + '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128', + '_Decimal32', '_Decimal64', '_Generic', '_Imaginary', + '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray', + 'array_create', 'new_slice', 'array_slicing', 'alias_assign', + 'transpose_alias_assign', 'array_fill', 't_slice', + 'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2', + 'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5', + 'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8', + 'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11', + 'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14', + 'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS', + 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', + 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', + 'get_index', 'numpy_to_ndarray_strides', + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data' + '__global__', '__device__', '__host__','__constant__', '__shared__', + '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim', + 'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset', + 'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch', + 'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc', + 'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer', + 'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset', + 'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice', + 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize', + 'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord', + 'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet', + 'cuDeviceGetCount', 'cuDeviceGetName', + 'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy', + 'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload', + 'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef', + 'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH', + 'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync', + 'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32', + 'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize', + 'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid', + 'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery', + 'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime', + 'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize', + 'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize', + 'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy', + 'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D', + 'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode', + 'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray', + 'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat', + 'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor', + 'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags' + ]) + + def has_clash(self, name, symbols): + """ + Indicate whether the proposed name causes any clashes. + + Checks if a suggested name conflicts with predefined + keywords or specified symbols,returning true for a clash. + This method is crucial for maintaining namespace integrity and + preventing naming conflicts in code generation processes. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + bool + True if the name is a collision. + False if the name is collision free. + """ + return any(name == k for k in self.keywords) or \ + any(name == s for s in symbols) + + def get_collisionless_name(self, name, symbols): + """ + Get a valid name which doesn't collision with symbols or Cuda keywords. + + Find a new name based on the suggested name which will not cause + conflicts with Cuda keywords, does not appear in the provided symbols, + and is a valid name in Cuda code. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + str + A new name which is collision free. + """ + if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)): + # Ignore magic methods + return name + if name[0] == '_': + name = 'private'+name + return self._get_collisionless_name(name, symbols) diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py index fa672a905b..d6415e6449 100644 --- a/pyccel/naming/languagenameclashchecker.py +++ b/pyccel/naming/languagenameclashchecker.py @@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton): """ keywords = None + def __init__(self): #pylint: disable=useless-parent-delegation + # This __init__ function is required so the ArgumentSingleton can + # always detect a signature + super().__init__() + def _get_collisionless_name(self, name, symbols): """ Get a name which doesn't collision with keywords or symbols. diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index f6e9f34f39..6b4143b442 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -116,6 +116,8 @@ from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol from pyccel.ast.variable import DottedName, DottedVariable +from pyccel.ast.cuda import KernelCall + from pyccel.errors.errors import Errors from pyccel.errors.errors import PyccelSemanticError @@ -133,7 +135,10 @@ PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE, UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, - FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC) + FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, + MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL, + INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) + from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1057,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun FunctionCall/PyccelFunction The semantic representation of the call. """ + + if isinstance(func, FunctionDef) and 'device' in func.decorators: + if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators: + errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal') if isinstance(func, PyccelFunctionDef): if use_build_functions: annotation_method = '_build_' + func.cls_name.__name__ @@ -1139,6 +1148,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun return new_expr + def _handle_kernel(self, expr, func, args): + """ + Create the node representing the kernel function call. + + Create a FunctionCall or an instance of a PyccelInternalFunction + from the function information and arguments. + + Parameters + ---------- + expr : IndexedFunctionCall + Node has all the information about the function call. + + func : FunctionDef | Interface | PyccelInternalFunction type + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + Returns + ------- + Pyccel.ast.cuda.KernelCall + The semantic representation of the kernel call. + """ + if len(expr.indexes) != 2: + errors.report(INVALID_KERNEL_LAUNCH_CONFIG, + symbol=expr, + severity='fatal') + if len(func.results): + errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification", + symbol=expr, + severity='fatal') + if isinstance(func, FunctionDef) and len(args) != len(func.arguments): + errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments", + symbol=expr, + severity='fatal') + if not isinstance(expr.indexes[0], (LiteralInteger)): + if isinstance(expr.indexes[0], PyccelSymbol): + num_blocks = self.get_variable(expr.indexes[0]) + + if not isinstance(num_blocks.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + if not isinstance(expr.indexes[1], (LiteralInteger)): + if isinstance(expr.indexes[1], PyccelSymbol): + tp_block = self.get_variable(expr.indexes[1]) + if not isinstance(tp_block.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1]) + return new_expr + def _sort_function_call_args(self, func_args, args): """ Sort and add the missing call arguments to match the arguments in the function definition. @@ -2852,6 +2922,23 @@ def _visit_Lambda(self, expr): expr = Lambda(tuple(expr.variables), expr_new) return expr + def _visit_IndexedFunctionCall(self, expr): + name = expr.funcdef + name = self.scope.get_expected_name(name) + func = self.scope.find(name, 'functions') + args = self._handle_function_args(expr.args) + + if func is None: + return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef, + bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset), + severity='fatal') + + func = self._annotate_the_called_function_def(func) + if 'kernel' in func.decorators : + return self._handle_kernel(expr, func, args) + else: + return errors.report("Unknown function type", + symbol=expr, severity='fatal') def _visit_FunctionCall(self, expr): name = expr.funcdef try: diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py index 318b765703..0cfe895605 100644 --- a/pyccel/parser/syntactic.py +++ b/pyccel/parser/syntactic.py @@ -64,6 +64,8 @@ from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation +from pyccel.ast.core import IndexedFunctionCall + from pyccel.parser.base import BasicParser from pyccel.parser.extend_tree import extend_tree from pyccel.parser.utilities import get_default_path @@ -1101,6 +1103,8 @@ def _visit_Call(self, stmt): elif isinstance(func, DottedName): func_attr = FunctionCall(func.name[-1], args) func = DottedName(*func.name[:-1], func_attr) + elif isinstance(func,IndexedElement): + func = IndexedFunctionCall(func.base, args, func.indices) else: raise NotImplementedError(f' Unknown function type {type(func)}') diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu new file mode 100644 index 0000000000..348cb146b3 --- /dev/null +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -0,0 +1,90 @@ +#include "cuda_ndarrays.h" + +void allocateMemoryOnDevice(void** devPtr, size_t size) +{ + cudaMalloc(devPtr, size); +} + +void allocateMemoryOnHost(void** devPtr, size_t size) +{ + *devPtr = malloc(size); +} +t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , +enum e_memory_locations location) +{ + t_ndarray arr; + void (*fun_ptr_arr[])(void**, size_t) = {allocateMemoryOnHost, allocateMemoryOnDevice}; + + arr.nd = nd; + arr.type = type; + switch (type) + { + case nd_int8: + arr.type_size = sizeof(int8_t); + break; + case nd_int16: + arr.type_size = sizeof(int16_t); + break; + case nd_int32: + arr.type_size = sizeof(int32_t); + break; + case nd_int64: + arr.type_size = sizeof(int64_t); + break; + case nd_float: + arr.type_size = sizeof(float); + break; + case nd_double: + arr.type_size = sizeof(double); + break; + case nd_bool: + arr.type_size = sizeof(bool); + break; + } + arr.is_view = is_view; + arr.length = 1; + cudaMallocManaged(&(arr.shape), arr.nd * sizeof(int64_t)); + for (int32_t i = 0; i < arr.nd; i++) + { + arr.length *= shape[i]; + arr.shape[i] = shape[i]; + } + arr.buffer_size = arr.length * arr.type_size; + cudaMallocManaged(&(arr.strides), nd * sizeof(int64_t)); + for (int32_t i = 0; i < arr.nd; i++) + { + arr.strides[i] = 1; + for (int32_t j = i + 1; j < arr.nd; j++) + arr.strides[i] *= arr.shape[j]; + } + if (!is_view) + (*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size); + return (arr); +} + +int32_t cuda_free_host(t_ndarray arr) +{ + if (arr.shape == NULL) + return (0); + free(arr.raw_data); + arr.raw_data = NULL; + cudaFree(arr.shape); + arr.shape = NULL; + cudaFree(arr.strides); + arr.strides = NULL; + return (1); +} + +__host__ __device__ +int32_t cuda_free(t_ndarray arr) +{ + if (arr.shape == NULL) + return (0); + cudaFree(arr.raw_data); + arr.raw_data = NULL; + cudaFree(arr.shape); + arr.shape = NULL; + cudaFree(arr.strides); + arr.strides = NULL; + return (0); +} \ No newline at end of file diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h new file mode 100644 index 0000000000..9a29be594d --- /dev/null +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -0,0 +1,23 @@ +#ifndef CUDA_NDARRAYS_H +# define CUDA_NDARRAYS_H + +# include +#include "../ndarrays/ndarrays.h" + + + +enum e_memory_locations +{ + HostMemory, + DeviceMemory +}; + + + +t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , +enum e_memory_locations location); +int32_t cuda_free_host(t_ndarray arr); +__host__ __device__ +int32_t cuda_free(t_ndarray arr); + +#endif \ No newline at end of file diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c index bceaeea429..c6502b93bb 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.c +++ b/pyccel/stdlib/ndarrays/ndarrays.c @@ -46,6 +46,7 @@ void print_ndarray_memory(t_ndarray nd) case nd_bool: printf("[%d]", nd.nd_bool[i]); break; + #ifndef __NVCC__ case nd_cfloat: { double real = creal(nd.nd_cfloat[i]); @@ -60,6 +61,8 @@ void print_ndarray_memory(t_ndarray nd) printf("[%lf%+lfj]", real, imag); break; } + #endif + } ++i; } @@ -248,7 +251,7 @@ void _array_fill_double(double c, t_ndarray arr) for (int32_t i = 0; i < arr.length; i++) arr.nd_double[i] = c; } - +#ifndef __NVCC__ void _array_fill_cfloat(float complex c, t_ndarray arr) { if (c == 0) @@ -267,6 +270,7 @@ void _array_fill_cdouble(double complex c, t_ndarray arr) for (int32_t i = 0; i < arr.length; i++) arr.nd_cdouble[i] = c; } +#endif /* ** deallocation @@ -489,7 +493,7 @@ bool is_same_shape(t_ndarray a, t_ndarray b) } return (true); } - +#ifndef __NVCC__ #define COPY_DATA_FROM_(SRC_TYPE) \ void copy_data_from_##SRC_TYPE(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp) \ { \ @@ -648,7 +652,7 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp case nd_double: copy_data_from_double(ds, src, offset, elem_wise_cp); break; - + #ifndef __NVCC__ case nd_cfloat: copy_data_from_cfloat(ds, src, offset, elem_wise_cp); break; @@ -656,9 +660,11 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp case nd_cdouble: copy_data_from_cdouble(ds, src, offset, elem_wise_cp); break; + #endif } } + void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset) { unsigned char *d = (unsigned char*)dest->raw_data; @@ -675,6 +681,7 @@ void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset) copy_data(&dest, src, offset, true); } } +#endif /* ** sum of ndarray @@ -747,8 +754,10 @@ NUMPY_SUM_(int32, int64_t, int32) NUMPY_SUM_(int64, int64_t, int64) NUMPY_SUM_(float32, float, float) NUMPY_SUM_(float64, double, double) +#ifndef __NVCC__ NUMPY_SUM_(complex64, float complex, cfloat) NUMPY_SUM_(complex128, double complex, cdouble) +#endif #define NUMPY_AMAX_(NAME, TYPE, CTYPE) \ TYPE numpy_amax_##NAME(t_ndarray arr) \ @@ -782,8 +791,10 @@ NUMPY_AMAX_(int32, int64_t, int32) NUMPY_AMAX_(int64, int64_t, int64) NUMPY_AMAX_(float32, float, float) NUMPY_AMAX_(float64, double, double) +#ifndef __NVCC__ NUMPY_AMAX_(complex64, float complex, cfloat) NUMPY_AMAX_(complex128, double complex, cdouble) +#endif #define NUMPY_AMIN_(NAME, TYPE, CTYPE) \ TYPE numpy_amin_##NAME(t_ndarray arr) \ @@ -817,6 +828,8 @@ NUMPY_AMIN_(int32, int64_t, int32) NUMPY_AMIN_(int64, int64_t, int64) NUMPY_AMIN_(float32, float, float) NUMPY_AMIN_(float64, double, double) +#ifndef __NVCC__ NUMPY_AMIN_(complex64, float complex, cfloat) NUMPY_AMIN_(complex128, double complex, cdouble) +#endif diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h index 11bbfbf455..c2f1293bef 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.h +++ b/pyccel/stdlib/ndarrays/ndarrays.h @@ -11,6 +11,10 @@ # include # include +# ifdef __NVCC__ + #include +# endif + /* mapping the function array_fill to the correct type */ # define array_fill(c, arr) _Generic((c), int64_t : _array_fill_int64,\ int32_t : _array_fill_int32,\ @@ -80,6 +84,7 @@ typedef enum e_order order_c, } t_order; + typedef struct s_ndarray { /* raw data buffer*/ @@ -92,8 +97,14 @@ typedef struct s_ndarray float *nd_float; double *nd_double; bool *nd_bool; + #ifndef __NVCC__ double complex *nd_cdouble; float complex *nd_cfloat; + #endif + #ifdef __NVCC__ + cuDoubleComplex *nd_cdouble; + cuFloatComplex *nd_cfloat; + #endif }; /* number of dimensions */ int32_t nd; @@ -128,8 +139,10 @@ void _array_fill_int64(int64_t c, t_ndarray arr); void _array_fill_float(float c, t_ndarray arr); void _array_fill_double(double c, t_ndarray arr); void _array_fill_bool(bool c, t_ndarray arr); +#ifndef __NVCC__ void _array_fill_cfloat(float complex c, t_ndarray arr); void _array_fill_cdouble(double complex c, t_ndarray arr); +#endif /* slicing */ /* creating a Slice object */ @@ -149,6 +162,7 @@ int32_t free_pointer(t_ndarray* dump); int64_t get_index(t_ndarray arr, ...); /* data converting between numpy and ndarray */ + int64_t *numpy_to_ndarray_strides(int64_t *np_strides, int type_size, int nd); int64_t *numpy_to_ndarray_shape(int64_t *np_shape, int nd); void print_ndarray_memory(t_ndarray nd); @@ -164,8 +178,10 @@ int64_t numpy_sum_int32(t_ndarray arr); int64_t numpy_sum_int64(t_ndarray arr); float numpy_sum_float32(t_ndarray arr); double numpy_sum_float64(t_ndarray arr); +#ifndef __NVCC__ float complex numpy_sum_complex64(t_ndarray arr); double complex numpy_sum_complex128(t_ndarray arr); +#endif /*numpy max/amax */ @@ -176,8 +192,10 @@ int64_t numpy_amax_int32(t_ndarray arr); int64_t numpy_amax_int64(t_ndarray arr); float numpy_amax_float32(t_ndarray arr); double numpy_amax_float64(t_ndarray arr); +#ifndef __NVCC__ float complex numpy_amax_complex64(t_ndarray arr); double complex numpy_amax_complex128(t_ndarray arr); +#endif /* numpy min/amin */ @@ -188,7 +206,9 @@ int64_t numpy_amin_int32(t_ndarray arr); int64_t numpy_amin_int64(t_ndarray arr); float numpy_amin_float32(t_ndarray arr); double numpy_amin_float64(t_ndarray arr); +#ifndef __NVCC__ float complex numpy_amin_complex64(t_ndarray arr); double complex numpy_amin_complex128(t_ndarray arr); +#endif #endif diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c index 7c9ecbbf6b..bc56214772 100644 --- a/pyccel/stdlib/numpy/numpy_c.c +++ b/pyccel/stdlib/numpy/numpy_c.c @@ -17,8 +17,10 @@ double fsign(double x) return SIGN(x); } +#ifndef __NVCC__ /* numpy.sign for complex */ double complex csign(double complex x) { return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0; } +#endif diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h index e72cf3ad57..c2a16a5516 100644 --- a/pyccel/stdlib/numpy/numpy_c.h +++ b/pyccel/stdlib/numpy/numpy_c.h @@ -15,6 +15,8 @@ long long int isign(long long int x); double fsign(double x); +#ifndef __NVCC__ double complex csign(double complex x); +#endif #endif diff --git a/pyproject.toml b/pyproject.toml index adffdd13dd..29fbba3da1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,8 @@ include = [ "pyccel/stdlib/**/*.c", "pyccel/stdlib/**/*.f90", "pyccel/extensions/STC/include", - "pyccel/extensions/gFTL/include/v2" + "pyccel/extensions/gFTL/include/v2", + "pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu" ] exclude = [ "pyccel/extensions/STC/src", diff --git a/pytest.ini b/pytest.ini index 42eb0d72ba..3792ab65f9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,3 +9,4 @@ markers = python: test to generate python code xdist_incompatible: test which compiles a file also compiled by another test external: test using an external dll (problematic with conda on Windows) + cuda: test to generate cuda code diff --git a/tests/conftest.py b/tests/conftest.py index 79144b6978..4e74d1ec7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,17 @@ def language(request): return request.param +@pytest.fixture( params=[ + pytest.param("fortran", marks = pytest.mark.fortran), + pytest.param("c", marks = pytest.mark.c), + pytest.param("python", marks = pytest.mark.python), + pytest.param("cuda", marks = pytest.mark.cuda) + ], + scope = "session" +) +def language_with_cuda(request): + return request.param + def move_coverage(path_dir): for root, _, files in os.walk(path_dir): for name in files: @@ -48,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem): def pytest_addoption(parser): parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised") + parser.addoption("--gpu_available", action="store_true", + default=False, help="enable GPU tests") + +def pytest_generate_tests(metafunc): + if "gpu_available" in metafunc.fixturenames: + if metafunc.config.getoption("gpu_available"): + metafunc.parametrize("gpu_available", [True]) + else: + metafunc.parametrize("gpu_available", [False]) def pytest_sessionstart(session): # setup_stuff diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py new file mode 100644 index 0000000000..5723991961 --- /dev/null +++ b/tests/cuda/test_device_semantic.py @@ -0,0 +1,31 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import device +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVAlID_DEVICE_CALL,) + + +@pytest.mark.cuda +def test_invalid_device_call(): + def invalid_device_call(): + @device + def device_call(): + pass + def fake_kernel_call(): + device_call() + + fake_kernel_call() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_device_call, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert INVAlID_DEVICE_CALL == error_info.message diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py new file mode 100644 index 0000000000..00b74c3bea --- /dev/null +++ b/tests/cuda/test_kernel_semantic.py @@ -0,0 +1,176 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import kernel +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK, + INVALID_KERNEL_CALL_BP_GRID, + INVALID_KERNEL_LAUNCH_CONFIG) + + +@pytest.mark.cuda +def test_invalid_block_number(): + def invalid_block_number(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1.0 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_block_number, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_BP_GRID == error_info.message + + +@pytest.mark.cuda +def test_invalid_thread_per_block(): + def invalid_thread_per_block(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1.0 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_thread_per_block, language="cuda") + assert errors.has_errors() + assert errors.num_messages() == 1 + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_high(): + def invalid_launch_config_high(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + third_param = 1 + kernel_call[blocks_per_grid, threads_per_block, third_param]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_high, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_low(): + def invalid_launch_config_low(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + kernel_call[blocks_per_grid]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_low, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call(): + def invalid_arguments(): + @kernel + def kernel_call(arg : int): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "0 argument types given, but function takes 1 arguments" == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call_2(): + def invalid_arguments_(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments_, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "1 argument types given, but function takes 0 arguments" == error_info.message + + +@pytest.mark.cuda +def test_kernel_return(): + def kernel_return(): + @kernel + def kernel_call(): + return 7 + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(kernel_return, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py new file mode 100644 index 0000000000..bb7ae6b98a --- /dev/null +++ b/tests/epyccel/modules/cuda_module.py @@ -0,0 +1,13 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import numpy as np + +g = np.float64(9.81) +r0 = np.float32(1.0) +rmin = 0.01 +rmax = 1.0 + +skip_centre = True + +method = 3 + +tiny = np.int32(4) diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py index c22064d321..413f79eef1 100644 --- a/tests/epyccel/test_base.py +++ b/tests/epyccel/test_base.py @@ -7,128 +7,128 @@ from utilities import epyccel_test -def test_is_false(language): - test = epyccel_test(base.is_false, lang=language) +def test_is_false(language_with_cuda): + test = epyccel_test(base.is_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_is_true(language): - test = epyccel_test(base.is_true, lang=language) +def test_is_true(language_with_cuda): + test = epyccel_test(base.is_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_compare_is(language): - test = epyccel_test(base.compare_is, lang=language) +def test_compare_is(language_with_cuda): + test = epyccel_test(base.compare_is, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_not(language): - test = epyccel_test(base.compare_is_not, lang=language) +def test_compare_is_not(language_with_cuda): + test = epyccel_test(base.compare_is_not, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_int(language): - test = epyccel_test(base.compare_is_int, lang=language) +def test_compare_is_int(language_with_cuda): + test = epyccel_test(base.compare_is_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_compare_is_not_int(language): - test = epyccel_test(base.compare_is_not_int, lang=language) +def test_compare_is_not_int(language_with_cuda): + test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_not_false(language): - test = epyccel_test(base.not_false, lang=language) +def test_not_false(language_with_cuda): + test = epyccel_test(base.not_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_true(language): - test = epyccel_test(base.not_true, lang=language) +def test_not_true(language_with_cuda): + test = epyccel_test(base.not_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_eq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_eq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_neq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_neq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not(language): - test = epyccel_test(base.not_val, lang=language) +def test_not(language_with_cuda): + test = epyccel_test(base.not_val, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_int(language): - test = epyccel_test(base.not_int, lang=language) +def test_not_int(language_with_cuda): + test = epyccel_test(base.not_int, lang=language_with_cuda) test.compare_epyccel( 0 ) test.compare_epyccel( 4 ) -def test_compare_is_nil(language): - test = epyccel_test(base.is_nil, lang=language) +def test_compare_is_nil(language_with_cuda): + test = epyccel_test(base.is_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_compare_is_not_nil(language): - test = epyccel_test(base.is_not_nil, lang=language) +def test_compare_is_not_nil(language_with_cuda): + test = epyccel_test(base.is_not_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_cast_int(language): - test = epyccel_test(base.cast_int, lang=language) +def test_cast_int(language_with_cuda): + test = epyccel_test(base.cast_int, lang=language_with_cuda) test.compare_epyccel( 4 ) - test = epyccel_test(base.cast_float_to_int, lang=language) + test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda) test.compare_epyccel( 4.5 ) -def test_cast_bool(language): - test = epyccel_test(base.cast_bool, lang=language) +def test_cast_bool(language_with_cuda): + test = epyccel_test(base.cast_bool, lang=language_with_cuda) test.compare_epyccel( True ) -def test_cast_float(language): - test = epyccel_test(base.cast_float, lang=language) +def test_cast_float(language_with_cuda): + test = epyccel_test(base.cast_float, lang=language_with_cuda) test.compare_epyccel( 4.5 ) - test = epyccel_test(base.cast_int_to_float, lang=language) + test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda) test.compare_epyccel( 4 ) -def test_if_0_int(language): - test = epyccel_test(base.if_0_int, lang=language) +def test_if_0_int(language_with_cuda): + test = epyccel_test(base.if_0_int, lang=language_with_cuda) test.compare_epyccel( 22 ) test.compare_epyccel( 0 ) -def test_if_0_real(language): - test = epyccel_test(base.if_0_real, lang=language) +def test_if_0_real(language_with_cuda): + test = epyccel_test(base.if_0_real, lang=language_with_cuda) test.compare_epyccel( 22.3 ) test.compare_epyccel( 0.0 ) -def test_same_int(language): - test = epyccel_test(base.is_same_int, lang=language) +def test_same_int(language_with_cuda): + test = epyccel_test(base.is_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) - test = epyccel_test(base.isnot_same_int, lang=language) + test = epyccel_test(base.isnot_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) -def test_same_float(language): - test = epyccel_test(base.is_same_float, lang=language) +def test_same_float(language_with_cuda): + test = epyccel_test(base.is_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) - test = epyccel_test(base.isnot_same_float, lang=language) + test = epyccel_test(base.isnot_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) @pytest.mark.parametrize( 'language', [ @@ -150,28 +150,28 @@ def test_same_complex(language): test = epyccel_test(base.isnot_same_complex, lang=language) test.compare_epyccel( complex(2,3) ) -def test_is_types(language): - test = epyccel_test(base.is_types, lang=language) +def test_is_types(language_with_cuda): + test = epyccel_test(base.is_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_isnot_types(language): - test = epyccel_test(base.isnot_types, lang=language) +def test_isnot_types(language_with_cuda): + test = epyccel_test(base.isnot_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_none_is_none(language): - test = epyccel_test(base.none_is_none, lang=language) +def test_none_is_none(language_with_cuda): + test = epyccel_test(base.none_is_none, lang=language_with_cuda) test.compare_epyccel() -def test_none_isnot_none(language): - test = epyccel_test(base.none_isnot_none, lang=language) +def test_none_isnot_none(language_with_cuda): + test = epyccel_test(base.none_isnot_none, lang=language_with_cuda) test.compare_epyccel() -def test_pass_if(language): - test = epyccel_test(base.pass_if, lang=language) +def test_pass_if(language_with_cuda): + test = epyccel_test(base.pass_if, lang=language_with_cuda) test.compare_epyccel(2) -def test_pass2_if(language): - test = epyccel_test(base.pass2_if, lang=language) +def test_pass2_if(language_with_cuda): + test = epyccel_test(base.pass2_if, lang=language_with_cuda) test.compare_epyccel(0.2) test.compare_epyccel(0.0) @@ -192,15 +192,15 @@ def test_use_optional(language): test.compare_epyccel() test.compare_epyccel(6) -def test_none_equality(language): - test = epyccel_test(base.none_equality, lang=language) +def test_none_equality(language_with_cuda): + test = epyccel_test(base.none_equality, lang=language_with_cuda) test.compare_epyccel() test.compare_epyccel(6) -def test_none_none_equality(language): - test = epyccel_test(base.none_none_equality, lang=language) +def test_none_none_equality(language_with_cuda): + test = epyccel_test(base.none_none_equality, lang=language_with_cuda) test.compare_epyccel() -def test_none_literal_equality(language): - test = epyccel_test(base.none_literal_equality, lang=language) +def test_none_literal_equality(language_with_cuda): + test = epyccel_test(base.none_literal_equality, lang=language_with_cuda) test.compare_epyccel() diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py index ad8ae0bd75..223f741bf0 100644 --- a/tests/epyccel/test_epyccel_modules.py +++ b/tests/epyccel/test_epyccel_modules.py @@ -200,3 +200,16 @@ def test_awkward_names(language): assert mod.function() == modnew.function() assert mod.pure() == modnew.pure() assert mod.allocate(1) == modnew.allocate(1) + +def test_cuda_module(language_with_cuda): + import modules.cuda_module as mod + + modnew = epyccel(mod, language=language_with_cuda) + + atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre', + 'method', 'tiny') + for att in atts: + mod_att = getattr(mod, att) + modnew_att = getattr(modnew, att) + assert mod_att == modnew_att + assert type(mod_att) is type(modnew_att) diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py new file mode 100644 index 0000000000..308970e9d0 --- /dev/null +++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel import cuda + +def addition_cuda_host_2Darrays(): + a = cuda.host_empty((10,10)) + b = cuda.host_empty((10,10)) + + for i in range(10): + for j in range(10): + a[i][j] = 1 + b[i][j] = 1 + b = b + a + b = b + 1 + + print(b) + +if __name__ == '__main__': + addition_cuda_host_2Darrays() + diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py new file mode 100644 index 0000000000..d0f61881cf --- /dev/null +++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py @@ -0,0 +1,16 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel import cuda + +def addition_cuda_host_arrays(): + a = cuda.host_empty(3) + b = cuda.host_empty(3) + + for i in range(3): + b[i] = 1 + a[i] = 1 + + b = b + a + print(b) + +if __name__ == '__main__': + addition_cuda_host_arrays() diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py new file mode 100644 index 0000000000..a4762a6242 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import device, kernel +from pyccel import cuda + +@device +def device_call(): + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py new file mode 100644 index 0000000000..b6901b25a1 --- /dev/null +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def say_hello(its_morning : bool): + if(its_morning): + print("Hello and Good morning") + else: + print("Hello and Good afternoon") + +def f(): + its_morning = True + say_hello[1,1](its_morning) + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py new file mode 100644 index 0000000000..e686521e43 --- /dev/null +++ b/tests/pyccel/scripts/kernel/host_array.py @@ -0,0 +1,10 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel import cuda +def f(): + a = cuda.host_empty(10) + + for i in range(10): + a[i] = 1 + print(a) +if __name__ == '__main__': + f() diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py new file mode 100644 index 0000000000..ac7abe25ae --- /dev/null +++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py @@ -0,0 +1,8 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel + +@kernel +def do(): + pass + +do[1,1]() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index ec1e846549..f8638a3b3c 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None): #------------------------------------------------------------------------------ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, cwd = None, pyccel_commands = "", output_dtype = float, - language = None, output_dir = None): + language = None, output_dir = None, execute_code = True): """ Run pyccel and compare the output to ensure that the results are equivalent @@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, compile_fortran(cwd, output_test_file, dependencies) elif language == 'c': compile_c(cwd, output_test_file, dependencies) - - lang_out = get_lang_output(output_test_file, language) - compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) + if execute_code: + lang_out = get_lang_output(output_test_file, language) + compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) #============================================================================== # UNIT TESTS #============================================================================== + def test_relative_imports_in_project(language): base_dir = os.path.dirname(os.path.realpath(__file__)) @@ -728,6 +729,46 @@ def test_multiple_results(language): def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_hello_kernel(gpu_available): + types = str + pyccel_test("scripts/kernel/hello_kernel.py", + language="cuda", output_dtype=types , execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_kernel_collision(gpu_available): + pyccel_test("scripts/kernel/kernel_name_collision.py", + language="cuda", execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_host_array(gpu_available): + types = float + pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '--verbose', + language="cuda", output_dtype=types, execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_cuda_host_array_addition(gpu_available): + types = float + pyccel_test("scripts/kernel/cuda_host_array_addition.py", + language="cuda", output_dtype=types, execute_code=gpu_available) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_cuda_host_2d_array_addition(gpu_available): + types = float + pyccel_test("scripts/kernel/cuda_host_2d_array_addition.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_device_call(gpu_available): + types = str + pyccel_test("scripts/kernel/device_test.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str